tree-optimization/112281 - loop distribution and zero dependence distances
[official-gcc.git] / gcc / tree-vect-stmts.cc
blob96e4a6cffadebb43946c5cb7e9849c915da589bc
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
57 #include "regs.h"
58 #include "attribs.h"
59 #include "optabs-libfuncs.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* Return the vectorized type for the given statement. */
66 tree
67 stmt_vectype (class _stmt_vec_info *stmt_info)
69 return STMT_VINFO_VECTYPE (stmt_info);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
74 bool
75 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
77 gimple *stmt = STMT_VINFO_STMT (stmt_info);
78 basic_block bb = gimple_bb (stmt);
79 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
80 class loop* loop;
82 if (!loop_vinfo)
83 return false;
85 loop = LOOP_VINFO_LOOP (loop_vinfo);
87 return (bb->loop_father == loop->inner);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
94 static unsigned
95 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
96 enum vect_cost_for_stmt kind,
97 stmt_vec_info stmt_info, slp_tree node,
98 tree vectype, int misalign,
99 enum vect_cost_model_location where)
101 if ((kind == vector_load || kind == unaligned_load)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_gather_load;
104 if ((kind == vector_store || kind == unaligned_store)
105 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
106 kind = vector_scatter_store;
108 stmt_info_for_cost si
109 = { count, kind, where, stmt_info, node, vectype, misalign };
110 body_cost_vec->safe_push (si);
112 return (unsigned)
113 (builtin_vectorization_cost (kind, vectype, misalign) * count);
116 unsigned
117 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
118 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
119 tree vectype, int misalign,
120 enum vect_cost_model_location where)
122 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
123 vectype, misalign, where);
126 unsigned
127 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
128 enum vect_cost_for_stmt kind, slp_tree node,
129 tree vectype, int misalign,
130 enum vect_cost_model_location where)
132 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
133 vectype, misalign, where);
136 unsigned
137 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
138 enum vect_cost_for_stmt kind,
139 enum vect_cost_model_location where)
141 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
142 || kind == scalar_stmt);
143 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
144 NULL_TREE, 0, where);
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
149 static tree
150 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
152 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
153 "vect_array");
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
161 static tree
162 read_vector_array (vec_info *vinfo,
163 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
164 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
166 tree vect_type, vect, vect_name, array_ref;
167 gimple *new_stmt;
169 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170 vect_type = TREE_TYPE (TREE_TYPE (array));
171 vect = vect_create_destination_var (scalar_dest, vect_type);
172 array_ref = build4 (ARRAY_REF, vect_type, array,
173 build_int_cst (size_type_node, n),
174 NULL_TREE, NULL_TREE);
176 new_stmt = gimple_build_assign (vect, array_ref);
177 vect_name = make_ssa_name (vect, new_stmt);
178 gimple_assign_set_lhs (new_stmt, vect_name);
179 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
181 return vect_name;
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
188 static void
189 write_vector_array (vec_info *vinfo,
190 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
191 tree vect, tree array, unsigned HOST_WIDE_INT n)
193 tree array_ref;
194 gimple *new_stmt;
196 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
197 build_int_cst (size_type_node, n),
198 NULL_TREE, NULL_TREE);
200 new_stmt = gimple_build_assign (array_ref, vect);
201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
206 (and its group). */
208 static tree
209 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
211 tree mem_ref;
213 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
216 return mem_ref;
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
222 static void
223 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
224 gimple_stmt_iterator *gsi, tree var)
226 tree clobber = build_clobber (TREE_TYPE (var));
227 gimple *new_stmt = gimple_build_assign (var, clobber);
228 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
233 /* Function vect_mark_relevant.
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
237 static void
238 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
239 enum vect_relevant relevant, bool live_p)
241 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
242 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "mark relevant %d, live %d: %G", relevant, live_p,
247 stmt_info->stmt);
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE, vect_location,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
265 stmt_vec_info old_stmt_info = stmt_info;
266 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
268 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
269 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
271 if (live_p && relevant == vect_unused_in_scope)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
276 "relevant.\n");
277 relevant = vect_used_only_live;
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE, vect_location,
282 "mark relevant %d, live %d: %G", relevant, live_p,
283 stmt_info->stmt);
286 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
287 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
288 STMT_VINFO_RELEVANT (stmt_info) = relevant;
290 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE, vect_location,
295 "already marked relevant/live.\n");
296 return;
299 worklist->safe_push (stmt_info);
303 /* Function is_simple_and_all_uses_invariant
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
307 bool
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
309 loop_vec_info loop_vinfo)
311 tree op;
312 ssa_op_iter iter;
314 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
315 if (!stmt)
316 return false;
318 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
320 enum vect_def_type dt = vect_uninitialized_def;
322 if (!vect_is_simple_use (op, loop_vinfo, &dt))
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "use not simple.\n");
327 return false;
330 if (dt != vect_external_def && dt != vect_constant_def)
331 return false;
333 return true;
336 /* Function vect_stmt_relevant_p.
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
346 CHECKME: what other side effects would the vectorizer allow? */
348 static bool
349 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
350 enum vect_relevant *relevant, bool *live_p)
352 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
353 ssa_op_iter op_iter;
354 imm_use_iterator imm_iter;
355 use_operand_p use_p;
356 def_operand_p def_p;
358 *relevant = vect_unused_in_scope;
359 *live_p = false;
361 /* cond stmt other than loop exit cond. */
362 if (is_ctrl_stmt (stmt_info->stmt)
363 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
364 *relevant = vect_used_in_scope;
366 /* changing memory. */
367 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
368 if (gimple_vdef (stmt_info->stmt)
369 && !gimple_clobber_p (stmt_info->stmt))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: stmt has vdefs.\n");
374 *relevant = vect_used_in_scope;
377 /* uses outside the loop. */
378 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
380 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
382 basic_block bb = gimple_bb (USE_STMT (use_p));
383 if (!flow_bb_inside_loop_p (loop, bb))
385 if (is_gimple_debug (USE_STMT (use_p)))
386 continue;
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: used out of loop.\n");
392 /* We expect all such uses to be in the loop exit phis
393 (because of loop closed form) */
394 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
395 gcc_assert (bb == single_exit (loop)->dest);
397 *live_p = true;
402 if (*live_p && *relevant == vect_unused_in_scope
403 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
405 if (dump_enabled_p ())
406 dump_printf_loc (MSG_NOTE, vect_location,
407 "vec_stmt_relevant_p: stmt live but not relevant.\n");
408 *relevant = vect_used_only_live;
411 return (*live_p || *relevant);
415 /* Function exist_non_indexing_operands_for_use_p
417 USE is one of the uses attached to STMT_INFO. Check if USE is
418 used in STMT_INFO for anything other than indexing an array. */
420 static bool
421 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
423 tree operand;
425 /* USE corresponds to some operand in STMT. If there is no data
426 reference in STMT, then any operand that corresponds to USE
427 is not indexing an array. */
428 if (!STMT_VINFO_DATA_REF (stmt_info))
429 return true;
431 /* STMT has a data_ref. FORNOW this means that its of one of
432 the following forms:
433 -1- ARRAY_REF = var
434 -2- var = ARRAY_REF
435 (This should have been verified in analyze_data_refs).
437 'var' in the second case corresponds to a def, not a use,
438 so USE cannot correspond to any operands that are not used
439 for array indexing.
441 Therefore, all we need to check is if STMT falls into the
442 first case, and whether var corresponds to USE. */
444 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
445 if (!assign || !gimple_assign_copy_p (assign))
447 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
448 if (call && gimple_call_internal_p (call))
450 internal_fn ifn = gimple_call_internal_fn (call);
451 int mask_index = internal_fn_mask_index (ifn);
452 if (mask_index >= 0
453 && use == gimple_call_arg (call, mask_index))
454 return true;
455 int stored_value_index = internal_fn_stored_value_index (ifn);
456 if (stored_value_index >= 0
457 && use == gimple_call_arg (call, stored_value_index))
458 return true;
459 if (internal_gather_scatter_fn_p (ifn)
460 && use == gimple_call_arg (call, 1))
461 return true;
463 return false;
466 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
467 return false;
468 operand = gimple_assign_rhs1 (assign);
469 if (TREE_CODE (operand) != SSA_NAME)
470 return false;
472 if (operand == use)
473 return true;
475 return false;
480 Function process_use.
482 Inputs:
483 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
484 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
485 that defined USE. This is done by calling mark_relevant and passing it
486 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
487 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
488 be performed.
490 Outputs:
491 Generally, LIVE_P and RELEVANT are used to define the liveness and
492 relevance info of the DEF_STMT of this USE:
493 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
494 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
495 Exceptions:
496 - case 1: If USE is used only for address computations (e.g. array indexing),
497 which does not need to be directly vectorized, then the liveness/relevance
498 of the respective DEF_STMT is left unchanged.
499 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
500 we skip DEF_STMT cause it had already been processed.
501 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
502 "relevant" will be modified accordingly.
504 Return true if everything is as expected. Return false otherwise. */
506 static opt_result
507 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
508 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
509 bool force)
511 stmt_vec_info dstmt_vinfo;
512 enum vect_def_type dt;
514 /* case 1: we are only interested in uses that need to be vectorized. Uses
515 that are used for address computation are not considered relevant. */
516 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
517 return opt_result::success ();
519 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
520 return opt_result::failure_at (stmt_vinfo->stmt,
521 "not vectorized:"
522 " unsupported use in stmt.\n");
524 if (!dstmt_vinfo)
525 return opt_result::success ();
527 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
528 basic_block bb = gimple_bb (stmt_vinfo->stmt);
530 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
531 We have to force the stmt live since the epilogue loop needs it to
532 continue computing the reduction. */
533 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
534 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
537 && bb->loop_father == def_bb->loop_father)
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_NOTE, vect_location,
541 "reduc-stmt defining reduc-phi in the same nest.\n");
542 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
543 return opt_result::success ();
546 /* case 3a: outer-loop stmt defining an inner-loop stmt:
547 outer-loop-header-bb:
548 d = dstmt_vinfo
549 inner-loop:
550 stmt # use (d)
551 outer-loop-tail-bb:
552 ... */
553 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE, vect_location,
557 "outer-loop def-stmt defining inner-loop stmt.\n");
559 switch (relevant)
561 case vect_unused_in_scope:
562 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
563 vect_used_in_scope : vect_unused_in_scope;
564 break;
566 case vect_used_in_outer_by_reduction:
567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
568 relevant = vect_used_by_reduction;
569 break;
571 case vect_used_in_outer:
572 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
573 relevant = vect_used_in_scope;
574 break;
576 case vect_used_in_scope:
577 break;
579 default:
580 gcc_unreachable ();
584 /* case 3b: inner-loop stmt defining an outer-loop stmt:
585 outer-loop-header-bb:
587 inner-loop:
588 d = dstmt_vinfo
589 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
590 stmt # use (d) */
591 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
593 if (dump_enabled_p ())
594 dump_printf_loc (MSG_NOTE, vect_location,
595 "inner-loop def-stmt defining outer-loop stmt.\n");
597 switch (relevant)
599 case vect_unused_in_scope:
600 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
601 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
602 vect_used_in_outer_by_reduction : vect_unused_in_scope;
603 break;
605 case vect_used_by_reduction:
606 case vect_used_only_live:
607 relevant = vect_used_in_outer_by_reduction;
608 break;
610 case vect_used_in_scope:
611 relevant = vect_used_in_outer;
612 break;
614 default:
615 gcc_unreachable ();
618 /* We are also not interested in uses on loop PHI backedges that are
619 inductions. Otherwise we'll needlessly vectorize the IV increment
620 and cause hybrid SLP for SLP inductions. Unless the PHI is live
621 of course. */
622 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
623 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
624 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
625 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
626 loop_latch_edge (bb->loop_father))
627 == use))
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE, vect_location,
631 "induction value on backedge.\n");
632 return opt_result::success ();
636 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
637 return opt_result::success ();
641 /* Function vect_mark_stmts_to_be_vectorized.
643 Not all stmts in the loop need to be vectorized. For example:
645 for i...
646 for j...
647 1. T0 = i + j
648 2. T1 = a[T0]
650 3. j = j + 1
652 Stmt 1 and 3 do not need to be vectorized, because loop control and
653 addressing of vectorized data-refs are handled differently.
655 This pass detects such stmts. */
657 opt_result
658 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
660 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
661 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
662 unsigned int nbbs = loop->num_nodes;
663 gimple_stmt_iterator si;
664 unsigned int i;
665 basic_block bb;
666 bool live_p;
667 enum vect_relevant relevant;
669 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
671 auto_vec<stmt_vec_info, 64> worklist;
673 /* 1. Init worklist. */
674 for (i = 0; i < nbbs; i++)
676 bb = bbs[i];
677 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
679 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
682 phi_info->stmt);
684 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
685 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
687 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
689 if (is_gimple_debug (gsi_stmt (si)))
690 continue;
691 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE, vect_location,
694 "init: stmt relevant? %G", stmt_info->stmt);
696 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
697 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
701 /* 2. Process_worklist */
702 while (worklist.length () > 0)
704 use_operand_p use_p;
705 ssa_op_iter iter;
707 stmt_vec_info stmt_vinfo = worklist.pop ();
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE, vect_location,
710 "worklist: examine stmt: %G", stmt_vinfo->stmt);
712 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
713 (DEF_STMT) as relevant/irrelevant according to the relevance property
714 of STMT. */
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
717 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
718 propagated as is to the DEF_STMTs of its USEs.
720 One exception is when STMT has been identified as defining a reduction
721 variable; in this case we set the relevance to vect_used_by_reduction.
722 This is because we distinguish between two kinds of relevant stmts -
723 those that are used by a reduction computation, and those that are
724 (also) used by a regular computation. This allows us later on to
725 identify stmts that are used solely by a reduction, and therefore the
726 order of the results that they produce does not have to be kept. */
728 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
730 case vect_reduction_def:
731 gcc_assert (relevant != vect_unused_in_scope);
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_in_scope
734 && relevant != vect_used_by_reduction
735 && relevant != vect_used_only_live)
736 return opt_result::failure_at
737 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
738 break;
740 case vect_nested_cycle:
741 if (relevant != vect_unused_in_scope
742 && relevant != vect_used_in_outer_by_reduction
743 && relevant != vect_used_in_outer)
744 return opt_result::failure_at
745 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
746 break;
748 case vect_double_reduction_def:
749 if (relevant != vect_unused_in_scope
750 && relevant != vect_used_by_reduction
751 && relevant != vect_used_only_live)
752 return opt_result::failure_at
753 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
754 break;
756 default:
757 break;
760 if (is_pattern_stmt_p (stmt_vinfo))
762 /* Pattern statements are not inserted into the code, so
763 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
764 have to scan the RHS or function arguments instead. */
765 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
767 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
768 tree op = gimple_assign_rhs1 (assign);
770 i = 1;
771 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
773 opt_result res
774 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
775 loop_vinfo, relevant, &worklist, false);
776 if (!res)
777 return res;
778 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
779 loop_vinfo, relevant, &worklist, false);
780 if (!res)
781 return res;
782 i = 2;
784 for (; i < gimple_num_ops (assign); i++)
786 op = gimple_op (assign, i);
787 if (TREE_CODE (op) == SSA_NAME)
789 opt_result res
790 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
791 &worklist, false);
792 if (!res)
793 return res;
797 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
799 for (i = 0; i < gimple_call_num_args (call); i++)
801 tree arg = gimple_call_arg (call, i);
802 opt_result res
803 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
804 &worklist, false);
805 if (!res)
806 return res;
810 else
811 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
813 tree op = USE_FROM_PTR (use_p);
814 opt_result res
815 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
816 &worklist, false);
817 if (!res)
818 return res;
821 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
823 gather_scatter_info gs_info;
824 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
825 gcc_unreachable ();
826 opt_result res
827 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
828 &worklist, true);
829 if (!res)
831 if (fatal)
832 *fatal = false;
833 return res;
836 } /* while worklist */
838 return opt_result::success ();
841 /* Function vect_model_simple_cost.
843 Models cost for simple operations, i.e. those that only emit ncopies of a
844 single op. Right now, this does not account for multiple insns that could
845 be generated for the single vector op. We will handle that shortly. */
847 static void
848 vect_model_simple_cost (vec_info *,
849 stmt_vec_info stmt_info, int ncopies,
850 enum vect_def_type *dt,
851 int ndts,
852 slp_tree node,
853 stmt_vector_for_cost *cost_vec,
854 vect_cost_for_stmt kind = vector_stmt)
856 int inside_cost = 0, prologue_cost = 0;
858 gcc_assert (cost_vec != NULL);
860 /* ??? Somehow we need to fix this at the callers. */
861 if (node)
862 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
864 if (!node)
865 /* Cost the "broadcast" of a scalar operand in to a vector operand.
866 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
867 cost model. */
868 for (int i = 0; i < ndts; i++)
869 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
870 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
871 stmt_info, 0, vect_prologue);
873 /* Pass the inside-of-loop statements to the target-specific cost model. */
874 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
875 stmt_info, 0, vect_body);
877 if (dump_enabled_p ())
878 dump_printf_loc (MSG_NOTE, vect_location,
879 "vect_model_simple_cost: inside_cost = %d, "
880 "prologue_cost = %d .\n", inside_cost, prologue_cost);
884 /* Model cost for type demotion and promotion operations. PWR is
885 normally zero for single-step promotions and demotions. It will be
886 one if two-step promotion/demotion is required, and so on. NCOPIES
887 is the number of vector results (and thus number of instructions)
888 for the narrowest end of the operation chain. Each additional
889 step doubles the number of instructions required. If WIDEN_ARITH
890 is true the stmt is doing widening arithmetic. */
892 static void
893 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
894 enum vect_def_type *dt,
895 unsigned int ncopies, int pwr,
896 stmt_vector_for_cost *cost_vec,
897 bool widen_arith)
899 int i;
900 int inside_cost = 0, prologue_cost = 0;
902 for (i = 0; i < pwr + 1; i++)
904 inside_cost += record_stmt_cost (cost_vec, ncopies,
905 widen_arith
906 ? vector_stmt : vec_promote_demote,
907 stmt_info, 0, vect_body);
908 ncopies *= 2;
911 /* FORNOW: Assuming maximum 2 args per stmts. */
912 for (i = 0; i < 2; i++)
913 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
914 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
915 stmt_info, 0, vect_prologue);
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE, vect_location,
919 "vect_model_promotion_demotion_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost, prologue_cost);
923 /* Returns true if the current function returns DECL. */
925 static bool
926 cfun_returns (tree decl)
928 edge_iterator ei;
929 edge e;
930 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
932 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
933 if (!ret)
934 continue;
935 if (gimple_return_retval (ret) == decl)
936 return true;
937 /* We often end up with an aggregate copy to the result decl,
938 handle that case as well. First skip intermediate clobbers
939 though. */
940 gimple *def = ret;
943 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
945 while (gimple_clobber_p (def));
946 if (is_a <gassign *> (def)
947 && gimple_assign_lhs (def) == gimple_return_retval (ret)
948 && gimple_assign_rhs1 (def) == decl)
949 return true;
951 return false;
954 /* Calculate cost of DR's memory access. */
955 void
956 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
957 dr_alignment_support alignment_support_scheme,
958 int misalignment,
959 unsigned int *inside_cost,
960 stmt_vector_for_cost *body_cost_vec)
962 switch (alignment_support_scheme)
964 case dr_aligned:
966 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
967 vector_store, stmt_info, 0,
968 vect_body);
970 if (dump_enabled_p ())
971 dump_printf_loc (MSG_NOTE, vect_location,
972 "vect_model_store_cost: aligned.\n");
973 break;
976 case dr_unaligned_supported:
978 /* Here, we assign an additional cost for the unaligned store. */
979 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
980 unaligned_store, stmt_info,
981 misalignment, vect_body);
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_NOTE, vect_location,
984 "vect_model_store_cost: unaligned supported by "
985 "hardware.\n");
986 break;
989 case dr_unaligned_unsupported:
991 *inside_cost = VECT_MAX_COST;
993 if (dump_enabled_p ())
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
995 "vect_model_store_cost: unsupported access.\n");
996 break;
999 default:
1000 gcc_unreachable ();
1004 /* Calculate cost of DR's memory access. */
1005 void
1006 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1007 dr_alignment_support alignment_support_scheme,
1008 int misalignment,
1009 bool add_realign_cost, unsigned int *inside_cost,
1010 unsigned int *prologue_cost,
1011 stmt_vector_for_cost *prologue_cost_vec,
1012 stmt_vector_for_cost *body_cost_vec,
1013 bool record_prologue_costs)
1015 switch (alignment_support_scheme)
1017 case dr_aligned:
1019 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1020 stmt_info, 0, vect_body);
1022 if (dump_enabled_p ())
1023 dump_printf_loc (MSG_NOTE, vect_location,
1024 "vect_model_load_cost: aligned.\n");
1026 break;
1028 case dr_unaligned_supported:
1030 /* Here, we assign an additional cost for the unaligned load. */
1031 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1032 unaligned_load, stmt_info,
1033 misalignment, vect_body);
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_NOTE, vect_location,
1037 "vect_model_load_cost: unaligned supported by "
1038 "hardware.\n");
1040 break;
1042 case dr_explicit_realign:
1044 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1045 vector_load, stmt_info, 0, vect_body);
1046 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1047 vec_perm, stmt_info, 0, vect_body);
1049 /* FIXME: If the misalignment remains fixed across the iterations of
1050 the containing loop, the following cost should be added to the
1051 prologue costs. */
1052 if (targetm.vectorize.builtin_mask_for_load)
1053 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1054 stmt_info, 0, vect_body);
1056 if (dump_enabled_p ())
1057 dump_printf_loc (MSG_NOTE, vect_location,
1058 "vect_model_load_cost: explicit realign\n");
1060 break;
1062 case dr_explicit_realign_optimized:
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_NOTE, vect_location,
1066 "vect_model_load_cost: unaligned software "
1067 "pipelined.\n");
1069 /* Unaligned software pipeline has a load of an address, an initial
1070 load, and possibly a mask operation to "prime" the loop. However,
1071 if this is an access in a group of loads, which provide grouped
1072 access, then the above cost should only be considered for one
1073 access in the group. Inside the loop, there is a load op
1074 and a realignment op. */
1076 if (add_realign_cost && record_prologue_costs)
1078 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1079 vector_stmt, stmt_info,
1080 0, vect_prologue);
1081 if (targetm.vectorize.builtin_mask_for_load)
1082 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1083 vector_stmt, stmt_info,
1084 0, vect_prologue);
1087 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1088 stmt_info, 0, vect_body);
1089 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1090 stmt_info, 0, vect_body);
1092 if (dump_enabled_p ())
1093 dump_printf_loc (MSG_NOTE, vect_location,
1094 "vect_model_load_cost: explicit realign optimized"
1095 "\n");
1097 break;
1100 case dr_unaligned_unsupported:
1102 *inside_cost = VECT_MAX_COST;
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1106 "vect_model_load_cost: unsupported access.\n");
1107 break;
1110 default:
1111 gcc_unreachable ();
1115 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1116 the loop preheader for the vectorized stmt STMT_VINFO. */
1118 static void
1119 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1120 gimple_stmt_iterator *gsi)
1122 if (gsi)
1123 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1124 else
1125 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1127 if (dump_enabled_p ())
1128 dump_printf_loc (MSG_NOTE, vect_location,
1129 "created new init_stmt: %G", new_stmt);
1132 /* Function vect_init_vector.
1134 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1135 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1136 vector type a vector with all elements equal to VAL is created first.
1137 Place the initialization at GSI if it is not NULL. Otherwise, place the
1138 initialization at the loop preheader.
1139 Return the DEF of INIT_STMT.
1140 It will be used in the vectorization of STMT_INFO. */
1142 tree
1143 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1144 gimple_stmt_iterator *gsi)
1146 gimple *init_stmt;
1147 tree new_temp;
1149 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1150 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1152 gcc_assert (VECTOR_TYPE_P (type));
1153 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1155 /* Scalar boolean value should be transformed into
1156 all zeros or all ones value before building a vector. */
1157 if (VECTOR_BOOLEAN_TYPE_P (type))
1159 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1160 tree false_val = build_zero_cst (TREE_TYPE (type));
1162 if (CONSTANT_CLASS_P (val))
1163 val = integer_zerop (val) ? false_val : true_val;
1164 else
1166 new_temp = make_ssa_name (TREE_TYPE (type));
1167 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1168 val, true_val, false_val);
1169 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1170 val = new_temp;
1173 else
1175 gimple_seq stmts = NULL;
1176 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1177 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1178 TREE_TYPE (type), val);
1179 else
1180 /* ??? Condition vectorization expects us to do
1181 promotion of invariant/external defs. */
1182 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1183 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1184 !gsi_end_p (gsi2); )
1186 init_stmt = gsi_stmt (gsi2);
1187 gsi_remove (&gsi2, false);
1188 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1192 val = build_vector_from_val (type, val);
1195 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1196 init_stmt = gimple_build_assign (new_temp, val);
1197 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1198 return new_temp;
1202 /* Function vect_get_vec_defs_for_operand.
1204 OP is an operand in STMT_VINFO. This function returns a vector of
1205 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1207 In the case that OP is an SSA_NAME which is defined in the loop, then
1208 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1210 In case OP is an invariant or constant, a new stmt that creates a vector def
1211 needs to be introduced. VECTYPE may be used to specify a required type for
1212 vector invariant. */
1214 void
1215 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1216 unsigned ncopies,
1217 tree op, vec<tree> *vec_oprnds, tree vectype)
1219 gimple *def_stmt;
1220 enum vect_def_type dt;
1221 bool is_simple_use;
1222 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE, vect_location,
1226 "vect_get_vec_defs_for_operand: %T\n", op);
1228 stmt_vec_info def_stmt_info;
1229 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1230 &def_stmt_info, &def_stmt);
1231 gcc_assert (is_simple_use);
1232 if (def_stmt && dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1235 vec_oprnds->create (ncopies);
1236 if (dt == vect_constant_def || dt == vect_external_def)
1238 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1239 tree vector_type;
1241 if (vectype)
1242 vector_type = vectype;
1243 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1244 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1245 vector_type = truth_type_for (stmt_vectype);
1246 else
1247 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1249 gcc_assert (vector_type);
1250 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1251 while (ncopies--)
1252 vec_oprnds->quick_push (vop);
1254 else
1256 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1257 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1258 for (unsigned i = 0; i < ncopies; ++i)
1259 vec_oprnds->quick_push (gimple_get_lhs
1260 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1265 /* Get vectorized definitions for OP0 and OP1. */
1267 void
1268 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1269 unsigned ncopies,
1270 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1271 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1272 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1273 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1275 if (slp_node)
1277 if (op0)
1278 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1279 if (op1)
1280 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1281 if (op2)
1282 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1283 if (op3)
1284 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1286 else
1288 if (op0)
1289 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1290 op0, vec_oprnds0, vectype0);
1291 if (op1)
1292 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1293 op1, vec_oprnds1, vectype1);
1294 if (op2)
1295 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1296 op2, vec_oprnds2, vectype2);
1297 if (op3)
1298 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1299 op3, vec_oprnds3, vectype3);
1303 void
1304 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1305 unsigned ncopies,
1306 tree op0, vec<tree> *vec_oprnds0,
1307 tree op1, vec<tree> *vec_oprnds1,
1308 tree op2, vec<tree> *vec_oprnds2,
1309 tree op3, vec<tree> *vec_oprnds3)
1311 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1312 op0, vec_oprnds0, NULL_TREE,
1313 op1, vec_oprnds1, NULL_TREE,
1314 op2, vec_oprnds2, NULL_TREE,
1315 op3, vec_oprnds3, NULL_TREE);
1318 /* Helper function called by vect_finish_replace_stmt and
1319 vect_finish_stmt_generation. Set the location of the new
1320 statement and create and return a stmt_vec_info for it. */
1322 static void
1323 vect_finish_stmt_generation_1 (vec_info *,
1324 stmt_vec_info stmt_info, gimple *vec_stmt)
1326 if (dump_enabled_p ())
1327 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1329 if (stmt_info)
1331 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1333 /* While EH edges will generally prevent vectorization, stmt might
1334 e.g. be in a must-not-throw region. Ensure newly created stmts
1335 that could throw are part of the same region. */
1336 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1337 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1338 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1340 else
1341 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1344 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1345 which sets the same scalar result as STMT_INFO did. Create and return a
1346 stmt_vec_info for VEC_STMT. */
1348 void
1349 vect_finish_replace_stmt (vec_info *vinfo,
1350 stmt_vec_info stmt_info, gimple *vec_stmt)
1352 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1353 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1355 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1356 gsi_replace (&gsi, vec_stmt, true);
1358 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1361 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1362 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1364 void
1365 vect_finish_stmt_generation (vec_info *vinfo,
1366 stmt_vec_info stmt_info, gimple *vec_stmt,
1367 gimple_stmt_iterator *gsi)
1369 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1371 if (!gsi_end_p (*gsi)
1372 && gimple_has_mem_ops (vec_stmt))
1374 gimple *at_stmt = gsi_stmt (*gsi);
1375 tree vuse = gimple_vuse (at_stmt);
1376 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1378 tree vdef = gimple_vdef (at_stmt);
1379 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1380 gimple_set_modified (vec_stmt, true);
1381 /* If we have an SSA vuse and insert a store, update virtual
1382 SSA form to avoid triggering the renamer. Do so only
1383 if we can easily see all uses - which is what almost always
1384 happens with the way vectorized stmts are inserted. */
1385 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1386 && ((is_gimple_assign (vec_stmt)
1387 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1388 || (is_gimple_call (vec_stmt)
1389 && (!(gimple_call_flags (vec_stmt)
1390 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1391 || (gimple_call_lhs (vec_stmt)
1392 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1394 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1395 gimple_set_vdef (vec_stmt, new_vdef);
1396 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1400 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1401 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1404 /* We want to vectorize a call to combined function CFN with function
1405 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1406 as the types of all inputs. Check whether this is possible using
1407 an internal function, returning its code if so or IFN_LAST if not. */
1409 static internal_fn
1410 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1411 tree vectype_out, tree vectype_in)
1413 internal_fn ifn;
1414 if (internal_fn_p (cfn))
1415 ifn = as_internal_fn (cfn);
1416 else
1417 ifn = associated_internal_fn (fndecl);
1418 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1420 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1421 if (info.vectorizable)
1423 bool same_size_p = TYPE_SIZE (vectype_in) == TYPE_SIZE (vectype_out);
1424 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1425 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1427 /* The type size of both the vectype_in and vectype_out should be
1428 exactly the same when vectype_out isn't participating the optab.
1429 While there is no restriction for type size when vectype_out
1430 is part of the optab query. */
1431 if (type0 != vectype_out && type1 != vectype_out && !same_size_p)
1432 return IFN_LAST;
1434 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1435 OPTIMIZE_FOR_SPEED))
1436 return ifn;
1439 return IFN_LAST;
1443 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1444 gimple_stmt_iterator *);
1446 /* Check whether a load or store statement in the loop described by
1447 LOOP_VINFO is possible in a loop using partial vectors. This is
1448 testing whether the vectorizer pass has the appropriate support,
1449 as well as whether the target does.
1451 VLS_TYPE says whether the statement is a load or store and VECTYPE
1452 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1453 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1454 says how the load or store is going to be implemented and GROUP_SIZE
1455 is the number of load or store statements in the containing group.
1456 If the access is a gather load or scatter store, GS_INFO describes
1457 its arguments. If the load or store is conditional, SCALAR_MASK is the
1458 condition under which it occurs.
1460 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1461 vectors is not supported, otherwise record the required rgroup control
1462 types. */
1464 static void
1465 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1466 slp_tree slp_node,
1467 vec_load_store_type vls_type,
1468 int group_size,
1469 vect_memory_access_type
1470 memory_access_type,
1471 gather_scatter_info *gs_info,
1472 tree scalar_mask)
1474 /* Invariant loads need no special support. */
1475 if (memory_access_type == VMAT_INVARIANT)
1476 return;
1478 unsigned int nvectors;
1479 if (slp_node)
1480 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1481 else
1482 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1484 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1485 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1486 machine_mode vecmode = TYPE_MODE (vectype);
1487 bool is_load = (vls_type == VLS_LOAD);
1488 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1490 internal_fn ifn
1491 = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1492 : vect_store_lanes_supported (vectype, group_size, true));
1493 if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1494 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1495 else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1496 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1497 scalar_mask);
1498 else
1500 if (dump_enabled_p ())
1501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1502 "can't operate on partial vectors because"
1503 " the target doesn't have an appropriate"
1504 " load/store-lanes instruction.\n");
1505 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1507 return;
1510 if (memory_access_type == VMAT_GATHER_SCATTER)
1512 internal_fn ifn = (is_load
1513 ? IFN_MASK_GATHER_LOAD
1514 : IFN_MASK_SCATTER_STORE);
1515 internal_fn len_ifn = (is_load
1516 ? IFN_MASK_LEN_GATHER_LOAD
1517 : IFN_MASK_LEN_SCATTER_STORE);
1518 if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1519 gs_info->memory_type,
1520 gs_info->offset_vectype,
1521 gs_info->scale))
1522 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1523 else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1524 gs_info->memory_type,
1525 gs_info->offset_vectype,
1526 gs_info->scale))
1527 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1528 scalar_mask);
1529 else
1531 if (dump_enabled_p ())
1532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1533 "can't operate on partial vectors because"
1534 " the target doesn't have an appropriate"
1535 " gather load or scatter store instruction.\n");
1536 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1538 return;
1541 if (memory_access_type != VMAT_CONTIGUOUS
1542 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1544 /* Element X of the data must come from iteration i * VF + X of the
1545 scalar loop. We need more work to support other mappings. */
1546 if (dump_enabled_p ())
1547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1548 "can't operate on partial vectors because an"
1549 " access isn't contiguous.\n");
1550 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1551 return;
1554 if (!VECTOR_MODE_P (vecmode))
1556 if (dump_enabled_p ())
1557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1558 "can't operate on partial vectors when emulating"
1559 " vector operations.\n");
1560 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1561 return;
1564 /* We might load more scalars than we need for permuting SLP loads.
1565 We checked in get_group_load_store_type that the extra elements
1566 don't leak into a new vector. */
1567 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1569 unsigned int nvectors;
1570 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1571 return nvectors;
1572 gcc_unreachable ();
1575 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1576 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1577 machine_mode mask_mode;
1578 machine_mode vmode;
1579 bool using_partial_vectors_p = false;
1580 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1582 nvectors = group_memory_nvectors (group_size * vf, nunits);
1583 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1584 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1585 using_partial_vectors_p = true;
1587 else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1588 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1590 nvectors = group_memory_nvectors (group_size * vf, nunits);
1591 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1592 using_partial_vectors_p = true;
1595 if (!using_partial_vectors_p)
1597 if (dump_enabled_p ())
1598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1599 "can't operate on partial vectors because the"
1600 " target doesn't have the appropriate partial"
1601 " vectorization load or store.\n");
1602 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1606 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1607 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1608 that needs to be applied to all loads and stores in a vectorized loop.
1609 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1610 otherwise return VEC_MASK & LOOP_MASK.
1612 MASK_TYPE is the type of both masks. If new statements are needed,
1613 insert them before GSI. */
1615 static tree
1616 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1617 tree vec_mask, gimple_stmt_iterator *gsi)
1619 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1620 if (!loop_mask)
1621 return vec_mask;
1623 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1625 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1626 return vec_mask;
1628 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1629 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1630 vec_mask, loop_mask);
1632 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1633 return and_res;
1636 /* Determine whether we can use a gather load or scatter store to vectorize
1637 strided load or store STMT_INFO by truncating the current offset to a
1638 smaller width. We need to be able to construct an offset vector:
1640 { 0, X, X*2, X*3, ... }
1642 without loss of precision, where X is STMT_INFO's DR_STEP.
1644 Return true if this is possible, describing the gather load or scatter
1645 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1647 static bool
1648 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1649 loop_vec_info loop_vinfo, bool masked_p,
1650 gather_scatter_info *gs_info)
1652 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1653 data_reference *dr = dr_info->dr;
1654 tree step = DR_STEP (dr);
1655 if (TREE_CODE (step) != INTEGER_CST)
1657 /* ??? Perhaps we could use range information here? */
1658 if (dump_enabled_p ())
1659 dump_printf_loc (MSG_NOTE, vect_location,
1660 "cannot truncate variable step.\n");
1661 return false;
1664 /* Get the number of bits in an element. */
1665 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1666 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1667 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1669 /* Set COUNT to the upper limit on the number of elements - 1.
1670 Start with the maximum vectorization factor. */
1671 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1673 /* Try lowering COUNT to the number of scalar latch iterations. */
1674 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1675 widest_int max_iters;
1676 if (max_loop_iterations (loop, &max_iters)
1677 && max_iters < count)
1678 count = max_iters.to_shwi ();
1680 /* Try scales of 1 and the element size. */
1681 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1682 wi::overflow_type overflow = wi::OVF_NONE;
1683 for (int i = 0; i < 2; ++i)
1685 int scale = scales[i];
1686 widest_int factor;
1687 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1688 continue;
1690 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1691 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1692 if (overflow)
1693 continue;
1694 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1695 unsigned int min_offset_bits = wi::min_precision (range, sign);
1697 /* Find the narrowest viable offset type. */
1698 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1699 tree offset_type = build_nonstandard_integer_type (offset_bits,
1700 sign == UNSIGNED);
1702 /* See whether the target supports the operation with an offset
1703 no narrower than OFFSET_TYPE. */
1704 tree memory_type = TREE_TYPE (DR_REF (dr));
1705 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1706 vectype, memory_type, offset_type, scale,
1707 &gs_info->ifn, &gs_info->offset_vectype)
1708 || gs_info->ifn == IFN_LAST)
1709 continue;
1711 gs_info->decl = NULL_TREE;
1712 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1713 but we don't need to store that here. */
1714 gs_info->base = NULL_TREE;
1715 gs_info->element_type = TREE_TYPE (vectype);
1716 gs_info->offset = fold_convert (offset_type, step);
1717 gs_info->offset_dt = vect_constant_def;
1718 gs_info->scale = scale;
1719 gs_info->memory_type = memory_type;
1720 return true;
1723 if (overflow && dump_enabled_p ())
1724 dump_printf_loc (MSG_NOTE, vect_location,
1725 "truncating gather/scatter offset to %d bits"
1726 " might change its value.\n", element_bits);
1728 return false;
1731 /* Return true if we can use gather/scatter internal functions to
1732 vectorize STMT_INFO, which is a grouped or strided load or store.
1733 MASKED_P is true if load or store is conditional. When returning
1734 true, fill in GS_INFO with the information required to perform the
1735 operation. */
1737 static bool
1738 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1739 loop_vec_info loop_vinfo, bool masked_p,
1740 gather_scatter_info *gs_info)
1742 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1743 || gs_info->ifn == IFN_LAST)
1744 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1745 masked_p, gs_info);
1747 tree old_offset_type = TREE_TYPE (gs_info->offset);
1748 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1750 gcc_assert (TYPE_PRECISION (new_offset_type)
1751 >= TYPE_PRECISION (old_offset_type));
1752 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1754 if (dump_enabled_p ())
1755 dump_printf_loc (MSG_NOTE, vect_location,
1756 "using gather/scatter for strided/grouped access,"
1757 " scale = %d\n", gs_info->scale);
1759 return true;
1762 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1763 elements with a known constant step. Return -1 if that step
1764 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1766 static int
1767 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1769 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1770 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1771 size_zero_node);
1774 /* If the target supports a permute mask that reverses the elements in
1775 a vector of type VECTYPE, return that mask, otherwise return null. */
1777 static tree
1778 perm_mask_for_reverse (tree vectype)
1780 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1782 /* The encoding has a single stepped pattern. */
1783 vec_perm_builder sel (nunits, 1, 3);
1784 for (int i = 0; i < 3; ++i)
1785 sel.quick_push (nunits - 1 - i);
1787 vec_perm_indices indices (sel, 1, nunits);
1788 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1789 indices))
1790 return NULL_TREE;
1791 return vect_gen_perm_mask_checked (vectype, indices);
1794 /* A subroutine of get_load_store_type, with a subset of the same
1795 arguments. Handle the case where STMT_INFO is a load or store that
1796 accesses consecutive elements with a negative step. Sets *POFFSET
1797 to the offset to be applied to the DR for the first access. */
1799 static vect_memory_access_type
1800 get_negative_load_store_type (vec_info *vinfo,
1801 stmt_vec_info stmt_info, tree vectype,
1802 vec_load_store_type vls_type,
1803 unsigned int ncopies, poly_int64 *poffset)
1805 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1806 dr_alignment_support alignment_support_scheme;
1808 if (ncopies > 1)
1810 if (dump_enabled_p ())
1811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1812 "multiple types with negative step.\n");
1813 return VMAT_ELEMENTWISE;
1816 /* For backward running DRs the first access in vectype actually is
1817 N-1 elements before the address of the DR. */
1818 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1819 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1821 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
1822 alignment_support_scheme
1823 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1824 if (alignment_support_scheme != dr_aligned
1825 && alignment_support_scheme != dr_unaligned_supported)
1827 if (dump_enabled_p ())
1828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1829 "negative step but alignment required.\n");
1830 *poffset = 0;
1831 return VMAT_ELEMENTWISE;
1834 if (vls_type == VLS_STORE_INVARIANT)
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_NOTE, vect_location,
1838 "negative step with invariant source;"
1839 " no permute needed.\n");
1840 return VMAT_CONTIGUOUS_DOWN;
1843 if (!perm_mask_for_reverse (vectype))
1845 if (dump_enabled_p ())
1846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1847 "negative step and reversing not supported.\n");
1848 *poffset = 0;
1849 return VMAT_ELEMENTWISE;
1852 return VMAT_CONTIGUOUS_REVERSE;
1855 /* STMT_INFO is either a masked or unconditional store. Return the value
1856 being stored. */
1858 tree
1859 vect_get_store_rhs (stmt_vec_info stmt_info)
1861 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1863 gcc_assert (gimple_assign_single_p (assign));
1864 return gimple_assign_rhs1 (assign);
1866 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1868 internal_fn ifn = gimple_call_internal_fn (call);
1869 int index = internal_fn_stored_value_index (ifn);
1870 gcc_assert (index >= 0);
1871 return gimple_call_arg (call, index);
1873 gcc_unreachable ();
1876 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1878 This function returns a vector type which can be composed with NETLS pieces,
1879 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1880 same vector size as the return vector. It checks target whether supports
1881 pieces-size vector mode for construction firstly, if target fails to, check
1882 pieces-size scalar mode for construction further. It returns NULL_TREE if
1883 fails to find the available composition.
1885 For example, for (vtype=V16QI, nelts=4), we can probably get:
1886 - V16QI with PTYPE V4QI.
1887 - V4SI with PTYPE SI.
1888 - NULL_TREE. */
1890 static tree
1891 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
1893 gcc_assert (VECTOR_TYPE_P (vtype));
1894 gcc_assert (known_gt (nelts, 0U));
1896 machine_mode vmode = TYPE_MODE (vtype);
1897 if (!VECTOR_MODE_P (vmode))
1898 return NULL_TREE;
1900 /* When we are asked to compose the vector from its components let
1901 that happen directly. */
1902 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
1904 *ptype = TREE_TYPE (vtype);
1905 return vtype;
1908 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
1909 unsigned int pbsize;
1910 if (constant_multiple_p (vbsize, nelts, &pbsize))
1912 /* First check if vec_init optab supports construction from
1913 vector pieces directly. */
1914 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
1915 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
1916 machine_mode rmode;
1917 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
1918 && (convert_optab_handler (vec_init_optab, vmode, rmode)
1919 != CODE_FOR_nothing))
1921 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
1922 return vtype;
1925 /* Otherwise check if exists an integer type of the same piece size and
1926 if vec_init optab supports construction from it directly. */
1927 if (int_mode_for_size (pbsize, 0).exists (&elmode)
1928 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
1929 && (convert_optab_handler (vec_init_optab, rmode, elmode)
1930 != CODE_FOR_nothing))
1932 *ptype = build_nonstandard_integer_type (pbsize, 1);
1933 return build_vector_type (*ptype, nelts);
1937 return NULL_TREE;
1940 /* A subroutine of get_load_store_type, with a subset of the same
1941 arguments. Handle the case where STMT_INFO is part of a grouped load
1942 or store.
1944 For stores, the statements in the group are all consecutive
1945 and there is no gap at the end. For loads, the statements in the
1946 group might not be consecutive; there can be gaps between statements
1947 as well as at the end. */
1949 static bool
1950 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
1951 tree vectype, slp_tree slp_node,
1952 bool masked_p, vec_load_store_type vls_type,
1953 vect_memory_access_type *memory_access_type,
1954 poly_int64 *poffset,
1955 dr_alignment_support *alignment_support_scheme,
1956 int *misalignment,
1957 gather_scatter_info *gs_info,
1958 internal_fn *lanes_ifn)
1960 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1961 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1962 stmt_vec_info first_stmt_info;
1963 unsigned int group_size;
1964 unsigned HOST_WIDE_INT gap;
1965 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1967 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1968 group_size = DR_GROUP_SIZE (first_stmt_info);
1969 gap = DR_GROUP_GAP (first_stmt_info);
1971 else
1973 first_stmt_info = stmt_info;
1974 group_size = 1;
1975 gap = 0;
1977 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
1978 bool single_element_p = (stmt_info == first_stmt_info
1979 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
1980 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1982 /* True if the vectorized statements would access beyond the last
1983 statement in the group. */
1984 bool overrun_p = false;
1986 /* True if we can cope with such overrun by peeling for gaps, so that
1987 there is at least one final scalar iteration after the vector loop. */
1988 bool can_overrun_p = (!masked_p
1989 && vls_type == VLS_LOAD
1990 && loop_vinfo
1991 && !loop->inner);
1993 /* There can only be a gap at the end of the group if the stride is
1994 known at compile time. */
1995 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
1997 /* Stores can't yet have gaps. */
1998 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2000 if (slp_node)
2002 /* For SLP vectorization we directly vectorize a subchain
2003 without permutation. */
2004 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2005 first_dr_info
2006 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2007 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2009 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2010 separated by the stride, until we have a complete vector.
2011 Fall back to scalar accesses if that isn't possible. */
2012 if (multiple_p (nunits, group_size))
2013 *memory_access_type = VMAT_STRIDED_SLP;
2014 else
2015 *memory_access_type = VMAT_ELEMENTWISE;
2017 else
2019 overrun_p = loop_vinfo && gap != 0;
2020 if (overrun_p && vls_type != VLS_LOAD)
2022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2023 "Grouped store with gaps requires"
2024 " non-consecutive accesses\n");
2025 return false;
2027 /* An overrun is fine if the trailing elements are smaller
2028 than the alignment boundary B. Every vector access will
2029 be a multiple of B and so we are guaranteed to access a
2030 non-gap element in the same B-sized block. */
2031 if (overrun_p
2032 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2033 vectype)
2034 / vect_get_scalar_dr_size (first_dr_info)))
2035 overrun_p = false;
2037 /* If the gap splits the vector in half and the target
2038 can do half-vector operations avoid the epilogue peeling
2039 by simply loading half of the vector only. Usually
2040 the construction with an upper zero half will be elided. */
2041 dr_alignment_support alss;
2042 int misalign = dr_misalignment (first_dr_info, vectype);
2043 tree half_vtype;
2044 if (overrun_p
2045 && !masked_p
2046 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2047 vectype, misalign)))
2048 == dr_aligned
2049 || alss == dr_unaligned_supported)
2050 && known_eq (nunits, (group_size - gap) * 2)
2051 && known_eq (nunits, group_size)
2052 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2053 != NULL_TREE))
2054 overrun_p = false;
2056 if (overrun_p && !can_overrun_p)
2058 if (dump_enabled_p ())
2059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2060 "Peeling for outer loop is not supported\n");
2061 return false;
2063 int cmp = compare_step_with_zero (vinfo, stmt_info);
2064 if (cmp < 0)
2066 if (single_element_p)
2067 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2068 only correct for single element "interleaving" SLP. */
2069 *memory_access_type = get_negative_load_store_type
2070 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2071 else
2073 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2074 separated by the stride, until we have a complete vector.
2075 Fall back to scalar accesses if that isn't possible. */
2076 if (multiple_p (nunits, group_size))
2077 *memory_access_type = VMAT_STRIDED_SLP;
2078 else
2079 *memory_access_type = VMAT_ELEMENTWISE;
2082 else if (cmp == 0 && loop_vinfo)
2084 gcc_assert (vls_type == VLS_LOAD);
2085 *memory_access_type = VMAT_INVARIANT;
2086 /* Invariant accesses perform only component accesses, alignment
2087 is irrelevant for them. */
2088 *alignment_support_scheme = dr_unaligned_supported;
2090 else
2091 *memory_access_type = VMAT_CONTIGUOUS;
2093 /* When we have a contiguous access across loop iterations
2094 but the access in the loop doesn't cover the full vector
2095 we can end up with no gap recorded but still excess
2096 elements accessed, see PR103116. Make sure we peel for
2097 gaps if necessary and sufficient and give up if not.
2099 If there is a combination of the access not covering the full
2100 vector and a gap recorded then we may need to peel twice. */
2101 if (loop_vinfo
2102 && *memory_access_type == VMAT_CONTIGUOUS
2103 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2104 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2105 nunits))
2107 unsigned HOST_WIDE_INT cnunits, cvf;
2108 if (!can_overrun_p
2109 || !nunits.is_constant (&cnunits)
2110 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2111 /* Peeling for gaps assumes that a single scalar iteration
2112 is enough to make sure the last vector iteration doesn't
2113 access excess elements.
2114 ??? Enhancements include peeling multiple iterations
2115 or using masked loads with a static mask. */
2116 || (group_size * cvf) % cnunits + group_size - gap < cnunits)
2118 if (dump_enabled_p ())
2119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2120 "peeling for gaps insufficient for "
2121 "access\n");
2122 return false;
2124 overrun_p = true;
2128 else
2130 /* We can always handle this case using elementwise accesses,
2131 but see if something more efficient is available. */
2132 *memory_access_type = VMAT_ELEMENTWISE;
2134 /* If there is a gap at the end of the group then these optimizations
2135 would access excess elements in the last iteration. */
2136 bool would_overrun_p = (gap != 0);
2137 /* An overrun is fine if the trailing elements are smaller than the
2138 alignment boundary B. Every vector access will be a multiple of B
2139 and so we are guaranteed to access a non-gap element in the
2140 same B-sized block. */
2141 if (would_overrun_p
2142 && !masked_p
2143 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2144 / vect_get_scalar_dr_size (first_dr_info)))
2145 would_overrun_p = false;
2147 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2148 && (can_overrun_p || !would_overrun_p)
2149 && compare_step_with_zero (vinfo, stmt_info) > 0)
2151 /* First cope with the degenerate case of a single-element
2152 vector. */
2153 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2156 else
2158 /* Otherwise try using LOAD/STORE_LANES. */
2159 *lanes_ifn
2160 = vls_type == VLS_LOAD
2161 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2162 : vect_store_lanes_supported (vectype, group_size,
2163 masked_p);
2164 if (*lanes_ifn != IFN_LAST)
2166 *memory_access_type = VMAT_LOAD_STORE_LANES;
2167 overrun_p = would_overrun_p;
2170 /* If that fails, try using permuting loads. */
2171 else if (vls_type == VLS_LOAD
2172 ? vect_grouped_load_supported (vectype,
2173 single_element_p,
2174 group_size)
2175 : vect_grouped_store_supported (vectype, group_size))
2177 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2178 overrun_p = would_overrun_p;
2183 /* As a last resort, trying using a gather load or scatter store.
2185 ??? Although the code can handle all group sizes correctly,
2186 it probably isn't a win to use separate strided accesses based
2187 on nearby locations. Or, even if it's a win over scalar code,
2188 it might not be a win over vectorizing at a lower VF, if that
2189 allows us to use contiguous accesses. */
2190 if (*memory_access_type == VMAT_ELEMENTWISE
2191 && single_element_p
2192 && loop_vinfo
2193 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2194 masked_p, gs_info))
2195 *memory_access_type = VMAT_GATHER_SCATTER;
2198 if (*memory_access_type == VMAT_GATHER_SCATTER
2199 || *memory_access_type == VMAT_ELEMENTWISE)
2201 *alignment_support_scheme = dr_unaligned_supported;
2202 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2204 else
2206 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2207 *alignment_support_scheme
2208 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2209 *misalignment);
2212 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2214 /* STMT is the leader of the group. Check the operands of all the
2215 stmts of the group. */
2216 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2217 while (next_stmt_info)
2219 tree op = vect_get_store_rhs (next_stmt_info);
2220 enum vect_def_type dt;
2221 if (!vect_is_simple_use (op, vinfo, &dt))
2223 if (dump_enabled_p ())
2224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2225 "use not simple.\n");
2226 return false;
2228 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2232 if (overrun_p)
2234 gcc_assert (can_overrun_p);
2235 if (dump_enabled_p ())
2236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2237 "Data access with gaps requires scalar "
2238 "epilogue loop\n");
2239 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2242 return true;
2245 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2246 if there is a memory access type that the vectorized form can use,
2247 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2248 or scatters, fill in GS_INFO accordingly. In addition
2249 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2250 the target does not support the alignment scheme. *MISALIGNMENT
2251 is set according to the alignment of the access (including
2252 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2254 SLP says whether we're performing SLP rather than loop vectorization.
2255 MASKED_P is true if the statement is conditional on a vectorized mask.
2256 VECTYPE is the vector type that the vectorized statements will use.
2257 NCOPIES is the number of vector statements that will be needed. */
2259 static bool
2260 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2261 tree vectype, slp_tree slp_node,
2262 bool masked_p, vec_load_store_type vls_type,
2263 unsigned int ncopies,
2264 vect_memory_access_type *memory_access_type,
2265 poly_int64 *poffset,
2266 dr_alignment_support *alignment_support_scheme,
2267 int *misalignment,
2268 gather_scatter_info *gs_info,
2269 internal_fn *lanes_ifn)
2271 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2272 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2273 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2274 *poffset = 0;
2275 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2277 *memory_access_type = VMAT_GATHER_SCATTER;
2278 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2279 gcc_unreachable ();
2280 /* When using internal functions, we rely on pattern recognition
2281 to convert the type of the offset to the type that the target
2282 requires, with the result being a call to an internal function.
2283 If that failed for some reason (e.g. because another pattern
2284 took priority), just handle cases in which the offset already
2285 has the right type. */
2286 else if (gs_info->ifn != IFN_LAST
2287 && !is_gimple_call (stmt_info->stmt)
2288 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2289 TREE_TYPE (gs_info->offset_vectype)))
2291 if (dump_enabled_p ())
2292 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2293 "%s offset requires a conversion\n",
2294 vls_type == VLS_LOAD ? "gather" : "scatter");
2295 return false;
2297 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2298 &gs_info->offset_dt,
2299 &gs_info->offset_vectype))
2301 if (dump_enabled_p ())
2302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2303 "%s index use not simple.\n",
2304 vls_type == VLS_LOAD ? "gather" : "scatter");
2305 return false;
2307 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2309 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2310 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2311 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2312 (gs_info->offset_vectype),
2313 TYPE_VECTOR_SUBPARTS (vectype)))
2315 if (dump_enabled_p ())
2316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2317 "unsupported vector types for emulated "
2318 "gather.\n");
2319 return false;
2322 /* Gather-scatter accesses perform only component accesses, alignment
2323 is irrelevant for them. */
2324 *alignment_support_scheme = dr_unaligned_supported;
2326 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
2328 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2329 masked_p,
2330 vls_type, memory_access_type, poffset,
2331 alignment_support_scheme,
2332 misalignment, gs_info, lanes_ifn))
2333 return false;
2335 else if (STMT_VINFO_STRIDED_P (stmt_info))
2337 gcc_assert (!slp_node);
2338 if (loop_vinfo
2339 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2340 masked_p, gs_info))
2341 *memory_access_type = VMAT_GATHER_SCATTER;
2342 else
2343 *memory_access_type = VMAT_ELEMENTWISE;
2344 /* Alignment is irrelevant here. */
2345 *alignment_support_scheme = dr_unaligned_supported;
2347 else
2349 int cmp = compare_step_with_zero (vinfo, stmt_info);
2350 if (cmp == 0)
2352 gcc_assert (vls_type == VLS_LOAD);
2353 *memory_access_type = VMAT_INVARIANT;
2354 /* Invariant accesses perform only component accesses, alignment
2355 is irrelevant for them. */
2356 *alignment_support_scheme = dr_unaligned_supported;
2358 else
2360 if (cmp < 0)
2361 *memory_access_type = get_negative_load_store_type
2362 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2363 else
2364 *memory_access_type = VMAT_CONTIGUOUS;
2365 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2366 vectype, *poffset);
2367 *alignment_support_scheme
2368 = vect_supportable_dr_alignment (vinfo,
2369 STMT_VINFO_DR_INFO (stmt_info),
2370 vectype, *misalignment);
2374 if ((*memory_access_type == VMAT_ELEMENTWISE
2375 || *memory_access_type == VMAT_STRIDED_SLP)
2376 && !nunits.is_constant ())
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2380 "Not using elementwise accesses due to variable "
2381 "vectorization factor.\n");
2382 return false;
2385 if (*alignment_support_scheme == dr_unaligned_unsupported)
2387 if (dump_enabled_p ())
2388 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2389 "unsupported unaligned access\n");
2390 return false;
2393 /* FIXME: At the moment the cost model seems to underestimate the
2394 cost of using elementwise accesses. This check preserves the
2395 traditional behavior until that can be fixed. */
2396 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2397 if (!first_stmt_info)
2398 first_stmt_info = stmt_info;
2399 if (*memory_access_type == VMAT_ELEMENTWISE
2400 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2401 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2402 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2403 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2407 "not falling back to elementwise accesses\n");
2408 return false;
2410 return true;
2413 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2414 conditional operation STMT_INFO. When returning true, store the mask
2415 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2416 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2417 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2419 static bool
2420 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2421 slp_tree slp_node, unsigned mask_index,
2422 tree *mask, slp_tree *mask_node,
2423 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2425 enum vect_def_type mask_dt;
2426 tree mask_vectype;
2427 slp_tree mask_node_1;
2428 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2429 mask, &mask_node_1, &mask_dt, &mask_vectype))
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2433 "mask use not simple.\n");
2434 return false;
2437 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2439 if (dump_enabled_p ())
2440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2441 "mask argument is not a boolean.\n");
2442 return false;
2445 /* If the caller is not prepared for adjusting an external/constant
2446 SLP mask vector type fail. */
2447 if (slp_node
2448 && !mask_node
2449 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2451 if (dump_enabled_p ())
2452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2453 "SLP mask argument is not vectorized.\n");
2454 return false;
2457 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2458 if (!mask_vectype)
2459 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
2460 mask_node_1);
2462 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2464 if (dump_enabled_p ())
2465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2466 "could not find an appropriate vector mask type.\n");
2467 return false;
2470 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2471 TYPE_VECTOR_SUBPARTS (vectype)))
2473 if (dump_enabled_p ())
2474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2475 "vector mask type %T"
2476 " does not match vector data type %T.\n",
2477 mask_vectype, vectype);
2479 return false;
2482 *mask_dt_out = mask_dt;
2483 *mask_vectype_out = mask_vectype;
2484 if (mask_node)
2485 *mask_node = mask_node_1;
2486 return true;
2489 /* Return true if stored value is suitable for vectorizing store
2490 statement STMT_INFO. When returning true, store the scalar stored
2491 in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
2492 the type of the vectorized store value in
2493 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2495 static bool
2496 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2497 slp_tree slp_node, tree *rhs, slp_tree *rhs_node,
2498 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2499 vec_load_store_type *vls_type_out)
2501 int op_no = 0;
2502 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2504 if (gimple_call_internal_p (call)
2505 && internal_store_fn_p (gimple_call_internal_fn (call)))
2506 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2508 if (slp_node)
2509 op_no = vect_slp_child_index_for_operand
2510 (stmt_info->stmt, op_no, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
2512 enum vect_def_type rhs_dt;
2513 tree rhs_vectype;
2514 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2515 rhs, rhs_node, &rhs_dt, &rhs_vectype))
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2519 "use not simple.\n");
2520 return false;
2523 /* In the case this is a store from a constant make sure
2524 native_encode_expr can handle it. */
2525 if (CONSTANT_CLASS_P (*rhs) && native_encode_expr (*rhs, NULL, 64) == 0)
2527 if (dump_enabled_p ())
2528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2529 "cannot encode constant as a byte sequence.\n");
2530 return false;
2533 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2534 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2536 if (dump_enabled_p ())
2537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2538 "incompatible vector types.\n");
2539 return false;
2542 *rhs_dt_out = rhs_dt;
2543 *rhs_vectype_out = rhs_vectype;
2544 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2545 *vls_type_out = VLS_STORE_INVARIANT;
2546 else
2547 *vls_type_out = VLS_STORE;
2548 return true;
2551 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2552 Note that we support masks with floating-point type, in which case the
2553 floats are interpreted as a bitmask. */
2555 static tree
2556 vect_build_all_ones_mask (vec_info *vinfo,
2557 stmt_vec_info stmt_info, tree masktype)
2559 if (TREE_CODE (masktype) == INTEGER_TYPE)
2560 return build_int_cst (masktype, -1);
2561 else if (VECTOR_BOOLEAN_TYPE_P (masktype)
2562 || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2564 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2565 mask = build_vector_from_val (masktype, mask);
2566 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2568 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2570 REAL_VALUE_TYPE r;
2571 long tmp[6];
2572 for (int j = 0; j < 6; ++j)
2573 tmp[j] = -1;
2574 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2575 tree mask = build_real (TREE_TYPE (masktype), r);
2576 mask = build_vector_from_val (masktype, mask);
2577 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2579 gcc_unreachable ();
2582 /* Build an all-zero merge value of type VECTYPE while vectorizing
2583 STMT_INFO as a gather load. */
2585 static tree
2586 vect_build_zero_merge_argument (vec_info *vinfo,
2587 stmt_vec_info stmt_info, tree vectype)
2589 tree merge;
2590 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2591 merge = build_int_cst (TREE_TYPE (vectype), 0);
2592 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2594 REAL_VALUE_TYPE r;
2595 long tmp[6];
2596 for (int j = 0; j < 6; ++j)
2597 tmp[j] = 0;
2598 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2599 merge = build_real (TREE_TYPE (vectype), r);
2601 else
2602 gcc_unreachable ();
2603 merge = build_vector_from_val (vectype, merge);
2604 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2607 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2608 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2609 the gather load operation. If the load is conditional, MASK is the
2610 vectorized condition, otherwise MASK is null. PTR is the base
2611 pointer and OFFSET is the vectorized offset. */
2613 static gimple *
2614 vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
2615 gimple_stmt_iterator *gsi,
2616 gather_scatter_info *gs_info,
2617 tree ptr, tree offset, tree mask)
2619 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2620 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2621 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2622 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2623 /* ptrtype */ arglist = TREE_CHAIN (arglist);
2624 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2625 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2626 tree scaletype = TREE_VALUE (arglist);
2627 tree var;
2628 gcc_checking_assert (types_compatible_p (srctype, rettype)
2629 && (!mask
2630 || TREE_CODE (masktype) == INTEGER_TYPE
2631 || types_compatible_p (srctype, masktype)));
2633 tree op = offset;
2634 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2636 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2637 TYPE_VECTOR_SUBPARTS (idxtype)));
2638 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2639 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2640 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2641 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2642 op = var;
2645 tree src_op = NULL_TREE;
2646 tree mask_op = NULL_TREE;
2647 if (mask)
2649 if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
2651 tree utype, optype = TREE_TYPE (mask);
2652 if (VECTOR_TYPE_P (masktype)
2653 || TYPE_MODE (masktype) == TYPE_MODE (optype))
2654 utype = masktype;
2655 else
2656 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2657 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2658 tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
2659 gassign *new_stmt
2660 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2661 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2662 mask_arg = var;
2663 if (!useless_type_conversion_p (masktype, utype))
2665 gcc_assert (TYPE_PRECISION (utype)
2666 <= TYPE_PRECISION (masktype));
2667 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2668 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2669 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2670 mask_arg = var;
2672 src_op = build_zero_cst (srctype);
2673 mask_op = mask_arg;
2675 else
2677 src_op = mask;
2678 mask_op = mask;
2681 else
2683 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2684 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2687 tree scale = build_int_cst (scaletype, gs_info->scale);
2688 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2689 mask_op, scale);
2691 if (!useless_type_conversion_p (vectype, rettype))
2693 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2694 TYPE_VECTOR_SUBPARTS (rettype)));
2695 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2696 gimple_call_set_lhs (new_stmt, op);
2697 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2698 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2699 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
2702 return new_stmt;
2705 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2706 instructions before GSI. GS_INFO describes the scatter store operation.
2707 PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
2708 vectorized data to store.
2709 If the store is conditional, MASK is the vectorized condition, otherwise
2710 MASK is null. */
2712 static gimple *
2713 vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
2714 gimple_stmt_iterator *gsi,
2715 gather_scatter_info *gs_info,
2716 tree ptr, tree offset, tree oprnd, tree mask)
2718 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2719 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2720 /* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist);
2721 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2722 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2723 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2724 tree scaletype = TREE_VALUE (arglist);
2725 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
2726 && TREE_CODE (rettype) == VOID_TYPE);
2728 tree mask_arg = NULL_TREE;
2729 if (mask)
2731 mask_arg = mask;
2732 tree optype = TREE_TYPE (mask_arg);
2733 tree utype;
2734 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
2735 utype = masktype;
2736 else
2737 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2738 tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
2739 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
2740 gassign *new_stmt
2741 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2742 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2743 mask_arg = var;
2744 if (!useless_type_conversion_p (masktype, utype))
2746 gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
2747 tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2748 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2749 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2750 mask_arg = var;
2753 else
2755 mask_arg = build_int_cst (masktype, -1);
2756 mask_arg = vect_init_vector (vinfo, stmt_info, mask_arg, masktype, NULL);
2759 tree src = oprnd;
2760 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
2762 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
2763 TYPE_VECTOR_SUBPARTS (srctype)));
2764 tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
2765 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
2766 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
2767 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2768 src = var;
2771 tree op = offset;
2772 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2774 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2775 TYPE_VECTOR_SUBPARTS (idxtype)));
2776 tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2777 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2778 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2779 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2780 op = var;
2783 tree scale = build_int_cst (scaletype, gs_info->scale);
2784 gcall *new_stmt
2785 = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
2786 return new_stmt;
2789 /* Prepare the base and offset in GS_INFO for vectorization.
2790 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2791 to the vectorized offset argument for the first copy of STMT_INFO.
2792 STMT_INFO is the statement described by GS_INFO and LOOP is the
2793 containing loop. */
2795 static void
2796 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2797 class loop *loop, stmt_vec_info stmt_info,
2798 slp_tree slp_node, gather_scatter_info *gs_info,
2799 tree *dataref_ptr, vec<tree> *vec_offset)
2801 gimple_seq stmts = NULL;
2802 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2803 if (stmts != NULL)
2805 basic_block new_bb;
2806 edge pe = loop_preheader_edge (loop);
2807 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2808 gcc_assert (!new_bb);
2810 if (slp_node)
2811 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
2812 else
2814 unsigned ncopies
2815 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2816 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2817 gs_info->offset, vec_offset,
2818 gs_info->offset_vectype);
2822 /* Prepare to implement a grouped or strided load or store using
2823 the gather load or scatter store operation described by GS_INFO.
2824 STMT_INFO is the load or store statement.
2826 Set *DATAREF_BUMP to the amount that should be added to the base
2827 address after each copy of the vectorized statement. Set *VEC_OFFSET
2828 to an invariant offset vector in which element I has the value
2829 I * DR_STEP / SCALE. */
2831 static void
2832 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2833 loop_vec_info loop_vinfo,
2834 gimple_stmt_iterator *gsi,
2835 gather_scatter_info *gs_info,
2836 tree *dataref_bump, tree *vec_offset,
2837 vec_loop_lens *loop_lens)
2839 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2840 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2842 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2844 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
2845 ivtmp_8 = _31 * 16 (step in bytes);
2846 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2847 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
2848 tree loop_len
2849 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
2850 tree tmp
2851 = fold_build2 (MULT_EXPR, sizetype,
2852 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2853 loop_len);
2854 *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
2855 GSI_SAME_STMT);
2857 else
2859 tree bump
2860 = size_binop (MULT_EXPR,
2861 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2862 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2863 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2866 /* The offset given in GS_INFO can have pointer type, so use the element
2867 type of the vector instead. */
2868 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2870 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2871 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2872 ssize_int (gs_info->scale));
2873 step = fold_convert (offset_type, step);
2875 /* Create {0, X, X*2, X*3, ...}. */
2876 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2877 build_zero_cst (offset_type), step);
2878 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2881 /* Prepare the pointer IVs which needs to be updated by a variable amount.
2882 Such variable amount is the outcome of .SELECT_VL. In this case, we can
2883 allow each iteration process the flexible number of elements as long as
2884 the number <= vf elments.
2886 Return data reference according to SELECT_VL.
2887 If new statements are needed, insert them before GSI. */
2889 static tree
2890 vect_get_loop_variant_data_ptr_increment (
2891 vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
2892 vec_loop_lens *loop_lens, dr_vec_info *dr_info,
2893 vect_memory_access_type memory_access_type)
2895 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2896 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2898 /* gather/scatter never reach here. */
2899 gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
2901 /* When we support SELECT_VL pattern, we dynamic adjust
2902 the memory address by .SELECT_VL result.
2904 The result of .SELECT_VL is the number of elements to
2905 be processed of each iteration. So the memory address
2906 adjustment operation should be:
2908 addr = addr + .SELECT_VL (ARG..) * step;
2910 tree loop_len
2911 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
2912 tree len_type = TREE_TYPE (loop_len);
2913 /* Since the outcome of .SELECT_VL is element size, we should adjust
2914 it into bytesize so that it can be used in address pointer variable
2915 amount IVs adjustment. */
2916 tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
2917 wide_int_to_tree (len_type, wi::to_widest (step)));
2918 tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
2919 gassign *assign = gimple_build_assign (bump, tmp);
2920 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
2921 return bump;
2924 /* Return the amount that should be added to a vector pointer to move
2925 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2926 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2927 vectorization. */
2929 static tree
2930 vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
2931 dr_vec_info *dr_info, tree aggr_type,
2932 vect_memory_access_type memory_access_type,
2933 vec_loop_lens *loop_lens = nullptr)
2935 if (memory_access_type == VMAT_INVARIANT)
2936 return size_zero_node;
2938 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2939 if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2940 return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
2941 loop_lens, dr_info,
2942 memory_access_type);
2944 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2945 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2946 if (tree_int_cst_sgn (step) == -1)
2947 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2948 return iv_step;
2951 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2953 static bool
2954 vectorizable_bswap (vec_info *vinfo,
2955 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2956 gimple **vec_stmt, slp_tree slp_node,
2957 slp_tree *slp_op,
2958 tree vectype_in, stmt_vector_for_cost *cost_vec)
2960 tree op, vectype;
2961 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2962 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2963 unsigned ncopies;
2965 op = gimple_call_arg (stmt, 0);
2966 vectype = STMT_VINFO_VECTYPE (stmt_info);
2967 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2969 /* Multiple types in SLP are handled by creating the appropriate number of
2970 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2971 case of SLP. */
2972 if (slp_node)
2973 ncopies = 1;
2974 else
2975 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2977 gcc_assert (ncopies >= 1);
2979 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2980 if (! char_vectype)
2981 return false;
2983 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2984 unsigned word_bytes;
2985 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2986 return false;
2988 /* The encoding uses one stepped pattern for each byte in the word. */
2989 vec_perm_builder elts (num_bytes, word_bytes, 3);
2990 for (unsigned i = 0; i < 3; ++i)
2991 for (unsigned j = 0; j < word_bytes; ++j)
2992 elts.quick_push ((i + 1) * word_bytes - j - 1);
2994 vec_perm_indices indices (elts, 1, num_bytes);
2995 machine_mode vmode = TYPE_MODE (char_vectype);
2996 if (!can_vec_perm_const_p (vmode, vmode, indices))
2997 return false;
2999 if (! vec_stmt)
3001 if (slp_node
3002 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3004 if (dump_enabled_p ())
3005 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3006 "incompatible vector types for invariants\n");
3007 return false;
3010 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3011 DUMP_VECT_SCOPE ("vectorizable_bswap");
3012 record_stmt_cost (cost_vec,
3013 1, vector_stmt, stmt_info, 0, vect_prologue);
3014 record_stmt_cost (cost_vec,
3015 slp_node
3016 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3017 vec_perm, stmt_info, 0, vect_body);
3018 return true;
3021 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3023 /* Transform. */
3024 vec<tree> vec_oprnds = vNULL;
3025 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3026 op, &vec_oprnds);
3027 /* Arguments are ready. create the new vector stmt. */
3028 unsigned i;
3029 tree vop;
3030 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3032 gimple *new_stmt;
3033 tree tem = make_ssa_name (char_vectype);
3034 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3035 char_vectype, vop));
3036 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3037 tree tem2 = make_ssa_name (char_vectype);
3038 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3039 tem, tem, bswap_vconst);
3040 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3041 tem = make_ssa_name (vectype);
3042 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3043 vectype, tem2));
3044 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3045 if (slp_node)
3046 slp_node->push_vec_def (new_stmt);
3047 else
3048 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3051 if (!slp_node)
3052 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3054 vec_oprnds.release ();
3055 return true;
3058 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3059 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3060 in a single step. On success, store the binary pack code in
3061 *CONVERT_CODE. */
3063 static bool
3064 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3065 code_helper *convert_code)
3067 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3068 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3069 return false;
3071 code_helper code;
3072 int multi_step_cvt = 0;
3073 auto_vec <tree, 8> interm_types;
3074 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3075 &code, &multi_step_cvt, &interm_types)
3076 || multi_step_cvt)
3077 return false;
3079 *convert_code = code;
3080 return true;
3083 /* Function vectorizable_call.
3085 Check if STMT_INFO performs a function call that can be vectorized.
3086 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3087 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3088 Return true if STMT_INFO is vectorizable in this way. */
3090 static bool
3091 vectorizable_call (vec_info *vinfo,
3092 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3093 gimple **vec_stmt, slp_tree slp_node,
3094 stmt_vector_for_cost *cost_vec)
3096 gcall *stmt;
3097 tree vec_dest;
3098 tree scalar_dest;
3099 tree op;
3100 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3101 tree vectype_out, vectype_in;
3102 poly_uint64 nunits_in;
3103 poly_uint64 nunits_out;
3104 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3105 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3106 tree fndecl, new_temp, rhs_type;
3107 enum vect_def_type dt[4]
3108 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3109 vect_unknown_def_type };
3110 tree vectypes[ARRAY_SIZE (dt)] = {};
3111 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3112 int ndts = ARRAY_SIZE (dt);
3113 int ncopies, j;
3114 auto_vec<tree, 8> vargs;
3115 enum { NARROW, NONE, WIDEN } modifier;
3116 size_t i, nargs;
3117 tree lhs;
3118 tree clz_ctz_arg1 = NULL_TREE;
3120 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3121 return false;
3123 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3124 && ! vec_stmt)
3125 return false;
3127 /* Is STMT_INFO a vectorizable call? */
3128 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3129 if (!stmt)
3130 return false;
3132 if (gimple_call_internal_p (stmt)
3133 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3134 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3135 /* Handled by vectorizable_load and vectorizable_store. */
3136 return false;
3138 if (gimple_call_lhs (stmt) == NULL_TREE
3139 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3140 return false;
3142 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3144 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3146 /* Process function arguments. */
3147 rhs_type = NULL_TREE;
3148 vectype_in = NULL_TREE;
3149 nargs = gimple_call_num_args (stmt);
3151 /* Bail out if the function has more than four arguments, we do not have
3152 interesting builtin functions to vectorize with more than two arguments
3153 except for fma. No arguments is also not good. */
3154 if (nargs == 0 || nargs > 4)
3155 return false;
3157 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3158 combined_fn cfn = gimple_call_combined_fn (stmt);
3159 if (cfn == CFN_GOMP_SIMD_LANE)
3161 nargs = 0;
3162 rhs_type = unsigned_type_node;
3164 /* Similarly pretend IFN_CLZ and IFN_CTZ only has one argument, the second
3165 argument just says whether it is well-defined at zero or not and what
3166 value should be returned for it. */
3167 if ((cfn == CFN_CLZ || cfn == CFN_CTZ) && nargs == 2)
3169 nargs = 1;
3170 clz_ctz_arg1 = gimple_call_arg (stmt, 1);
3173 int mask_opno = -1;
3174 if (internal_fn_p (cfn))
3175 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3177 for (i = 0; i < nargs; i++)
3179 if ((int) i == mask_opno)
3181 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3182 &op, &slp_op[i], &dt[i], &vectypes[i]))
3183 return false;
3184 continue;
3187 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3188 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3190 if (dump_enabled_p ())
3191 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3192 "use not simple.\n");
3193 return false;
3196 /* We can only handle calls with arguments of the same type. */
3197 if (rhs_type
3198 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3200 if (dump_enabled_p ())
3201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3202 "argument types differ.\n");
3203 return false;
3205 if (!rhs_type)
3206 rhs_type = TREE_TYPE (op);
3208 if (!vectype_in)
3209 vectype_in = vectypes[i];
3210 else if (vectypes[i]
3211 && !types_compatible_p (vectypes[i], vectype_in))
3213 if (dump_enabled_p ())
3214 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3215 "argument vector types differ.\n");
3216 return false;
3219 /* If all arguments are external or constant defs, infer the vector type
3220 from the scalar type. */
3221 if (!vectype_in)
3222 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3223 if (vec_stmt)
3224 gcc_assert (vectype_in);
3225 if (!vectype_in)
3227 if (dump_enabled_p ())
3228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3229 "no vectype for scalar type %T\n", rhs_type);
3231 return false;
3234 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3235 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3237 if (dump_enabled_p ())
3238 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3239 "mixed mask and nonmask vector types\n");
3240 return false;
3243 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3245 if (dump_enabled_p ())
3246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3247 "use emulated vector type for call\n");
3248 return false;
3251 /* FORNOW */
3252 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3253 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3254 if (known_eq (nunits_in * 2, nunits_out))
3255 modifier = NARROW;
3256 else if (known_eq (nunits_out, nunits_in))
3257 modifier = NONE;
3258 else if (known_eq (nunits_out * 2, nunits_in))
3259 modifier = WIDEN;
3260 else
3261 return false;
3263 /* We only handle functions that do not read or clobber memory. */
3264 if (gimple_vuse (stmt))
3266 if (dump_enabled_p ())
3267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3268 "function reads from or writes to memory.\n");
3269 return false;
3272 /* For now, we only vectorize functions if a target specific builtin
3273 is available. TODO -- in some cases, it might be profitable to
3274 insert the calls for pieces of the vector, in order to be able
3275 to vectorize other operations in the loop. */
3276 fndecl = NULL_TREE;
3277 internal_fn ifn = IFN_LAST;
3278 tree callee = gimple_call_fndecl (stmt);
3280 /* First try using an internal function. */
3281 code_helper convert_code = MAX_TREE_CODES;
3282 if (cfn != CFN_LAST
3283 && (modifier == NONE
3284 || (modifier == NARROW
3285 && simple_integer_narrowing (vectype_out, vectype_in,
3286 &convert_code))))
3287 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3288 vectype_in);
3290 /* If that fails, try asking for a target-specific built-in function. */
3291 if (ifn == IFN_LAST)
3293 if (cfn != CFN_LAST)
3294 fndecl = targetm.vectorize.builtin_vectorized_function
3295 (cfn, vectype_out, vectype_in);
3296 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3297 fndecl = targetm.vectorize.builtin_md_vectorized_function
3298 (callee, vectype_out, vectype_in);
3301 if (ifn == IFN_LAST && !fndecl)
3303 if (cfn == CFN_GOMP_SIMD_LANE
3304 && !slp_node
3305 && loop_vinfo
3306 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3307 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3308 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3309 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3311 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3312 { 0, 1, 2, ... vf - 1 } vector. */
3313 gcc_assert (nargs == 0);
3315 else if (modifier == NONE
3316 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3317 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3318 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3319 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3320 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3321 slp_op, vectype_in, cost_vec);
3322 else
3324 if (dump_enabled_p ())
3325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3326 "function is not vectorizable.\n");
3327 return false;
3331 if (slp_node)
3332 ncopies = 1;
3333 else if (modifier == NARROW && ifn == IFN_LAST)
3334 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3335 else
3336 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3338 /* Sanity check: make sure that at least one copy of the vectorized stmt
3339 needs to be generated. */
3340 gcc_assert (ncopies >= 1);
3342 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3343 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3344 internal_fn cond_len_fn = get_len_internal_fn (ifn);
3345 int len_opno = internal_fn_len_index (cond_len_fn);
3346 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3347 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3348 if (!vec_stmt) /* transformation not required. */
3350 if (slp_node)
3351 for (i = 0; i < nargs; ++i)
3352 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3353 vectypes[i]
3354 ? vectypes[i] : vectype_in))
3356 if (dump_enabled_p ())
3357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3358 "incompatible vector types for invariants\n");
3359 return false;
3361 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3362 DUMP_VECT_SCOPE ("vectorizable_call");
3363 vect_model_simple_cost (vinfo, stmt_info,
3364 ncopies, dt, ndts, slp_node, cost_vec);
3365 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3366 record_stmt_cost (cost_vec, ncopies / 2,
3367 vec_promote_demote, stmt_info, 0, vect_body);
3369 if (loop_vinfo
3370 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3371 && (reduc_idx >= 0 || mask_opno >= 0))
3373 if (reduc_idx >= 0
3374 && (cond_fn == IFN_LAST
3375 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3376 OPTIMIZE_FOR_SPEED))
3377 && (cond_len_fn == IFN_LAST
3378 || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3379 OPTIMIZE_FOR_SPEED)))
3381 if (dump_enabled_p ())
3382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3383 "can't use a fully-masked loop because no"
3384 " conditional operation is available.\n");
3385 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3387 else
3389 unsigned int nvectors
3390 = (slp_node
3391 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3392 : ncopies);
3393 tree scalar_mask = NULL_TREE;
3394 if (mask_opno >= 0)
3395 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3396 if (cond_len_fn != IFN_LAST
3397 && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3398 OPTIMIZE_FOR_SPEED))
3399 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3401 else
3402 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3403 scalar_mask);
3406 return true;
3409 /* Transform. */
3411 if (dump_enabled_p ())
3412 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3414 /* Handle def. */
3415 scalar_dest = gimple_call_lhs (stmt);
3416 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3418 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3419 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3420 unsigned int vect_nargs = nargs;
3421 if (len_loop_p)
3423 if (len_opno >= 0)
3425 ifn = cond_len_fn;
3426 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3427 vect_nargs += 2;
3429 else if (reduc_idx >= 0)
3430 gcc_unreachable ();
3432 else if (masked_loop_p && reduc_idx >= 0)
3434 ifn = cond_fn;
3435 vect_nargs += 2;
3437 if (clz_ctz_arg1)
3438 ++vect_nargs;
3440 if (modifier == NONE || ifn != IFN_LAST)
3442 tree prev_res = NULL_TREE;
3443 vargs.safe_grow (vect_nargs, true);
3444 auto_vec<vec<tree> > vec_defs (nargs);
3445 for (j = 0; j < ncopies; ++j)
3447 /* Build argument list for the vectorized call. */
3448 if (slp_node)
3450 vec<tree> vec_oprnds0;
3452 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3453 vec_oprnds0 = vec_defs[0];
3455 /* Arguments are ready. Create the new vector stmt. */
3456 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3458 int varg = 0;
3459 if (masked_loop_p && reduc_idx >= 0)
3461 unsigned int vec_num = vec_oprnds0.length ();
3462 /* Always true for SLP. */
3463 gcc_assert (ncopies == 1);
3464 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
3465 gsi, masks, vec_num,
3466 vectype_out, i);
3468 size_t k;
3469 for (k = 0; k < nargs; k++)
3471 vec<tree> vec_oprndsk = vec_defs[k];
3472 vargs[varg++] = vec_oprndsk[i];
3474 if (masked_loop_p && reduc_idx >= 0)
3475 vargs[varg++] = vargs[reduc_idx + 1];
3476 if (clz_ctz_arg1)
3477 vargs[varg++] = clz_ctz_arg1;
3479 gimple *new_stmt;
3480 if (modifier == NARROW)
3482 /* We don't define any narrowing conditional functions
3483 at present. */
3484 gcc_assert (mask_opno < 0);
3485 tree half_res = make_ssa_name (vectype_in);
3486 gcall *call
3487 = gimple_build_call_internal_vec (ifn, vargs);
3488 gimple_call_set_lhs (call, half_res);
3489 gimple_call_set_nothrow (call, true);
3490 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3491 if ((i & 1) == 0)
3493 prev_res = half_res;
3494 continue;
3496 new_temp = make_ssa_name (vec_dest);
3497 new_stmt = vect_gimple_build (new_temp, convert_code,
3498 prev_res, half_res);
3499 vect_finish_stmt_generation (vinfo, stmt_info,
3500 new_stmt, gsi);
3502 else
3504 if (len_opno >= 0 && len_loop_p)
3506 unsigned int vec_num = vec_oprnds0.length ();
3507 /* Always true for SLP. */
3508 gcc_assert (ncopies == 1);
3509 tree len
3510 = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3511 vectype_out, i, 1);
3512 signed char biasval
3513 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3514 tree bias = build_int_cst (intQI_type_node, biasval);
3515 vargs[len_opno] = len;
3516 vargs[len_opno + 1] = bias;
3518 else if (mask_opno >= 0 && masked_loop_p)
3520 unsigned int vec_num = vec_oprnds0.length ();
3521 /* Always true for SLP. */
3522 gcc_assert (ncopies == 1);
3523 tree mask = vect_get_loop_mask (loop_vinfo,
3524 gsi, masks, vec_num,
3525 vectype_out, i);
3526 vargs[mask_opno] = prepare_vec_mask
3527 (loop_vinfo, TREE_TYPE (mask), mask,
3528 vargs[mask_opno], gsi);
3531 gcall *call;
3532 if (ifn != IFN_LAST)
3533 call = gimple_build_call_internal_vec (ifn, vargs);
3534 else
3535 call = gimple_build_call_vec (fndecl, vargs);
3536 new_temp = make_ssa_name (vec_dest, call);
3537 gimple_call_set_lhs (call, new_temp);
3538 gimple_call_set_nothrow (call, true);
3539 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3540 new_stmt = call;
3542 slp_node->push_vec_def (new_stmt);
3544 continue;
3547 int varg = 0;
3548 if (masked_loop_p && reduc_idx >= 0)
3549 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3550 vectype_out, j);
3551 for (i = 0; i < nargs; i++)
3553 op = gimple_call_arg (stmt, i);
3554 if (j == 0)
3556 vec_defs.quick_push (vNULL);
3557 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3558 op, &vec_defs[i],
3559 vectypes[i]);
3561 vargs[varg++] = vec_defs[i][j];
3563 if (masked_loop_p && reduc_idx >= 0)
3564 vargs[varg++] = vargs[reduc_idx + 1];
3565 if (clz_ctz_arg1)
3566 vargs[varg++] = clz_ctz_arg1;
3568 if (len_opno >= 0 && len_loop_p)
3570 tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3571 vectype_out, j, 1);
3572 signed char biasval
3573 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3574 tree bias = build_int_cst (intQI_type_node, biasval);
3575 vargs[len_opno] = len;
3576 vargs[len_opno + 1] = bias;
3578 else if (mask_opno >= 0 && masked_loop_p)
3580 tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3581 vectype_out, j);
3582 vargs[mask_opno]
3583 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3584 vargs[mask_opno], gsi);
3587 gimple *new_stmt;
3588 if (cfn == CFN_GOMP_SIMD_LANE)
3590 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3591 tree new_var
3592 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3593 gimple *init_stmt = gimple_build_assign (new_var, cst);
3594 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3595 new_temp = make_ssa_name (vec_dest);
3596 new_stmt = gimple_build_assign (new_temp, new_var);
3597 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3599 else if (modifier == NARROW)
3601 /* We don't define any narrowing conditional functions at
3602 present. */
3603 gcc_assert (mask_opno < 0);
3604 tree half_res = make_ssa_name (vectype_in);
3605 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3606 gimple_call_set_lhs (call, half_res);
3607 gimple_call_set_nothrow (call, true);
3608 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3609 if ((j & 1) == 0)
3611 prev_res = half_res;
3612 continue;
3614 new_temp = make_ssa_name (vec_dest);
3615 new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
3616 half_res);
3617 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3619 else
3621 gcall *call;
3622 if (ifn != IFN_LAST)
3623 call = gimple_build_call_internal_vec (ifn, vargs);
3624 else
3625 call = gimple_build_call_vec (fndecl, vargs);
3626 new_temp = make_ssa_name (vec_dest, call);
3627 gimple_call_set_lhs (call, new_temp);
3628 gimple_call_set_nothrow (call, true);
3629 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3630 new_stmt = call;
3633 if (j == (modifier == NARROW ? 1 : 0))
3634 *vec_stmt = new_stmt;
3635 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3637 for (i = 0; i < nargs; i++)
3639 vec<tree> vec_oprndsi = vec_defs[i];
3640 vec_oprndsi.release ();
3643 else if (modifier == NARROW)
3645 auto_vec<vec<tree> > vec_defs (nargs);
3646 /* We don't define any narrowing conditional functions at present. */
3647 gcc_assert (mask_opno < 0);
3648 for (j = 0; j < ncopies; ++j)
3650 /* Build argument list for the vectorized call. */
3651 if (j == 0)
3652 vargs.create (nargs * 2);
3653 else
3654 vargs.truncate (0);
3656 if (slp_node)
3658 vec<tree> vec_oprnds0;
3660 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3661 vec_oprnds0 = vec_defs[0];
3663 /* Arguments are ready. Create the new vector stmt. */
3664 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3666 size_t k;
3667 vargs.truncate (0);
3668 for (k = 0; k < nargs; k++)
3670 vec<tree> vec_oprndsk = vec_defs[k];
3671 vargs.quick_push (vec_oprndsk[i]);
3672 vargs.quick_push (vec_oprndsk[i + 1]);
3674 gcall *call;
3675 if (ifn != IFN_LAST)
3676 call = gimple_build_call_internal_vec (ifn, vargs);
3677 else
3678 call = gimple_build_call_vec (fndecl, vargs);
3679 new_temp = make_ssa_name (vec_dest, call);
3680 gimple_call_set_lhs (call, new_temp);
3681 gimple_call_set_nothrow (call, true);
3682 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3683 slp_node->push_vec_def (call);
3685 continue;
3688 for (i = 0; i < nargs; i++)
3690 op = gimple_call_arg (stmt, i);
3691 if (j == 0)
3693 vec_defs.quick_push (vNULL);
3694 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3695 op, &vec_defs[i], vectypes[i]);
3697 vec_oprnd0 = vec_defs[i][2*j];
3698 vec_oprnd1 = vec_defs[i][2*j+1];
3700 vargs.quick_push (vec_oprnd0);
3701 vargs.quick_push (vec_oprnd1);
3704 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3705 new_temp = make_ssa_name (vec_dest, new_stmt);
3706 gimple_call_set_lhs (new_stmt, new_temp);
3707 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3709 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3712 if (!slp_node)
3713 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3715 for (i = 0; i < nargs; i++)
3717 vec<tree> vec_oprndsi = vec_defs[i];
3718 vec_oprndsi.release ();
3721 else
3722 /* No current target implements this case. */
3723 return false;
3725 vargs.release ();
3727 /* The call in STMT might prevent it from being removed in dce.
3728 We however cannot remove it here, due to the way the ssa name
3729 it defines is mapped to the new definition. So just replace
3730 rhs of the statement with something harmless. */
3732 if (slp_node)
3733 return true;
3735 stmt_info = vect_orig_stmt (stmt_info);
3736 lhs = gimple_get_lhs (stmt_info->stmt);
3738 gassign *new_stmt
3739 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3740 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3742 return true;
3746 struct simd_call_arg_info
3748 tree vectype;
3749 tree op;
3750 HOST_WIDE_INT linear_step;
3751 enum vect_def_type dt;
3752 unsigned int align;
3753 bool simd_lane_linear;
3756 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3757 is linear within simd lane (but not within whole loop), note it in
3758 *ARGINFO. */
3760 static void
3761 vect_simd_lane_linear (tree op, class loop *loop,
3762 struct simd_call_arg_info *arginfo)
3764 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3766 if (!is_gimple_assign (def_stmt)
3767 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3768 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3769 return;
3771 tree base = gimple_assign_rhs1 (def_stmt);
3772 HOST_WIDE_INT linear_step = 0;
3773 tree v = gimple_assign_rhs2 (def_stmt);
3774 while (TREE_CODE (v) == SSA_NAME)
3776 tree t;
3777 def_stmt = SSA_NAME_DEF_STMT (v);
3778 if (is_gimple_assign (def_stmt))
3779 switch (gimple_assign_rhs_code (def_stmt))
3781 case PLUS_EXPR:
3782 t = gimple_assign_rhs2 (def_stmt);
3783 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3784 return;
3785 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3786 v = gimple_assign_rhs1 (def_stmt);
3787 continue;
3788 case MULT_EXPR:
3789 t = gimple_assign_rhs2 (def_stmt);
3790 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3791 return;
3792 linear_step = tree_to_shwi (t);
3793 v = gimple_assign_rhs1 (def_stmt);
3794 continue;
3795 CASE_CONVERT:
3796 t = gimple_assign_rhs1 (def_stmt);
3797 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3798 || (TYPE_PRECISION (TREE_TYPE (v))
3799 < TYPE_PRECISION (TREE_TYPE (t))))
3800 return;
3801 if (!linear_step)
3802 linear_step = 1;
3803 v = t;
3804 continue;
3805 default:
3806 return;
3808 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3809 && loop->simduid
3810 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3811 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3812 == loop->simduid))
3814 if (!linear_step)
3815 linear_step = 1;
3816 arginfo->linear_step = linear_step;
3817 arginfo->op = base;
3818 arginfo->simd_lane_linear = true;
3819 return;
3824 /* Function vectorizable_simd_clone_call.
3826 Check if STMT_INFO performs a function call that can be vectorized
3827 by calling a simd clone of the function.
3828 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3829 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3830 Return true if STMT_INFO is vectorizable in this way. */
3832 static bool
3833 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3834 gimple_stmt_iterator *gsi,
3835 gimple **vec_stmt, slp_tree slp_node,
3836 stmt_vector_for_cost *)
3838 tree vec_dest;
3839 tree scalar_dest;
3840 tree op, type;
3841 tree vec_oprnd0 = NULL_TREE;
3842 tree vectype;
3843 poly_uint64 nunits;
3844 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3845 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3846 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3847 tree fndecl, new_temp;
3848 int ncopies, j;
3849 auto_vec<simd_call_arg_info> arginfo;
3850 vec<tree> vargs = vNULL;
3851 size_t i, nargs;
3852 tree lhs, rtype, ratype;
3853 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3854 int masked_call_offset = 0;
3856 /* Is STMT a vectorizable call? */
3857 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3858 if (!stmt)
3859 return false;
3861 fndecl = gimple_call_fndecl (stmt);
3862 if (fndecl == NULL_TREE
3863 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
3865 fndecl = gimple_call_arg (stmt, 0);
3866 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
3867 fndecl = TREE_OPERAND (fndecl, 0);
3868 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
3869 masked_call_offset = 1;
3871 if (fndecl == NULL_TREE)
3872 return false;
3874 struct cgraph_node *node = cgraph_node::get (fndecl);
3875 if (node == NULL || node->simd_clones == NULL)
3876 return false;
3878 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3879 return false;
3881 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3882 && ! vec_stmt)
3883 return false;
3885 if (gimple_call_lhs (stmt)
3886 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3887 return false;
3889 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3891 vectype = STMT_VINFO_VECTYPE (stmt_info);
3893 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3894 return false;
3896 /* Process function arguments. */
3897 nargs = gimple_call_num_args (stmt) - masked_call_offset;
3899 /* Bail out if the function has zero arguments. */
3900 if (nargs == 0)
3901 return false;
3903 vec<tree>& simd_clone_info = (slp_node ? SLP_TREE_SIMD_CLONE_INFO (slp_node)
3904 : STMT_VINFO_SIMD_CLONE_INFO (stmt_info));
3905 arginfo.reserve (nargs, true);
3906 auto_vec<slp_tree> slp_op;
3907 slp_op.safe_grow_cleared (nargs);
3909 for (i = 0; i < nargs; i++)
3911 simd_call_arg_info thisarginfo;
3912 affine_iv iv;
3914 thisarginfo.linear_step = 0;
3915 thisarginfo.align = 0;
3916 thisarginfo.op = NULL_TREE;
3917 thisarginfo.simd_lane_linear = false;
3919 int op_no = i + masked_call_offset;
3920 if (slp_node)
3921 op_no = vect_slp_child_index_for_operand (stmt, op_no, false);
3922 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3923 op_no, &op, &slp_op[i],
3924 &thisarginfo.dt, &thisarginfo.vectype)
3925 || thisarginfo.dt == vect_uninitialized_def)
3927 if (dump_enabled_p ())
3928 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3929 "use not simple.\n");
3930 return false;
3933 if (thisarginfo.dt == vect_constant_def
3934 || thisarginfo.dt == vect_external_def)
3936 /* With SLP we determine the vector type of constants/externals
3937 at analysis time, handling conflicts via
3938 vect_maybe_update_slp_op_vectype. At transform time
3939 we have a vector type recorded for SLP. */
3940 gcc_assert (!vec_stmt
3941 || !slp_node
3942 || thisarginfo.vectype != NULL_TREE);
3943 if (!vec_stmt)
3944 thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
3945 TREE_TYPE (op),
3946 slp_node);
3948 else
3949 gcc_assert (thisarginfo.vectype != NULL_TREE);
3951 /* For linear arguments, the analyze phase should have saved
3952 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */
3953 if (i * 3 + 4 <= simd_clone_info.length ()
3954 && simd_clone_info[i * 3 + 2])
3956 gcc_assert (vec_stmt);
3957 thisarginfo.linear_step = tree_to_shwi (simd_clone_info[i * 3 + 2]);
3958 thisarginfo.op = simd_clone_info[i * 3 + 1];
3959 thisarginfo.simd_lane_linear
3960 = (simd_clone_info[i * 3 + 3] == boolean_true_node);
3961 /* If loop has been peeled for alignment, we need to adjust it. */
3962 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3963 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3964 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3966 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3967 tree step = simd_clone_info[i * 3 + 2];
3968 tree opt = TREE_TYPE (thisarginfo.op);
3969 bias = fold_convert (TREE_TYPE (step), bias);
3970 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3971 thisarginfo.op
3972 = fold_build2 (POINTER_TYPE_P (opt)
3973 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3974 thisarginfo.op, bias);
3977 else if (!vec_stmt
3978 && thisarginfo.dt != vect_constant_def
3979 && thisarginfo.dt != vect_external_def
3980 && loop_vinfo
3981 && TREE_CODE (op) == SSA_NAME
3982 && simple_iv (loop, loop_containing_stmt (stmt), op,
3983 &iv, false)
3984 && tree_fits_shwi_p (iv.step))
3986 thisarginfo.linear_step = tree_to_shwi (iv.step);
3987 thisarginfo.op = iv.base;
3989 else if ((thisarginfo.dt == vect_constant_def
3990 || thisarginfo.dt == vect_external_def)
3991 && POINTER_TYPE_P (TREE_TYPE (op)))
3992 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3993 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3994 linear too. */
3995 if (POINTER_TYPE_P (TREE_TYPE (op))
3996 && !thisarginfo.linear_step
3997 && !vec_stmt
3998 && thisarginfo.dt != vect_constant_def
3999 && thisarginfo.dt != vect_external_def
4000 && loop_vinfo
4001 && TREE_CODE (op) == SSA_NAME)
4002 vect_simd_lane_linear (op, loop, &thisarginfo);
4004 arginfo.quick_push (thisarginfo);
4007 poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
4008 unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : 1;
4009 unsigned int badness = 0;
4010 struct cgraph_node *bestn = NULL;
4011 if (simd_clone_info.exists ())
4012 bestn = cgraph_node::get (simd_clone_info[0]);
4013 else
4014 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4015 n = n->simdclone->next_clone)
4017 unsigned int this_badness = 0;
4018 unsigned int num_calls;
4019 /* The number of arguments in the call and the number of parameters in
4020 the simdclone should match. However, when the simdclone is
4021 'inbranch', it could have one more paramater than nargs when using
4022 an inbranch simdclone to call a non-inbranch call, either in a
4023 non-masked loop using a all true constant mask, or inside a masked
4024 loop using it's mask. */
4025 size_t simd_nargs = n->simdclone->nargs;
4026 if (!masked_call_offset && n->simdclone->inbranch)
4027 simd_nargs--;
4028 if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen,
4029 &num_calls)
4030 || (!n->simdclone->inbranch && (masked_call_offset > 0))
4031 || (nargs != simd_nargs))
4032 continue;
4033 if (num_calls != 1)
4034 this_badness += exact_log2 (num_calls) * 4096;
4035 if (n->simdclone->inbranch)
4036 this_badness += 8192;
4037 int target_badness = targetm.simd_clone.usable (n);
4038 if (target_badness < 0)
4039 continue;
4040 this_badness += target_badness * 512;
4041 for (i = 0; i < nargs; i++)
4043 switch (n->simdclone->args[i].arg_type)
4045 case SIMD_CLONE_ARG_TYPE_VECTOR:
4046 if (!useless_type_conversion_p
4047 (n->simdclone->args[i].orig_type,
4048 TREE_TYPE (gimple_call_arg (stmt,
4049 i + masked_call_offset))))
4050 i = -1;
4051 else if (arginfo[i].dt == vect_constant_def
4052 || arginfo[i].dt == vect_external_def
4053 || arginfo[i].linear_step)
4054 this_badness += 64;
4055 break;
4056 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4057 if (arginfo[i].dt != vect_constant_def
4058 && arginfo[i].dt != vect_external_def)
4059 i = -1;
4060 break;
4061 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4062 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4063 if (arginfo[i].dt == vect_constant_def
4064 || arginfo[i].dt == vect_external_def
4065 || (arginfo[i].linear_step
4066 != n->simdclone->args[i].linear_step))
4067 i = -1;
4068 break;
4069 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4070 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4071 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4072 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4073 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4074 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4075 /* FORNOW */
4076 i = -1;
4077 break;
4078 case SIMD_CLONE_ARG_TYPE_MASK:
4079 /* While we can create a traditional data vector from
4080 an incoming integer mode mask we have no good way to
4081 force generate an integer mode mask from a traditional
4082 boolean vector input. */
4083 if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4084 && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4085 i = -1;
4086 else if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4087 && SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4088 this_badness += 2048;
4089 break;
4091 if (i == (size_t) -1)
4092 break;
4093 if (n->simdclone->args[i].alignment > arginfo[i].align)
4095 i = -1;
4096 break;
4098 if (arginfo[i].align)
4099 this_badness += (exact_log2 (arginfo[i].align)
4100 - exact_log2 (n->simdclone->args[i].alignment));
4102 if (i == (size_t) -1)
4103 continue;
4104 if (masked_call_offset == 0
4105 && n->simdclone->inbranch
4106 && n->simdclone->nargs > nargs)
4108 gcc_assert (n->simdclone->args[n->simdclone->nargs - 1].arg_type ==
4109 SIMD_CLONE_ARG_TYPE_MASK);
4110 /* Penalize using a masked SIMD clone in a non-masked loop, that is
4111 not in a branch, as we'd have to construct an all-true mask. */
4112 if (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4113 this_badness += 64;
4115 if (bestn == NULL || this_badness < badness)
4117 bestn = n;
4118 badness = this_badness;
4122 if (bestn == NULL)
4123 return false;
4125 unsigned int num_mask_args = 0;
4126 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4127 for (i = 0; i < nargs; i++)
4128 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4129 num_mask_args++;
4131 for (i = 0; i < nargs; i++)
4133 if ((arginfo[i].dt == vect_constant_def
4134 || arginfo[i].dt == vect_external_def)
4135 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4137 tree arg_type = TREE_TYPE (gimple_call_arg (stmt,
4138 i + masked_call_offset));
4139 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4140 slp_node);
4141 if (arginfo[i].vectype == NULL
4142 || !constant_multiple_p (bestn->simdclone->simdlen,
4143 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4144 return false;
4147 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4148 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4150 if (dump_enabled_p ())
4151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4152 "vector mask arguments are not supported.\n");
4153 return false;
4156 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4158 tree clone_arg_vectype = bestn->simdclone->args[i].vector_type;
4159 if (bestn->simdclone->mask_mode == VOIDmode)
4161 if (maybe_ne (TYPE_VECTOR_SUBPARTS (clone_arg_vectype),
4162 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4164 /* FORNOW we only have partial support for vector-type masks
4165 that can't hold all of simdlen. */
4166 if (dump_enabled_p ())
4167 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4168 vect_location,
4169 "in-branch vector clones are not yet"
4170 " supported for mismatched vector sizes.\n");
4171 return false;
4174 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4176 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype))
4177 || maybe_ne (exact_div (bestn->simdclone->simdlen,
4178 num_mask_args),
4179 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4181 /* FORNOW we only have partial support for integer-type masks
4182 that represent the same number of lanes as the
4183 vectorized mask inputs. */
4184 if (dump_enabled_p ())
4185 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4186 vect_location,
4187 "in-branch vector clones are not yet "
4188 "supported for mismatched vector sizes.\n");
4189 return false;
4192 else
4194 if (dump_enabled_p ())
4195 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4196 vect_location,
4197 "in-branch vector clones not supported"
4198 " on this target.\n");
4199 return false;
4204 fndecl = bestn->decl;
4205 nunits = bestn->simdclone->simdlen;
4206 if (slp_node)
4207 ncopies = vector_unroll_factor (vf * group_size, nunits);
4208 else
4209 ncopies = vector_unroll_factor (vf, nunits);
4211 /* If the function isn't const, only allow it in simd loops where user
4212 has asserted that at least nunits consecutive iterations can be
4213 performed using SIMD instructions. */
4214 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4215 && gimple_vuse (stmt))
4216 return false;
4218 /* Sanity check: make sure that at least one copy of the vectorized stmt
4219 needs to be generated. */
4220 gcc_assert (ncopies >= 1);
4222 if (!vec_stmt) /* transformation not required. */
4224 if (slp_node)
4225 for (unsigned i = 0; i < nargs; ++i)
4226 if (!vect_maybe_update_slp_op_vectype (slp_op[i], arginfo[i].vectype))
4228 if (dump_enabled_p ())
4229 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4230 "incompatible vector types for invariants\n");
4231 return false;
4233 /* When the original call is pure or const but the SIMD ABI dictates
4234 an aggregate return we will have to use a virtual definition and
4235 in a loop eventually even need to add a virtual PHI. That's
4236 not straight-forward so allow to fix this up via renaming. */
4237 if (gimple_call_lhs (stmt)
4238 && !gimple_vdef (stmt)
4239 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4240 vinfo->any_known_not_updated_vssa = true;
4241 /* ??? For SLP code-gen we end up inserting after the last
4242 vector argument def rather than at the original call position
4243 so automagic virtual operand updating doesn't work. */
4244 if (gimple_vuse (stmt) && slp_node)
4245 vinfo->any_known_not_updated_vssa = true;
4246 simd_clone_info.safe_push (bestn->decl);
4247 for (i = 0; i < bestn->simdclone->nargs; i++)
4249 switch (bestn->simdclone->args[i].arg_type)
4251 default:
4252 continue;
4253 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4254 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4256 simd_clone_info.safe_grow_cleared (i * 3 + 1, true);
4257 simd_clone_info.safe_push (arginfo[i].op);
4258 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4259 ? size_type_node : TREE_TYPE (arginfo[i].op);
4260 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4261 simd_clone_info.safe_push (ls);
4262 tree sll = arginfo[i].simd_lane_linear
4263 ? boolean_true_node : boolean_false_node;
4264 simd_clone_info.safe_push (sll);
4266 break;
4267 case SIMD_CLONE_ARG_TYPE_MASK:
4268 if (loop_vinfo
4269 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4270 vect_record_loop_mask (loop_vinfo,
4271 &LOOP_VINFO_MASKS (loop_vinfo),
4272 ncopies, vectype, op);
4274 break;
4278 if (!bestn->simdclone->inbranch && loop_vinfo)
4280 if (dump_enabled_p ()
4281 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4282 dump_printf_loc (MSG_NOTE, vect_location,
4283 "can't use a fully-masked loop because a"
4284 " non-masked simd clone was selected.\n");
4285 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
4288 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4289 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4290 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4291 dt, slp_node, cost_vec); */
4292 return true;
4295 /* Transform. */
4297 if (dump_enabled_p ())
4298 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4300 /* Handle def. */
4301 scalar_dest = gimple_call_lhs (stmt);
4302 vec_dest = NULL_TREE;
4303 rtype = NULL_TREE;
4304 ratype = NULL_TREE;
4305 if (scalar_dest)
4307 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4308 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4309 if (TREE_CODE (rtype) == ARRAY_TYPE)
4311 ratype = rtype;
4312 rtype = TREE_TYPE (ratype);
4316 auto_vec<vec<tree> > vec_oprnds;
4317 auto_vec<unsigned> vec_oprnds_i;
4318 vec_oprnds_i.safe_grow_cleared (nargs, true);
4319 if (slp_node)
4321 vec_oprnds.reserve_exact (nargs);
4322 vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
4324 else
4325 vec_oprnds.safe_grow_cleared (nargs, true);
4326 for (j = 0; j < ncopies; ++j)
4328 poly_uint64 callee_nelements;
4329 poly_uint64 caller_nelements;
4330 /* Build argument list for the vectorized call. */
4331 if (j == 0)
4332 vargs.create (nargs);
4333 else
4334 vargs.truncate (0);
4336 for (i = 0; i < nargs; i++)
4338 unsigned int k, l, m, o;
4339 tree atype;
4340 op = gimple_call_arg (stmt, i + masked_call_offset);
4341 switch (bestn->simdclone->args[i].arg_type)
4343 case SIMD_CLONE_ARG_TYPE_VECTOR:
4344 atype = bestn->simdclone->args[i].vector_type;
4345 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4346 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4347 o = vector_unroll_factor (nunits, callee_nelements);
4348 for (m = j * o; m < (j + 1) * o; m++)
4350 if (known_lt (callee_nelements, caller_nelements))
4352 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4353 if (!constant_multiple_p (caller_nelements,
4354 callee_nelements, &k))
4355 gcc_unreachable ();
4357 gcc_assert ((k & (k - 1)) == 0);
4358 if (m == 0)
4360 if (!slp_node)
4361 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4362 ncopies * o / k, op,
4363 &vec_oprnds[i]);
4364 vec_oprnds_i[i] = 0;
4365 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4367 else
4369 vec_oprnd0 = arginfo[i].op;
4370 if ((m & (k - 1)) == 0)
4371 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4373 arginfo[i].op = vec_oprnd0;
4374 vec_oprnd0
4375 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4376 bitsize_int (prec),
4377 bitsize_int ((m & (k - 1)) * prec));
4378 gassign *new_stmt
4379 = gimple_build_assign (make_ssa_name (atype),
4380 vec_oprnd0);
4381 vect_finish_stmt_generation (vinfo, stmt_info,
4382 new_stmt, gsi);
4383 vargs.safe_push (gimple_assign_lhs (new_stmt));
4385 else
4387 if (!constant_multiple_p (callee_nelements,
4388 caller_nelements, &k))
4389 gcc_unreachable ();
4390 gcc_assert ((k & (k - 1)) == 0);
4391 vec<constructor_elt, va_gc> *ctor_elts;
4392 if (k != 1)
4393 vec_alloc (ctor_elts, k);
4394 else
4395 ctor_elts = NULL;
4396 for (l = 0; l < k; l++)
4398 if (m == 0 && l == 0)
4400 if (!slp_node)
4401 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4402 k * o * ncopies,
4404 &vec_oprnds[i]);
4405 vec_oprnds_i[i] = 0;
4406 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4408 else
4409 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4410 arginfo[i].op = vec_oprnd0;
4411 if (k == 1)
4412 break;
4413 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4414 vec_oprnd0);
4416 if (k == 1)
4417 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4418 atype))
4420 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, atype,
4421 vec_oprnd0);
4422 gassign *new_stmt
4423 = gimple_build_assign (make_ssa_name (atype),
4424 vec_oprnd0);
4425 vect_finish_stmt_generation (vinfo, stmt_info,
4426 new_stmt, gsi);
4427 vargs.safe_push (gimple_get_lhs (new_stmt));
4429 else
4430 vargs.safe_push (vec_oprnd0);
4431 else
4433 vec_oprnd0 = build_constructor (atype, ctor_elts);
4434 gassign *new_stmt
4435 = gimple_build_assign (make_ssa_name (atype),
4436 vec_oprnd0);
4437 vect_finish_stmt_generation (vinfo, stmt_info,
4438 new_stmt, gsi);
4439 vargs.safe_push (gimple_assign_lhs (new_stmt));
4443 break;
4444 case SIMD_CLONE_ARG_TYPE_MASK:
4445 if (bestn->simdclone->mask_mode == VOIDmode)
4447 atype = bestn->simdclone->args[i].vector_type;
4448 tree elt_type = TREE_TYPE (atype);
4449 tree one = fold_convert (elt_type, integer_one_node);
4450 tree zero = fold_convert (elt_type, integer_zero_node);
4451 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4452 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4453 o = vector_unroll_factor (nunits, callee_nelements);
4454 for (m = j * o; m < (j + 1) * o; m++)
4456 if (maybe_lt (callee_nelements, caller_nelements))
4458 /* The mask type has fewer elements than simdlen. */
4460 /* FORNOW */
4461 gcc_unreachable ();
4463 else if (known_eq (callee_nelements, caller_nelements))
4465 /* The SIMD clone function has the same number of
4466 elements as the current function. */
4467 if (m == 0)
4469 if (!slp_node)
4470 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4471 o * ncopies,
4473 &vec_oprnds[i]);
4474 vec_oprnds_i[i] = 0;
4476 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4477 if (loop_vinfo
4478 && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4480 vec_loop_masks *loop_masks
4481 = &LOOP_VINFO_MASKS (loop_vinfo);
4482 tree loop_mask
4483 = vect_get_loop_mask (loop_vinfo, gsi,
4484 loop_masks, ncopies,
4485 vectype, j);
4486 vec_oprnd0
4487 = prepare_vec_mask (loop_vinfo,
4488 TREE_TYPE (loop_mask),
4489 loop_mask, vec_oprnd0,
4490 gsi);
4491 loop_vinfo->vec_cond_masked_set.add ({ vec_oprnd0,
4492 loop_mask });
4495 vec_oprnd0
4496 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4497 build_vector_from_val (atype, one),
4498 build_vector_from_val (atype, zero));
4499 gassign *new_stmt
4500 = gimple_build_assign (make_ssa_name (atype),
4501 vec_oprnd0);
4502 vect_finish_stmt_generation (vinfo, stmt_info,
4503 new_stmt, gsi);
4504 vargs.safe_push (gimple_assign_lhs (new_stmt));
4506 else
4508 /* The mask type has more elements than simdlen. */
4510 /* FORNOW */
4511 gcc_unreachable ();
4515 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4517 atype = bestn->simdclone->args[i].vector_type;
4518 /* Guess the number of lanes represented by atype. */
4519 poly_uint64 atype_subparts
4520 = exact_div (bestn->simdclone->simdlen,
4521 num_mask_args);
4522 o = vector_unroll_factor (nunits, atype_subparts);
4523 for (m = j * o; m < (j + 1) * o; m++)
4525 if (m == 0)
4527 if (!slp_node)
4528 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4529 o * ncopies,
4531 &vec_oprnds[i]);
4532 vec_oprnds_i[i] = 0;
4534 if (maybe_lt (atype_subparts,
4535 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4537 /* The mask argument has fewer elements than the
4538 input vector. */
4539 /* FORNOW */
4540 gcc_unreachable ();
4542 else if (known_eq (atype_subparts,
4543 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4545 /* The vector mask argument matches the input
4546 in the number of lanes, but not necessarily
4547 in the mode. */
4548 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4549 tree st = lang_hooks.types.type_for_mode
4550 (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
4551 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
4552 vec_oprnd0);
4553 gassign *new_stmt
4554 = gimple_build_assign (make_ssa_name (st),
4555 vec_oprnd0);
4556 vect_finish_stmt_generation (vinfo, stmt_info,
4557 new_stmt, gsi);
4558 if (!types_compatible_p (atype, st))
4560 new_stmt
4561 = gimple_build_assign (make_ssa_name (atype),
4562 NOP_EXPR,
4563 gimple_assign_lhs
4564 (new_stmt));
4565 vect_finish_stmt_generation (vinfo, stmt_info,
4566 new_stmt, gsi);
4568 vargs.safe_push (gimple_assign_lhs (new_stmt));
4570 else
4572 /* The mask argument has more elements than the
4573 input vector. */
4574 /* FORNOW */
4575 gcc_unreachable ();
4579 else
4580 gcc_unreachable ();
4581 break;
4582 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4583 vargs.safe_push (op);
4584 break;
4585 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4586 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4587 if (j == 0)
4589 gimple_seq stmts;
4590 arginfo[i].op
4591 = force_gimple_operand (unshare_expr (arginfo[i].op),
4592 &stmts, true, NULL_TREE);
4593 if (stmts != NULL)
4595 basic_block new_bb;
4596 edge pe = loop_preheader_edge (loop);
4597 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4598 gcc_assert (!new_bb);
4600 if (arginfo[i].simd_lane_linear)
4602 vargs.safe_push (arginfo[i].op);
4603 break;
4605 tree phi_res = copy_ssa_name (op);
4606 gphi *new_phi = create_phi_node (phi_res, loop->header);
4607 add_phi_arg (new_phi, arginfo[i].op,
4608 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4609 enum tree_code code
4610 = POINTER_TYPE_P (TREE_TYPE (op))
4611 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4612 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4613 ? sizetype : TREE_TYPE (op);
4614 poly_widest_int cst
4615 = wi::mul (bestn->simdclone->args[i].linear_step,
4616 ncopies * nunits);
4617 tree tcst = wide_int_to_tree (type, cst);
4618 tree phi_arg = copy_ssa_name (op);
4619 gassign *new_stmt
4620 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4621 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4622 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4623 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4624 UNKNOWN_LOCATION);
4625 arginfo[i].op = phi_res;
4626 vargs.safe_push (phi_res);
4628 else
4630 enum tree_code code
4631 = POINTER_TYPE_P (TREE_TYPE (op))
4632 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4633 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4634 ? sizetype : TREE_TYPE (op);
4635 poly_widest_int cst
4636 = wi::mul (bestn->simdclone->args[i].linear_step,
4637 j * nunits);
4638 tree tcst = wide_int_to_tree (type, cst);
4639 new_temp = make_ssa_name (TREE_TYPE (op));
4640 gassign *new_stmt
4641 = gimple_build_assign (new_temp, code,
4642 arginfo[i].op, tcst);
4643 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4644 vargs.safe_push (new_temp);
4646 break;
4647 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4648 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4649 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4650 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4651 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4652 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4653 default:
4654 gcc_unreachable ();
4658 if (masked_call_offset == 0
4659 && bestn->simdclone->inbranch
4660 && bestn->simdclone->nargs > nargs)
4662 unsigned long m, o;
4663 size_t mask_i = bestn->simdclone->nargs - 1;
4664 tree mask;
4665 gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
4666 SIMD_CLONE_ARG_TYPE_MASK);
4668 tree masktype = bestn->simdclone->args[mask_i].vector_type;
4669 callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
4670 o = vector_unroll_factor (nunits, callee_nelements);
4671 for (m = j * o; m < (j + 1) * o; m++)
4673 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4675 vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
4676 mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
4677 ncopies, vectype, j);
4679 else
4680 mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
4682 gassign *new_stmt;
4683 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4685 /* This means we are dealing with integer mask modes.
4686 First convert to an integer type with the same size as
4687 the current vector type. */
4688 unsigned HOST_WIDE_INT intermediate_size
4689 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
4690 tree mid_int_type =
4691 build_nonstandard_integer_type (intermediate_size, 1);
4692 mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
4693 new_stmt
4694 = gimple_build_assign (make_ssa_name (mid_int_type),
4695 mask);
4696 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4697 /* Then zero-extend to the mask mode. */
4698 mask = fold_build1 (NOP_EXPR, masktype,
4699 gimple_get_lhs (new_stmt));
4701 else if (bestn->simdclone->mask_mode == VOIDmode)
4703 tree one = fold_convert (TREE_TYPE (masktype),
4704 integer_one_node);
4705 tree zero = fold_convert (TREE_TYPE (masktype),
4706 integer_zero_node);
4707 mask = build3 (VEC_COND_EXPR, masktype, mask,
4708 build_vector_from_val (masktype, one),
4709 build_vector_from_val (masktype, zero));
4711 else
4712 gcc_unreachable ();
4714 new_stmt = gimple_build_assign (make_ssa_name (masktype), mask);
4715 vect_finish_stmt_generation (vinfo, stmt_info,
4716 new_stmt, gsi);
4717 mask = gimple_assign_lhs (new_stmt);
4718 vargs.safe_push (mask);
4722 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4723 if (vec_dest)
4725 gcc_assert (ratype
4726 || known_eq (TYPE_VECTOR_SUBPARTS (rtype), nunits));
4727 if (ratype)
4728 new_temp = create_tmp_var (ratype);
4729 else if (useless_type_conversion_p (vectype, rtype))
4730 new_temp = make_ssa_name (vec_dest, new_call);
4731 else
4732 new_temp = make_ssa_name (rtype, new_call);
4733 gimple_call_set_lhs (new_call, new_temp);
4735 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4736 gimple *new_stmt = new_call;
4738 if (vec_dest)
4740 if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype), nunits))
4742 unsigned int k, l;
4743 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4744 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4745 k = vector_unroll_factor (nunits,
4746 TYPE_VECTOR_SUBPARTS (vectype));
4747 gcc_assert ((k & (k - 1)) == 0);
4748 for (l = 0; l < k; l++)
4750 tree t;
4751 if (ratype)
4753 t = build_fold_addr_expr (new_temp);
4754 t = build2 (MEM_REF, vectype, t,
4755 build_int_cst (TREE_TYPE (t), l * bytes));
4757 else
4758 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4759 bitsize_int (prec), bitsize_int (l * prec));
4760 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4761 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4763 if (j == 0 && l == 0)
4764 *vec_stmt = new_stmt;
4765 if (slp_node)
4766 SLP_TREE_VEC_DEFS (slp_node)
4767 .quick_push (gimple_assign_lhs (new_stmt));
4768 else
4769 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4772 if (ratype)
4773 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4774 continue;
4776 else if (!multiple_p (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
4778 unsigned int k;
4779 if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
4780 TYPE_VECTOR_SUBPARTS (rtype), &k))
4781 gcc_unreachable ();
4782 gcc_assert ((k & (k - 1)) == 0);
4783 if ((j & (k - 1)) == 0)
4784 vec_alloc (ret_ctor_elts, k);
4785 if (ratype)
4787 unsigned int m, o;
4788 o = vector_unroll_factor (nunits,
4789 TYPE_VECTOR_SUBPARTS (rtype));
4790 for (m = 0; m < o; m++)
4792 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4793 size_int (m), NULL_TREE, NULL_TREE);
4794 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4795 tem);
4796 vect_finish_stmt_generation (vinfo, stmt_info,
4797 new_stmt, gsi);
4798 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4799 gimple_assign_lhs (new_stmt));
4801 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4803 else
4804 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4805 if ((j & (k - 1)) != k - 1)
4806 continue;
4807 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4808 new_stmt
4809 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4810 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4812 if ((unsigned) j == k - 1)
4813 *vec_stmt = new_stmt;
4814 if (slp_node)
4815 SLP_TREE_VEC_DEFS (slp_node)
4816 .quick_push (gimple_assign_lhs (new_stmt));
4817 else
4818 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4819 continue;
4821 else if (ratype)
4823 tree t = build_fold_addr_expr (new_temp);
4824 t = build2 (MEM_REF, vectype, t,
4825 build_int_cst (TREE_TYPE (t), 0));
4826 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4827 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4828 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4830 else if (!useless_type_conversion_p (vectype, rtype))
4832 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4833 new_stmt
4834 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4835 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4839 if (j == 0)
4840 *vec_stmt = new_stmt;
4841 if (slp_node)
4842 SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
4843 else
4844 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4847 for (i = 0; i < nargs; ++i)
4849 vec<tree> oprndsi = vec_oprnds[i];
4850 oprndsi.release ();
4852 vargs.release ();
4854 /* Mark the clone as no longer being a candidate for GC. */
4855 bestn->gc_candidate = false;
4857 /* The call in STMT might prevent it from being removed in dce.
4858 We however cannot remove it here, due to the way the ssa name
4859 it defines is mapped to the new definition. So just replace
4860 rhs of the statement with something harmless. */
4862 if (slp_node)
4863 return true;
4865 gimple *new_stmt;
4866 if (scalar_dest)
4868 type = TREE_TYPE (scalar_dest);
4869 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4870 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4872 else
4873 new_stmt = gimple_build_nop ();
4874 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4875 unlink_stmt_vdef (stmt);
4877 return true;
4881 /* Function vect_gen_widened_results_half
4883 Create a vector stmt whose code, type, number of arguments, and result
4884 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4885 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4886 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4887 needs to be created (DECL is a function-decl of a target-builtin).
4888 STMT_INFO is the original scalar stmt that we are vectorizing. */
4890 static gimple *
4891 vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
4892 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4893 tree vec_dest, gimple_stmt_iterator *gsi,
4894 stmt_vec_info stmt_info)
4896 gimple *new_stmt;
4897 tree new_temp;
4899 /* Generate half of the widened result: */
4900 if (op_type != binary_op)
4901 vec_oprnd1 = NULL;
4902 new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
4903 new_temp = make_ssa_name (vec_dest, new_stmt);
4904 gimple_set_lhs (new_stmt, new_temp);
4905 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4907 return new_stmt;
4911 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4912 For multi-step conversions store the resulting vectors and call the function
4913 recursively. When NARROW_SRC_P is true, there's still a conversion after
4914 narrowing, don't store the vectors in the SLP_NODE or in vector info of
4915 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
4917 static void
4918 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4919 int multi_step_cvt,
4920 stmt_vec_info stmt_info,
4921 vec<tree> &vec_dsts,
4922 gimple_stmt_iterator *gsi,
4923 slp_tree slp_node, code_helper code,
4924 bool narrow_src_p)
4926 unsigned int i;
4927 tree vop0, vop1, new_tmp, vec_dest;
4929 vec_dest = vec_dsts.pop ();
4931 for (i = 0; i < vec_oprnds->length (); i += 2)
4933 /* Create demotion operation. */
4934 vop0 = (*vec_oprnds)[i];
4935 vop1 = (*vec_oprnds)[i + 1];
4936 gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
4937 new_tmp = make_ssa_name (vec_dest, new_stmt);
4938 gimple_set_lhs (new_stmt, new_tmp);
4939 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4940 if (multi_step_cvt || narrow_src_p)
4941 /* Store the resulting vector for next recursive call,
4942 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
4943 (*vec_oprnds)[i/2] = new_tmp;
4944 else
4946 /* This is the last step of the conversion sequence. Store the
4947 vectors in SLP_NODE or in vector info of the scalar statement
4948 (or in STMT_VINFO_RELATED_STMT chain). */
4949 if (slp_node)
4950 slp_node->push_vec_def (new_stmt);
4951 else
4952 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4956 /* For multi-step demotion operations we first generate demotion operations
4957 from the source type to the intermediate types, and then combine the
4958 results (stored in VEC_OPRNDS) in demotion operation to the destination
4959 type. */
4960 if (multi_step_cvt)
4962 /* At each level of recursion we have half of the operands we had at the
4963 previous level. */
4964 vec_oprnds->truncate ((i+1)/2);
4965 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4966 multi_step_cvt - 1,
4967 stmt_info, vec_dsts, gsi,
4968 slp_node, VEC_PACK_TRUNC_EXPR,
4969 narrow_src_p);
4972 vec_dsts.quick_push (vec_dest);
4976 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4977 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4978 STMT_INFO. For multi-step conversions store the resulting vectors and
4979 call the function recursively. */
4981 static void
4982 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4983 vec<tree> *vec_oprnds0,
4984 vec<tree> *vec_oprnds1,
4985 stmt_vec_info stmt_info, tree vec_dest,
4986 gimple_stmt_iterator *gsi,
4987 code_helper ch1,
4988 code_helper ch2, int op_type)
4990 int i;
4991 tree vop0, vop1, new_tmp1, new_tmp2;
4992 gimple *new_stmt1, *new_stmt2;
4993 vec<tree> vec_tmp = vNULL;
4995 vec_tmp.create (vec_oprnds0->length () * 2);
4996 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4998 if (op_type == binary_op)
4999 vop1 = (*vec_oprnds1)[i];
5000 else
5001 vop1 = NULL_TREE;
5003 /* Generate the two halves of promotion operation. */
5004 new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
5005 op_type, vec_dest, gsi,
5006 stmt_info);
5007 new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
5008 op_type, vec_dest, gsi,
5009 stmt_info);
5010 if (is_gimple_call (new_stmt1))
5012 new_tmp1 = gimple_call_lhs (new_stmt1);
5013 new_tmp2 = gimple_call_lhs (new_stmt2);
5015 else
5017 new_tmp1 = gimple_assign_lhs (new_stmt1);
5018 new_tmp2 = gimple_assign_lhs (new_stmt2);
5021 /* Store the results for the next step. */
5022 vec_tmp.quick_push (new_tmp1);
5023 vec_tmp.quick_push (new_tmp2);
5026 vec_oprnds0->release ();
5027 *vec_oprnds0 = vec_tmp;
5030 /* Create vectorized promotion stmts for widening stmts using only half the
5031 potential vector size for input. */
5032 static void
5033 vect_create_half_widening_stmts (vec_info *vinfo,
5034 vec<tree> *vec_oprnds0,
5035 vec<tree> *vec_oprnds1,
5036 stmt_vec_info stmt_info, tree vec_dest,
5037 gimple_stmt_iterator *gsi,
5038 code_helper code1,
5039 int op_type)
5041 int i;
5042 tree vop0, vop1;
5043 gimple *new_stmt1;
5044 gimple *new_stmt2;
5045 gimple *new_stmt3;
5046 vec<tree> vec_tmp = vNULL;
5048 vec_tmp.create (vec_oprnds0->length ());
5049 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5051 tree new_tmp1, new_tmp2, new_tmp3, out_type;
5053 gcc_assert (op_type == binary_op);
5054 vop1 = (*vec_oprnds1)[i];
5056 /* Widen the first vector input. */
5057 out_type = TREE_TYPE (vec_dest);
5058 new_tmp1 = make_ssa_name (out_type);
5059 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5060 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
5061 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5063 /* Widen the second vector input. */
5064 new_tmp2 = make_ssa_name (out_type);
5065 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5066 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
5067 /* Perform the operation. With both vector inputs widened. */
5068 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5070 else
5072 /* Perform the operation. With the single vector input widened. */
5073 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5076 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
5077 gimple_assign_set_lhs (new_stmt3, new_tmp3);
5078 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
5080 /* Store the results for the next step. */
5081 vec_tmp.quick_push (new_tmp3);
5084 vec_oprnds0->release ();
5085 *vec_oprnds0 = vec_tmp;
5089 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5090 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5091 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5092 Return true if STMT_INFO is vectorizable in this way. */
5094 static bool
5095 vectorizable_conversion (vec_info *vinfo,
5096 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5097 gimple **vec_stmt, slp_tree slp_node,
5098 stmt_vector_for_cost *cost_vec)
5100 tree vec_dest, cvt_op = NULL_TREE;
5101 tree scalar_dest;
5102 tree op0, op1 = NULL_TREE;
5103 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5104 tree_code tc1, tc2;
5105 code_helper code, code1, code2;
5106 code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5107 tree new_temp;
5108 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5109 int ndts = 2;
5110 poly_uint64 nunits_in;
5111 poly_uint64 nunits_out;
5112 tree vectype_out, vectype_in;
5113 int ncopies, i;
5114 tree lhs_type, rhs_type;
5115 /* For conversions between floating point and integer, there're 2 NARROW
5116 cases. NARROW_SRC is for FLOAT_EXPR, means
5117 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5118 This is safe when the range of the source integer can fit into the lower
5119 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5120 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5121 For other conversions, when there's narrowing, NARROW_DST is used as
5122 default. */
5123 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5124 vec<tree> vec_oprnds0 = vNULL;
5125 vec<tree> vec_oprnds1 = vNULL;
5126 tree vop0;
5127 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5128 int multi_step_cvt = 0;
5129 vec<tree> interm_types = vNULL;
5130 tree intermediate_type, cvt_type = NULL_TREE;
5131 int op_type;
5132 unsigned short fltsz;
5134 /* Is STMT a vectorizable conversion? */
5136 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5137 return false;
5139 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5140 && ! vec_stmt)
5141 return false;
5143 gimple* stmt = stmt_info->stmt;
5144 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
5145 return false;
5147 if (gimple_get_lhs (stmt) == NULL_TREE
5148 || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5149 return false;
5151 if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5152 return false;
5154 if (is_gimple_assign (stmt))
5156 code = gimple_assign_rhs_code (stmt);
5157 op_type = TREE_CODE_LENGTH ((tree_code) code);
5159 else if (gimple_call_internal_p (stmt))
5161 code = gimple_call_internal_fn (stmt);
5162 op_type = gimple_call_num_args (stmt);
5164 else
5165 return false;
5167 bool widen_arith = (code == WIDEN_MULT_EXPR
5168 || code == WIDEN_LSHIFT_EXPR
5169 || widening_fn_p (code));
5171 if (!widen_arith
5172 && !CONVERT_EXPR_CODE_P (code)
5173 && code != FIX_TRUNC_EXPR
5174 && code != FLOAT_EXPR)
5175 return false;
5177 /* Check types of lhs and rhs. */
5178 scalar_dest = gimple_get_lhs (stmt);
5179 lhs_type = TREE_TYPE (scalar_dest);
5180 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5182 /* Check the operands of the operation. */
5183 slp_tree slp_op0, slp_op1 = NULL;
5184 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5185 0, &op0, &slp_op0, &dt[0], &vectype_in))
5187 if (dump_enabled_p ())
5188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5189 "use not simple.\n");
5190 return false;
5193 rhs_type = TREE_TYPE (op0);
5194 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5195 && !((INTEGRAL_TYPE_P (lhs_type)
5196 && INTEGRAL_TYPE_P (rhs_type))
5197 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5198 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5199 return false;
5201 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5202 && ((INTEGRAL_TYPE_P (lhs_type)
5203 && !type_has_mode_precision_p (lhs_type))
5204 || (INTEGRAL_TYPE_P (rhs_type)
5205 && !type_has_mode_precision_p (rhs_type))))
5207 if (dump_enabled_p ())
5208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5209 "type conversion to/from bit-precision unsupported."
5210 "\n");
5211 return false;
5214 if (op_type == binary_op)
5216 gcc_assert (code == WIDEN_MULT_EXPR
5217 || code == WIDEN_LSHIFT_EXPR
5218 || widening_fn_p (code));
5220 op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
5221 gimple_call_arg (stmt, 0);
5222 tree vectype1_in;
5223 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5224 &op1, &slp_op1, &dt[1], &vectype1_in))
5226 if (dump_enabled_p ())
5227 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5228 "use not simple.\n");
5229 return false;
5231 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5232 OP1. */
5233 if (!vectype_in)
5234 vectype_in = vectype1_in;
5237 /* If op0 is an external or constant def, infer the vector type
5238 from the scalar type. */
5239 if (!vectype_in)
5240 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5241 if (vec_stmt)
5242 gcc_assert (vectype_in);
5243 if (!vectype_in)
5245 if (dump_enabled_p ())
5246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5247 "no vectype for scalar type %T\n", rhs_type);
5249 return false;
5252 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5253 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5255 if (dump_enabled_p ())
5256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5257 "can't convert between boolean and non "
5258 "boolean vectors %T\n", rhs_type);
5260 return false;
5263 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5264 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5265 if (known_eq (nunits_out, nunits_in))
5266 if (widen_arith)
5267 modifier = WIDEN;
5268 else
5269 modifier = NONE;
5270 else if (multiple_p (nunits_out, nunits_in))
5271 modifier = NARROW_DST;
5272 else
5274 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5275 modifier = WIDEN;
5278 /* Multiple types in SLP are handled by creating the appropriate number of
5279 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5280 case of SLP. */
5281 if (slp_node)
5282 ncopies = 1;
5283 else if (modifier == NARROW_DST)
5284 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5285 else
5286 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5288 /* Sanity check: make sure that at least one copy of the vectorized stmt
5289 needs to be generated. */
5290 gcc_assert (ncopies >= 1);
5292 bool found_mode = false;
5293 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5294 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5295 opt_scalar_mode rhs_mode_iter;
5297 /* Supportable by target? */
5298 switch (modifier)
5300 case NONE:
5301 if (code != FIX_TRUNC_EXPR
5302 && code != FLOAT_EXPR
5303 && !CONVERT_EXPR_CODE_P (code))
5304 return false;
5305 gcc_assert (code.is_tree_code ());
5306 if (supportable_convert_operation ((tree_code) code, vectype_out,
5307 vectype_in, &tc1))
5309 code1 = tc1;
5310 break;
5313 /* For conversions between float and integer types try whether
5314 we can use intermediate signed integer types to support the
5315 conversion. */
5316 if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
5317 && (code == FLOAT_EXPR ||
5318 (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
5320 bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
5321 bool float_expr_p = code == FLOAT_EXPR;
5322 unsigned short target_size;
5323 scalar_mode intermediate_mode;
5324 if (demotion)
5326 intermediate_mode = lhs_mode;
5327 target_size = GET_MODE_SIZE (rhs_mode);
5329 else
5331 target_size = GET_MODE_SIZE (lhs_mode);
5332 if (!int_mode_for_size
5333 (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
5334 goto unsupported;
5336 code1 = float_expr_p ? code : NOP_EXPR;
5337 codecvt1 = float_expr_p ? NOP_EXPR : code;
5338 opt_scalar_mode mode_iter;
5339 FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
5341 intermediate_mode = mode_iter.require ();
5343 if (GET_MODE_SIZE (intermediate_mode) > target_size)
5344 break;
5346 scalar_mode cvt_mode;
5347 if (!int_mode_for_size
5348 (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
5349 break;
5351 cvt_type = build_nonstandard_integer_type
5352 (GET_MODE_BITSIZE (cvt_mode), 0);
5354 /* Check if the intermediate type can hold OP0's range.
5355 When converting from float to integer this is not necessary
5356 because values that do not fit the (smaller) target type are
5357 unspecified anyway. */
5358 if (demotion && float_expr_p)
5360 wide_int op_min_value, op_max_value;
5361 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5362 break;
5364 if (cvt_type == NULL_TREE
5365 || (wi::min_precision (op_max_value, SIGNED)
5366 > TYPE_PRECISION (cvt_type))
5367 || (wi::min_precision (op_min_value, SIGNED)
5368 > TYPE_PRECISION (cvt_type)))
5369 continue;
5372 cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
5373 /* This should only happened for SLP as long as loop vectorizer
5374 only supports same-sized vector. */
5375 if (cvt_type == NULL_TREE
5376 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
5377 || !supportable_convert_operation ((tree_code) code1,
5378 vectype_out,
5379 cvt_type, &tc1)
5380 || !supportable_convert_operation ((tree_code) codecvt1,
5381 cvt_type,
5382 vectype_in, &tc2))
5383 continue;
5385 found_mode = true;
5386 break;
5389 if (found_mode)
5391 multi_step_cvt++;
5392 interm_types.safe_push (cvt_type);
5393 cvt_type = NULL_TREE;
5394 code1 = tc1;
5395 codecvt1 = tc2;
5396 break;
5399 /* FALLTHRU */
5400 unsupported:
5401 if (dump_enabled_p ())
5402 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5403 "conversion not supported by target.\n");
5404 return false;
5406 case WIDEN:
5407 if (known_eq (nunits_in, nunits_out))
5409 if (!(code.is_tree_code ()
5410 && supportable_half_widening_operation ((tree_code) code,
5411 vectype_out, vectype_in,
5412 &tc1)))
5413 goto unsupported;
5414 code1 = tc1;
5415 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5416 break;
5418 if (supportable_widening_operation (vinfo, code, stmt_info,
5419 vectype_out, vectype_in, &code1,
5420 &code2, &multi_step_cvt,
5421 &interm_types))
5423 /* Binary widening operation can only be supported directly by the
5424 architecture. */
5425 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5426 break;
5429 if (code != FLOAT_EXPR
5430 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5431 goto unsupported;
5433 fltsz = GET_MODE_SIZE (lhs_mode);
5434 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5436 rhs_mode = rhs_mode_iter.require ();
5437 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5438 break;
5440 cvt_type
5441 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5442 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5443 if (cvt_type == NULL_TREE)
5444 goto unsupported;
5446 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5448 tc1 = ERROR_MARK;
5449 gcc_assert (code.is_tree_code ());
5450 if (!supportable_convert_operation ((tree_code) code, vectype_out,
5451 cvt_type, &tc1))
5452 goto unsupported;
5453 codecvt1 = tc1;
5455 else if (!supportable_widening_operation (vinfo, code,
5456 stmt_info, vectype_out,
5457 cvt_type, &codecvt1,
5458 &codecvt2, &multi_step_cvt,
5459 &interm_types))
5460 continue;
5461 else
5462 gcc_assert (multi_step_cvt == 0);
5464 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5465 cvt_type,
5466 vectype_in, &code1,
5467 &code2, &multi_step_cvt,
5468 &interm_types))
5470 found_mode = true;
5471 break;
5475 if (!found_mode)
5476 goto unsupported;
5478 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5479 codecvt2 = ERROR_MARK;
5480 else
5482 multi_step_cvt++;
5483 interm_types.safe_push (cvt_type);
5484 cvt_type = NULL_TREE;
5486 break;
5488 case NARROW_DST:
5489 gcc_assert (op_type == unary_op);
5490 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5491 &code1, &multi_step_cvt,
5492 &interm_types))
5493 break;
5495 if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5496 goto unsupported;
5498 if (code == FIX_TRUNC_EXPR)
5500 cvt_type
5501 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5502 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5503 if (cvt_type == NULL_TREE)
5504 goto unsupported;
5505 if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5506 &tc1))
5507 codecvt1 = tc1;
5508 else
5509 goto unsupported;
5510 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5511 &code1, &multi_step_cvt,
5512 &interm_types))
5513 break;
5515 /* If op0 can be represented with low precision integer,
5516 truncate it to cvt_type and the do FLOAT_EXPR. */
5517 else if (code == FLOAT_EXPR)
5519 wide_int op_min_value, op_max_value;
5520 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5521 goto unsupported;
5523 cvt_type
5524 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5525 if (cvt_type == NULL_TREE
5526 || (wi::min_precision (op_max_value, SIGNED)
5527 > TYPE_PRECISION (cvt_type))
5528 || (wi::min_precision (op_min_value, SIGNED)
5529 > TYPE_PRECISION (cvt_type)))
5530 goto unsupported;
5532 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5533 if (cvt_type == NULL_TREE)
5534 goto unsupported;
5535 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5536 &code1, &multi_step_cvt,
5537 &interm_types))
5538 goto unsupported;
5539 if (supportable_convert_operation ((tree_code) code, vectype_out,
5540 cvt_type, &tc1))
5542 codecvt1 = tc1;
5543 modifier = NARROW_SRC;
5544 break;
5548 goto unsupported;
5550 default:
5551 gcc_unreachable ();
5554 if (!vec_stmt) /* transformation not required. */
5556 if (slp_node
5557 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5558 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5560 if (dump_enabled_p ())
5561 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5562 "incompatible vector types for invariants\n");
5563 return false;
5565 DUMP_VECT_SCOPE ("vectorizable_conversion");
5566 if (modifier == NONE)
5568 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5569 vect_model_simple_cost (vinfo, stmt_info,
5570 ncopies * (1 + multi_step_cvt),
5571 dt, ndts, slp_node, cost_vec);
5573 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5575 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5576 /* The final packing step produces one vector result per copy. */
5577 unsigned int nvectors
5578 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5579 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5580 multi_step_cvt, cost_vec,
5581 widen_arith);
5583 else
5585 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5586 /* The initial unpacking step produces two vector results
5587 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5588 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5589 unsigned int nvectors
5590 = (slp_node
5591 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5592 : ncopies * 2);
5593 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5594 multi_step_cvt, cost_vec,
5595 widen_arith);
5597 interm_types.release ();
5598 return true;
5601 /* Transform. */
5602 if (dump_enabled_p ())
5603 dump_printf_loc (MSG_NOTE, vect_location,
5604 "transform conversion. ncopies = %d.\n", ncopies);
5606 if (op_type == binary_op)
5608 if (CONSTANT_CLASS_P (op0))
5609 op0 = fold_convert (TREE_TYPE (op1), op0);
5610 else if (CONSTANT_CLASS_P (op1))
5611 op1 = fold_convert (TREE_TYPE (op0), op1);
5614 /* In case of multi-step conversion, we first generate conversion operations
5615 to the intermediate types, and then from that types to the final one.
5616 We create vector destinations for the intermediate type (TYPES) received
5617 from supportable_*_operation, and store them in the correct order
5618 for future use in vect_create_vectorized_*_stmts (). */
5619 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5620 bool widen_or_narrow_float_p
5621 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5622 vec_dest = vect_create_destination_var (scalar_dest,
5623 widen_or_narrow_float_p
5624 ? cvt_type : vectype_out);
5625 vec_dsts.quick_push (vec_dest);
5627 if (multi_step_cvt)
5629 for (i = interm_types.length () - 1;
5630 interm_types.iterate (i, &intermediate_type); i--)
5632 vec_dest = vect_create_destination_var (scalar_dest,
5633 intermediate_type);
5634 vec_dsts.quick_push (vec_dest);
5638 if (cvt_type)
5639 vec_dest = vect_create_destination_var (scalar_dest,
5640 widen_or_narrow_float_p
5641 ? vectype_out : cvt_type);
5643 int ninputs = 1;
5644 if (!slp_node)
5646 if (modifier == WIDEN)
5648 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5650 if (multi_step_cvt)
5651 ninputs = vect_pow2 (multi_step_cvt);
5652 ninputs *= 2;
5656 switch (modifier)
5658 case NONE:
5659 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5660 op0, &vec_oprnds0);
5661 /* vec_dest is intermediate type operand when multi_step_cvt. */
5662 if (multi_step_cvt)
5664 cvt_op = vec_dest;
5665 vec_dest = vec_dsts[0];
5668 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5670 /* Arguments are ready, create the new vector stmt. */
5671 gimple* new_stmt;
5672 if (multi_step_cvt)
5674 gcc_assert (multi_step_cvt == 1);
5675 new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5676 new_temp = make_ssa_name (cvt_op, new_stmt);
5677 gimple_assign_set_lhs (new_stmt, new_temp);
5678 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5679 vop0 = new_temp;
5681 new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5682 new_temp = make_ssa_name (vec_dest, new_stmt);
5683 gimple_set_lhs (new_stmt, new_temp);
5684 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5686 if (slp_node)
5687 slp_node->push_vec_def (new_stmt);
5688 else
5689 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5691 break;
5693 case WIDEN:
5694 /* In case the vectorization factor (VF) is bigger than the number
5695 of elements that we can fit in a vectype (nunits), we have to
5696 generate more than one vector stmt - i.e - we need to "unroll"
5697 the vector stmt by a factor VF/nunits. */
5698 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5699 op0, &vec_oprnds0,
5700 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5701 &vec_oprnds1);
5702 if (code == WIDEN_LSHIFT_EXPR)
5704 int oprnds_size = vec_oprnds0.length ();
5705 vec_oprnds1.create (oprnds_size);
5706 for (i = 0; i < oprnds_size; ++i)
5707 vec_oprnds1.quick_push (op1);
5709 /* Arguments are ready. Create the new vector stmts. */
5710 for (i = multi_step_cvt; i >= 0; i--)
5712 tree this_dest = vec_dsts[i];
5713 code_helper c1 = code1, c2 = code2;
5714 if (i == 0 && codecvt2 != ERROR_MARK)
5716 c1 = codecvt1;
5717 c2 = codecvt2;
5719 if (known_eq (nunits_out, nunits_in))
5720 vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
5721 stmt_info, this_dest, gsi, c1,
5722 op_type);
5723 else
5724 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5725 &vec_oprnds1, stmt_info,
5726 this_dest, gsi,
5727 c1, c2, op_type);
5730 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5732 gimple *new_stmt;
5733 if (cvt_type)
5735 new_temp = make_ssa_name (vec_dest);
5736 new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5737 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5739 else
5740 new_stmt = SSA_NAME_DEF_STMT (vop0);
5742 if (slp_node)
5743 slp_node->push_vec_def (new_stmt);
5744 else
5745 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5747 break;
5749 case NARROW_SRC:
5750 case NARROW_DST:
5751 /* In case the vectorization factor (VF) is bigger than the number
5752 of elements that we can fit in a vectype (nunits), we have to
5753 generate more than one vector stmt - i.e - we need to "unroll"
5754 the vector stmt by a factor VF/nunits. */
5755 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5756 op0, &vec_oprnds0);
5757 /* Arguments are ready. Create the new vector stmts. */
5758 if (cvt_type && modifier == NARROW_DST)
5759 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5761 new_temp = make_ssa_name (vec_dest);
5762 gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5763 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5764 vec_oprnds0[i] = new_temp;
5767 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5768 multi_step_cvt,
5769 stmt_info, vec_dsts, gsi,
5770 slp_node, code1,
5771 modifier == NARROW_SRC);
5772 /* After demoting op0 to cvt_type, convert it to dest. */
5773 if (cvt_type && code == FLOAT_EXPR)
5775 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5777 /* Arguments are ready, create the new vector stmt. */
5778 gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5779 gimple *new_stmt
5780 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5781 new_temp = make_ssa_name (vec_dest, new_stmt);
5782 gimple_set_lhs (new_stmt, new_temp);
5783 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5785 /* This is the last step of the conversion sequence. Store the
5786 vectors in SLP_NODE or in vector info of the scalar statement
5787 (or in STMT_VINFO_RELATED_STMT chain). */
5788 if (slp_node)
5789 slp_node->push_vec_def (new_stmt);
5790 else
5791 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5794 break;
5796 if (!slp_node)
5797 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5799 vec_oprnds0.release ();
5800 vec_oprnds1.release ();
5801 interm_types.release ();
5803 return true;
5806 /* Return true if we can assume from the scalar form of STMT_INFO that
5807 neither the scalar nor the vector forms will generate code. STMT_INFO
5808 is known not to involve a data reference. */
5810 bool
5811 vect_nop_conversion_p (stmt_vec_info stmt_info)
5813 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5814 if (!stmt)
5815 return false;
5817 tree lhs = gimple_assign_lhs (stmt);
5818 tree_code code = gimple_assign_rhs_code (stmt);
5819 tree rhs = gimple_assign_rhs1 (stmt);
5821 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5822 return true;
5824 if (CONVERT_EXPR_CODE_P (code))
5825 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5827 return false;
5830 /* Function vectorizable_assignment.
5832 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5833 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5834 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5835 Return true if STMT_INFO is vectorizable in this way. */
5837 static bool
5838 vectorizable_assignment (vec_info *vinfo,
5839 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5840 gimple **vec_stmt, slp_tree slp_node,
5841 stmt_vector_for_cost *cost_vec)
5843 tree vec_dest;
5844 tree scalar_dest;
5845 tree op;
5846 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5847 tree new_temp;
5848 enum vect_def_type dt[1] = {vect_unknown_def_type};
5849 int ndts = 1;
5850 int ncopies;
5851 int i;
5852 vec<tree> vec_oprnds = vNULL;
5853 tree vop;
5854 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5855 enum tree_code code;
5856 tree vectype_in;
5858 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5859 return false;
5861 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5862 && ! vec_stmt)
5863 return false;
5865 /* Is vectorizable assignment? */
5866 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5867 if (!stmt)
5868 return false;
5870 scalar_dest = gimple_assign_lhs (stmt);
5871 if (TREE_CODE (scalar_dest) != SSA_NAME)
5872 return false;
5874 if (STMT_VINFO_DATA_REF (stmt_info))
5875 return false;
5877 code = gimple_assign_rhs_code (stmt);
5878 if (!(gimple_assign_single_p (stmt)
5879 || code == PAREN_EXPR
5880 || CONVERT_EXPR_CODE_P (code)))
5881 return false;
5883 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5884 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5886 /* Multiple types in SLP are handled by creating the appropriate number of
5887 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5888 case of SLP. */
5889 if (slp_node)
5890 ncopies = 1;
5891 else
5892 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5894 gcc_assert (ncopies >= 1);
5896 slp_tree slp_op;
5897 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5898 &dt[0], &vectype_in))
5900 if (dump_enabled_p ())
5901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5902 "use not simple.\n");
5903 return false;
5905 if (!vectype_in)
5906 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5908 /* We can handle NOP_EXPR conversions that do not change the number
5909 of elements or the vector size. */
5910 if ((CONVERT_EXPR_CODE_P (code)
5911 || code == VIEW_CONVERT_EXPR)
5912 && (!vectype_in
5913 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5914 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5915 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5916 return false;
5918 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5920 if (dump_enabled_p ())
5921 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5922 "can't convert between boolean and non "
5923 "boolean vectors %T\n", TREE_TYPE (op));
5925 return false;
5928 /* We do not handle bit-precision changes. */
5929 if ((CONVERT_EXPR_CODE_P (code)
5930 || code == VIEW_CONVERT_EXPR)
5931 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5932 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5933 || (INTEGRAL_TYPE_P (TREE_TYPE (op))
5934 && !type_has_mode_precision_p (TREE_TYPE (op))))
5935 /* But a conversion that does not change the bit-pattern is ok. */
5936 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5937 && INTEGRAL_TYPE_P (TREE_TYPE (op))
5938 && (((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5939 > TYPE_PRECISION (TREE_TYPE (op)))
5940 && TYPE_UNSIGNED (TREE_TYPE (op)))
5941 || (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5942 == TYPE_PRECISION (TREE_TYPE (op))))))
5944 if (dump_enabled_p ())
5945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5946 "type conversion to/from bit-precision "
5947 "unsupported.\n");
5948 return false;
5951 if (!vec_stmt) /* transformation not required. */
5953 if (slp_node
5954 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5956 if (dump_enabled_p ())
5957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5958 "incompatible vector types for invariants\n");
5959 return false;
5961 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5962 DUMP_VECT_SCOPE ("vectorizable_assignment");
5963 if (!vect_nop_conversion_p (stmt_info))
5964 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5965 cost_vec);
5966 return true;
5969 /* Transform. */
5970 if (dump_enabled_p ())
5971 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5973 /* Handle def. */
5974 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5976 /* Handle use. */
5977 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5979 /* Arguments are ready. create the new vector stmt. */
5980 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5982 if (CONVERT_EXPR_CODE_P (code)
5983 || code == VIEW_CONVERT_EXPR)
5984 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5985 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5986 new_temp = make_ssa_name (vec_dest, new_stmt);
5987 gimple_assign_set_lhs (new_stmt, new_temp);
5988 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5989 if (slp_node)
5990 slp_node->push_vec_def (new_stmt);
5991 else
5992 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5994 if (!slp_node)
5995 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5997 vec_oprnds.release ();
5998 return true;
6002 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6003 either as shift by a scalar or by a vector. */
6005 bool
6006 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
6009 machine_mode vec_mode;
6010 optab optab;
6011 int icode;
6012 tree vectype;
6014 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6015 if (!vectype)
6016 return false;
6018 optab = optab_for_tree_code (code, vectype, optab_scalar);
6019 if (!optab
6020 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
6022 optab = optab_for_tree_code (code, vectype, optab_vector);
6023 if (!optab
6024 || (optab_handler (optab, TYPE_MODE (vectype))
6025 == CODE_FOR_nothing))
6026 return false;
6029 vec_mode = TYPE_MODE (vectype);
6030 icode = (int) optab_handler (optab, vec_mode);
6031 if (icode == CODE_FOR_nothing)
6032 return false;
6034 return true;
6038 /* Function vectorizable_shift.
6040 Check if STMT_INFO performs a shift operation that can be vectorized.
6041 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6042 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6043 Return true if STMT_INFO is vectorizable in this way. */
6045 static bool
6046 vectorizable_shift (vec_info *vinfo,
6047 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6048 gimple **vec_stmt, slp_tree slp_node,
6049 stmt_vector_for_cost *cost_vec)
6051 tree vec_dest;
6052 tree scalar_dest;
6053 tree op0, op1 = NULL;
6054 tree vec_oprnd1 = NULL_TREE;
6055 tree vectype;
6056 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6057 enum tree_code code;
6058 machine_mode vec_mode;
6059 tree new_temp;
6060 optab optab;
6061 int icode;
6062 machine_mode optab_op2_mode;
6063 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
6064 int ndts = 2;
6065 poly_uint64 nunits_in;
6066 poly_uint64 nunits_out;
6067 tree vectype_out;
6068 tree op1_vectype;
6069 int ncopies;
6070 int i;
6071 vec<tree> vec_oprnds0 = vNULL;
6072 vec<tree> vec_oprnds1 = vNULL;
6073 tree vop0, vop1;
6074 unsigned int k;
6075 bool scalar_shift_arg = true;
6076 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6077 bool incompatible_op1_vectype_p = false;
6079 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6080 return false;
6082 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6083 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6084 && ! vec_stmt)
6085 return false;
6087 /* Is STMT a vectorizable binary/unary operation? */
6088 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6089 if (!stmt)
6090 return false;
6092 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6093 return false;
6095 code = gimple_assign_rhs_code (stmt);
6097 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6098 || code == RROTATE_EXPR))
6099 return false;
6101 scalar_dest = gimple_assign_lhs (stmt);
6102 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6103 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6105 if (dump_enabled_p ())
6106 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6107 "bit-precision shifts not supported.\n");
6108 return false;
6111 slp_tree slp_op0;
6112 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6113 0, &op0, &slp_op0, &dt[0], &vectype))
6115 if (dump_enabled_p ())
6116 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6117 "use not simple.\n");
6118 return false;
6120 /* If op0 is an external or constant def, infer the vector type
6121 from the scalar type. */
6122 if (!vectype)
6123 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6124 if (vec_stmt)
6125 gcc_assert (vectype);
6126 if (!vectype)
6128 if (dump_enabled_p ())
6129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6130 "no vectype for scalar type\n");
6131 return false;
6134 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6135 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6136 if (maybe_ne (nunits_out, nunits_in))
6137 return false;
6139 stmt_vec_info op1_def_stmt_info;
6140 slp_tree slp_op1;
6141 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
6142 &dt[1], &op1_vectype, &op1_def_stmt_info))
6144 if (dump_enabled_p ())
6145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6146 "use not simple.\n");
6147 return false;
6150 /* Multiple types in SLP are handled by creating the appropriate number of
6151 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6152 case of SLP. */
6153 if (slp_node)
6154 ncopies = 1;
6155 else
6156 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6158 gcc_assert (ncopies >= 1);
6160 /* Determine whether the shift amount is a vector, or scalar. If the
6161 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6163 if ((dt[1] == vect_internal_def
6164 || dt[1] == vect_induction_def
6165 || dt[1] == vect_nested_cycle)
6166 && !slp_node)
6167 scalar_shift_arg = false;
6168 else if (dt[1] == vect_constant_def
6169 || dt[1] == vect_external_def
6170 || dt[1] == vect_internal_def)
6172 /* In SLP, need to check whether the shift count is the same,
6173 in loops if it is a constant or invariant, it is always
6174 a scalar shift. */
6175 if (slp_node)
6177 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6178 stmt_vec_info slpstmt_info;
6180 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6182 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
6183 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
6184 scalar_shift_arg = false;
6187 /* For internal SLP defs we have to make sure we see scalar stmts
6188 for all vector elements.
6189 ??? For different vectors we could resort to a different
6190 scalar shift operand but code-generation below simply always
6191 takes the first. */
6192 if (dt[1] == vect_internal_def
6193 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6194 stmts.length ()))
6195 scalar_shift_arg = false;
6198 /* If the shift amount is computed by a pattern stmt we cannot
6199 use the scalar amount directly thus give up and use a vector
6200 shift. */
6201 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
6202 scalar_shift_arg = false;
6204 else
6206 if (dump_enabled_p ())
6207 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6208 "operand mode requires invariant argument.\n");
6209 return false;
6212 /* Vector shifted by vector. */
6213 bool was_scalar_shift_arg = scalar_shift_arg;
6214 if (!scalar_shift_arg)
6216 optab = optab_for_tree_code (code, vectype, optab_vector);
6217 if (dump_enabled_p ())
6218 dump_printf_loc (MSG_NOTE, vect_location,
6219 "vector/vector shift/rotate found.\n");
6221 if (!op1_vectype)
6222 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6223 slp_op1);
6224 incompatible_op1_vectype_p
6225 = (op1_vectype == NULL_TREE
6226 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
6227 TYPE_VECTOR_SUBPARTS (vectype))
6228 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6229 if (incompatible_op1_vectype_p
6230 && (!slp_node
6231 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6232 || slp_op1->refcnt != 1))
6234 if (dump_enabled_p ())
6235 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6236 "unusable type for last operand in"
6237 " vector/vector shift/rotate.\n");
6238 return false;
6241 /* See if the machine has a vector shifted by scalar insn and if not
6242 then see if it has a vector shifted by vector insn. */
6243 else
6245 optab = optab_for_tree_code (code, vectype, optab_scalar);
6246 if (optab
6247 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6249 if (dump_enabled_p ())
6250 dump_printf_loc (MSG_NOTE, vect_location,
6251 "vector/scalar shift/rotate found.\n");
6253 else
6255 optab = optab_for_tree_code (code, vectype, optab_vector);
6256 if (optab
6257 && (optab_handler (optab, TYPE_MODE (vectype))
6258 != CODE_FOR_nothing))
6260 scalar_shift_arg = false;
6262 if (dump_enabled_p ())
6263 dump_printf_loc (MSG_NOTE, vect_location,
6264 "vector/vector shift/rotate found.\n");
6266 if (!op1_vectype)
6267 op1_vectype = get_vectype_for_scalar_type (vinfo,
6268 TREE_TYPE (op1),
6269 slp_op1);
6271 /* Unlike the other binary operators, shifts/rotates have
6272 the rhs being int, instead of the same type as the lhs,
6273 so make sure the scalar is the right type if we are
6274 dealing with vectors of long long/long/short/char. */
6275 incompatible_op1_vectype_p
6276 = (!op1_vectype
6277 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6278 TREE_TYPE (op1)));
6279 if (incompatible_op1_vectype_p
6280 && dt[1] == vect_internal_def)
6282 if (dump_enabled_p ())
6283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6284 "unusable type for last operand in"
6285 " vector/vector shift/rotate.\n");
6286 return false;
6292 /* Supportable by target? */
6293 if (!optab)
6295 if (dump_enabled_p ())
6296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6297 "no optab.\n");
6298 return false;
6300 vec_mode = TYPE_MODE (vectype);
6301 icode = (int) optab_handler (optab, vec_mode);
6302 if (icode == CODE_FOR_nothing)
6304 if (dump_enabled_p ())
6305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6306 "op not supported by target.\n");
6307 return false;
6309 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6310 if (vect_emulated_vector_p (vectype))
6311 return false;
6313 if (!vec_stmt) /* transformation not required. */
6315 if (slp_node
6316 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6317 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6318 && (!incompatible_op1_vectype_p
6319 || dt[1] == vect_constant_def)
6320 && !vect_maybe_update_slp_op_vectype
6321 (slp_op1,
6322 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6324 if (dump_enabled_p ())
6325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6326 "incompatible vector types for invariants\n");
6327 return false;
6329 /* Now adjust the constant shift amount in place. */
6330 if (slp_node
6331 && incompatible_op1_vectype_p
6332 && dt[1] == vect_constant_def)
6334 for (unsigned i = 0;
6335 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6337 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6338 = fold_convert (TREE_TYPE (vectype),
6339 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6340 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6341 == INTEGER_CST));
6344 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6345 DUMP_VECT_SCOPE ("vectorizable_shift");
6346 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6347 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6348 return true;
6351 /* Transform. */
6353 if (dump_enabled_p ())
6354 dump_printf_loc (MSG_NOTE, vect_location,
6355 "transform binary/unary operation.\n");
6357 if (incompatible_op1_vectype_p && !slp_node)
6359 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6360 op1 = fold_convert (TREE_TYPE (vectype), op1);
6361 if (dt[1] != vect_constant_def)
6362 op1 = vect_init_vector (vinfo, stmt_info, op1,
6363 TREE_TYPE (vectype), NULL);
6366 /* Handle def. */
6367 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6369 if (scalar_shift_arg && dt[1] != vect_internal_def)
6371 /* Vector shl and shr insn patterns can be defined with scalar
6372 operand 2 (shift operand). In this case, use constant or loop
6373 invariant op1 directly, without extending it to vector mode
6374 first. */
6375 optab_op2_mode = insn_data[icode].operand[2].mode;
6376 if (!VECTOR_MODE_P (optab_op2_mode))
6378 if (dump_enabled_p ())
6379 dump_printf_loc (MSG_NOTE, vect_location,
6380 "operand 1 using scalar mode.\n");
6381 vec_oprnd1 = op1;
6382 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6383 vec_oprnds1.quick_push (vec_oprnd1);
6384 /* Store vec_oprnd1 for every vector stmt to be created.
6385 We check during the analysis that all the shift arguments
6386 are the same.
6387 TODO: Allow different constants for different vector
6388 stmts generated for an SLP instance. */
6389 for (k = 0;
6390 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6391 vec_oprnds1.quick_push (vec_oprnd1);
6394 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6396 if (was_scalar_shift_arg)
6398 /* If the argument was the same in all lanes create
6399 the correctly typed vector shift amount directly. */
6400 op1 = fold_convert (TREE_TYPE (vectype), op1);
6401 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6402 !loop_vinfo ? gsi : NULL);
6403 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6404 !loop_vinfo ? gsi : NULL);
6405 vec_oprnds1.create (slp_node->vec_stmts_size);
6406 for (k = 0; k < slp_node->vec_stmts_size; k++)
6407 vec_oprnds1.quick_push (vec_oprnd1);
6409 else if (dt[1] == vect_constant_def)
6410 /* The constant shift amount has been adjusted in place. */
6412 else
6413 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6416 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6417 (a special case for certain kind of vector shifts); otherwise,
6418 operand 1 should be of a vector type (the usual case). */
6419 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6420 op0, &vec_oprnds0,
6421 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6423 /* Arguments are ready. Create the new vector stmt. */
6424 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6426 /* For internal defs where we need to use a scalar shift arg
6427 extract the first lane. */
6428 if (scalar_shift_arg && dt[1] == vect_internal_def)
6430 vop1 = vec_oprnds1[0];
6431 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6432 gassign *new_stmt
6433 = gimple_build_assign (new_temp,
6434 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6435 vop1,
6436 TYPE_SIZE (TREE_TYPE (new_temp)),
6437 bitsize_zero_node));
6438 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6439 vop1 = new_temp;
6441 else
6442 vop1 = vec_oprnds1[i];
6443 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6444 new_temp = make_ssa_name (vec_dest, new_stmt);
6445 gimple_assign_set_lhs (new_stmt, new_temp);
6446 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6447 if (slp_node)
6448 slp_node->push_vec_def (new_stmt);
6449 else
6450 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6453 if (!slp_node)
6454 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6456 vec_oprnds0.release ();
6457 vec_oprnds1.release ();
6459 return true;
6462 /* Function vectorizable_operation.
6464 Check if STMT_INFO performs a binary, unary or ternary operation that can
6465 be vectorized.
6466 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6467 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6468 Return true if STMT_INFO is vectorizable in this way. */
6470 static bool
6471 vectorizable_operation (vec_info *vinfo,
6472 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6473 gimple **vec_stmt, slp_tree slp_node,
6474 stmt_vector_for_cost *cost_vec)
6476 tree vec_dest;
6477 tree scalar_dest;
6478 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6479 tree vectype;
6480 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6481 enum tree_code code, orig_code;
6482 machine_mode vec_mode;
6483 tree new_temp;
6484 int op_type;
6485 optab optab;
6486 bool target_support_p;
6487 enum vect_def_type dt[3]
6488 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6489 int ndts = 3;
6490 poly_uint64 nunits_in;
6491 poly_uint64 nunits_out;
6492 tree vectype_out;
6493 int ncopies, vec_num;
6494 int i;
6495 vec<tree> vec_oprnds0 = vNULL;
6496 vec<tree> vec_oprnds1 = vNULL;
6497 vec<tree> vec_oprnds2 = vNULL;
6498 tree vop0, vop1, vop2;
6499 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6501 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6502 return false;
6504 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6505 && ! vec_stmt)
6506 return false;
6508 /* Is STMT a vectorizable binary/unary operation? */
6509 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6510 if (!stmt)
6511 return false;
6513 /* Loads and stores are handled in vectorizable_{load,store}. */
6514 if (STMT_VINFO_DATA_REF (stmt_info))
6515 return false;
6517 orig_code = code = gimple_assign_rhs_code (stmt);
6519 /* Shifts are handled in vectorizable_shift. */
6520 if (code == LSHIFT_EXPR
6521 || code == RSHIFT_EXPR
6522 || code == LROTATE_EXPR
6523 || code == RROTATE_EXPR)
6524 return false;
6526 /* Comparisons are handled in vectorizable_comparison. */
6527 if (TREE_CODE_CLASS (code) == tcc_comparison)
6528 return false;
6530 /* Conditions are handled in vectorizable_condition. */
6531 if (code == COND_EXPR)
6532 return false;
6534 /* For pointer addition and subtraction, we should use the normal
6535 plus and minus for the vector operation. */
6536 if (code == POINTER_PLUS_EXPR)
6537 code = PLUS_EXPR;
6538 if (code == POINTER_DIFF_EXPR)
6539 code = MINUS_EXPR;
6541 /* Support only unary or binary operations. */
6542 op_type = TREE_CODE_LENGTH (code);
6543 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6545 if (dump_enabled_p ())
6546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6547 "num. args = %d (not unary/binary/ternary op).\n",
6548 op_type);
6549 return false;
6552 scalar_dest = gimple_assign_lhs (stmt);
6553 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6555 /* Most operations cannot handle bit-precision types without extra
6556 truncations. */
6557 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6558 if (!mask_op_p
6559 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6560 /* Exception are bitwise binary operations. */
6561 && code != BIT_IOR_EXPR
6562 && code != BIT_XOR_EXPR
6563 && code != BIT_AND_EXPR)
6565 if (dump_enabled_p ())
6566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6567 "bit-precision arithmetic not supported.\n");
6568 return false;
6571 slp_tree slp_op0;
6572 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6573 0, &op0, &slp_op0, &dt[0], &vectype))
6575 if (dump_enabled_p ())
6576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6577 "use not simple.\n");
6578 return false;
6580 bool is_invariant = (dt[0] == vect_external_def
6581 || dt[0] == vect_constant_def);
6582 /* If op0 is an external or constant def, infer the vector type
6583 from the scalar type. */
6584 if (!vectype)
6586 /* For boolean type we cannot determine vectype by
6587 invariant value (don't know whether it is a vector
6588 of booleans or vector of integers). We use output
6589 vectype because operations on boolean don't change
6590 type. */
6591 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6593 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6595 if (dump_enabled_p ())
6596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6597 "not supported operation on bool value.\n");
6598 return false;
6600 vectype = vectype_out;
6602 else
6603 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6604 slp_node);
6606 if (vec_stmt)
6607 gcc_assert (vectype);
6608 if (!vectype)
6610 if (dump_enabled_p ())
6611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6612 "no vectype for scalar type %T\n",
6613 TREE_TYPE (op0));
6615 return false;
6618 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6619 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6620 if (maybe_ne (nunits_out, nunits_in))
6621 return false;
6623 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6624 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6625 if (op_type == binary_op || op_type == ternary_op)
6627 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6628 1, &op1, &slp_op1, &dt[1], &vectype2))
6630 if (dump_enabled_p ())
6631 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6632 "use not simple.\n");
6633 return false;
6635 is_invariant &= (dt[1] == vect_external_def
6636 || dt[1] == vect_constant_def);
6637 if (vectype2
6638 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6639 return false;
6641 if (op_type == ternary_op)
6643 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6644 2, &op2, &slp_op2, &dt[2], &vectype3))
6646 if (dump_enabled_p ())
6647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6648 "use not simple.\n");
6649 return false;
6651 is_invariant &= (dt[2] == vect_external_def
6652 || dt[2] == vect_constant_def);
6653 if (vectype3
6654 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6655 return false;
6658 /* Multiple types in SLP are handled by creating the appropriate number of
6659 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6660 case of SLP. */
6661 if (slp_node)
6663 ncopies = 1;
6664 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6666 else
6668 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6669 vec_num = 1;
6672 gcc_assert (ncopies >= 1);
6674 /* Reject attempts to combine mask types with nonmask types, e.g. if
6675 we have an AND between a (nonmask) boolean loaded from memory and
6676 a (mask) boolean result of a comparison.
6678 TODO: We could easily fix these cases up using pattern statements. */
6679 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6680 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6681 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6683 if (dump_enabled_p ())
6684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6685 "mixed mask and nonmask vector types\n");
6686 return false;
6689 /* Supportable by target? */
6691 vec_mode = TYPE_MODE (vectype);
6692 if (code == MULT_HIGHPART_EXPR)
6693 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6694 else
6696 optab = optab_for_tree_code (code, vectype, optab_default);
6697 if (!optab)
6699 if (dump_enabled_p ())
6700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6701 "no optab.\n");
6702 return false;
6704 target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing
6705 || optab_libfunc (optab, vec_mode));
6708 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6709 if (!target_support_p || using_emulated_vectors_p)
6711 if (dump_enabled_p ())
6712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6713 "op not supported by target.\n");
6714 /* When vec_mode is not a vector mode and we verified ops we
6715 do not have to lower like AND are natively supported let
6716 those through even when the mode isn't word_mode. For
6717 ops we have to lower the lowering code assumes we are
6718 dealing with word_mode. */
6719 if ((((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6720 || !target_support_p)
6721 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6722 /* Check only during analysis. */
6723 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6725 if (dump_enabled_p ())
6726 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6727 return false;
6729 if (dump_enabled_p ())
6730 dump_printf_loc (MSG_NOTE, vect_location,
6731 "proceeding using word mode.\n");
6732 using_emulated_vectors_p = true;
6735 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6736 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6737 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6738 internal_fn cond_fn = get_conditional_internal_fn (code);
6739 internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6741 /* If operating on inactive elements could generate spurious traps,
6742 we need to restrict the operation to active lanes. Note that this
6743 specifically doesn't apply to unhoisted invariants, since they
6744 operate on the same value for every lane.
6746 Similarly, if this operation is part of a reduction, a fully-masked
6747 loop should only change the active lanes of the reduction chain,
6748 keeping the inactive lanes as-is. */
6749 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6750 || reduc_idx >= 0);
6752 if (!vec_stmt) /* transformation not required. */
6754 if (loop_vinfo
6755 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6756 && mask_out_inactive)
6758 if (cond_len_fn != IFN_LAST
6759 && direct_internal_fn_supported_p (cond_len_fn, vectype,
6760 OPTIMIZE_FOR_SPEED))
6761 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6763 else if (cond_fn != IFN_LAST
6764 && direct_internal_fn_supported_p (cond_fn, vectype,
6765 OPTIMIZE_FOR_SPEED))
6766 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6767 vectype, NULL);
6768 else
6770 if (dump_enabled_p ())
6771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6772 "can't use a fully-masked loop because no"
6773 " conditional operation is available.\n");
6774 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6778 /* Put types on constant and invariant SLP children. */
6779 if (slp_node
6780 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6781 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6782 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6784 if (dump_enabled_p ())
6785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6786 "incompatible vector types for invariants\n");
6787 return false;
6790 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6791 DUMP_VECT_SCOPE ("vectorizable_operation");
6792 vect_model_simple_cost (vinfo, stmt_info,
6793 ncopies, dt, ndts, slp_node, cost_vec);
6794 if (using_emulated_vectors_p)
6796 /* The above vect_model_simple_cost call handles constants
6797 in the prologue and (mis-)costs one of the stmts as
6798 vector stmt. See below for the actual lowering that will
6799 be applied. */
6800 unsigned n
6801 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6802 switch (code)
6804 case PLUS_EXPR:
6805 n *= 5;
6806 break;
6807 case MINUS_EXPR:
6808 n *= 6;
6809 break;
6810 case NEGATE_EXPR:
6811 n *= 4;
6812 break;
6813 default:
6814 /* Bit operations do not have extra cost and are accounted
6815 as vector stmt by vect_model_simple_cost. */
6816 n = 0;
6817 break;
6819 if (n != 0)
6821 /* We also need to materialize two large constants. */
6822 record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6823 0, vect_prologue);
6824 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6825 0, vect_body);
6828 return true;
6831 /* Transform. */
6833 if (dump_enabled_p ())
6834 dump_printf_loc (MSG_NOTE, vect_location,
6835 "transform binary/unary operation.\n");
6837 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6838 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6840 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6841 vectors with unsigned elements, but the result is signed. So, we
6842 need to compute the MINUS_EXPR into vectype temporary and
6843 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6844 tree vec_cvt_dest = NULL_TREE;
6845 if (orig_code == POINTER_DIFF_EXPR)
6847 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6848 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6850 /* Handle def. */
6851 else
6852 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6854 /* In case the vectorization factor (VF) is bigger than the number
6855 of elements that we can fit in a vectype (nunits), we have to generate
6856 more than one vector stmt - i.e - we need to "unroll" the
6857 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6858 from one copy of the vector stmt to the next, in the field
6859 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6860 stages to find the correct vector defs to be used when vectorizing
6861 stmts that use the defs of the current stmt. The example below
6862 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6863 we need to create 4 vectorized stmts):
6865 before vectorization:
6866 RELATED_STMT VEC_STMT
6867 S1: x = memref - -
6868 S2: z = x + 1 - -
6870 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6871 there):
6872 RELATED_STMT VEC_STMT
6873 VS1_0: vx0 = memref0 VS1_1 -
6874 VS1_1: vx1 = memref1 VS1_2 -
6875 VS1_2: vx2 = memref2 VS1_3 -
6876 VS1_3: vx3 = memref3 - -
6877 S1: x = load - VS1_0
6878 S2: z = x + 1 - -
6880 step2: vectorize stmt S2 (done here):
6881 To vectorize stmt S2 we first need to find the relevant vector
6882 def for the first operand 'x'. This is, as usual, obtained from
6883 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6884 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6885 relevant vector def 'vx0'. Having found 'vx0' we can generate
6886 the vector stmt VS2_0, and as usual, record it in the
6887 STMT_VINFO_VEC_STMT of stmt S2.
6888 When creating the second copy (VS2_1), we obtain the relevant vector
6889 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6890 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6891 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6892 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6893 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6894 chain of stmts and pointers:
6895 RELATED_STMT VEC_STMT
6896 VS1_0: vx0 = memref0 VS1_1 -
6897 VS1_1: vx1 = memref1 VS1_2 -
6898 VS1_2: vx2 = memref2 VS1_3 -
6899 VS1_3: vx3 = memref3 - -
6900 S1: x = load - VS1_0
6901 VS2_0: vz0 = vx0 + v1 VS2_1 -
6902 VS2_1: vz1 = vx1 + v1 VS2_2 -
6903 VS2_2: vz2 = vx2 + v1 VS2_3 -
6904 VS2_3: vz3 = vx3 + v1 - -
6905 S2: z = x + 1 - VS2_0 */
6907 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6908 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6909 /* Arguments are ready. Create the new vector stmt. */
6910 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6912 gimple *new_stmt = NULL;
6913 vop1 = ((op_type == binary_op || op_type == ternary_op)
6914 ? vec_oprnds1[i] : NULL_TREE);
6915 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6916 if (using_emulated_vectors_p
6917 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
6919 /* Lower the operation. This follows vector lowering. */
6920 unsigned int width = vector_element_bits (vectype);
6921 tree inner_type = TREE_TYPE (vectype);
6922 tree word_type
6923 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
6924 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
6925 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
6926 tree high_bits
6927 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
6928 tree wvop0 = make_ssa_name (word_type);
6929 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
6930 build1 (VIEW_CONVERT_EXPR,
6931 word_type, vop0));
6932 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6933 tree result_low, signs;
6934 if (code == PLUS_EXPR || code == MINUS_EXPR)
6936 tree wvop1 = make_ssa_name (word_type);
6937 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
6938 build1 (VIEW_CONVERT_EXPR,
6939 word_type, vop1));
6940 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6941 signs = make_ssa_name (word_type);
6942 new_stmt = gimple_build_assign (signs,
6943 BIT_XOR_EXPR, wvop0, wvop1);
6944 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6945 tree b_low = make_ssa_name (word_type);
6946 new_stmt = gimple_build_assign (b_low,
6947 BIT_AND_EXPR, wvop1, low_bits);
6948 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6949 tree a_low = make_ssa_name (word_type);
6950 if (code == PLUS_EXPR)
6951 new_stmt = gimple_build_assign (a_low,
6952 BIT_AND_EXPR, wvop0, low_bits);
6953 else
6954 new_stmt = gimple_build_assign (a_low,
6955 BIT_IOR_EXPR, wvop0, high_bits);
6956 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6957 if (code == MINUS_EXPR)
6959 new_stmt = gimple_build_assign (NULL_TREE,
6960 BIT_NOT_EXPR, signs);
6961 signs = make_ssa_name (word_type);
6962 gimple_assign_set_lhs (new_stmt, signs);
6963 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6965 new_stmt = gimple_build_assign (NULL_TREE,
6966 BIT_AND_EXPR, signs, high_bits);
6967 signs = make_ssa_name (word_type);
6968 gimple_assign_set_lhs (new_stmt, signs);
6969 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6970 result_low = make_ssa_name (word_type);
6971 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
6972 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6974 else
6976 tree a_low = make_ssa_name (word_type);
6977 new_stmt = gimple_build_assign (a_low,
6978 BIT_AND_EXPR, wvop0, low_bits);
6979 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6980 signs = make_ssa_name (word_type);
6981 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
6982 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6983 new_stmt = gimple_build_assign (NULL_TREE,
6984 BIT_AND_EXPR, signs, high_bits);
6985 signs = make_ssa_name (word_type);
6986 gimple_assign_set_lhs (new_stmt, signs);
6987 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6988 result_low = make_ssa_name (word_type);
6989 new_stmt = gimple_build_assign (result_low,
6990 MINUS_EXPR, high_bits, a_low);
6991 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6993 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
6994 signs);
6995 result_low = make_ssa_name (word_type);
6996 gimple_assign_set_lhs (new_stmt, result_low);
6997 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6998 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
6999 build1 (VIEW_CONVERT_EXPR,
7000 vectype, result_low));
7001 new_temp = make_ssa_name (vectype);
7002 gimple_assign_set_lhs (new_stmt, new_temp);
7003 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7005 else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
7007 tree mask;
7008 if (masked_loop_p)
7009 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7010 vec_num * ncopies, vectype, i);
7011 else
7012 /* Dummy mask. */
7013 mask = build_minus_one_cst (truth_type_for (vectype));
7014 auto_vec<tree> vops (6);
7015 vops.quick_push (mask);
7016 vops.quick_push (vop0);
7017 if (vop1)
7018 vops.quick_push (vop1);
7019 if (vop2)
7020 vops.quick_push (vop2);
7021 if (reduc_idx >= 0)
7023 /* Perform the operation on active elements only and take
7024 inactive elements from the reduction chain input. */
7025 gcc_assert (!vop2);
7026 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
7028 else
7030 auto else_value = targetm.preferred_else_value
7031 (cond_fn, vectype, vops.length () - 1, &vops[1]);
7032 vops.quick_push (else_value);
7034 if (len_loop_p)
7036 tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7037 vec_num * ncopies, vectype, i, 1);
7038 signed char biasval
7039 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7040 tree bias = build_int_cst (intQI_type_node, biasval);
7041 vops.quick_push (len);
7042 vops.quick_push (bias);
7044 gcall *call
7045 = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7046 : cond_len_fn,
7047 vops);
7048 new_temp = make_ssa_name (vec_dest, call);
7049 gimple_call_set_lhs (call, new_temp);
7050 gimple_call_set_nothrow (call, true);
7051 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7052 new_stmt = call;
7054 else
7056 tree mask = NULL_TREE;
7057 /* When combining two masks check if either of them is elsewhere
7058 combined with a loop mask, if that's the case we can mark that the
7059 new combined mask doesn't need to be combined with a loop mask. */
7060 if (masked_loop_p
7061 && code == BIT_AND_EXPR
7062 && VECTOR_BOOLEAN_TYPE_P (vectype))
7064 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
7065 ncopies}))
7067 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7068 vec_num * ncopies, vectype, i);
7070 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7071 vop0, gsi);
7074 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
7075 ncopies }))
7077 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7078 vec_num * ncopies, vectype, i);
7080 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7081 vop1, gsi);
7085 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7086 new_temp = make_ssa_name (vec_dest, new_stmt);
7087 gimple_assign_set_lhs (new_stmt, new_temp);
7088 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7089 if (using_emulated_vectors_p)
7090 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7092 /* Enter the combined value into the vector cond hash so we don't
7093 AND it with a loop mask again. */
7094 if (mask)
7095 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
7098 if (vec_cvt_dest)
7100 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7101 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7102 new_temp);
7103 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
7104 gimple_assign_set_lhs (new_stmt, new_temp);
7105 vect_finish_stmt_generation (vinfo, stmt_info,
7106 new_stmt, gsi);
7109 if (slp_node)
7110 slp_node->push_vec_def (new_stmt);
7111 else
7112 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7115 if (!slp_node)
7116 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7118 vec_oprnds0.release ();
7119 vec_oprnds1.release ();
7120 vec_oprnds2.release ();
7122 return true;
7125 /* A helper function to ensure data reference DR_INFO's base alignment. */
7127 static void
7128 ensure_base_align (dr_vec_info *dr_info)
7130 /* Alignment is only analyzed for the first element of a DR group,
7131 use that to look at base alignment we need to enforce. */
7132 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7133 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7135 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7137 if (dr_info->base_misaligned)
7139 tree base_decl = dr_info->base_decl;
7141 // We should only be able to increase the alignment of a base object if
7142 // we know what its new alignment should be at compile time.
7143 unsigned HOST_WIDE_INT align_base_to =
7144 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7146 if (decl_in_symtab_p (base_decl))
7147 symtab_node::get (base_decl)->increase_alignment (align_base_to);
7148 else if (DECL_ALIGN (base_decl) < align_base_to)
7150 SET_DECL_ALIGN (base_decl, align_base_to);
7151 DECL_USER_ALIGN (base_decl) = 1;
7153 dr_info->base_misaligned = false;
7158 /* Function get_group_alias_ptr_type.
7160 Return the alias type for the group starting at FIRST_STMT_INFO. */
7162 static tree
7163 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7165 struct data_reference *first_dr, *next_dr;
7167 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7168 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7169 while (next_stmt_info)
7171 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7172 if (get_alias_set (DR_REF (first_dr))
7173 != get_alias_set (DR_REF (next_dr)))
7175 if (dump_enabled_p ())
7176 dump_printf_loc (MSG_NOTE, vect_location,
7177 "conflicting alias set types.\n");
7178 return ptr_type_node;
7180 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7182 return reference_alias_ptr_type (DR_REF (first_dr));
7186 /* Function scan_operand_equal_p.
7188 Helper function for check_scan_store. Compare two references
7189 with .GOMP_SIMD_LANE bases. */
7191 static bool
7192 scan_operand_equal_p (tree ref1, tree ref2)
7194 tree ref[2] = { ref1, ref2 };
7195 poly_int64 bitsize[2], bitpos[2];
7196 tree offset[2], base[2];
7197 for (int i = 0; i < 2; ++i)
7199 machine_mode mode;
7200 int unsignedp, reversep, volatilep = 0;
7201 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7202 &offset[i], &mode, &unsignedp,
7203 &reversep, &volatilep);
7204 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
7205 return false;
7206 if (TREE_CODE (base[i]) == MEM_REF
7207 && offset[i] == NULL_TREE
7208 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7210 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7211 if (is_gimple_assign (def_stmt)
7212 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
7213 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7214 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7216 if (maybe_ne (mem_ref_offset (base[i]), 0))
7217 return false;
7218 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7219 offset[i] = gimple_assign_rhs2 (def_stmt);
7224 if (!operand_equal_p (base[0], base[1], 0))
7225 return false;
7226 if (maybe_ne (bitsize[0], bitsize[1]))
7227 return false;
7228 if (offset[0] != offset[1])
7230 if (!offset[0] || !offset[1])
7231 return false;
7232 if (!operand_equal_p (offset[0], offset[1], 0))
7234 tree step[2];
7235 for (int i = 0; i < 2; ++i)
7237 step[i] = integer_one_node;
7238 if (TREE_CODE (offset[i]) == SSA_NAME)
7240 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7241 if (is_gimple_assign (def_stmt)
7242 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
7243 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7244 == INTEGER_CST))
7246 step[i] = gimple_assign_rhs2 (def_stmt);
7247 offset[i] = gimple_assign_rhs1 (def_stmt);
7250 else if (TREE_CODE (offset[i]) == MULT_EXPR)
7252 step[i] = TREE_OPERAND (offset[i], 1);
7253 offset[i] = TREE_OPERAND (offset[i], 0);
7255 tree rhs1 = NULL_TREE;
7256 if (TREE_CODE (offset[i]) == SSA_NAME)
7258 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7259 if (gimple_assign_cast_p (def_stmt))
7260 rhs1 = gimple_assign_rhs1 (def_stmt);
7262 else if (CONVERT_EXPR_P (offset[i]))
7263 rhs1 = TREE_OPERAND (offset[i], 0);
7264 if (rhs1
7265 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7266 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7267 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7268 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7269 offset[i] = rhs1;
7271 if (!operand_equal_p (offset[0], offset[1], 0)
7272 || !operand_equal_p (step[0], step[1], 0))
7273 return false;
7276 return true;
7280 enum scan_store_kind {
7281 /* Normal permutation. */
7282 scan_store_kind_perm,
7284 /* Whole vector left shift permutation with zero init. */
7285 scan_store_kind_lshift_zero,
7287 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7288 scan_store_kind_lshift_cond
7291 /* Function check_scan_store.
7293 Verify if we can perform the needed permutations or whole vector shifts.
7294 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7295 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7296 to do at each step. */
7298 static int
7299 scan_store_can_perm_p (tree vectype, tree init,
7300 vec<enum scan_store_kind> *use_whole_vector = NULL)
7302 enum machine_mode vec_mode = TYPE_MODE (vectype);
7303 unsigned HOST_WIDE_INT nunits;
7304 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7305 return -1;
7306 int units_log2 = exact_log2 (nunits);
7307 if (units_log2 <= 0)
7308 return -1;
7310 int i;
7311 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7312 for (i = 0; i <= units_log2; ++i)
7314 unsigned HOST_WIDE_INT j, k;
7315 enum scan_store_kind kind = scan_store_kind_perm;
7316 vec_perm_builder sel (nunits, nunits, 1);
7317 sel.quick_grow (nunits);
7318 if (i == units_log2)
7320 for (j = 0; j < nunits; ++j)
7321 sel[j] = nunits - 1;
7323 else
7325 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7326 sel[j] = j;
7327 for (k = 0; j < nunits; ++j, ++k)
7328 sel[j] = nunits + k;
7330 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7331 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7333 if (i == units_log2)
7334 return -1;
7336 if (whole_vector_shift_kind == scan_store_kind_perm)
7338 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
7339 return -1;
7340 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7341 /* Whole vector shifts shift in zeros, so if init is all zero
7342 constant, there is no need to do anything further. */
7343 if ((TREE_CODE (init) != INTEGER_CST
7344 && TREE_CODE (init) != REAL_CST)
7345 || !initializer_zerop (init))
7347 tree masktype = truth_type_for (vectype);
7348 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7349 return -1;
7350 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7353 kind = whole_vector_shift_kind;
7355 if (use_whole_vector)
7357 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7358 use_whole_vector->safe_grow_cleared (i, true);
7359 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7360 use_whole_vector->safe_push (kind);
7364 return units_log2;
7368 /* Function check_scan_store.
7370 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7372 static bool
7373 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7374 enum vect_def_type rhs_dt, bool slp, tree mask,
7375 vect_memory_access_type memory_access_type)
7377 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7378 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7379 tree ref_type;
7381 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7382 if (slp
7383 || mask
7384 || memory_access_type != VMAT_CONTIGUOUS
7385 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7386 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7387 || loop_vinfo == NULL
7388 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7389 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7390 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7391 || !integer_zerop (DR_INIT (dr_info->dr))
7392 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7393 || !alias_sets_conflict_p (get_alias_set (vectype),
7394 get_alias_set (TREE_TYPE (ref_type))))
7396 if (dump_enabled_p ())
7397 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7398 "unsupported OpenMP scan store.\n");
7399 return false;
7402 /* We need to pattern match code built by OpenMP lowering and simplified
7403 by following optimizations into something we can handle.
7404 #pragma omp simd reduction(inscan,+:r)
7405 for (...)
7407 r += something ();
7408 #pragma omp scan inclusive (r)
7409 use (r);
7411 shall have body with:
7412 // Initialization for input phase, store the reduction initializer:
7413 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7414 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7415 D.2042[_21] = 0;
7416 // Actual input phase:
7418 r.0_5 = D.2042[_20];
7419 _6 = _4 + r.0_5;
7420 D.2042[_20] = _6;
7421 // Initialization for scan phase:
7422 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7423 _26 = D.2043[_25];
7424 _27 = D.2042[_25];
7425 _28 = _26 + _27;
7426 D.2043[_25] = _28;
7427 D.2042[_25] = _28;
7428 // Actual scan phase:
7430 r.1_8 = D.2042[_20];
7432 The "omp simd array" variable D.2042 holds the privatized copy used
7433 inside of the loop and D.2043 is another one that holds copies of
7434 the current original list item. The separate GOMP_SIMD_LANE ifn
7435 kinds are there in order to allow optimizing the initializer store
7436 and combiner sequence, e.g. if it is originally some C++ish user
7437 defined reduction, but allow the vectorizer to pattern recognize it
7438 and turn into the appropriate vectorized scan.
7440 For exclusive scan, this is slightly different:
7441 #pragma omp simd reduction(inscan,+:r)
7442 for (...)
7444 use (r);
7445 #pragma omp scan exclusive (r)
7446 r += something ();
7448 shall have body with:
7449 // Initialization for input phase, store the reduction initializer:
7450 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7451 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7452 D.2042[_21] = 0;
7453 // Actual input phase:
7455 r.0_5 = D.2042[_20];
7456 _6 = _4 + r.0_5;
7457 D.2042[_20] = _6;
7458 // Initialization for scan phase:
7459 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7460 _26 = D.2043[_25];
7461 D.2044[_25] = _26;
7462 _27 = D.2042[_25];
7463 _28 = _26 + _27;
7464 D.2043[_25] = _28;
7465 // Actual scan phase:
7467 r.1_8 = D.2044[_20];
7468 ... */
7470 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7472 /* Match the D.2042[_21] = 0; store above. Just require that
7473 it is a constant or external definition store. */
7474 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7476 fail_init:
7477 if (dump_enabled_p ())
7478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7479 "unsupported OpenMP scan initializer store.\n");
7480 return false;
7483 if (! loop_vinfo->scan_map)
7484 loop_vinfo->scan_map = new hash_map<tree, tree>;
7485 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7486 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7487 if (cached)
7488 goto fail_init;
7489 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7491 /* These stores can be vectorized normally. */
7492 return true;
7495 if (rhs_dt != vect_internal_def)
7497 fail:
7498 if (dump_enabled_p ())
7499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7500 "unsupported OpenMP scan combiner pattern.\n");
7501 return false;
7504 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7505 tree rhs = gimple_assign_rhs1 (stmt);
7506 if (TREE_CODE (rhs) != SSA_NAME)
7507 goto fail;
7509 gimple *other_store_stmt = NULL;
7510 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7511 bool inscan_var_store
7512 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7514 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7516 if (!inscan_var_store)
7518 use_operand_p use_p;
7519 imm_use_iterator iter;
7520 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7522 gimple *use_stmt = USE_STMT (use_p);
7523 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7524 continue;
7525 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7526 || !is_gimple_assign (use_stmt)
7527 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7528 || other_store_stmt
7529 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7530 goto fail;
7531 other_store_stmt = use_stmt;
7533 if (other_store_stmt == NULL)
7534 goto fail;
7535 rhs = gimple_assign_lhs (other_store_stmt);
7536 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7537 goto fail;
7540 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7542 use_operand_p use_p;
7543 imm_use_iterator iter;
7544 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7546 gimple *use_stmt = USE_STMT (use_p);
7547 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7548 continue;
7549 if (other_store_stmt)
7550 goto fail;
7551 other_store_stmt = use_stmt;
7554 else
7555 goto fail;
7557 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7558 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7559 || !is_gimple_assign (def_stmt)
7560 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7561 goto fail;
7563 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7564 /* For pointer addition, we should use the normal plus for the vector
7565 operation. */
7566 switch (code)
7568 case POINTER_PLUS_EXPR:
7569 code = PLUS_EXPR;
7570 break;
7571 case MULT_HIGHPART_EXPR:
7572 goto fail;
7573 default:
7574 break;
7576 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7577 goto fail;
7579 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7580 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7581 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7582 goto fail;
7584 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7585 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7586 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7587 || !gimple_assign_load_p (load1_stmt)
7588 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7589 || !gimple_assign_load_p (load2_stmt))
7590 goto fail;
7592 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7593 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7594 if (load1_stmt_info == NULL
7595 || load2_stmt_info == NULL
7596 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7597 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7598 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7599 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7600 goto fail;
7602 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7604 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7605 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7606 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7607 goto fail;
7608 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7609 tree lrhs;
7610 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7611 lrhs = rhs1;
7612 else
7613 lrhs = rhs2;
7614 use_operand_p use_p;
7615 imm_use_iterator iter;
7616 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7618 gimple *use_stmt = USE_STMT (use_p);
7619 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7620 continue;
7621 if (other_store_stmt)
7622 goto fail;
7623 other_store_stmt = use_stmt;
7627 if (other_store_stmt == NULL)
7628 goto fail;
7629 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7630 || !gimple_store_p (other_store_stmt))
7631 goto fail;
7633 stmt_vec_info other_store_stmt_info
7634 = loop_vinfo->lookup_stmt (other_store_stmt);
7635 if (other_store_stmt_info == NULL
7636 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7637 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7638 goto fail;
7640 gimple *stmt1 = stmt;
7641 gimple *stmt2 = other_store_stmt;
7642 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7643 std::swap (stmt1, stmt2);
7644 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7645 gimple_assign_rhs1 (load2_stmt)))
7647 std::swap (rhs1, rhs2);
7648 std::swap (load1_stmt, load2_stmt);
7649 std::swap (load1_stmt_info, load2_stmt_info);
7651 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7652 gimple_assign_rhs1 (load1_stmt)))
7653 goto fail;
7655 tree var3 = NULL_TREE;
7656 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7657 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7658 gimple_assign_rhs1 (load2_stmt)))
7659 goto fail;
7660 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7662 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7663 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7664 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7665 goto fail;
7666 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7667 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7668 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7669 || lookup_attribute ("omp simd inscan exclusive",
7670 DECL_ATTRIBUTES (var3)))
7671 goto fail;
7674 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7675 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7676 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7677 goto fail;
7679 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7680 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7681 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7682 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7683 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7684 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7685 goto fail;
7687 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7688 std::swap (var1, var2);
7690 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7692 if (!lookup_attribute ("omp simd inscan exclusive",
7693 DECL_ATTRIBUTES (var1)))
7694 goto fail;
7695 var1 = var3;
7698 if (loop_vinfo->scan_map == NULL)
7699 goto fail;
7700 tree *init = loop_vinfo->scan_map->get (var1);
7701 if (init == NULL)
7702 goto fail;
7704 /* The IL is as expected, now check if we can actually vectorize it.
7705 Inclusive scan:
7706 _26 = D.2043[_25];
7707 _27 = D.2042[_25];
7708 _28 = _26 + _27;
7709 D.2043[_25] = _28;
7710 D.2042[_25] = _28;
7711 should be vectorized as (where _40 is the vectorized rhs
7712 from the D.2042[_21] = 0; store):
7713 _30 = MEM <vector(8) int> [(int *)&D.2043];
7714 _31 = MEM <vector(8) int> [(int *)&D.2042];
7715 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7716 _33 = _31 + _32;
7717 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7718 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7719 _35 = _33 + _34;
7720 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7721 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7722 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7723 _37 = _35 + _36;
7724 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7725 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7726 _38 = _30 + _37;
7727 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7728 MEM <vector(8) int> [(int *)&D.2043] = _39;
7729 MEM <vector(8) int> [(int *)&D.2042] = _38;
7730 Exclusive scan:
7731 _26 = D.2043[_25];
7732 D.2044[_25] = _26;
7733 _27 = D.2042[_25];
7734 _28 = _26 + _27;
7735 D.2043[_25] = _28;
7736 should be vectorized as (where _40 is the vectorized rhs
7737 from the D.2042[_21] = 0; store):
7738 _30 = MEM <vector(8) int> [(int *)&D.2043];
7739 _31 = MEM <vector(8) int> [(int *)&D.2042];
7740 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7741 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7742 _34 = _32 + _33;
7743 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7744 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7745 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7746 _36 = _34 + _35;
7747 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7748 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7749 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7750 _38 = _36 + _37;
7751 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7752 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7753 _39 = _30 + _38;
7754 _50 = _31 + _39;
7755 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7756 MEM <vector(8) int> [(int *)&D.2044] = _39;
7757 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7758 enum machine_mode vec_mode = TYPE_MODE (vectype);
7759 optab optab = optab_for_tree_code (code, vectype, optab_default);
7760 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7761 goto fail;
7763 int units_log2 = scan_store_can_perm_p (vectype, *init);
7764 if (units_log2 == -1)
7765 goto fail;
7767 return true;
7771 /* Function vectorizable_scan_store.
7773 Helper of vectorizable_score, arguments like on vectorizable_store.
7774 Handle only the transformation, checking is done in check_scan_store. */
7776 static bool
7777 vectorizable_scan_store (vec_info *vinfo,
7778 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7779 gimple **vec_stmt, int ncopies)
7781 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7782 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7783 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7784 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7786 if (dump_enabled_p ())
7787 dump_printf_loc (MSG_NOTE, vect_location,
7788 "transform scan store. ncopies = %d\n", ncopies);
7790 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7791 tree rhs = gimple_assign_rhs1 (stmt);
7792 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7794 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7795 bool inscan_var_store
7796 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7798 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7800 use_operand_p use_p;
7801 imm_use_iterator iter;
7802 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7804 gimple *use_stmt = USE_STMT (use_p);
7805 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7806 continue;
7807 rhs = gimple_assign_lhs (use_stmt);
7808 break;
7812 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7813 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7814 if (code == POINTER_PLUS_EXPR)
7815 code = PLUS_EXPR;
7816 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7817 && commutative_tree_code (code));
7818 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7819 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7820 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7821 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7822 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7823 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7824 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7825 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7826 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7827 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7828 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7830 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7832 std::swap (rhs1, rhs2);
7833 std::swap (var1, var2);
7834 std::swap (load1_dr_info, load2_dr_info);
7837 tree *init = loop_vinfo->scan_map->get (var1);
7838 gcc_assert (init);
7840 unsigned HOST_WIDE_INT nunits;
7841 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7842 gcc_unreachable ();
7843 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7844 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7845 gcc_assert (units_log2 > 0);
7846 auto_vec<tree, 16> perms;
7847 perms.quick_grow (units_log2 + 1);
7848 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7849 for (int i = 0; i <= units_log2; ++i)
7851 unsigned HOST_WIDE_INT j, k;
7852 vec_perm_builder sel (nunits, nunits, 1);
7853 sel.quick_grow (nunits);
7854 if (i == units_log2)
7855 for (j = 0; j < nunits; ++j)
7856 sel[j] = nunits - 1;
7857 else
7859 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7860 sel[j] = j;
7861 for (k = 0; j < nunits; ++j, ++k)
7862 sel[j] = nunits + k;
7864 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7865 if (!use_whole_vector.is_empty ()
7866 && use_whole_vector[i] != scan_store_kind_perm)
7868 if (zero_vec == NULL_TREE)
7869 zero_vec = build_zero_cst (vectype);
7870 if (masktype == NULL_TREE
7871 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7872 masktype = truth_type_for (vectype);
7873 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7875 else
7876 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7879 tree vec_oprnd1 = NULL_TREE;
7880 tree vec_oprnd2 = NULL_TREE;
7881 tree vec_oprnd3 = NULL_TREE;
7882 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7883 tree dataref_offset = build_int_cst (ref_type, 0);
7884 tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
7885 vectype, VMAT_CONTIGUOUS);
7886 tree ldataref_ptr = NULL_TREE;
7887 tree orig = NULL_TREE;
7888 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7889 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7890 auto_vec<tree> vec_oprnds1;
7891 auto_vec<tree> vec_oprnds2;
7892 auto_vec<tree> vec_oprnds3;
7893 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7894 *init, &vec_oprnds1,
7895 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7896 rhs2, &vec_oprnds3);
7897 for (int j = 0; j < ncopies; j++)
7899 vec_oprnd1 = vec_oprnds1[j];
7900 if (ldataref_ptr == NULL)
7901 vec_oprnd2 = vec_oprnds2[j];
7902 vec_oprnd3 = vec_oprnds3[j];
7903 if (j == 0)
7904 orig = vec_oprnd3;
7905 else if (!inscan_var_store)
7906 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7908 if (ldataref_ptr)
7910 vec_oprnd2 = make_ssa_name (vectype);
7911 tree data_ref = fold_build2 (MEM_REF, vectype,
7912 unshare_expr (ldataref_ptr),
7913 dataref_offset);
7914 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7915 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7916 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7917 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7918 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7921 tree v = vec_oprnd2;
7922 for (int i = 0; i < units_log2; ++i)
7924 tree new_temp = make_ssa_name (vectype);
7925 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7926 (zero_vec
7927 && (use_whole_vector[i]
7928 != scan_store_kind_perm))
7929 ? zero_vec : vec_oprnd1, v,
7930 perms[i]);
7931 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7932 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7933 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7935 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7937 /* Whole vector shift shifted in zero bits, but if *init
7938 is not initializer_zerop, we need to replace those elements
7939 with elements from vec_oprnd1. */
7940 tree_vector_builder vb (masktype, nunits, 1);
7941 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7942 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7943 ? boolean_false_node : boolean_true_node);
7945 tree new_temp2 = make_ssa_name (vectype);
7946 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7947 new_temp, vec_oprnd1);
7948 vect_finish_stmt_generation (vinfo, stmt_info,
7949 g, gsi);
7950 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7951 new_temp = new_temp2;
7954 /* For exclusive scan, perform the perms[i] permutation once
7955 more. */
7956 if (i == 0
7957 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7958 && v == vec_oprnd2)
7960 v = new_temp;
7961 --i;
7962 continue;
7965 tree new_temp2 = make_ssa_name (vectype);
7966 g = gimple_build_assign (new_temp2, code, v, new_temp);
7967 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7968 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7970 v = new_temp2;
7973 tree new_temp = make_ssa_name (vectype);
7974 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7975 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7976 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7978 tree last_perm_arg = new_temp;
7979 /* For exclusive scan, new_temp computed above is the exclusive scan
7980 prefix sum. Turn it into inclusive prefix sum for the broadcast
7981 of the last element into orig. */
7982 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7984 last_perm_arg = make_ssa_name (vectype);
7985 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7986 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7987 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7990 orig = make_ssa_name (vectype);
7991 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7992 last_perm_arg, perms[units_log2]);
7993 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7994 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7996 if (!inscan_var_store)
7998 tree data_ref = fold_build2 (MEM_REF, vectype,
7999 unshare_expr (dataref_ptr),
8000 dataref_offset);
8001 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8002 g = gimple_build_assign (data_ref, new_temp);
8003 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8004 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8008 if (inscan_var_store)
8009 for (int j = 0; j < ncopies; j++)
8011 if (j != 0)
8012 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8014 tree data_ref = fold_build2 (MEM_REF, vectype,
8015 unshare_expr (dataref_ptr),
8016 dataref_offset);
8017 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8018 gimple *g = gimple_build_assign (data_ref, orig);
8019 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8020 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8022 return true;
8026 /* Function vectorizable_store.
8028 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8029 that can be vectorized.
8030 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8031 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8032 Return true if STMT_INFO is vectorizable in this way. */
8034 static bool
8035 vectorizable_store (vec_info *vinfo,
8036 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8037 gimple **vec_stmt, slp_tree slp_node,
8038 stmt_vector_for_cost *cost_vec)
8040 tree data_ref;
8041 tree vec_oprnd = NULL_TREE;
8042 tree elem_type;
8043 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8044 class loop *loop = NULL;
8045 machine_mode vec_mode;
8046 tree dummy;
8047 enum vect_def_type rhs_dt = vect_unknown_def_type;
8048 enum vect_def_type mask_dt = vect_unknown_def_type;
8049 tree dataref_ptr = NULL_TREE;
8050 tree dataref_offset = NULL_TREE;
8051 gimple *ptr_incr = NULL;
8052 int ncopies;
8053 int j;
8054 stmt_vec_info first_stmt_info;
8055 bool grouped_store;
8056 unsigned int group_size, i;
8057 bool slp = (slp_node != NULL);
8058 unsigned int vec_num;
8059 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8060 tree aggr_type;
8061 gather_scatter_info gs_info;
8062 poly_uint64 vf;
8063 vec_load_store_type vls_type;
8064 tree ref_type;
8066 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8067 return false;
8069 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8070 && ! vec_stmt)
8071 return false;
8073 /* Is vectorizable store? */
8075 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8076 slp_tree mask_node = NULL;
8077 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8079 tree scalar_dest = gimple_assign_lhs (assign);
8080 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8081 && is_pattern_stmt_p (stmt_info))
8082 scalar_dest = TREE_OPERAND (scalar_dest, 0);
8083 if (TREE_CODE (scalar_dest) != ARRAY_REF
8084 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8085 && TREE_CODE (scalar_dest) != INDIRECT_REF
8086 && TREE_CODE (scalar_dest) != COMPONENT_REF
8087 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8088 && TREE_CODE (scalar_dest) != REALPART_EXPR
8089 && TREE_CODE (scalar_dest) != MEM_REF)
8090 return false;
8092 else
8094 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8095 if (!call || !gimple_call_internal_p (call))
8096 return false;
8098 internal_fn ifn = gimple_call_internal_fn (call);
8099 if (!internal_store_fn_p (ifn))
8100 return false;
8102 int mask_index = internal_fn_mask_index (ifn);
8103 if (mask_index >= 0 && slp_node)
8104 mask_index = vect_slp_child_index_for_operand
8105 (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8106 if (mask_index >= 0
8107 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8108 &mask, &mask_node, &mask_dt,
8109 &mask_vectype))
8110 return false;
8113 /* Cannot have hybrid store SLP -- that would mean storing to the
8114 same location twice. */
8115 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8117 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8118 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8120 if (loop_vinfo)
8122 loop = LOOP_VINFO_LOOP (loop_vinfo);
8123 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8125 else
8126 vf = 1;
8128 /* Multiple types in SLP are handled by creating the appropriate number of
8129 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8130 case of SLP. */
8131 if (slp)
8132 ncopies = 1;
8133 else
8134 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8136 gcc_assert (ncopies >= 1);
8138 /* FORNOW. This restriction should be relaxed. */
8139 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
8141 if (dump_enabled_p ())
8142 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8143 "multiple types in nested loop.\n");
8144 return false;
8147 tree op;
8148 slp_tree op_node;
8149 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8150 &op, &op_node, &rhs_dt, &rhs_vectype, &vls_type))
8151 return false;
8153 elem_type = TREE_TYPE (vectype);
8154 vec_mode = TYPE_MODE (vectype);
8156 if (!STMT_VINFO_DATA_REF (stmt_info))
8157 return false;
8159 vect_memory_access_type memory_access_type;
8160 enum dr_alignment_support alignment_support_scheme;
8161 int misalignment;
8162 poly_int64 poffset;
8163 internal_fn lanes_ifn;
8164 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
8165 ncopies, &memory_access_type, &poffset,
8166 &alignment_support_scheme, &misalignment, &gs_info,
8167 &lanes_ifn))
8168 return false;
8170 if (mask)
8172 if (memory_access_type == VMAT_CONTIGUOUS)
8174 if (!VECTOR_MODE_P (vec_mode)
8175 || !can_vec_mask_load_store_p (vec_mode,
8176 TYPE_MODE (mask_vectype), false))
8177 return false;
8179 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8180 && (memory_access_type != VMAT_GATHER_SCATTER
8181 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8183 if (dump_enabled_p ())
8184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8185 "unsupported access type for masked store.\n");
8186 return false;
8188 else if (memory_access_type == VMAT_GATHER_SCATTER
8189 && gs_info.ifn == IFN_LAST
8190 && !gs_info.decl)
8192 if (dump_enabled_p ())
8193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8194 "unsupported masked emulated scatter.\n");
8195 return false;
8198 else
8200 /* FORNOW. In some cases can vectorize even if data-type not supported
8201 (e.g. - array initialization with 0). */
8202 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
8203 return false;
8206 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8207 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8208 && memory_access_type != VMAT_GATHER_SCATTER
8209 && (slp || memory_access_type != VMAT_CONTIGUOUS));
8210 if (grouped_store)
8212 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8213 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8214 group_size = DR_GROUP_SIZE (first_stmt_info);
8216 else
8218 first_stmt_info = stmt_info;
8219 first_dr_info = dr_info;
8220 group_size = vec_num = 1;
8223 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
8225 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
8226 memory_access_type))
8227 return false;
8230 bool costing_p = !vec_stmt;
8231 if (costing_p) /* transformation not required. */
8233 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8235 if (loop_vinfo
8236 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8237 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8238 vls_type, group_size,
8239 memory_access_type, &gs_info,
8240 mask);
8242 if (slp_node
8243 && (!vect_maybe_update_slp_op_vectype (op_node, vectype)
8244 || (mask
8245 && !vect_maybe_update_slp_op_vectype (mask_node,
8246 mask_vectype))))
8248 if (dump_enabled_p ())
8249 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8250 "incompatible vector types for invariants\n");
8251 return false;
8254 if (dump_enabled_p ()
8255 && memory_access_type != VMAT_ELEMENTWISE
8256 && memory_access_type != VMAT_GATHER_SCATTER
8257 && alignment_support_scheme != dr_aligned)
8258 dump_printf_loc (MSG_NOTE, vect_location,
8259 "Vectorizing an unaligned access.\n");
8261 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8263 /* As function vect_transform_stmt shows, for interleaving stores
8264 the whole chain is vectorized when the last store in the chain
8265 is reached, the other stores in the group are skipped. So we
8266 want to only cost the last one here, but it's not trivial to
8267 get the last, as it's equivalent to use the first one for
8268 costing, use the first one instead. */
8269 if (grouped_store
8270 && !slp
8271 && first_stmt_info != stmt_info)
8272 return true;
8274 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8276 /* Transform. */
8278 ensure_base_align (dr_info);
8280 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8282 gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
8283 gcc_assert (!slp);
8284 if (costing_p)
8286 unsigned int inside_cost = 0, prologue_cost = 0;
8287 if (vls_type == VLS_STORE_INVARIANT)
8288 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8289 stmt_info, 0, vect_prologue);
8290 vect_get_store_cost (vinfo, stmt_info, ncopies,
8291 alignment_support_scheme, misalignment,
8292 &inside_cost, cost_vec);
8294 if (dump_enabled_p ())
8295 dump_printf_loc (MSG_NOTE, vect_location,
8296 "vect_model_store_cost: inside_cost = %d, "
8297 "prologue_cost = %d .\n",
8298 inside_cost, prologue_cost);
8300 return true;
8302 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8305 if (grouped_store)
8307 /* FORNOW */
8308 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8310 if (slp)
8312 grouped_store = false;
8313 /* VEC_NUM is the number of vect stmts to be created for this
8314 group. */
8315 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8316 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8317 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8318 == first_stmt_info);
8319 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8320 op = vect_get_store_rhs (first_stmt_info);
8322 else
8323 /* VEC_NUM is the number of vect stmts to be created for this
8324 group. */
8325 vec_num = group_size;
8327 ref_type = get_group_alias_ptr_type (first_stmt_info);
8329 else
8330 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8332 if (!costing_p && dump_enabled_p ())
8333 dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n",
8334 ncopies);
8336 /* Check if we need to update prologue cost for invariant,
8337 and update it accordingly if so. If it's not for
8338 interleaving store, we can just check vls_type; but if
8339 it's for interleaving store, need to check the def_type
8340 of the stored value since the current vls_type is just
8341 for first_stmt_info. */
8342 auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
8344 gcc_assert (costing_p);
8345 if (slp)
8346 return;
8347 if (grouped_store)
8349 gcc_assert (store_rhs);
8350 enum vect_def_type cdt;
8351 gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
8352 if (cdt != vect_constant_def && cdt != vect_external_def)
8353 return;
8355 else if (vls_type != VLS_STORE_INVARIANT)
8356 return;
8357 *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info,
8358 0, vect_prologue);
8361 if (memory_access_type == VMAT_ELEMENTWISE
8362 || memory_access_type == VMAT_STRIDED_SLP)
8364 unsigned inside_cost = 0, prologue_cost = 0;
8365 gimple_stmt_iterator incr_gsi;
8366 bool insert_after;
8367 gimple *incr;
8368 tree offvar;
8369 tree ivstep;
8370 tree running_off;
8371 tree stride_base, stride_step, alias_off;
8372 tree vec_oprnd = NULL_TREE;
8373 tree dr_offset;
8374 unsigned int g;
8375 /* Checked by get_load_store_type. */
8376 unsigned int const_nunits = nunits.to_constant ();
8378 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8379 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8381 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8382 stride_base
8383 = fold_build_pointer_plus
8384 (DR_BASE_ADDRESS (first_dr_info->dr),
8385 size_binop (PLUS_EXPR,
8386 convert_to_ptrofftype (dr_offset),
8387 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8388 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8390 /* For a store with loop-invariant (but other than power-of-2)
8391 stride (i.e. not a grouped access) like so:
8393 for (i = 0; i < n; i += stride)
8394 array[i] = ...;
8396 we generate a new induction variable and new stores from
8397 the components of the (vectorized) rhs:
8399 for (j = 0; ; j += VF*stride)
8400 vectemp = ...;
8401 tmp1 = vectemp[0];
8402 array[j] = tmp1;
8403 tmp2 = vectemp[1];
8404 array[j + stride] = tmp2;
8408 unsigned nstores = const_nunits;
8409 unsigned lnel = 1;
8410 tree ltype = elem_type;
8411 tree lvectype = vectype;
8412 if (slp)
8414 if (group_size < const_nunits
8415 && const_nunits % group_size == 0)
8417 nstores = const_nunits / group_size;
8418 lnel = group_size;
8419 ltype = build_vector_type (elem_type, group_size);
8420 lvectype = vectype;
8422 /* First check if vec_extract optab doesn't support extraction
8423 of vector elts directly. */
8424 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8425 machine_mode vmode;
8426 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8427 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8428 group_size).exists (&vmode)
8429 || (convert_optab_handler (vec_extract_optab,
8430 TYPE_MODE (vectype), vmode)
8431 == CODE_FOR_nothing))
8433 /* Try to avoid emitting an extract of vector elements
8434 by performing the extracts using an integer type of the
8435 same size, extracting from a vector of those and then
8436 re-interpreting it as the original vector type if
8437 supported. */
8438 unsigned lsize
8439 = group_size * GET_MODE_BITSIZE (elmode);
8440 unsigned int lnunits = const_nunits / group_size;
8441 /* If we can't construct such a vector fall back to
8442 element extracts from the original vector type and
8443 element size stores. */
8444 if (int_mode_for_size (lsize, 0).exists (&elmode)
8445 && VECTOR_MODE_P (TYPE_MODE (vectype))
8446 && related_vector_mode (TYPE_MODE (vectype), elmode,
8447 lnunits).exists (&vmode)
8448 && (convert_optab_handler (vec_extract_optab,
8449 vmode, elmode)
8450 != CODE_FOR_nothing))
8452 nstores = lnunits;
8453 lnel = group_size;
8454 ltype = build_nonstandard_integer_type (lsize, 1);
8455 lvectype = build_vector_type (ltype, nstores);
8457 /* Else fall back to vector extraction anyway.
8458 Fewer stores are more important than avoiding spilling
8459 of the vector we extract from. Compared to the
8460 construction case in vectorizable_load no store-forwarding
8461 issue exists here for reasonable archs. */
8464 else if (group_size >= const_nunits
8465 && group_size % const_nunits == 0)
8467 int mis_align = dr_misalignment (first_dr_info, vectype);
8468 dr_alignment_support dr_align
8469 = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
8470 mis_align);
8471 if (dr_align == dr_aligned
8472 || dr_align == dr_unaligned_supported)
8474 nstores = 1;
8475 lnel = const_nunits;
8476 ltype = vectype;
8477 lvectype = vectype;
8478 alignment_support_scheme = dr_align;
8479 misalignment = mis_align;
8482 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8483 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8486 if (!costing_p)
8488 ivstep = stride_step;
8489 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8490 build_int_cst (TREE_TYPE (ivstep), vf));
8492 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8494 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8495 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8496 create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
8497 insert_after, &offvar, NULL);
8498 incr = gsi_stmt (incr_gsi);
8500 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8503 alias_off = build_int_cst (ref_type, 0);
8504 stmt_vec_info next_stmt_info = first_stmt_info;
8505 auto_vec<tree> vec_oprnds (ncopies);
8506 /* For costing some adjacent vector stores, we'd like to cost with
8507 the total number of them once instead of cost each one by one. */
8508 unsigned int n_adjacent_stores = 0;
8509 for (g = 0; g < group_size; g++)
8511 running_off = offvar;
8512 if (!costing_p)
8514 if (g)
8516 tree size = TYPE_SIZE_UNIT (ltype);
8517 tree pos
8518 = fold_build2 (MULT_EXPR, sizetype, size_int (g), size);
8519 tree newoff = copy_ssa_name (running_off, NULL);
8520 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8521 running_off, pos);
8522 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8523 running_off = newoff;
8526 if (!slp)
8527 op = vect_get_store_rhs (next_stmt_info);
8528 if (!costing_p)
8529 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies, op,
8530 &vec_oprnds);
8531 else
8532 update_prologue_cost (&prologue_cost, op);
8533 unsigned int group_el = 0;
8534 unsigned HOST_WIDE_INT
8535 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8536 for (j = 0; j < ncopies; j++)
8538 if (!costing_p)
8540 vec_oprnd = vec_oprnds[j];
8541 /* Pun the vector to extract from if necessary. */
8542 if (lvectype != vectype)
8544 tree tem = make_ssa_name (lvectype);
8545 tree cvt
8546 = build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
8547 gimple *pun = gimple_build_assign (tem, cvt);
8548 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8549 vec_oprnd = tem;
8552 for (i = 0; i < nstores; i++)
8554 if (costing_p)
8556 /* Only need vector extracting when there are more
8557 than one stores. */
8558 if (nstores > 1)
8559 inside_cost
8560 += record_stmt_cost (cost_vec, 1, vec_to_scalar,
8561 stmt_info, 0, vect_body);
8562 /* Take a single lane vector type store as scalar
8563 store to avoid ICE like 110776. */
8564 if (VECTOR_TYPE_P (ltype)
8565 && known_ne (TYPE_VECTOR_SUBPARTS (ltype), 1U))
8566 n_adjacent_stores++;
8567 else
8568 inside_cost
8569 += record_stmt_cost (cost_vec, 1, scalar_store,
8570 stmt_info, 0, vect_body);
8571 continue;
8573 tree newref, newoff;
8574 gimple *incr, *assign;
8575 tree size = TYPE_SIZE (ltype);
8576 /* Extract the i'th component. */
8577 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8578 bitsize_int (i), size);
8579 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8580 size, pos);
8582 elem = force_gimple_operand_gsi (gsi, elem, true,
8583 NULL_TREE, true,
8584 GSI_SAME_STMT);
8586 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8587 group_el * elsz);
8588 newref = build2 (MEM_REF, ltype,
8589 running_off, this_off);
8590 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8592 /* And store it to *running_off. */
8593 assign = gimple_build_assign (newref, elem);
8594 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8596 group_el += lnel;
8597 if (! slp
8598 || group_el == group_size)
8600 newoff = copy_ssa_name (running_off, NULL);
8601 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8602 running_off, stride_step);
8603 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8605 running_off = newoff;
8606 group_el = 0;
8608 if (g == group_size - 1
8609 && !slp)
8611 if (j == 0 && i == 0)
8612 *vec_stmt = assign;
8613 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8617 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8618 vec_oprnds.truncate(0);
8619 if (slp)
8620 break;
8623 if (costing_p)
8625 if (n_adjacent_stores > 0)
8626 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
8627 alignment_support_scheme, misalignment,
8628 &inside_cost, cost_vec);
8629 if (dump_enabled_p ())
8630 dump_printf_loc (MSG_NOTE, vect_location,
8631 "vect_model_store_cost: inside_cost = %d, "
8632 "prologue_cost = %d .\n",
8633 inside_cost, prologue_cost);
8636 return true;
8639 gcc_assert (alignment_support_scheme);
8640 vec_loop_masks *loop_masks
8641 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8642 ? &LOOP_VINFO_MASKS (loop_vinfo)
8643 : NULL);
8644 vec_loop_lens *loop_lens
8645 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8646 ? &LOOP_VINFO_LENS (loop_vinfo)
8647 : NULL);
8649 /* Shouldn't go with length-based approach if fully masked. */
8650 gcc_assert (!loop_lens || !loop_masks);
8652 /* Targets with store-lane instructions must not require explicit
8653 realignment. vect_supportable_dr_alignment always returns either
8654 dr_aligned or dr_unaligned_supported for masked operations. */
8655 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8656 && !mask
8657 && !loop_masks)
8658 || alignment_support_scheme == dr_aligned
8659 || alignment_support_scheme == dr_unaligned_supported);
8661 tree offset = NULL_TREE;
8662 if (!known_eq (poffset, 0))
8663 offset = size_int (poffset);
8665 tree bump;
8666 tree vec_offset = NULL_TREE;
8667 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8669 aggr_type = NULL_TREE;
8670 bump = NULL_TREE;
8672 else if (memory_access_type == VMAT_GATHER_SCATTER)
8674 aggr_type = elem_type;
8675 if (!costing_p)
8676 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
8677 &bump, &vec_offset, loop_lens);
8679 else
8681 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8682 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8683 else
8684 aggr_type = vectype;
8685 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8686 memory_access_type, loop_lens);
8689 if (mask && !costing_p)
8690 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8692 /* In case the vectorization factor (VF) is bigger than the number
8693 of elements that we can fit in a vectype (nunits), we have to generate
8694 more than one vector stmt - i.e - we need to "unroll" the
8695 vector stmt by a factor VF/nunits. */
8697 /* In case of interleaving (non-unit grouped access):
8699 S1: &base + 2 = x2
8700 S2: &base = x0
8701 S3: &base + 1 = x1
8702 S4: &base + 3 = x3
8704 We create vectorized stores starting from base address (the access of the
8705 first stmt in the chain (S2 in the above example), when the last store stmt
8706 of the chain (S4) is reached:
8708 VS1: &base = vx2
8709 VS2: &base + vec_size*1 = vx0
8710 VS3: &base + vec_size*2 = vx1
8711 VS4: &base + vec_size*3 = vx3
8713 Then permutation statements are generated:
8715 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8716 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8719 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8720 (the order of the data-refs in the output of vect_permute_store_chain
8721 corresponds to the order of scalar stmts in the interleaving chain - see
8722 the documentation of vect_permute_store_chain()).
8724 In case of both multiple types and interleaving, above vector stores and
8725 permutation stmts are created for every copy. The result vector stmts are
8726 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8727 STMT_VINFO_RELATED_STMT for the next copies.
8730 auto_vec<tree> dr_chain (group_size);
8731 auto_vec<tree> vec_masks;
8732 tree vec_mask = NULL;
8733 auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8734 for (i = 0; i < group_size; i++)
8735 gvec_oprnds.quick_push (new auto_vec<tree> (ncopies));
8737 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8739 gcc_assert (!slp && grouped_store);
8740 unsigned inside_cost = 0, prologue_cost = 0;
8741 /* For costing some adjacent vector stores, we'd like to cost with
8742 the total number of them once instead of cost each one by one. */
8743 unsigned int n_adjacent_stores = 0;
8744 for (j = 0; j < ncopies; j++)
8746 gimple *new_stmt;
8747 if (j == 0)
8749 /* For interleaved stores we collect vectorized defs for all
8750 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8751 as an input to vect_permute_store_chain(). */
8752 stmt_vec_info next_stmt_info = first_stmt_info;
8753 for (i = 0; i < group_size; i++)
8755 /* Since gaps are not supported for interleaved stores,
8756 DR_GROUP_SIZE is the exact number of stmts in the
8757 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8758 op = vect_get_store_rhs (next_stmt_info);
8759 if (costing_p)
8760 update_prologue_cost (&prologue_cost, op);
8761 else
8763 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8764 ncopies, op,
8765 gvec_oprnds[i]);
8766 vec_oprnd = (*gvec_oprnds[i])[0];
8767 dr_chain.quick_push (vec_oprnd);
8769 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8772 if (!costing_p)
8774 if (mask)
8776 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8777 mask, &vec_masks,
8778 mask_vectype);
8779 vec_mask = vec_masks[0];
8782 /* We should have catched mismatched types earlier. */
8783 gcc_assert (
8784 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
8785 dataref_ptr
8786 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8787 aggr_type, NULL, offset, &dummy,
8788 gsi, &ptr_incr, false, bump);
8791 else if (!costing_p)
8793 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8794 /* DR_CHAIN is then used as an input to
8795 vect_permute_store_chain(). */
8796 for (i = 0; i < group_size; i++)
8798 vec_oprnd = (*gvec_oprnds[i])[j];
8799 dr_chain[i] = vec_oprnd;
8801 if (mask)
8802 vec_mask = vec_masks[j];
8803 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8804 stmt_info, bump);
8807 if (costing_p)
8809 n_adjacent_stores += vec_num;
8810 continue;
8813 /* Get an array into which we can store the individual vectors. */
8814 tree vec_array = create_vector_array (vectype, vec_num);
8816 /* Invalidate the current contents of VEC_ARRAY. This should
8817 become an RTL clobber too, which prevents the vector registers
8818 from being upward-exposed. */
8819 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8821 /* Store the individual vectors into the array. */
8822 for (i = 0; i < vec_num; i++)
8824 vec_oprnd = dr_chain[i];
8825 write_vector_array (vinfo, stmt_info, gsi, vec_oprnd, vec_array,
8829 tree final_mask = NULL;
8830 tree final_len = NULL;
8831 tree bias = NULL;
8832 if (loop_masks)
8833 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8834 ncopies, vectype, j);
8835 if (vec_mask)
8836 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
8837 vec_mask, gsi);
8839 if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8841 if (loop_lens)
8842 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8843 ncopies, vectype, j, 1);
8844 else
8845 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8846 signed char biasval
8847 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8848 bias = build_int_cst (intQI_type_node, biasval);
8849 if (!final_mask)
8851 mask_vectype = truth_type_for (vectype);
8852 final_mask = build_minus_one_cst (mask_vectype);
8856 gcall *call;
8857 if (final_len && final_mask)
8859 /* Emit:
8860 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8861 LEN, BIAS, VEC_ARRAY). */
8862 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8863 tree alias_ptr = build_int_cst (ref_type, align);
8864 call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
8865 dataref_ptr, alias_ptr,
8866 final_mask, final_len, bias,
8867 vec_array);
8869 else if (final_mask)
8871 /* Emit:
8872 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8873 VEC_ARRAY). */
8874 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8875 tree alias_ptr = build_int_cst (ref_type, align);
8876 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8877 dataref_ptr, alias_ptr,
8878 final_mask, vec_array);
8880 else
8882 /* Emit:
8883 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8884 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8885 call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
8886 gimple_call_set_lhs (call, data_ref);
8888 gimple_call_set_nothrow (call, true);
8889 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8890 new_stmt = call;
8892 /* Record that VEC_ARRAY is now dead. */
8893 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8894 if (j == 0)
8895 *vec_stmt = new_stmt;
8896 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8899 if (costing_p)
8901 if (n_adjacent_stores > 0)
8902 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
8903 alignment_support_scheme, misalignment,
8904 &inside_cost, cost_vec);
8905 if (dump_enabled_p ())
8906 dump_printf_loc (MSG_NOTE, vect_location,
8907 "vect_model_store_cost: inside_cost = %d, "
8908 "prologue_cost = %d .\n",
8909 inside_cost, prologue_cost);
8912 return true;
8915 if (memory_access_type == VMAT_GATHER_SCATTER)
8917 gcc_assert (!grouped_store);
8918 auto_vec<tree> vec_offsets;
8919 unsigned int inside_cost = 0, prologue_cost = 0;
8920 for (j = 0; j < ncopies; j++)
8922 gimple *new_stmt;
8923 if (j == 0)
8925 if (costing_p && vls_type == VLS_STORE_INVARIANT)
8926 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8927 stmt_info, 0, vect_prologue);
8928 else if (!costing_p)
8930 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
8931 DR_CHAIN is of size 1. */
8932 gcc_assert (group_size == 1);
8933 if (slp_node)
8934 vect_get_slp_defs (op_node, gvec_oprnds[0]);
8935 else
8936 vect_get_vec_defs_for_operand (vinfo, first_stmt_info,
8937 ncopies, op, gvec_oprnds[0]);
8938 if (mask)
8940 if (slp_node)
8941 vect_get_slp_defs (mask_node, &vec_masks);
8942 else
8943 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8944 ncopies,
8945 mask, &vec_masks,
8946 mask_vectype);
8949 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8950 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8951 slp_node, &gs_info,
8952 &dataref_ptr, &vec_offsets);
8953 else
8954 dataref_ptr
8955 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8956 aggr_type, NULL, offset,
8957 &dummy, gsi, &ptr_incr, false,
8958 bump);
8961 else if (!costing_p)
8963 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8964 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8965 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8966 gsi, stmt_info, bump);
8969 new_stmt = NULL;
8970 for (i = 0; i < vec_num; ++i)
8972 if (!costing_p)
8974 vec_oprnd = (*gvec_oprnds[0])[vec_num * j + i];
8975 if (mask)
8976 vec_mask = vec_masks[vec_num * j + i];
8977 /* We should have catched mismatched types earlier. */
8978 gcc_assert (useless_type_conversion_p (vectype,
8979 TREE_TYPE (vec_oprnd)));
8981 unsigned HOST_WIDE_INT align;
8982 tree final_mask = NULL_TREE;
8983 tree final_len = NULL_TREE;
8984 tree bias = NULL_TREE;
8985 if (!costing_p)
8987 if (loop_masks)
8988 final_mask = vect_get_loop_mask (loop_vinfo, gsi,
8989 loop_masks, ncopies,
8990 vectype, j);
8991 if (vec_mask)
8992 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8993 final_mask, vec_mask, gsi);
8996 if (gs_info.ifn != IFN_LAST)
8998 if (costing_p)
9000 unsigned int cnunits = vect_nunits_for_cost (vectype);
9001 inside_cost
9002 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9003 stmt_info, 0, vect_body);
9004 continue;
9007 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9008 vec_offset = vec_offsets[vec_num * j + i];
9009 tree scale = size_int (gs_info.scale);
9011 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
9013 if (loop_lens)
9014 final_len = vect_get_loop_len (loop_vinfo, gsi,
9015 loop_lens, ncopies,
9016 vectype, j, 1);
9017 else
9018 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9019 signed char biasval
9020 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9021 bias = build_int_cst (intQI_type_node, biasval);
9022 if (!final_mask)
9024 mask_vectype = truth_type_for (vectype);
9025 final_mask = build_minus_one_cst (mask_vectype);
9029 gcall *call;
9030 if (final_len && final_mask)
9031 call = gimple_build_call_internal
9032 (IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr,
9033 vec_offset, scale, vec_oprnd, final_mask,
9034 final_len, bias);
9035 else if (final_mask)
9036 call = gimple_build_call_internal
9037 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr,
9038 vec_offset, scale, vec_oprnd, final_mask);
9039 else
9040 call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
9041 dataref_ptr, vec_offset,
9042 scale, vec_oprnd);
9043 gimple_call_set_nothrow (call, true);
9044 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9045 new_stmt = call;
9047 else if (gs_info.decl)
9049 /* The builtin decls path for scatter is legacy, x86 only. */
9050 gcc_assert (nunits.is_constant ()
9051 && (!final_mask
9052 || SCALAR_INT_MODE_P
9053 (TYPE_MODE (TREE_TYPE (final_mask)))));
9054 if (costing_p)
9056 unsigned int cnunits = vect_nunits_for_cost (vectype);
9057 inside_cost
9058 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9059 stmt_info, 0, vect_body);
9060 continue;
9062 poly_uint64 offset_nunits
9063 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
9064 if (known_eq (nunits, offset_nunits))
9066 new_stmt = vect_build_one_scatter_store_call
9067 (vinfo, stmt_info, gsi, &gs_info,
9068 dataref_ptr, vec_offsets[vec_num * j + i],
9069 vec_oprnd, final_mask);
9070 vect_finish_stmt_generation (vinfo, stmt_info,
9071 new_stmt, gsi);
9073 else if (known_eq (nunits, offset_nunits * 2))
9075 /* We have a offset vector with half the number of
9076 lanes but the builtins will store full vectype
9077 data from the lower lanes. */
9078 new_stmt = vect_build_one_scatter_store_call
9079 (vinfo, stmt_info, gsi, &gs_info,
9080 dataref_ptr,
9081 vec_offsets[2 * vec_num * j + 2 * i],
9082 vec_oprnd, final_mask);
9083 vect_finish_stmt_generation (vinfo, stmt_info,
9084 new_stmt, gsi);
9085 int count = nunits.to_constant ();
9086 vec_perm_builder sel (count, count, 1);
9087 sel.quick_grow (count);
9088 for (int i = 0; i < count; ++i)
9089 sel[i] = i | (count / 2);
9090 vec_perm_indices indices (sel, 2, count);
9091 tree perm_mask
9092 = vect_gen_perm_mask_checked (vectype, indices);
9093 new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
9094 vec_oprnd, vec_oprnd,
9095 perm_mask);
9096 vec_oprnd = make_ssa_name (vectype);
9097 gimple_set_lhs (new_stmt, vec_oprnd);
9098 vect_finish_stmt_generation (vinfo, stmt_info,
9099 new_stmt, gsi);
9100 if (final_mask)
9102 new_stmt = gimple_build_assign (NULL_TREE,
9103 VEC_UNPACK_HI_EXPR,
9104 final_mask);
9105 final_mask = make_ssa_name
9106 (truth_type_for (gs_info.offset_vectype));
9107 gimple_set_lhs (new_stmt, final_mask);
9108 vect_finish_stmt_generation (vinfo, stmt_info,
9109 new_stmt, gsi);
9111 new_stmt = vect_build_one_scatter_store_call
9112 (vinfo, stmt_info, gsi, &gs_info,
9113 dataref_ptr,
9114 vec_offsets[2 * vec_num * j + 2 * i + 1],
9115 vec_oprnd, final_mask);
9116 vect_finish_stmt_generation (vinfo, stmt_info,
9117 new_stmt, gsi);
9119 else if (known_eq (nunits * 2, offset_nunits))
9121 /* We have a offset vector with double the number of
9122 lanes. Select the low/high part accordingly. */
9123 vec_offset = vec_offsets[(vec_num * j + i) / 2];
9124 if ((vec_num * j + i) & 1)
9126 int count = offset_nunits.to_constant ();
9127 vec_perm_builder sel (count, count, 1);
9128 sel.quick_grow (count);
9129 for (int i = 0; i < count; ++i)
9130 sel[i] = i | (count / 2);
9131 vec_perm_indices indices (sel, 2, count);
9132 tree perm_mask = vect_gen_perm_mask_checked
9133 (TREE_TYPE (vec_offset), indices);
9134 new_stmt = gimple_build_assign (NULL_TREE,
9135 VEC_PERM_EXPR,
9136 vec_offset,
9137 vec_offset,
9138 perm_mask);
9139 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
9140 gimple_set_lhs (new_stmt, vec_offset);
9141 vect_finish_stmt_generation (vinfo, stmt_info,
9142 new_stmt, gsi);
9144 new_stmt = vect_build_one_scatter_store_call
9145 (vinfo, stmt_info, gsi, &gs_info,
9146 dataref_ptr, vec_offset,
9147 vec_oprnd, final_mask);
9148 vect_finish_stmt_generation (vinfo, stmt_info,
9149 new_stmt, gsi);
9151 else
9152 gcc_unreachable ();
9154 else
9156 /* Emulated scatter. */
9157 gcc_assert (!final_mask);
9158 if (costing_p)
9160 unsigned int cnunits = vect_nunits_for_cost (vectype);
9161 /* For emulated scatter N offset vector element extracts
9162 (we assume the scalar scaling and ptr + offset add is
9163 consumed by the load). */
9164 inside_cost
9165 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9166 stmt_info, 0, vect_body);
9167 /* N scalar stores plus extracting the elements. */
9168 inside_cost
9169 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9170 stmt_info, 0, vect_body);
9171 inside_cost
9172 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9173 stmt_info, 0, vect_body);
9174 continue;
9177 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9178 unsigned HOST_WIDE_INT const_offset_nunits
9179 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant ();
9180 vec<constructor_elt, va_gc> *ctor_elts;
9181 vec_alloc (ctor_elts, const_nunits);
9182 gimple_seq stmts = NULL;
9183 tree elt_type = TREE_TYPE (vectype);
9184 unsigned HOST_WIDE_INT elt_size
9185 = tree_to_uhwi (TYPE_SIZE (elt_type));
9186 /* We support offset vectors with more elements
9187 than the data vector for now. */
9188 unsigned HOST_WIDE_INT factor
9189 = const_offset_nunits / const_nunits;
9190 vec_offset = vec_offsets[(vec_num * j + i) / factor];
9191 unsigned elt_offset = (j % factor) * const_nunits;
9192 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9193 tree scale = size_int (gs_info.scale);
9194 align = get_object_alignment (DR_REF (first_dr_info->dr));
9195 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9196 for (unsigned k = 0; k < const_nunits; ++k)
9198 /* Compute the offsetted pointer. */
9199 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9200 bitsize_int (k + elt_offset));
9201 tree idx
9202 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
9203 vec_offset, TYPE_SIZE (idx_type), boff);
9204 idx = gimple_convert (&stmts, sizetype, idx);
9205 idx = gimple_build (&stmts, MULT_EXPR, sizetype,
9206 idx, scale);
9207 tree ptr
9208 = gimple_build (&stmts, PLUS_EXPR,
9209 TREE_TYPE (dataref_ptr),
9210 dataref_ptr, idx);
9211 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9212 /* Extract the element to be stored. */
9213 tree elt
9214 = gimple_build (&stmts, BIT_FIELD_REF,
9215 TREE_TYPE (vectype),
9216 vec_oprnd, TYPE_SIZE (elt_type),
9217 bitsize_int (k * elt_size));
9218 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9219 stmts = NULL;
9220 tree ref
9221 = build2 (MEM_REF, ltype, ptr,
9222 build_int_cst (ref_type, 0));
9223 new_stmt = gimple_build_assign (ref, elt);
9224 vect_finish_stmt_generation (vinfo, stmt_info,
9225 new_stmt, gsi);
9227 if (slp)
9228 slp_node->push_vec_def (new_stmt);
9231 if (!slp && !costing_p)
9232 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9235 if (!slp && !costing_p)
9236 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9238 if (costing_p && dump_enabled_p ())
9239 dump_printf_loc (MSG_NOTE, vect_location,
9240 "vect_model_store_cost: inside_cost = %d, "
9241 "prologue_cost = %d .\n",
9242 inside_cost, prologue_cost);
9244 return true;
9247 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
9248 || memory_access_type == VMAT_CONTIGUOUS_DOWN
9249 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE
9250 || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
9252 unsigned inside_cost = 0, prologue_cost = 0;
9253 /* For costing some adjacent vector stores, we'd like to cost with
9254 the total number of them once instead of cost each one by one. */
9255 unsigned int n_adjacent_stores = 0;
9256 auto_vec<tree> result_chain (group_size);
9257 auto_vec<tree, 1> vec_oprnds;
9258 for (j = 0; j < ncopies; j++)
9260 gimple *new_stmt;
9261 if (j == 0)
9263 if (slp && !costing_p)
9265 /* Get vectorized arguments for SLP_NODE. */
9266 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
9267 &vec_oprnds, mask, &vec_masks);
9268 vec_oprnd = vec_oprnds[0];
9269 if (mask)
9270 vec_mask = vec_masks[0];
9272 else
9274 /* For interleaved stores we collect vectorized defs for all the
9275 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9276 input to vect_permute_store_chain().
9278 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9279 is of size 1. */
9280 stmt_vec_info next_stmt_info = first_stmt_info;
9281 for (i = 0; i < group_size; i++)
9283 /* Since gaps are not supported for interleaved stores,
9284 DR_GROUP_SIZE is the exact number of stmts in the chain.
9285 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9286 that there is no interleaving, DR_GROUP_SIZE is 1,
9287 and only one iteration of the loop will be executed. */
9288 op = vect_get_store_rhs (next_stmt_info);
9289 if (costing_p)
9290 update_prologue_cost (&prologue_cost, op);
9291 else
9293 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
9294 ncopies, op,
9295 gvec_oprnds[i]);
9296 vec_oprnd = (*gvec_oprnds[i])[0];
9297 dr_chain.quick_push (vec_oprnd);
9299 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9301 if (mask && !costing_p)
9303 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
9304 mask, &vec_masks,
9305 mask_vectype);
9306 vec_mask = vec_masks[0];
9310 /* We should have catched mismatched types earlier. */
9311 gcc_assert (costing_p
9312 || useless_type_conversion_p (vectype,
9313 TREE_TYPE (vec_oprnd)));
9314 bool simd_lane_access_p
9315 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9316 if (!costing_p
9317 && simd_lane_access_p
9318 && !loop_masks
9319 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9320 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9321 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9322 && integer_zerop (DR_INIT (first_dr_info->dr))
9323 && alias_sets_conflict_p (get_alias_set (aggr_type),
9324 get_alias_set (TREE_TYPE (ref_type))))
9326 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9327 dataref_offset = build_int_cst (ref_type, 0);
9329 else if (!costing_p)
9330 dataref_ptr
9331 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9332 simd_lane_access_p ? loop : NULL,
9333 offset, &dummy, gsi, &ptr_incr,
9334 simd_lane_access_p, bump);
9336 else if (!costing_p)
9338 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9339 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9340 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9341 of size 1. */
9342 for (i = 0; i < group_size; i++)
9344 vec_oprnd = (*gvec_oprnds[i])[j];
9345 dr_chain[i] = vec_oprnd;
9347 if (mask)
9348 vec_mask = vec_masks[j];
9349 if (dataref_offset)
9350 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
9351 else
9352 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9353 stmt_info, bump);
9356 new_stmt = NULL;
9357 if (grouped_store)
9359 /* Permute. */
9360 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
9361 if (costing_p)
9363 int group_size = DR_GROUP_SIZE (first_stmt_info);
9364 int nstmts = ceil_log2 (group_size) * group_size;
9365 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
9366 stmt_info, 0, vect_body);
9367 if (dump_enabled_p ())
9368 dump_printf_loc (MSG_NOTE, vect_location,
9369 "vect_model_store_cost: "
9370 "strided group_size = %d .\n",
9371 group_size);
9373 else
9374 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
9375 gsi, &result_chain);
9378 stmt_vec_info next_stmt_info = first_stmt_info;
9379 for (i = 0; i < vec_num; i++)
9381 if (!costing_p)
9383 if (slp)
9384 vec_oprnd = vec_oprnds[i];
9385 else if (grouped_store)
9386 /* For grouped stores vectorized defs are interleaved in
9387 vect_permute_store_chain(). */
9388 vec_oprnd = result_chain[i];
9391 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9393 if (costing_p)
9394 inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
9395 stmt_info, 0, vect_body);
9396 else
9398 tree perm_mask = perm_mask_for_reverse (vectype);
9399 tree perm_dest = vect_create_destination_var (
9400 vect_get_store_rhs (stmt_info), vectype);
9401 tree new_temp = make_ssa_name (perm_dest);
9403 /* Generate the permute statement. */
9404 gimple *perm_stmt
9405 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9406 vec_oprnd, perm_mask);
9407 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
9408 gsi);
9410 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9411 vec_oprnd = new_temp;
9415 if (costing_p)
9417 n_adjacent_stores++;
9419 if (!slp)
9421 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9422 if (!next_stmt_info)
9423 break;
9426 continue;
9429 tree final_mask = NULL_TREE;
9430 tree final_len = NULL_TREE;
9431 tree bias = NULL_TREE;
9432 if (loop_masks)
9433 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9434 vec_num * ncopies, vectype,
9435 vec_num * j + i);
9436 if (slp && vec_mask)
9437 vec_mask = vec_masks[i];
9438 if (vec_mask)
9439 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
9440 vec_mask, gsi);
9442 if (i > 0)
9443 /* Bump the vector pointer. */
9444 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9445 stmt_info, bump);
9447 unsigned misalign;
9448 unsigned HOST_WIDE_INT align;
9449 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9450 if (alignment_support_scheme == dr_aligned)
9451 misalign = 0;
9452 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9454 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
9455 misalign = 0;
9457 else
9458 misalign = misalignment;
9459 if (dataref_offset == NULL_TREE
9460 && TREE_CODE (dataref_ptr) == SSA_NAME)
9461 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9462 misalign);
9463 align = least_bit_hwi (misalign | align);
9465 /* Compute IFN when LOOP_LENS or final_mask valid. */
9466 machine_mode vmode = TYPE_MODE (vectype);
9467 machine_mode new_vmode = vmode;
9468 internal_fn partial_ifn = IFN_LAST;
9469 if (loop_lens)
9471 opt_machine_mode new_ovmode
9472 = get_len_load_store_mode (vmode, false, &partial_ifn);
9473 new_vmode = new_ovmode.require ();
9474 unsigned factor
9475 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9476 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9477 vec_num * ncopies, vectype,
9478 vec_num * j + i, factor);
9480 else if (final_mask)
9482 if (!can_vec_mask_load_store_p (
9483 vmode, TYPE_MODE (TREE_TYPE (final_mask)), false,
9484 &partial_ifn))
9485 gcc_unreachable ();
9488 if (partial_ifn == IFN_MASK_LEN_STORE)
9490 if (!final_len)
9492 /* Pass VF value to 'len' argument of
9493 MASK_LEN_STORE if LOOP_LENS is invalid. */
9494 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9496 if (!final_mask)
9498 /* Pass all ones value to 'mask' argument of
9499 MASK_LEN_STORE if final_mask is invalid. */
9500 mask_vectype = truth_type_for (vectype);
9501 final_mask = build_minus_one_cst (mask_vectype);
9504 if (final_len)
9506 signed char biasval
9507 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9509 bias = build_int_cst (intQI_type_node, biasval);
9512 /* Arguments are ready. Create the new vector stmt. */
9513 if (final_len)
9515 gcall *call;
9516 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9517 /* Need conversion if it's wrapped with VnQI. */
9518 if (vmode != new_vmode)
9520 tree new_vtype
9521 = build_vector_type_for_mode (unsigned_intQI_type_node,
9522 new_vmode);
9523 tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9524 vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9525 gassign *new_stmt
9526 = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9527 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9528 vec_oprnd = var;
9531 if (partial_ifn == IFN_MASK_LEN_STORE)
9532 call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9533 dataref_ptr, ptr, final_mask,
9534 final_len, bias, vec_oprnd);
9535 else
9536 call = gimple_build_call_internal (IFN_LEN_STORE, 5,
9537 dataref_ptr, ptr, final_len,
9538 bias, vec_oprnd);
9539 gimple_call_set_nothrow (call, true);
9540 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9541 new_stmt = call;
9543 else if (final_mask)
9545 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9546 gcall *call
9547 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
9548 ptr, final_mask, vec_oprnd);
9549 gimple_call_set_nothrow (call, true);
9550 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9551 new_stmt = call;
9553 else
9555 data_ref
9556 = fold_build2 (MEM_REF, vectype, dataref_ptr,
9557 dataref_offset ? dataref_offset
9558 : build_int_cst (ref_type, 0));
9559 if (alignment_support_scheme == dr_aligned)
9561 else
9562 TREE_TYPE (data_ref)
9563 = build_aligned_type (TREE_TYPE (data_ref),
9564 align * BITS_PER_UNIT);
9565 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9566 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9567 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9570 if (slp)
9571 continue;
9573 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9574 if (!next_stmt_info)
9575 break;
9577 if (!slp && !costing_p)
9579 if (j == 0)
9580 *vec_stmt = new_stmt;
9581 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9585 if (costing_p)
9587 if (n_adjacent_stores > 0)
9588 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
9589 alignment_support_scheme, misalignment,
9590 &inside_cost, cost_vec);
9592 /* When vectorizing a store into the function result assign
9593 a penalty if the function returns in a multi-register location.
9594 In this case we assume we'll end up with having to spill the
9595 vector result and do piecewise loads as a conservative estimate. */
9596 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
9597 if (base
9598 && (TREE_CODE (base) == RESULT_DECL
9599 || (DECL_P (base) && cfun_returns (base)))
9600 && !aggregate_value_p (base, cfun->decl))
9602 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
9603 /* ??? Handle PARALLEL in some way. */
9604 if (REG_P (reg))
9606 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
9607 /* Assume that a single reg-reg move is possible and cheap,
9608 do not account for vector to gp register move cost. */
9609 if (nregs > 1)
9611 /* Spill. */
9612 prologue_cost
9613 += record_stmt_cost (cost_vec, ncopies, vector_store,
9614 stmt_info, 0, vect_epilogue);
9615 /* Loads. */
9616 prologue_cost
9617 += record_stmt_cost (cost_vec, ncopies * nregs, scalar_load,
9618 stmt_info, 0, vect_epilogue);
9622 if (dump_enabled_p ())
9623 dump_printf_loc (MSG_NOTE, vect_location,
9624 "vect_model_store_cost: inside_cost = %d, "
9625 "prologue_cost = %d .\n",
9626 inside_cost, prologue_cost);
9629 return true;
9632 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9633 VECTOR_CST mask. No checks are made that the target platform supports the
9634 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9635 vect_gen_perm_mask_checked. */
9637 tree
9638 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9640 tree mask_type;
9642 poly_uint64 nunits = sel.length ();
9643 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9645 mask_type = build_vector_type (ssizetype, nunits);
9646 return vec_perm_indices_to_tree (mask_type, sel);
9649 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9650 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9652 tree
9653 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9655 machine_mode vmode = TYPE_MODE (vectype);
9656 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9657 return vect_gen_perm_mask_any (vectype, sel);
9660 /* Given a vector variable X and Y, that was generated for the scalar
9661 STMT_INFO, generate instructions to permute the vector elements of X and Y
9662 using permutation mask MASK_VEC, insert them at *GSI and return the
9663 permuted vector variable. */
9665 static tree
9666 permute_vec_elements (vec_info *vinfo,
9667 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9668 gimple_stmt_iterator *gsi)
9670 tree vectype = TREE_TYPE (x);
9671 tree perm_dest, data_ref;
9672 gimple *perm_stmt;
9674 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9675 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9676 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9677 else
9678 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9679 data_ref = make_ssa_name (perm_dest);
9681 /* Generate the permute statement. */
9682 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9683 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9685 return data_ref;
9688 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9689 inserting them on the loops preheader edge. Returns true if we
9690 were successful in doing so (and thus STMT_INFO can be moved then),
9691 otherwise returns false. HOIST_P indicates if we want to hoist the
9692 definitions of all SSA uses, it would be false when we are costing. */
9694 static bool
9695 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
9697 ssa_op_iter i;
9698 tree op;
9699 bool any = false;
9701 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9703 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9704 if (!gimple_nop_p (def_stmt)
9705 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9707 /* Make sure we don't need to recurse. While we could do
9708 so in simple cases when there are more complex use webs
9709 we don't have an easy way to preserve stmt order to fulfil
9710 dependencies within them. */
9711 tree op2;
9712 ssa_op_iter i2;
9713 if (gimple_code (def_stmt) == GIMPLE_PHI)
9714 return false;
9715 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9717 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9718 if (!gimple_nop_p (def_stmt2)
9719 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9720 return false;
9722 any = true;
9726 if (!any)
9727 return true;
9729 if (!hoist_p)
9730 return true;
9732 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9734 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9735 if (!gimple_nop_p (def_stmt)
9736 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9738 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9739 gsi_remove (&gsi, false);
9740 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9744 return true;
9747 /* vectorizable_load.
9749 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9750 that can be vectorized.
9751 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9752 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9753 Return true if STMT_INFO is vectorizable in this way. */
9755 static bool
9756 vectorizable_load (vec_info *vinfo,
9757 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9758 gimple **vec_stmt, slp_tree slp_node,
9759 stmt_vector_for_cost *cost_vec)
9761 tree scalar_dest;
9762 tree vec_dest = NULL;
9763 tree data_ref = NULL;
9764 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9765 class loop *loop = NULL;
9766 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9767 bool nested_in_vect_loop = false;
9768 tree elem_type;
9769 /* Avoid false positive uninitialized warning, see PR110652. */
9770 tree new_temp = NULL_TREE;
9771 machine_mode mode;
9772 tree dummy;
9773 tree dataref_ptr = NULL_TREE;
9774 tree dataref_offset = NULL_TREE;
9775 gimple *ptr_incr = NULL;
9776 int ncopies;
9777 int i, j;
9778 unsigned int group_size;
9779 poly_uint64 group_gap_adj;
9780 tree msq = NULL_TREE, lsq;
9781 tree realignment_token = NULL_TREE;
9782 gphi *phi = NULL;
9783 vec<tree> dr_chain = vNULL;
9784 bool grouped_load = false;
9785 stmt_vec_info first_stmt_info;
9786 stmt_vec_info first_stmt_info_for_drptr = NULL;
9787 bool compute_in_loop = false;
9788 class loop *at_loop;
9789 int vec_num;
9790 bool slp = (slp_node != NULL);
9791 bool slp_perm = false;
9792 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9793 poly_uint64 vf;
9794 tree aggr_type;
9795 gather_scatter_info gs_info;
9796 tree ref_type;
9797 enum vect_def_type mask_dt = vect_unknown_def_type;
9799 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9800 return false;
9802 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9803 && ! vec_stmt)
9804 return false;
9806 if (!STMT_VINFO_DATA_REF (stmt_info))
9807 return false;
9809 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9810 int mask_index = -1;
9811 slp_tree slp_op = NULL;
9812 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9814 scalar_dest = gimple_assign_lhs (assign);
9815 if (TREE_CODE (scalar_dest) != SSA_NAME)
9816 return false;
9818 tree_code code = gimple_assign_rhs_code (assign);
9819 if (code != ARRAY_REF
9820 && code != BIT_FIELD_REF
9821 && code != INDIRECT_REF
9822 && code != COMPONENT_REF
9823 && code != IMAGPART_EXPR
9824 && code != REALPART_EXPR
9825 && code != MEM_REF
9826 && TREE_CODE_CLASS (code) != tcc_declaration)
9827 return false;
9829 else
9831 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9832 if (!call || !gimple_call_internal_p (call))
9833 return false;
9835 internal_fn ifn = gimple_call_internal_fn (call);
9836 if (!internal_load_fn_p (ifn))
9837 return false;
9839 scalar_dest = gimple_call_lhs (call);
9840 if (!scalar_dest)
9841 return false;
9843 mask_index = internal_fn_mask_index (ifn);
9844 if (mask_index >= 0 && slp_node)
9845 mask_index = vect_slp_child_index_for_operand
9846 (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9847 if (mask_index >= 0
9848 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9849 &mask, &slp_op, &mask_dt, &mask_vectype))
9850 return false;
9853 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9854 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9856 if (loop_vinfo)
9858 loop = LOOP_VINFO_LOOP (loop_vinfo);
9859 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9860 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9862 else
9863 vf = 1;
9865 /* Multiple types in SLP are handled by creating the appropriate number of
9866 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9867 case of SLP. */
9868 if (slp)
9869 ncopies = 1;
9870 else
9871 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9873 gcc_assert (ncopies >= 1);
9875 /* FORNOW. This restriction should be relaxed. */
9876 if (nested_in_vect_loop && ncopies > 1)
9878 if (dump_enabled_p ())
9879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9880 "multiple types in nested loop.\n");
9881 return false;
9884 /* Invalidate assumptions made by dependence analysis when vectorization
9885 on the unrolled body effectively re-orders stmts. */
9886 if (ncopies > 1
9887 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9888 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9889 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9891 if (dump_enabled_p ())
9892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9893 "cannot perform implicit CSE when unrolling "
9894 "with negative dependence distance\n");
9895 return false;
9898 elem_type = TREE_TYPE (vectype);
9899 mode = TYPE_MODE (vectype);
9901 /* FORNOW. In some cases can vectorize even if data-type not supported
9902 (e.g. - data copies). */
9903 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
9905 if (dump_enabled_p ())
9906 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9907 "Aligned load, but unsupported type.\n");
9908 return false;
9911 /* Check if the load is a part of an interleaving chain. */
9912 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9914 grouped_load = true;
9915 /* FORNOW */
9916 gcc_assert (!nested_in_vect_loop);
9917 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9919 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9920 group_size = DR_GROUP_SIZE (first_stmt_info);
9922 /* Refuse non-SLP vectorization of SLP-only groups. */
9923 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9925 if (dump_enabled_p ())
9926 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9927 "cannot vectorize load in non-SLP mode.\n");
9928 return false;
9931 /* Invalidate assumptions made by dependence analysis when vectorization
9932 on the unrolled body effectively re-orders stmts. */
9933 if (!PURE_SLP_STMT (stmt_info)
9934 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9935 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9936 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9938 if (dump_enabled_p ())
9939 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9940 "cannot perform implicit CSE when performing "
9941 "group loads with negative dependence distance\n");
9942 return false;
9945 else
9946 group_size = 1;
9948 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9950 slp_perm = true;
9952 if (!loop_vinfo)
9954 /* In BB vectorization we may not actually use a loaded vector
9955 accessing elements in excess of DR_GROUP_SIZE. */
9956 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9957 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9958 unsigned HOST_WIDE_INT nunits;
9959 unsigned j, k, maxk = 0;
9960 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
9961 if (k > maxk)
9962 maxk = k;
9963 tree vectype = SLP_TREE_VECTYPE (slp_node);
9964 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
9965 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
9967 if (dump_enabled_p ())
9968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9969 "BB vectorization with gaps at the end of "
9970 "a load is not supported\n");
9971 return false;
9975 auto_vec<tree> tem;
9976 unsigned n_perms;
9977 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
9978 true, &n_perms))
9980 if (dump_enabled_p ())
9981 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
9982 vect_location,
9983 "unsupported load permutation\n");
9984 return false;
9988 vect_memory_access_type memory_access_type;
9989 enum dr_alignment_support alignment_support_scheme;
9990 int misalignment;
9991 poly_int64 poffset;
9992 internal_fn lanes_ifn;
9993 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
9994 ncopies, &memory_access_type, &poffset,
9995 &alignment_support_scheme, &misalignment, &gs_info,
9996 &lanes_ifn))
9997 return false;
9999 if (mask)
10001 if (memory_access_type == VMAT_CONTIGUOUS)
10003 machine_mode vec_mode = TYPE_MODE (vectype);
10004 if (!VECTOR_MODE_P (vec_mode)
10005 || !can_vec_mask_load_store_p (vec_mode,
10006 TYPE_MODE (mask_vectype), true))
10007 return false;
10009 else if (memory_access_type != VMAT_LOAD_STORE_LANES
10010 && memory_access_type != VMAT_GATHER_SCATTER)
10012 if (dump_enabled_p ())
10013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10014 "unsupported access type for masked load.\n");
10015 return false;
10017 else if (memory_access_type == VMAT_GATHER_SCATTER
10018 && gs_info.ifn == IFN_LAST
10019 && !gs_info.decl)
10021 if (dump_enabled_p ())
10022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10023 "unsupported masked emulated gather.\n");
10024 return false;
10028 bool costing_p = !vec_stmt;
10030 if (costing_p) /* transformation not required. */
10032 if (slp_node
10033 && mask
10034 && !vect_maybe_update_slp_op_vectype (slp_op,
10035 mask_vectype))
10037 if (dump_enabled_p ())
10038 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10039 "incompatible vector types for invariants\n");
10040 return false;
10043 if (!slp)
10044 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
10046 if (loop_vinfo
10047 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10048 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
10049 VLS_LOAD, group_size,
10050 memory_access_type, &gs_info,
10051 mask);
10053 if (dump_enabled_p ()
10054 && memory_access_type != VMAT_ELEMENTWISE
10055 && memory_access_type != VMAT_GATHER_SCATTER
10056 && alignment_support_scheme != dr_aligned)
10057 dump_printf_loc (MSG_NOTE, vect_location,
10058 "Vectorizing an unaligned access.\n");
10060 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10061 vinfo->any_known_not_updated_vssa = true;
10063 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
10066 if (!slp)
10067 gcc_assert (memory_access_type
10068 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
10070 if (dump_enabled_p () && !costing_p)
10071 dump_printf_loc (MSG_NOTE, vect_location,
10072 "transform load. ncopies = %d\n", ncopies);
10074 /* Transform. */
10076 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
10077 ensure_base_align (dr_info);
10079 if (memory_access_type == VMAT_INVARIANT)
10081 gcc_assert (!grouped_load && !mask && !bb_vinfo);
10082 /* If we have versioned for aliasing or the loop doesn't
10083 have any data dependencies that would preclude this,
10084 then we are sure this is a loop invariant load and
10085 thus we can insert it on the preheader edge.
10086 TODO: hoist_defs_of_uses should ideally be computed
10087 once at analysis time, remembered and used in the
10088 transform time. */
10089 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
10090 && !nested_in_vect_loop
10091 && hoist_defs_of_uses (stmt_info, loop, !costing_p));
10092 if (costing_p)
10094 enum vect_cost_model_location cost_loc
10095 = hoist_p ? vect_prologue : vect_body;
10096 unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
10097 stmt_info, 0, cost_loc);
10098 cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0,
10099 cost_loc);
10100 unsigned int prologue_cost = hoist_p ? cost : 0;
10101 unsigned int inside_cost = hoist_p ? 0 : cost;
10102 if (dump_enabled_p ())
10103 dump_printf_loc (MSG_NOTE, vect_location,
10104 "vect_model_load_cost: inside_cost = %d, "
10105 "prologue_cost = %d .\n",
10106 inside_cost, prologue_cost);
10107 return true;
10109 if (hoist_p)
10111 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
10112 if (dump_enabled_p ())
10113 dump_printf_loc (MSG_NOTE, vect_location,
10114 "hoisting out of the vectorized loop: %G",
10115 (gimple *) stmt);
10116 scalar_dest = copy_ssa_name (scalar_dest);
10117 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
10118 edge pe = loop_preheader_edge (loop);
10119 gphi *vphi = get_virtual_phi (loop->header);
10120 tree vuse;
10121 if (vphi)
10122 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
10123 else
10124 vuse = gimple_vuse (gsi_stmt (*gsi));
10125 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
10126 gimple_set_vuse (new_stmt, vuse);
10127 gsi_insert_on_edge_immediate (pe, new_stmt);
10129 /* These copies are all equivalent. */
10130 if (hoist_p)
10131 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10132 vectype, NULL);
10133 else
10135 gimple_stmt_iterator gsi2 = *gsi;
10136 gsi_next (&gsi2);
10137 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10138 vectype, &gsi2);
10140 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
10141 if (slp)
10142 for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
10143 slp_node->push_vec_def (new_stmt);
10144 else
10146 for (j = 0; j < ncopies; ++j)
10147 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10148 *vec_stmt = new_stmt;
10150 return true;
10153 if (memory_access_type == VMAT_ELEMENTWISE
10154 || memory_access_type == VMAT_STRIDED_SLP)
10156 gimple_stmt_iterator incr_gsi;
10157 bool insert_after;
10158 tree offvar;
10159 tree ivstep;
10160 tree running_off;
10161 vec<constructor_elt, va_gc> *v = NULL;
10162 tree stride_base, stride_step, alias_off;
10163 /* Checked by get_load_store_type. */
10164 unsigned int const_nunits = nunits.to_constant ();
10165 unsigned HOST_WIDE_INT cst_offset = 0;
10166 tree dr_offset;
10167 unsigned int inside_cost = 0;
10169 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
10170 gcc_assert (!nested_in_vect_loop);
10172 if (grouped_load)
10174 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10175 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10177 else
10179 first_stmt_info = stmt_info;
10180 first_dr_info = dr_info;
10183 if (slp && grouped_load)
10185 group_size = DR_GROUP_SIZE (first_stmt_info);
10186 ref_type = get_group_alias_ptr_type (first_stmt_info);
10188 else
10190 if (grouped_load)
10191 cst_offset
10192 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
10193 * vect_get_place_in_interleaving_chain (stmt_info,
10194 first_stmt_info));
10195 group_size = 1;
10196 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
10199 if (!costing_p)
10201 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
10202 stride_base = fold_build_pointer_plus (
10203 DR_BASE_ADDRESS (first_dr_info->dr),
10204 size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
10205 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
10206 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
10208 /* For a load with loop-invariant (but other than power-of-2)
10209 stride (i.e. not a grouped access) like so:
10211 for (i = 0; i < n; i += stride)
10212 ... = array[i];
10214 we generate a new induction variable and new accesses to
10215 form a new vector (or vectors, depending on ncopies):
10217 for (j = 0; ; j += VF*stride)
10218 tmp1 = array[j];
10219 tmp2 = array[j + stride];
10221 vectemp = {tmp1, tmp2, ...}
10224 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
10225 build_int_cst (TREE_TYPE (stride_step), vf));
10227 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
10229 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
10230 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
10231 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
10232 loop, &incr_gsi, insert_after,
10233 &offvar, NULL);
10235 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
10238 running_off = offvar;
10239 alias_off = build_int_cst (ref_type, 0);
10240 int nloads = const_nunits;
10241 int lnel = 1;
10242 tree ltype = TREE_TYPE (vectype);
10243 tree lvectype = vectype;
10244 auto_vec<tree> dr_chain;
10245 if (memory_access_type == VMAT_STRIDED_SLP)
10247 if (group_size < const_nunits)
10249 /* First check if vec_init optab supports construction from vector
10250 elts directly. Otherwise avoid emitting a constructor of
10251 vector elements by performing the loads using an integer type
10252 of the same size, constructing a vector of those and then
10253 re-interpreting it as the original vector type. This avoids a
10254 huge runtime penalty due to the general inability to perform
10255 store forwarding from smaller stores to a larger load. */
10256 tree ptype;
10257 tree vtype
10258 = vector_vector_composition_type (vectype,
10259 const_nunits / group_size,
10260 &ptype);
10261 if (vtype != NULL_TREE)
10263 nloads = const_nunits / group_size;
10264 lnel = group_size;
10265 lvectype = vtype;
10266 ltype = ptype;
10269 else
10271 nloads = 1;
10272 lnel = const_nunits;
10273 ltype = vectype;
10275 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
10277 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10278 else if (nloads == 1)
10279 ltype = vectype;
10281 if (slp)
10283 /* For SLP permutation support we need to load the whole group,
10284 not only the number of vector stmts the permutation result
10285 fits in. */
10286 if (slp_perm)
10288 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10289 variable VF. */
10290 unsigned int const_vf = vf.to_constant ();
10291 ncopies = CEIL (group_size * const_vf, const_nunits);
10292 dr_chain.create (ncopies);
10294 else
10295 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10297 unsigned int group_el = 0;
10298 unsigned HOST_WIDE_INT
10299 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10300 unsigned int n_groups = 0;
10301 /* For costing some adjacent vector loads, we'd like to cost with
10302 the total number of them once instead of cost each one by one. */
10303 unsigned int n_adjacent_loads = 0;
10304 for (j = 0; j < ncopies; j++)
10306 if (nloads > 1 && !costing_p)
10307 vec_alloc (v, nloads);
10308 gimple *new_stmt = NULL;
10309 for (i = 0; i < nloads; i++)
10311 if (costing_p)
10313 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10314 avoid ICE, see PR110776. */
10315 if (VECTOR_TYPE_P (ltype)
10316 && memory_access_type != VMAT_ELEMENTWISE)
10317 n_adjacent_loads++;
10318 else
10319 inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
10320 stmt_info, 0, vect_body);
10321 continue;
10323 tree this_off = build_int_cst (TREE_TYPE (alias_off),
10324 group_el * elsz + cst_offset);
10325 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10326 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10327 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
10328 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10329 if (nloads > 1)
10330 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10331 gimple_assign_lhs (new_stmt));
10333 group_el += lnel;
10334 if (! slp
10335 || group_el == group_size)
10337 n_groups++;
10338 /* When doing SLP make sure to not load elements from
10339 the next vector iteration, those will not be accessed
10340 so just use the last element again. See PR107451. */
10341 if (!slp || known_lt (n_groups, vf))
10343 tree newoff = copy_ssa_name (running_off);
10344 gimple *incr
10345 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10346 running_off, stride_step);
10347 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
10348 running_off = newoff;
10350 group_el = 0;
10354 if (nloads > 1)
10356 if (costing_p)
10357 inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
10358 stmt_info, 0, vect_body);
10359 else
10361 tree vec_inv = build_constructor (lvectype, v);
10362 new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
10363 lvectype, gsi);
10364 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10365 if (lvectype != vectype)
10367 new_stmt
10368 = gimple_build_assign (make_ssa_name (vectype),
10369 VIEW_CONVERT_EXPR,
10370 build1 (VIEW_CONVERT_EXPR,
10371 vectype, new_temp));
10372 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10373 gsi);
10378 if (!costing_p)
10380 if (slp)
10382 if (slp_perm)
10383 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
10384 else
10385 slp_node->push_vec_def (new_stmt);
10387 else
10389 if (j == 0)
10390 *vec_stmt = new_stmt;
10391 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10395 if (slp_perm)
10397 unsigned n_perms;
10398 if (costing_p)
10400 unsigned n_loads;
10401 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10402 true, &n_perms, &n_loads);
10403 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
10404 first_stmt_info, 0, vect_body);
10406 else
10407 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10408 false, &n_perms);
10411 if (costing_p)
10413 if (n_adjacent_loads > 0)
10414 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
10415 alignment_support_scheme, misalignment, false,
10416 &inside_cost, nullptr, cost_vec, cost_vec,
10417 true);
10418 if (dump_enabled_p ())
10419 dump_printf_loc (MSG_NOTE, vect_location,
10420 "vect_model_load_cost: inside_cost = %u, "
10421 "prologue_cost = 0 .\n",
10422 inside_cost);
10425 return true;
10428 if (memory_access_type == VMAT_GATHER_SCATTER
10429 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
10430 grouped_load = false;
10432 if (grouped_load
10433 || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10435 if (grouped_load)
10437 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10438 group_size = DR_GROUP_SIZE (first_stmt_info);
10440 else
10442 first_stmt_info = stmt_info;
10443 group_size = 1;
10445 /* For SLP vectorization we directly vectorize a subchain
10446 without permutation. */
10447 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10448 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10449 /* For BB vectorization always use the first stmt to base
10450 the data ref pointer on. */
10451 if (bb_vinfo)
10452 first_stmt_info_for_drptr
10453 = vect_find_first_scalar_stmt_in_slp (slp_node);
10455 /* Check if the chain of loads is already vectorized. */
10456 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10457 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10458 ??? But we can only do so if there is exactly one
10459 as we have no way to get at the rest. Leave the CSE
10460 opportunity alone.
10461 ??? With the group load eventually participating
10462 in multiple different permutations (having multiple
10463 slp nodes which refer to the same group) the CSE
10464 is even wrong code. See PR56270. */
10465 && !slp)
10467 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10468 return true;
10470 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10471 group_gap_adj = 0;
10473 /* VEC_NUM is the number of vect stmts to be created for this group. */
10474 if (slp)
10476 grouped_load = false;
10477 /* If an SLP permutation is from N elements to N elements,
10478 and if one vector holds a whole number of N, we can load
10479 the inputs to the permutation in the same way as an
10480 unpermuted sequence. In other cases we need to load the
10481 whole group, not only the number of vector stmts the
10482 permutation result fits in. */
10483 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10484 if (slp_perm
10485 && (group_size != scalar_lanes
10486 || !multiple_p (nunits, group_size)))
10488 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10489 variable VF; see vect_transform_slp_perm_load. */
10490 unsigned int const_vf = vf.to_constant ();
10491 unsigned int const_nunits = nunits.to_constant ();
10492 vec_num = CEIL (group_size * const_vf, const_nunits);
10493 group_gap_adj = vf * group_size - nunits * vec_num;
10495 else
10497 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10498 group_gap_adj
10499 = group_size - scalar_lanes;
10502 else
10503 vec_num = group_size;
10505 ref_type = get_group_alias_ptr_type (first_stmt_info);
10507 else
10509 first_stmt_info = stmt_info;
10510 first_dr_info = dr_info;
10511 group_size = vec_num = 1;
10512 group_gap_adj = 0;
10513 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10514 if (slp)
10515 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10518 gcc_assert (alignment_support_scheme);
10519 vec_loop_masks *loop_masks
10520 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10521 ? &LOOP_VINFO_MASKS (loop_vinfo)
10522 : NULL);
10523 vec_loop_lens *loop_lens
10524 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10525 ? &LOOP_VINFO_LENS (loop_vinfo)
10526 : NULL);
10528 /* Shouldn't go with length-based approach if fully masked. */
10529 gcc_assert (!loop_lens || !loop_masks);
10531 /* Targets with store-lane instructions must not require explicit
10532 realignment. vect_supportable_dr_alignment always returns either
10533 dr_aligned or dr_unaligned_supported for masked operations. */
10534 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10535 && !mask
10536 && !loop_masks)
10537 || alignment_support_scheme == dr_aligned
10538 || alignment_support_scheme == dr_unaligned_supported);
10540 /* In case the vectorization factor (VF) is bigger than the number
10541 of elements that we can fit in a vectype (nunits), we have to generate
10542 more than one vector stmt - i.e - we need to "unroll" the
10543 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10544 from one copy of the vector stmt to the next, in the field
10545 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10546 stages to find the correct vector defs to be used when vectorizing
10547 stmts that use the defs of the current stmt. The example below
10548 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10549 need to create 4 vectorized stmts):
10551 before vectorization:
10552 RELATED_STMT VEC_STMT
10553 S1: x = memref - -
10554 S2: z = x + 1 - -
10556 step 1: vectorize stmt S1:
10557 We first create the vector stmt VS1_0, and, as usual, record a
10558 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10559 Next, we create the vector stmt VS1_1, and record a pointer to
10560 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10561 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10562 stmts and pointers:
10563 RELATED_STMT VEC_STMT
10564 VS1_0: vx0 = memref0 VS1_1 -
10565 VS1_1: vx1 = memref1 VS1_2 -
10566 VS1_2: vx2 = memref2 VS1_3 -
10567 VS1_3: vx3 = memref3 - -
10568 S1: x = load - VS1_0
10569 S2: z = x + 1 - -
10572 /* In case of interleaving (non-unit grouped access):
10574 S1: x2 = &base + 2
10575 S2: x0 = &base
10576 S3: x1 = &base + 1
10577 S4: x3 = &base + 3
10579 Vectorized loads are created in the order of memory accesses
10580 starting from the access of the first stmt of the chain:
10582 VS1: vx0 = &base
10583 VS2: vx1 = &base + vec_size*1
10584 VS3: vx3 = &base + vec_size*2
10585 VS4: vx4 = &base + vec_size*3
10587 Then permutation statements are generated:
10589 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10590 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10593 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10594 (the order of the data-refs in the output of vect_permute_load_chain
10595 corresponds to the order of scalar stmts in the interleaving chain - see
10596 the documentation of vect_permute_load_chain()).
10597 The generation of permutation stmts and recording them in
10598 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10600 In case of both multiple types and interleaving, the vector loads and
10601 permutation stmts above are created for every copy. The result vector
10602 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10603 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10605 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10606 on a target that supports unaligned accesses (dr_unaligned_supported)
10607 we generate the following code:
10608 p = initial_addr;
10609 indx = 0;
10610 loop {
10611 p = p + indx * vectype_size;
10612 vec_dest = *(p);
10613 indx = indx + 1;
10616 Otherwise, the data reference is potentially unaligned on a target that
10617 does not support unaligned accesses (dr_explicit_realign_optimized) -
10618 then generate the following code, in which the data in each iteration is
10619 obtained by two vector loads, one from the previous iteration, and one
10620 from the current iteration:
10621 p1 = initial_addr;
10622 msq_init = *(floor(p1))
10623 p2 = initial_addr + VS - 1;
10624 realignment_token = call target_builtin;
10625 indx = 0;
10626 loop {
10627 p2 = p2 + indx * vectype_size
10628 lsq = *(floor(p2))
10629 vec_dest = realign_load (msq, lsq, realignment_token)
10630 indx = indx + 1;
10631 msq = lsq;
10632 } */
10634 /* If the misalignment remains the same throughout the execution of the
10635 loop, we can create the init_addr and permutation mask at the loop
10636 preheader. Otherwise, it needs to be created inside the loop.
10637 This can only occur when vectorizing memory accesses in the inner-loop
10638 nested within an outer-loop that is being vectorized. */
10640 if (nested_in_vect_loop
10641 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10642 GET_MODE_SIZE (TYPE_MODE (vectype))))
10644 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10645 compute_in_loop = true;
10648 bool diff_first_stmt_info
10649 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10651 tree offset = NULL_TREE;
10652 if ((alignment_support_scheme == dr_explicit_realign_optimized
10653 || alignment_support_scheme == dr_explicit_realign)
10654 && !compute_in_loop)
10656 /* If we have different first_stmt_info, we can't set up realignment
10657 here, since we can't guarantee first_stmt_info DR has been
10658 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10659 distance from first_stmt_info DR instead as below. */
10660 if (!costing_p)
10662 if (!diff_first_stmt_info)
10663 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10664 &realignment_token,
10665 alignment_support_scheme, NULL_TREE,
10666 &at_loop);
10667 if (alignment_support_scheme == dr_explicit_realign_optimized)
10669 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10670 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10671 size_one_node);
10672 gcc_assert (!first_stmt_info_for_drptr);
10676 else
10677 at_loop = loop;
10679 if (!known_eq (poffset, 0))
10680 offset = (offset
10681 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10682 : size_int (poffset));
10684 tree bump;
10685 tree vec_offset = NULL_TREE;
10686 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10688 aggr_type = NULL_TREE;
10689 bump = NULL_TREE;
10691 else if (memory_access_type == VMAT_GATHER_SCATTER)
10693 aggr_type = elem_type;
10694 if (!costing_p)
10695 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
10696 &bump, &vec_offset, loop_lens);
10698 else
10700 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10701 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
10702 else
10703 aggr_type = vectype;
10704 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10705 memory_access_type, loop_lens);
10708 auto_vec<tree> vec_offsets;
10709 auto_vec<tree> vec_masks;
10710 if (mask && !costing_p)
10712 if (slp_node)
10713 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10714 &vec_masks);
10715 else
10716 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
10717 &vec_masks, mask_vectype);
10720 tree vec_mask = NULL_TREE;
10721 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10723 gcc_assert (alignment_support_scheme == dr_aligned
10724 || alignment_support_scheme == dr_unaligned_supported);
10725 gcc_assert (grouped_load && !slp);
10727 unsigned int inside_cost = 0, prologue_cost = 0;
10728 /* For costing some adjacent vector loads, we'd like to cost with
10729 the total number of them once instead of cost each one by one. */
10730 unsigned int n_adjacent_loads = 0;
10731 for (j = 0; j < ncopies; j++)
10733 if (costing_p)
10735 /* An IFN_LOAD_LANES will load all its vector results,
10736 regardless of which ones we actually need. Account
10737 for the cost of unused results. */
10738 if (first_stmt_info == stmt_info)
10740 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10741 stmt_vec_info next_stmt_info = first_stmt_info;
10744 gaps -= 1;
10745 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10747 while (next_stmt_info);
10748 if (gaps)
10750 if (dump_enabled_p ())
10751 dump_printf_loc (MSG_NOTE, vect_location,
10752 "vect_model_load_cost: %d "
10753 "unused vectors.\n",
10754 gaps);
10755 vect_get_load_cost (vinfo, stmt_info, gaps,
10756 alignment_support_scheme,
10757 misalignment, false, &inside_cost,
10758 &prologue_cost, cost_vec, cost_vec,
10759 true);
10762 n_adjacent_loads++;
10763 continue;
10766 /* 1. Create the vector or array pointer update chain. */
10767 if (j == 0)
10768 dataref_ptr
10769 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10770 at_loop, offset, &dummy, gsi,
10771 &ptr_incr, false, bump);
10772 else
10774 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10775 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10776 stmt_info, bump);
10778 if (mask)
10779 vec_mask = vec_masks[j];
10781 tree vec_array = create_vector_array (vectype, vec_num);
10783 tree final_mask = NULL_TREE;
10784 tree final_len = NULL_TREE;
10785 tree bias = NULL_TREE;
10786 if (loop_masks)
10787 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10788 ncopies, vectype, j);
10789 if (vec_mask)
10790 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
10791 vec_mask, gsi);
10793 if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10795 if (loop_lens)
10796 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10797 ncopies, vectype, j, 1);
10798 else
10799 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10800 signed char biasval
10801 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10802 bias = build_int_cst (intQI_type_node, biasval);
10803 if (!final_mask)
10805 mask_vectype = truth_type_for (vectype);
10806 final_mask = build_minus_one_cst (mask_vectype);
10810 gcall *call;
10811 if (final_len && final_mask)
10813 /* Emit:
10814 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10815 VEC_MASK, LEN, BIAS). */
10816 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10817 tree alias_ptr = build_int_cst (ref_type, align);
10818 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
10819 dataref_ptr, alias_ptr,
10820 final_mask, final_len, bias);
10822 else if (final_mask)
10824 /* Emit:
10825 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10826 VEC_MASK). */
10827 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10828 tree alias_ptr = build_int_cst (ref_type, align);
10829 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
10830 dataref_ptr, alias_ptr,
10831 final_mask);
10833 else
10835 /* Emit:
10836 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10837 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
10838 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10840 gimple_call_set_lhs (call, vec_array);
10841 gimple_call_set_nothrow (call, true);
10842 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
10844 dr_chain.create (vec_num);
10845 /* Extract each vector into an SSA_NAME. */
10846 for (i = 0; i < vec_num; i++)
10848 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10849 vec_array, i);
10850 dr_chain.quick_push (new_temp);
10853 /* Record the mapping between SSA_NAMEs and statements. */
10854 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10856 /* Record that VEC_ARRAY is now dead. */
10857 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10859 dr_chain.release ();
10861 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10864 if (costing_p)
10866 if (n_adjacent_loads > 0)
10867 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
10868 alignment_support_scheme, misalignment, false,
10869 &inside_cost, &prologue_cost, cost_vec,
10870 cost_vec, true);
10871 if (dump_enabled_p ())
10872 dump_printf_loc (MSG_NOTE, vect_location,
10873 "vect_model_load_cost: inside_cost = %u, "
10874 "prologue_cost = %u .\n",
10875 inside_cost, prologue_cost);
10878 return true;
10881 if (memory_access_type == VMAT_GATHER_SCATTER)
10883 gcc_assert (alignment_support_scheme == dr_aligned
10884 || alignment_support_scheme == dr_unaligned_supported);
10885 gcc_assert (!grouped_load && !slp_perm);
10887 unsigned int inside_cost = 0, prologue_cost = 0;
10888 for (j = 0; j < ncopies; j++)
10890 /* 1. Create the vector or array pointer update chain. */
10891 if (j == 0 && !costing_p)
10893 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10894 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10895 slp_node, &gs_info, &dataref_ptr,
10896 &vec_offsets);
10897 else
10898 dataref_ptr
10899 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10900 at_loop, offset, &dummy, gsi,
10901 &ptr_incr, false, bump);
10903 else if (!costing_p)
10905 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10906 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10907 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10908 gsi, stmt_info, bump);
10911 gimple *new_stmt = NULL;
10912 for (i = 0; i < vec_num; i++)
10914 tree final_mask = NULL_TREE;
10915 tree final_len = NULL_TREE;
10916 tree bias = NULL_TREE;
10917 if (!costing_p)
10919 if (mask)
10920 vec_mask = vec_masks[vec_num * j + i];
10921 if (loop_masks)
10922 final_mask
10923 = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10924 vec_num * ncopies, vectype,
10925 vec_num * j + i);
10926 if (vec_mask)
10927 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10928 final_mask, vec_mask, gsi);
10930 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10931 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10932 gsi, stmt_info, bump);
10935 /* 2. Create the vector-load in the loop. */
10936 unsigned HOST_WIDE_INT align;
10937 if (gs_info.ifn != IFN_LAST)
10939 if (costing_p)
10941 unsigned int cnunits = vect_nunits_for_cost (vectype);
10942 inside_cost
10943 = record_stmt_cost (cost_vec, cnunits, scalar_load,
10944 stmt_info, 0, vect_body);
10945 continue;
10947 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10948 vec_offset = vec_offsets[vec_num * j + i];
10949 tree zero = build_zero_cst (vectype);
10950 tree scale = size_int (gs_info.scale);
10952 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
10954 if (loop_lens)
10955 final_len
10956 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10957 vec_num * ncopies, vectype,
10958 vec_num * j + i, 1);
10959 else
10960 final_len
10961 = build_int_cst (sizetype,
10962 TYPE_VECTOR_SUBPARTS (vectype));
10963 signed char biasval
10964 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10965 bias = build_int_cst (intQI_type_node, biasval);
10966 if (!final_mask)
10968 mask_vectype = truth_type_for (vectype);
10969 final_mask = build_minus_one_cst (mask_vectype);
10973 gcall *call;
10974 if (final_len && final_mask)
10975 call
10976 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
10977 dataref_ptr, vec_offset,
10978 scale, zero, final_mask,
10979 final_len, bias);
10980 else if (final_mask)
10981 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
10982 dataref_ptr, vec_offset,
10983 scale, zero, final_mask);
10984 else
10985 call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
10986 dataref_ptr, vec_offset,
10987 scale, zero);
10988 gimple_call_set_nothrow (call, true);
10989 new_stmt = call;
10990 data_ref = NULL_TREE;
10992 else if (gs_info.decl)
10994 /* The builtin decls path for gather is legacy, x86 only. */
10995 gcc_assert (!final_len && nunits.is_constant ());
10996 if (costing_p)
10998 unsigned int cnunits = vect_nunits_for_cost (vectype);
10999 inside_cost
11000 = record_stmt_cost (cost_vec, cnunits, scalar_load,
11001 stmt_info, 0, vect_body);
11002 continue;
11004 poly_uint64 offset_nunits
11005 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
11006 if (known_eq (nunits, offset_nunits))
11008 new_stmt = vect_build_one_gather_load_call
11009 (vinfo, stmt_info, gsi, &gs_info,
11010 dataref_ptr, vec_offsets[vec_num * j + i],
11011 final_mask);
11012 data_ref = NULL_TREE;
11014 else if (known_eq (nunits, offset_nunits * 2))
11016 /* We have a offset vector with half the number of
11017 lanes but the builtins will produce full vectype
11018 data with just the lower lanes filled. */
11019 new_stmt = vect_build_one_gather_load_call
11020 (vinfo, stmt_info, gsi, &gs_info,
11021 dataref_ptr, vec_offsets[2 * vec_num * j + 2 * i],
11022 final_mask);
11023 tree low = make_ssa_name (vectype);
11024 gimple_set_lhs (new_stmt, low);
11025 vect_finish_stmt_generation (vinfo, stmt_info,
11026 new_stmt, gsi);
11028 /* now put upper half of final_mask in final_mask low. */
11029 if (final_mask
11030 && !SCALAR_INT_MODE_P
11031 (TYPE_MODE (TREE_TYPE (final_mask))))
11033 int count = nunits.to_constant ();
11034 vec_perm_builder sel (count, count, 1);
11035 sel.quick_grow (count);
11036 for (int i = 0; i < count; ++i)
11037 sel[i] = i | (count / 2);
11038 vec_perm_indices indices (sel, 2, count);
11039 tree perm_mask = vect_gen_perm_mask_checked
11040 (TREE_TYPE (final_mask), indices);
11041 new_stmt = gimple_build_assign (NULL_TREE,
11042 VEC_PERM_EXPR,
11043 final_mask,
11044 final_mask,
11045 perm_mask);
11046 final_mask = make_ssa_name (TREE_TYPE (final_mask));
11047 gimple_set_lhs (new_stmt, final_mask);
11048 vect_finish_stmt_generation (vinfo, stmt_info,
11049 new_stmt, gsi);
11051 else if (final_mask)
11053 new_stmt = gimple_build_assign (NULL_TREE,
11054 VEC_UNPACK_HI_EXPR,
11055 final_mask);
11056 final_mask = make_ssa_name
11057 (truth_type_for (gs_info.offset_vectype));
11058 gimple_set_lhs (new_stmt, final_mask);
11059 vect_finish_stmt_generation (vinfo, stmt_info,
11060 new_stmt, gsi);
11063 new_stmt = vect_build_one_gather_load_call
11064 (vinfo, stmt_info, gsi, &gs_info,
11065 dataref_ptr,
11066 vec_offsets[2 * vec_num * j + 2 * i + 1],
11067 final_mask);
11068 tree high = make_ssa_name (vectype);
11069 gimple_set_lhs (new_stmt, high);
11070 vect_finish_stmt_generation (vinfo, stmt_info,
11071 new_stmt, gsi);
11073 /* compose low + high. */
11074 int count = nunits.to_constant ();
11075 vec_perm_builder sel (count, count, 1);
11076 sel.quick_grow (count);
11077 for (int i = 0; i < count; ++i)
11078 sel[i] = i < count / 2 ? i : i + count / 2;
11079 vec_perm_indices indices (sel, 2, count);
11080 tree perm_mask
11081 = vect_gen_perm_mask_checked (vectype, indices);
11082 new_stmt = gimple_build_assign (NULL_TREE,
11083 VEC_PERM_EXPR,
11084 low, high, perm_mask);
11085 data_ref = NULL_TREE;
11087 else if (known_eq (nunits * 2, offset_nunits))
11089 /* We have a offset vector with double the number of
11090 lanes. Select the low/high part accordingly. */
11091 vec_offset = vec_offsets[(vec_num * j + i) / 2];
11092 if ((vec_num * j + i) & 1)
11094 int count = offset_nunits.to_constant ();
11095 vec_perm_builder sel (count, count, 1);
11096 sel.quick_grow (count);
11097 for (int i = 0; i < count; ++i)
11098 sel[i] = i | (count / 2);
11099 vec_perm_indices indices (sel, 2, count);
11100 tree perm_mask = vect_gen_perm_mask_checked
11101 (TREE_TYPE (vec_offset), indices);
11102 new_stmt = gimple_build_assign (NULL_TREE,
11103 VEC_PERM_EXPR,
11104 vec_offset,
11105 vec_offset,
11106 perm_mask);
11107 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
11108 gimple_set_lhs (new_stmt, vec_offset);
11109 vect_finish_stmt_generation (vinfo, stmt_info,
11110 new_stmt, gsi);
11112 new_stmt = vect_build_one_gather_load_call
11113 (vinfo, stmt_info, gsi, &gs_info,
11114 dataref_ptr, vec_offset, final_mask);
11115 data_ref = NULL_TREE;
11117 else
11118 gcc_unreachable ();
11120 else
11122 /* Emulated gather-scatter. */
11123 gcc_assert (!final_mask);
11124 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
11125 if (costing_p)
11127 /* For emulated gathers N offset vector element
11128 offset add is consumed by the load). */
11129 inside_cost = record_stmt_cost (cost_vec, const_nunits,
11130 vec_to_scalar, stmt_info,
11131 0, vect_body);
11132 /* N scalar loads plus gathering them into a
11133 vector. */
11134 inside_cost
11135 = record_stmt_cost (cost_vec, const_nunits, scalar_load,
11136 stmt_info, 0, vect_body);
11137 inside_cost
11138 = record_stmt_cost (cost_vec, 1, vec_construct,
11139 stmt_info, 0, vect_body);
11140 continue;
11142 unsigned HOST_WIDE_INT const_offset_nunits
11143 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
11144 .to_constant ();
11145 vec<constructor_elt, va_gc> *ctor_elts;
11146 vec_alloc (ctor_elts, const_nunits);
11147 gimple_seq stmts = NULL;
11148 /* We support offset vectors with more elements
11149 than the data vector for now. */
11150 unsigned HOST_WIDE_INT factor
11151 = const_offset_nunits / const_nunits;
11152 vec_offset = vec_offsets[(vec_num * j + i) / factor];
11153 unsigned elt_offset = (j % factor) * const_nunits;
11154 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
11155 tree scale = size_int (gs_info.scale);
11156 align = get_object_alignment (DR_REF (first_dr_info->dr));
11157 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
11158 for (unsigned k = 0; k < const_nunits; ++k)
11160 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
11161 bitsize_int (k + elt_offset));
11162 tree idx
11163 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
11164 vec_offset, TYPE_SIZE (idx_type), boff);
11165 idx = gimple_convert (&stmts, sizetype, idx);
11166 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx,
11167 scale);
11168 tree ptr = gimple_build (&stmts, PLUS_EXPR,
11169 TREE_TYPE (dataref_ptr),
11170 dataref_ptr, idx);
11171 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
11172 tree elt = make_ssa_name (TREE_TYPE (vectype));
11173 tree ref = build2 (MEM_REF, ltype, ptr,
11174 build_int_cst (ref_type, 0));
11175 new_stmt = gimple_build_assign (elt, ref);
11176 gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
11177 gimple_seq_add_stmt (&stmts, new_stmt);
11178 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
11180 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
11181 new_stmt = gimple_build_assign (
11182 NULL_TREE, build_constructor (vectype, ctor_elts));
11183 data_ref = NULL_TREE;
11186 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11187 /* DATA_REF is null if we've already built the statement. */
11188 if (data_ref)
11190 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11191 new_stmt = gimple_build_assign (vec_dest, data_ref);
11193 new_temp = make_ssa_name (vec_dest, new_stmt);
11194 gimple_set_lhs (new_stmt, new_temp);
11195 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11197 /* Store vector loads in the corresponding SLP_NODE. */
11198 if (slp)
11199 slp_node->push_vec_def (new_stmt);
11202 if (!slp && !costing_p)
11203 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11206 if (!slp && !costing_p)
11207 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11209 if (costing_p && dump_enabled_p ())
11210 dump_printf_loc (MSG_NOTE, vect_location,
11211 "vect_model_load_cost: inside_cost = %u, "
11212 "prologue_cost = %u .\n",
11213 inside_cost, prologue_cost);
11214 return true;
11217 poly_uint64 group_elt = 0;
11218 unsigned int inside_cost = 0, prologue_cost = 0;
11219 /* For costing some adjacent vector loads, we'd like to cost with
11220 the total number of them once instead of cost each one by one. */
11221 unsigned int n_adjacent_loads = 0;
11222 for (j = 0; j < ncopies; j++)
11224 /* 1. Create the vector or array pointer update chain. */
11225 if (j == 0 && !costing_p)
11227 bool simd_lane_access_p
11228 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
11229 if (simd_lane_access_p
11230 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
11231 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
11232 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
11233 && integer_zerop (DR_INIT (first_dr_info->dr))
11234 && alias_sets_conflict_p (get_alias_set (aggr_type),
11235 get_alias_set (TREE_TYPE (ref_type)))
11236 && (alignment_support_scheme == dr_aligned
11237 || alignment_support_scheme == dr_unaligned_supported))
11239 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
11240 dataref_offset = build_int_cst (ref_type, 0);
11242 else if (diff_first_stmt_info)
11244 dataref_ptr
11245 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
11246 aggr_type, at_loop, offset, &dummy,
11247 gsi, &ptr_incr, simd_lane_access_p,
11248 bump);
11249 /* Adjust the pointer by the difference to first_stmt. */
11250 data_reference_p ptrdr
11251 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
11252 tree diff
11253 = fold_convert (sizetype,
11254 size_binop (MINUS_EXPR,
11255 DR_INIT (first_dr_info->dr),
11256 DR_INIT (ptrdr)));
11257 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11258 stmt_info, diff);
11259 if (alignment_support_scheme == dr_explicit_realign)
11261 msq = vect_setup_realignment (vinfo,
11262 first_stmt_info_for_drptr, gsi,
11263 &realignment_token,
11264 alignment_support_scheme,
11265 dataref_ptr, &at_loop);
11266 gcc_assert (!compute_in_loop);
11269 else
11270 dataref_ptr
11271 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11272 at_loop,
11273 offset, &dummy, gsi, &ptr_incr,
11274 simd_lane_access_p, bump);
11276 else if (!costing_p)
11278 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11279 if (dataref_offset)
11280 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
11281 bump);
11282 else
11283 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11284 stmt_info, bump);
11287 if (grouped_load || slp_perm)
11288 dr_chain.create (vec_num);
11290 gimple *new_stmt = NULL;
11291 for (i = 0; i < vec_num; i++)
11293 tree final_mask = NULL_TREE;
11294 tree final_len = NULL_TREE;
11295 tree bias = NULL_TREE;
11296 if (!costing_p)
11298 if (mask)
11299 vec_mask = vec_masks[vec_num * j + i];
11300 if (loop_masks)
11301 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11302 vec_num * ncopies, vectype,
11303 vec_num * j + i);
11304 if (vec_mask)
11305 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
11306 final_mask, vec_mask, gsi);
11308 if (i > 0)
11309 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11310 gsi, stmt_info, bump);
11313 /* 2. Create the vector-load in the loop. */
11314 switch (alignment_support_scheme)
11316 case dr_aligned:
11317 case dr_unaligned_supported:
11319 if (costing_p)
11320 break;
11322 unsigned int misalign;
11323 unsigned HOST_WIDE_INT align;
11324 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
11325 if (alignment_support_scheme == dr_aligned)
11326 misalign = 0;
11327 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
11329 align
11330 = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
11331 misalign = 0;
11333 else
11334 misalign = misalignment;
11335 if (dataref_offset == NULL_TREE
11336 && TREE_CODE (dataref_ptr) == SSA_NAME)
11337 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
11338 misalign);
11339 align = least_bit_hwi (misalign | align);
11341 /* Compute IFN when LOOP_LENS or final_mask valid. */
11342 machine_mode vmode = TYPE_MODE (vectype);
11343 machine_mode new_vmode = vmode;
11344 internal_fn partial_ifn = IFN_LAST;
11345 if (loop_lens)
11347 opt_machine_mode new_ovmode
11348 = get_len_load_store_mode (vmode, true, &partial_ifn);
11349 new_vmode = new_ovmode.require ();
11350 unsigned factor
11351 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
11352 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11353 vec_num * ncopies, vectype,
11354 vec_num * j + i, factor);
11356 else if (final_mask)
11358 if (!can_vec_mask_load_store_p (
11359 vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
11360 &partial_ifn))
11361 gcc_unreachable ();
11364 if (partial_ifn == IFN_MASK_LEN_LOAD)
11366 if (!final_len)
11368 /* Pass VF value to 'len' argument of
11369 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11370 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11372 if (!final_mask)
11374 /* Pass all ones value to 'mask' argument of
11375 MASK_LEN_LOAD if final_mask is invalid. */
11376 mask_vectype = truth_type_for (vectype);
11377 final_mask = build_minus_one_cst (mask_vectype);
11380 if (final_len)
11382 signed char biasval
11383 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11385 bias = build_int_cst (intQI_type_node, biasval);
11388 if (final_len)
11390 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11391 gcall *call;
11392 if (partial_ifn == IFN_MASK_LEN_LOAD)
11393 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
11394 dataref_ptr, ptr,
11395 final_mask, final_len,
11396 bias);
11397 else
11398 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
11399 dataref_ptr, ptr,
11400 final_len, bias);
11401 gimple_call_set_nothrow (call, true);
11402 new_stmt = call;
11403 data_ref = NULL_TREE;
11405 /* Need conversion if it's wrapped with VnQI. */
11406 if (vmode != new_vmode)
11408 tree new_vtype = build_vector_type_for_mode (
11409 unsigned_intQI_type_node, new_vmode);
11410 tree var
11411 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
11412 gimple_set_lhs (call, var);
11413 vect_finish_stmt_generation (vinfo, stmt_info, call,
11414 gsi);
11415 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11416 new_stmt = gimple_build_assign (vec_dest,
11417 VIEW_CONVERT_EXPR, op);
11420 else if (final_mask)
11422 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11423 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
11424 dataref_ptr, ptr,
11425 final_mask);
11426 gimple_call_set_nothrow (call, true);
11427 new_stmt = call;
11428 data_ref = NULL_TREE;
11430 else
11432 tree ltype = vectype;
11433 tree new_vtype = NULL_TREE;
11434 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11435 unsigned int vect_align
11436 = vect_known_alignment_in_bytes (first_dr_info, vectype);
11437 unsigned int scalar_dr_size
11438 = vect_get_scalar_dr_size (first_dr_info);
11439 /* If there's no peeling for gaps but we have a gap
11440 with slp loads then load the lower half of the
11441 vector only. See get_group_load_store_type for
11442 when we apply this optimization. */
11443 if (slp
11444 && loop_vinfo
11445 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != 0
11446 && known_eq (nunits, (group_size - gap) * 2)
11447 && known_eq (nunits, group_size)
11448 && gap >= (vect_align / scalar_dr_size))
11450 tree half_vtype;
11451 new_vtype
11452 = vector_vector_composition_type (vectype, 2,
11453 &half_vtype);
11454 if (new_vtype != NULL_TREE)
11455 ltype = half_vtype;
11457 tree offset
11458 = (dataref_offset ? dataref_offset
11459 : build_int_cst (ref_type, 0));
11460 if (ltype != vectype
11461 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11463 unsigned HOST_WIDE_INT gap_offset
11464 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
11465 tree gapcst = build_int_cst (ref_type, gap_offset);
11466 offset = size_binop (PLUS_EXPR, offset, gapcst);
11468 data_ref
11469 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
11470 if (alignment_support_scheme == dr_aligned)
11472 else
11473 TREE_TYPE (data_ref)
11474 = build_aligned_type (TREE_TYPE (data_ref),
11475 align * BITS_PER_UNIT);
11476 if (ltype != vectype)
11478 vect_copy_ref_info (data_ref,
11479 DR_REF (first_dr_info->dr));
11480 tree tem = make_ssa_name (ltype);
11481 new_stmt = gimple_build_assign (tem, data_ref);
11482 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
11483 gsi);
11484 data_ref = NULL;
11485 vec<constructor_elt, va_gc> *v;
11486 vec_alloc (v, 2);
11487 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11489 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11490 build_zero_cst (ltype));
11491 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11493 else
11495 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11496 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11497 build_zero_cst (ltype));
11499 gcc_assert (new_vtype != NULL_TREE);
11500 if (new_vtype == vectype)
11501 new_stmt = gimple_build_assign (
11502 vec_dest, build_constructor (vectype, v));
11503 else
11505 tree new_vname = make_ssa_name (new_vtype);
11506 new_stmt = gimple_build_assign (
11507 new_vname, build_constructor (new_vtype, v));
11508 vect_finish_stmt_generation (vinfo, stmt_info,
11509 new_stmt, gsi);
11510 new_stmt = gimple_build_assign (
11511 vec_dest,
11512 build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
11516 break;
11518 case dr_explicit_realign:
11520 if (costing_p)
11521 break;
11522 tree ptr, bump;
11524 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11526 if (compute_in_loop)
11527 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
11528 &realignment_token,
11529 dr_explicit_realign,
11530 dataref_ptr, NULL);
11532 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11533 ptr = copy_ssa_name (dataref_ptr);
11534 else
11535 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11536 // For explicit realign the target alignment should be
11537 // known at compile time.
11538 unsigned HOST_WIDE_INT align
11539 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11540 new_stmt = gimple_build_assign (
11541 ptr, BIT_AND_EXPR, dataref_ptr,
11542 build_int_cst (TREE_TYPE (dataref_ptr),
11543 -(HOST_WIDE_INT) align));
11544 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11545 data_ref
11546 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11547 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11548 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11549 new_stmt = gimple_build_assign (vec_dest, data_ref);
11550 new_temp = make_ssa_name (vec_dest, new_stmt);
11551 gimple_assign_set_lhs (new_stmt, new_temp);
11552 gimple_move_vops (new_stmt, stmt_info->stmt);
11553 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11554 msq = new_temp;
11556 bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11557 bump = size_binop (MINUS_EXPR, bump, size_one_node);
11558 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11559 bump);
11560 new_stmt = gimple_build_assign (
11561 NULL_TREE, BIT_AND_EXPR, ptr,
11562 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11563 if (TREE_CODE (ptr) == SSA_NAME)
11564 ptr = copy_ssa_name (ptr, new_stmt);
11565 else
11566 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
11567 gimple_assign_set_lhs (new_stmt, ptr);
11568 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11569 data_ref
11570 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11571 break;
11573 case dr_explicit_realign_optimized:
11575 if (costing_p)
11576 break;
11577 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11578 new_temp = copy_ssa_name (dataref_ptr);
11579 else
11580 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11581 // We should only be doing this if we know the target
11582 // alignment at compile time.
11583 unsigned HOST_WIDE_INT align
11584 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11585 new_stmt = gimple_build_assign (
11586 new_temp, BIT_AND_EXPR, dataref_ptr,
11587 build_int_cst (TREE_TYPE (dataref_ptr),
11588 -(HOST_WIDE_INT) align));
11589 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11590 data_ref = build2 (MEM_REF, vectype, new_temp,
11591 build_int_cst (ref_type, 0));
11592 break;
11594 default:
11595 gcc_unreachable ();
11598 /* One common place to cost the above vect load for different
11599 alignment support schemes. */
11600 if (costing_p)
11602 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11603 only need to take care of the first stmt, whose
11604 stmt_info is first_stmt_info, vec_num iterating on it
11605 will cover the cost for the remaining, it's consistent
11606 with transforming. For the prologue cost for realign,
11607 we only need to count it once for the whole group. */
11608 bool first_stmt_info_p = first_stmt_info == stmt_info;
11609 bool add_realign_cost = first_stmt_info_p && i == 0;
11610 if (memory_access_type == VMAT_CONTIGUOUS
11611 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11612 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11613 && (!grouped_load || first_stmt_info_p)))
11615 /* Leave realign cases alone to keep them simple. */
11616 if (alignment_support_scheme == dr_explicit_realign_optimized
11617 || alignment_support_scheme == dr_explicit_realign)
11618 vect_get_load_cost (vinfo, stmt_info, 1,
11619 alignment_support_scheme, misalignment,
11620 add_realign_cost, &inside_cost,
11621 &prologue_cost, cost_vec, cost_vec,
11622 true);
11623 else
11624 n_adjacent_loads++;
11627 else
11629 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11630 /* DATA_REF is null if we've already built the statement. */
11631 if (data_ref)
11633 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11634 new_stmt = gimple_build_assign (vec_dest, data_ref);
11636 new_temp = make_ssa_name (vec_dest, new_stmt);
11637 gimple_set_lhs (new_stmt, new_temp);
11638 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11641 /* 3. Handle explicit realignment if necessary/supported.
11642 Create in loop:
11643 vec_dest = realign_load (msq, lsq, realignment_token) */
11644 if (!costing_p
11645 && (alignment_support_scheme == dr_explicit_realign_optimized
11646 || alignment_support_scheme == dr_explicit_realign))
11648 lsq = gimple_assign_lhs (new_stmt);
11649 if (!realignment_token)
11650 realignment_token = dataref_ptr;
11651 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11652 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11653 lsq, realignment_token);
11654 new_temp = make_ssa_name (vec_dest, new_stmt);
11655 gimple_assign_set_lhs (new_stmt, new_temp);
11656 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11658 if (alignment_support_scheme == dr_explicit_realign_optimized)
11660 gcc_assert (phi);
11661 if (i == vec_num - 1 && j == ncopies - 1)
11662 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11663 UNKNOWN_LOCATION);
11664 msq = lsq;
11668 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11670 if (costing_p)
11671 inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
11672 stmt_info, 0, vect_body);
11673 else
11675 tree perm_mask = perm_mask_for_reverse (vectype);
11676 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
11677 perm_mask, stmt_info, gsi);
11678 new_stmt = SSA_NAME_DEF_STMT (new_temp);
11682 /* Collect vector loads and later create their permutation in
11683 vect_transform_grouped_load (). */
11684 if (!costing_p && (grouped_load || slp_perm))
11685 dr_chain.quick_push (new_temp);
11687 /* Store vector loads in the corresponding SLP_NODE. */
11688 if (!costing_p && slp && !slp_perm)
11689 slp_node->push_vec_def (new_stmt);
11691 /* With SLP permutation we load the gaps as well, without
11692 we need to skip the gaps after we manage to fully load
11693 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11694 group_elt += nunits;
11695 if (!costing_p
11696 && maybe_ne (group_gap_adj, 0U)
11697 && !slp_perm
11698 && known_eq (group_elt, group_size - group_gap_adj))
11700 poly_wide_int bump_val
11701 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11702 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11703 == -1)
11704 bump_val = -bump_val;
11705 tree bump = wide_int_to_tree (sizetype, bump_val);
11706 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11707 stmt_info, bump);
11708 group_elt = 0;
11711 /* Bump the vector pointer to account for a gap or for excess
11712 elements loaded for a permuted SLP load. */
11713 if (!costing_p
11714 && maybe_ne (group_gap_adj, 0U)
11715 && slp_perm)
11717 poly_wide_int bump_val
11718 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11719 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11720 bump_val = -bump_val;
11721 tree bump = wide_int_to_tree (sizetype, bump_val);
11722 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11723 stmt_info, bump);
11726 if (slp && !slp_perm)
11727 continue;
11729 if (slp_perm)
11731 unsigned n_perms;
11732 /* For SLP we know we've seen all possible uses of dr_chain so
11733 direct vect_transform_slp_perm_load to DCE the unused parts.
11734 ??? This is a hack to prevent compile-time issues as seen
11735 in PR101120 and friends. */
11736 if (costing_p)
11738 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
11739 true, &n_perms, nullptr);
11740 inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
11741 stmt_info, 0, vect_body);
11743 else
11745 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11746 gsi, vf, false, &n_perms,
11747 nullptr, true);
11748 gcc_assert (ok);
11751 else
11753 if (grouped_load)
11755 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11756 /* We assume that the cost of a single load-lanes instruction
11757 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11758 If a grouped access is instead being provided by a
11759 load-and-permute operation, include the cost of the
11760 permutes. */
11761 if (costing_p && first_stmt_info == stmt_info)
11763 /* Uses an even and odd extract operations or shuffle
11764 operations for each needed permute. */
11765 int group_size = DR_GROUP_SIZE (first_stmt_info);
11766 int nstmts = ceil_log2 (group_size) * group_size;
11767 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
11768 stmt_info, 0, vect_body);
11770 if (dump_enabled_p ())
11771 dump_printf_loc (MSG_NOTE, vect_location,
11772 "vect_model_load_cost:"
11773 "strided group_size = %d .\n",
11774 group_size);
11776 else if (!costing_p)
11778 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
11779 group_size, gsi);
11780 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11783 else if (!costing_p)
11784 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11786 dr_chain.release ();
11788 if (!slp && !costing_p)
11789 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11791 if (costing_p)
11793 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
11794 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11795 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11796 if (n_adjacent_loads > 0)
11797 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
11798 alignment_support_scheme, misalignment, false,
11799 &inside_cost, &prologue_cost, cost_vec, cost_vec,
11800 true);
11801 if (dump_enabled_p ())
11802 dump_printf_loc (MSG_NOTE, vect_location,
11803 "vect_model_load_cost: inside_cost = %u, "
11804 "prologue_cost = %u .\n",
11805 inside_cost, prologue_cost);
11808 return true;
11811 /* Function vect_is_simple_cond.
11813 Input:
11814 LOOP - the loop that is being vectorized.
11815 COND - Condition that is checked for simple use.
11817 Output:
11818 *COMP_VECTYPE - the vector type for the comparison.
11819 *DTS - The def types for the arguments of the comparison
11821 Returns whether a COND can be vectorized. Checks whether
11822 condition operands are supportable using vec_is_simple_use. */
11824 static bool
11825 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
11826 slp_tree slp_node, tree *comp_vectype,
11827 enum vect_def_type *dts, tree vectype)
11829 tree lhs, rhs;
11830 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11831 slp_tree slp_op;
11833 /* Mask case. */
11834 if (TREE_CODE (cond) == SSA_NAME
11835 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11837 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
11838 &slp_op, &dts[0], comp_vectype)
11839 || !*comp_vectype
11840 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11841 return false;
11842 return true;
11845 if (!COMPARISON_CLASS_P (cond))
11846 return false;
11848 lhs = TREE_OPERAND (cond, 0);
11849 rhs = TREE_OPERAND (cond, 1);
11851 if (TREE_CODE (lhs) == SSA_NAME)
11853 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
11854 &lhs, &slp_op, &dts[0], &vectype1))
11855 return false;
11857 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
11858 || TREE_CODE (lhs) == FIXED_CST)
11859 dts[0] = vect_constant_def;
11860 else
11861 return false;
11863 if (TREE_CODE (rhs) == SSA_NAME)
11865 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
11866 &rhs, &slp_op, &dts[1], &vectype2))
11867 return false;
11869 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
11870 || TREE_CODE (rhs) == FIXED_CST)
11871 dts[1] = vect_constant_def;
11872 else
11873 return false;
11875 if (vectype1 && vectype2
11876 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11877 TYPE_VECTOR_SUBPARTS (vectype2)))
11878 return false;
11880 *comp_vectype = vectype1 ? vectype1 : vectype2;
11881 /* Invariant comparison. */
11882 if (! *comp_vectype)
11884 tree scalar_type = TREE_TYPE (lhs);
11885 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11886 *comp_vectype = truth_type_for (vectype);
11887 else
11889 /* If we can widen the comparison to match vectype do so. */
11890 if (INTEGRAL_TYPE_P (scalar_type)
11891 && !slp_node
11892 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
11893 TYPE_SIZE (TREE_TYPE (vectype))))
11894 scalar_type = build_nonstandard_integer_type
11895 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
11896 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
11897 slp_node);
11901 return true;
11904 /* vectorizable_condition.
11906 Check if STMT_INFO is conditional modify expression that can be vectorized.
11907 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11908 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
11909 at GSI.
11911 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
11913 Return true if STMT_INFO is vectorizable in this way. */
11915 static bool
11916 vectorizable_condition (vec_info *vinfo,
11917 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11918 gimple **vec_stmt,
11919 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11921 tree scalar_dest = NULL_TREE;
11922 tree vec_dest = NULL_TREE;
11923 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
11924 tree then_clause, else_clause;
11925 tree comp_vectype = NULL_TREE;
11926 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
11927 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
11928 tree vec_compare;
11929 tree new_temp;
11930 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11931 enum vect_def_type dts[4]
11932 = {vect_unknown_def_type, vect_unknown_def_type,
11933 vect_unknown_def_type, vect_unknown_def_type};
11934 int ndts = 4;
11935 int ncopies;
11936 int vec_num;
11937 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11938 int i;
11939 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11940 vec<tree> vec_oprnds0 = vNULL;
11941 vec<tree> vec_oprnds1 = vNULL;
11942 vec<tree> vec_oprnds2 = vNULL;
11943 vec<tree> vec_oprnds3 = vNULL;
11944 tree vec_cmp_type;
11945 bool masked = false;
11947 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11948 return false;
11950 /* Is vectorizable conditional operation? */
11951 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11952 if (!stmt)
11953 return false;
11955 code = gimple_assign_rhs_code (stmt);
11956 if (code != COND_EXPR)
11957 return false;
11959 stmt_vec_info reduc_info = NULL;
11960 int reduc_index = -1;
11961 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
11962 bool for_reduction
11963 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
11964 if (for_reduction)
11966 if (slp_node)
11967 return false;
11968 reduc_info = info_for_reduction (vinfo, stmt_info);
11969 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
11970 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
11971 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
11972 || reduc_index != -1);
11974 else
11976 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11977 return false;
11980 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11981 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11983 if (slp_node)
11985 ncopies = 1;
11986 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
11988 else
11990 ncopies = vect_get_num_copies (loop_vinfo, vectype);
11991 vec_num = 1;
11994 gcc_assert (ncopies >= 1);
11995 if (for_reduction && ncopies > 1)
11996 return false; /* FORNOW */
11998 cond_expr = gimple_assign_rhs1 (stmt);
12000 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
12001 &comp_vectype, &dts[0], vectype)
12002 || !comp_vectype)
12003 return false;
12005 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
12006 slp_tree then_slp_node, else_slp_node;
12007 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
12008 &then_clause, &then_slp_node, &dts[2], &vectype1))
12009 return false;
12010 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
12011 &else_clause, &else_slp_node, &dts[3], &vectype2))
12012 return false;
12014 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
12015 return false;
12017 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
12018 return false;
12020 masked = !COMPARISON_CLASS_P (cond_expr);
12021 vec_cmp_type = truth_type_for (comp_vectype);
12023 if (vec_cmp_type == NULL_TREE)
12024 return false;
12026 cond_code = TREE_CODE (cond_expr);
12027 if (!masked)
12029 cond_expr0 = TREE_OPERAND (cond_expr, 0);
12030 cond_expr1 = TREE_OPERAND (cond_expr, 1);
12033 /* For conditional reductions, the "then" value needs to be the candidate
12034 value calculated by this iteration while the "else" value needs to be
12035 the result carried over from previous iterations. If the COND_EXPR
12036 is the other way around, we need to swap it. */
12037 bool must_invert_cmp_result = false;
12038 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
12040 if (masked)
12041 must_invert_cmp_result = true;
12042 else
12044 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
12045 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
12046 if (new_code == ERROR_MARK)
12047 must_invert_cmp_result = true;
12048 else
12050 cond_code = new_code;
12051 /* Make sure we don't accidentally use the old condition. */
12052 cond_expr = NULL_TREE;
12055 std::swap (then_clause, else_clause);
12058 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
12060 /* Boolean values may have another representation in vectors
12061 and therefore we prefer bit operations over comparison for
12062 them (which also works for scalar masks). We store opcodes
12063 to use in bitop1 and bitop2. Statement is vectorized as
12064 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12065 depending on bitop1 and bitop2 arity. */
12066 switch (cond_code)
12068 case GT_EXPR:
12069 bitop1 = BIT_NOT_EXPR;
12070 bitop2 = BIT_AND_EXPR;
12071 break;
12072 case GE_EXPR:
12073 bitop1 = BIT_NOT_EXPR;
12074 bitop2 = BIT_IOR_EXPR;
12075 break;
12076 case LT_EXPR:
12077 bitop1 = BIT_NOT_EXPR;
12078 bitop2 = BIT_AND_EXPR;
12079 std::swap (cond_expr0, cond_expr1);
12080 break;
12081 case LE_EXPR:
12082 bitop1 = BIT_NOT_EXPR;
12083 bitop2 = BIT_IOR_EXPR;
12084 std::swap (cond_expr0, cond_expr1);
12085 break;
12086 case NE_EXPR:
12087 bitop1 = BIT_XOR_EXPR;
12088 break;
12089 case EQ_EXPR:
12090 bitop1 = BIT_XOR_EXPR;
12091 bitop2 = BIT_NOT_EXPR;
12092 break;
12093 default:
12094 return false;
12096 cond_code = SSA_NAME;
12099 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
12100 && reduction_type == EXTRACT_LAST_REDUCTION
12101 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
12103 if (dump_enabled_p ())
12104 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12105 "reduction comparison operation not supported.\n");
12106 return false;
12109 if (!vec_stmt)
12111 if (bitop1 != NOP_EXPR)
12113 machine_mode mode = TYPE_MODE (comp_vectype);
12114 optab optab;
12116 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
12117 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12118 return false;
12120 if (bitop2 != NOP_EXPR)
12122 optab = optab_for_tree_code (bitop2, comp_vectype,
12123 optab_default);
12124 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12125 return false;
12129 vect_cost_for_stmt kind = vector_stmt;
12130 if (reduction_type == EXTRACT_LAST_REDUCTION)
12131 /* Count one reduction-like operation per vector. */
12132 kind = vec_to_scalar;
12133 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
12134 && (masked
12135 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
12136 cond_code)
12137 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
12138 ERROR_MARK))))
12139 return false;
12141 if (slp_node
12142 && (!vect_maybe_update_slp_op_vectype
12143 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
12144 || (op_adjust == 1
12145 && !vect_maybe_update_slp_op_vectype
12146 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
12147 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
12148 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
12150 if (dump_enabled_p ())
12151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12152 "incompatible vector types for invariants\n");
12153 return false;
12156 if (loop_vinfo && for_reduction
12157 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12159 if (reduction_type == EXTRACT_LAST_REDUCTION)
12161 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12162 vectype, OPTIMIZE_FOR_SPEED))
12163 vect_record_loop_len (loop_vinfo,
12164 &LOOP_VINFO_LENS (loop_vinfo),
12165 ncopies * vec_num, vectype, 1);
12166 else
12167 vect_record_loop_mask (loop_vinfo,
12168 &LOOP_VINFO_MASKS (loop_vinfo),
12169 ncopies * vec_num, vectype, NULL);
12171 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12172 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
12174 if (dump_enabled_p ())
12175 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12176 "conditional reduction prevents the use"
12177 " of partial vectors.\n");
12178 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
12182 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
12183 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
12184 cost_vec, kind);
12185 return true;
12188 /* Transform. */
12190 /* Handle def. */
12191 scalar_dest = gimple_assign_lhs (stmt);
12192 if (reduction_type != EXTRACT_LAST_REDUCTION)
12193 vec_dest = vect_create_destination_var (scalar_dest, vectype);
12195 bool swap_cond_operands = false;
12197 /* See whether another part of the vectorized code applies a loop
12198 mask to the condition, or to its inverse. */
12200 vec_loop_masks *masks = NULL;
12201 vec_loop_lens *lens = NULL;
12202 if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
12204 if (reduction_type == EXTRACT_LAST_REDUCTION)
12205 lens = &LOOP_VINFO_LENS (loop_vinfo);
12207 else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
12209 if (reduction_type == EXTRACT_LAST_REDUCTION)
12210 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12211 else
12213 scalar_cond_masked_key cond (cond_expr, ncopies);
12214 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12215 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12216 else
12218 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
12219 tree_code orig_code = cond.code;
12220 cond.code = invert_tree_comparison (cond.code, honor_nans);
12221 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
12223 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12224 cond_code = cond.code;
12225 swap_cond_operands = true;
12227 else
12229 /* Try the inverse of the current mask. We check if the
12230 inverse mask is live and if so we generate a negate of
12231 the current mask such that we still honor NaNs. */
12232 cond.inverted_p = true;
12233 cond.code = orig_code;
12234 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12236 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12237 cond_code = cond.code;
12238 swap_cond_operands = true;
12239 must_invert_cmp_result = true;
12246 /* Handle cond expr. */
12247 if (masked)
12248 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12249 cond_expr, &vec_oprnds0, comp_vectype,
12250 then_clause, &vec_oprnds2, vectype,
12251 reduction_type != EXTRACT_LAST_REDUCTION
12252 ? else_clause : NULL, &vec_oprnds3, vectype);
12253 else
12254 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12255 cond_expr0, &vec_oprnds0, comp_vectype,
12256 cond_expr1, &vec_oprnds1, comp_vectype,
12257 then_clause, &vec_oprnds2, vectype,
12258 reduction_type != EXTRACT_LAST_REDUCTION
12259 ? else_clause : NULL, &vec_oprnds3, vectype);
12261 /* Arguments are ready. Create the new vector stmt. */
12262 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
12264 vec_then_clause = vec_oprnds2[i];
12265 if (reduction_type != EXTRACT_LAST_REDUCTION)
12266 vec_else_clause = vec_oprnds3[i];
12268 if (swap_cond_operands)
12269 std::swap (vec_then_clause, vec_else_clause);
12271 if (masked)
12272 vec_compare = vec_cond_lhs;
12273 else
12275 vec_cond_rhs = vec_oprnds1[i];
12276 if (bitop1 == NOP_EXPR)
12278 gimple_seq stmts = NULL;
12279 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
12280 vec_cond_lhs, vec_cond_rhs);
12281 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
12283 else
12285 new_temp = make_ssa_name (vec_cmp_type);
12286 gassign *new_stmt;
12287 if (bitop1 == BIT_NOT_EXPR)
12288 new_stmt = gimple_build_assign (new_temp, bitop1,
12289 vec_cond_rhs);
12290 else
12291 new_stmt
12292 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
12293 vec_cond_rhs);
12294 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12295 if (bitop2 == NOP_EXPR)
12296 vec_compare = new_temp;
12297 else if (bitop2 == BIT_NOT_EXPR
12298 && reduction_type != EXTRACT_LAST_REDUCTION)
12300 /* Instead of doing ~x ? y : z do x ? z : y. */
12301 vec_compare = new_temp;
12302 std::swap (vec_then_clause, vec_else_clause);
12304 else
12306 vec_compare = make_ssa_name (vec_cmp_type);
12307 if (bitop2 == BIT_NOT_EXPR)
12308 new_stmt
12309 = gimple_build_assign (vec_compare, bitop2, new_temp);
12310 else
12311 new_stmt
12312 = gimple_build_assign (vec_compare, bitop2,
12313 vec_cond_lhs, new_temp);
12314 vect_finish_stmt_generation (vinfo, stmt_info,
12315 new_stmt, gsi);
12320 /* If we decided to apply a loop mask to the result of the vector
12321 comparison, AND the comparison with the mask now. Later passes
12322 should then be able to reuse the AND results between mulitple
12323 vector statements.
12325 For example:
12326 for (int i = 0; i < 100; ++i)
12327 x[i] = y[i] ? z[i] : 10;
12329 results in following optimized GIMPLE:
12331 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12332 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12333 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12334 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12335 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12336 vect_iftmp.11_47, { 10, ... }>;
12338 instead of using a masked and unmasked forms of
12339 vec != { 0, ... } (masked in the MASK_LOAD,
12340 unmasked in the VEC_COND_EXPR). */
12342 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12343 in cases where that's necessary. */
12345 tree len = NULL_TREE, bias = NULL_TREE;
12346 if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
12348 if (!is_gimple_val (vec_compare))
12350 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12351 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12352 vec_compare);
12353 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12354 vec_compare = vec_compare_name;
12357 if (must_invert_cmp_result)
12359 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12360 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12361 BIT_NOT_EXPR,
12362 vec_compare);
12363 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12364 vec_compare = vec_compare_name;
12367 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12368 vectype, OPTIMIZE_FOR_SPEED))
12370 if (lens)
12372 len = vect_get_loop_len (loop_vinfo, gsi, lens,
12373 vec_num * ncopies, vectype, i, 1);
12374 signed char biasval
12375 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
12376 bias = build_int_cst (intQI_type_node, biasval);
12378 else
12380 len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
12381 bias = build_int_cst (intQI_type_node, 0);
12384 if (masks)
12386 tree loop_mask
12387 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
12388 vectype, i);
12389 tree tmp2 = make_ssa_name (vec_cmp_type);
12390 gassign *g
12391 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
12392 loop_mask);
12393 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
12394 vec_compare = tmp2;
12398 gimple *new_stmt;
12399 if (reduction_type == EXTRACT_LAST_REDUCTION)
12401 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
12402 tree lhs = gimple_get_lhs (old_stmt);
12403 if (len)
12404 new_stmt = gimple_build_call_internal
12405 (IFN_LEN_FOLD_EXTRACT_LAST, 5, else_clause, vec_compare,
12406 vec_then_clause, len, bias);
12407 else
12408 new_stmt = gimple_build_call_internal
12409 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
12410 vec_then_clause);
12411 gimple_call_set_lhs (new_stmt, lhs);
12412 SSA_NAME_DEF_STMT (lhs) = new_stmt;
12413 if (old_stmt == gsi_stmt (*gsi))
12414 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
12415 else
12417 /* In this case we're moving the definition to later in the
12418 block. That doesn't matter because the only uses of the
12419 lhs are in phi statements. */
12420 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12421 gsi_remove (&old_gsi, true);
12422 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12425 else
12427 new_temp = make_ssa_name (vec_dest);
12428 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12429 vec_then_clause, vec_else_clause);
12430 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12432 if (slp_node)
12433 slp_node->push_vec_def (new_stmt);
12434 else
12435 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12438 if (!slp_node)
12439 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12441 vec_oprnds0.release ();
12442 vec_oprnds1.release ();
12443 vec_oprnds2.release ();
12444 vec_oprnds3.release ();
12446 return true;
12449 /* Helper of vectorizable_comparison.
12451 Check if STMT_INFO is comparison expression CODE that can be vectorized.
12452 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12453 comparison, put it in VEC_STMT, and insert it at GSI.
12455 Return true if STMT_INFO is vectorizable in this way. */
12457 static bool
12458 vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
12459 stmt_vec_info stmt_info, tree_code code,
12460 gimple_stmt_iterator *gsi, gimple **vec_stmt,
12461 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12463 tree lhs, rhs1, rhs2;
12464 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12465 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12466 tree new_temp;
12467 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12468 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
12469 int ndts = 2;
12470 poly_uint64 nunits;
12471 int ncopies;
12472 enum tree_code bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12473 int i;
12474 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12475 vec<tree> vec_oprnds0 = vNULL;
12476 vec<tree> vec_oprnds1 = vNULL;
12477 tree mask_type;
12478 tree mask;
12480 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12481 return false;
12483 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
12484 return false;
12486 mask_type = vectype;
12487 nunits = TYPE_VECTOR_SUBPARTS (vectype);
12489 if (slp_node)
12490 ncopies = 1;
12491 else
12492 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12494 gcc_assert (ncopies >= 1);
12496 if (TREE_CODE_CLASS (code) != tcc_comparison)
12497 return false;
12499 slp_tree slp_rhs1, slp_rhs2;
12500 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12501 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
12502 return false;
12504 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12505 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
12506 return false;
12508 if (vectype1 && vectype2
12509 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12510 TYPE_VECTOR_SUBPARTS (vectype2)))
12511 return false;
12513 vectype = vectype1 ? vectype1 : vectype2;
12515 /* Invariant comparison. */
12516 if (!vectype)
12518 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
12519 vectype = mask_type;
12520 else
12521 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
12522 slp_node);
12523 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
12524 return false;
12526 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
12527 return false;
12529 /* Can't compare mask and non-mask types. */
12530 if (vectype1 && vectype2
12531 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12532 return false;
12534 /* Boolean values may have another representation in vectors
12535 and therefore we prefer bit operations over comparison for
12536 them (which also works for scalar masks). We store opcodes
12537 to use in bitop1 and bitop2. Statement is vectorized as
12538 BITOP2 (rhs1 BITOP1 rhs2) or
12539 rhs1 BITOP2 (BITOP1 rhs2)
12540 depending on bitop1 and bitop2 arity. */
12541 bool swap_p = false;
12542 if (VECTOR_BOOLEAN_TYPE_P (vectype))
12544 if (code == GT_EXPR)
12546 bitop1 = BIT_NOT_EXPR;
12547 bitop2 = BIT_AND_EXPR;
12549 else if (code == GE_EXPR)
12551 bitop1 = BIT_NOT_EXPR;
12552 bitop2 = BIT_IOR_EXPR;
12554 else if (code == LT_EXPR)
12556 bitop1 = BIT_NOT_EXPR;
12557 bitop2 = BIT_AND_EXPR;
12558 swap_p = true;
12560 else if (code == LE_EXPR)
12562 bitop1 = BIT_NOT_EXPR;
12563 bitop2 = BIT_IOR_EXPR;
12564 swap_p = true;
12566 else
12568 bitop1 = BIT_XOR_EXPR;
12569 if (code == EQ_EXPR)
12570 bitop2 = BIT_NOT_EXPR;
12574 if (!vec_stmt)
12576 if (bitop1 == NOP_EXPR)
12578 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12579 return false;
12581 else
12583 machine_mode mode = TYPE_MODE (vectype);
12584 optab optab;
12586 optab = optab_for_tree_code (bitop1, vectype, optab_default);
12587 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12588 return false;
12590 if (bitop2 != NOP_EXPR)
12592 optab = optab_for_tree_code (bitop2, vectype, optab_default);
12593 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12594 return false;
12598 /* Put types on constant and invariant SLP children. */
12599 if (slp_node
12600 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12601 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
12603 if (dump_enabled_p ())
12604 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12605 "incompatible vector types for invariants\n");
12606 return false;
12609 vect_model_simple_cost (vinfo, stmt_info,
12610 ncopies * (1 + (bitop2 != NOP_EXPR)),
12611 dts, ndts, slp_node, cost_vec);
12612 return true;
12615 /* Transform. */
12617 /* Handle def. */
12618 lhs = gimple_assign_lhs (STMT_VINFO_STMT (stmt_info));
12619 mask = vect_create_destination_var (lhs, mask_type);
12621 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12622 rhs1, &vec_oprnds0, vectype,
12623 rhs2, &vec_oprnds1, vectype);
12624 if (swap_p)
12625 std::swap (vec_oprnds0, vec_oprnds1);
12627 /* Arguments are ready. Create the new vector stmt. */
12628 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12630 gimple *new_stmt;
12631 vec_rhs2 = vec_oprnds1[i];
12633 new_temp = make_ssa_name (mask);
12634 if (bitop1 == NOP_EXPR)
12636 new_stmt = gimple_build_assign (new_temp, code,
12637 vec_rhs1, vec_rhs2);
12638 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12640 else
12642 if (bitop1 == BIT_NOT_EXPR)
12643 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12644 else
12645 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12646 vec_rhs2);
12647 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12648 if (bitop2 != NOP_EXPR)
12650 tree res = make_ssa_name (mask);
12651 if (bitop2 == BIT_NOT_EXPR)
12652 new_stmt = gimple_build_assign (res, bitop2, new_temp);
12653 else
12654 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12655 new_temp);
12656 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12659 if (slp_node)
12660 slp_node->push_vec_def (new_stmt);
12661 else
12662 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12665 if (!slp_node)
12666 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12668 vec_oprnds0.release ();
12669 vec_oprnds1.release ();
12671 return true;
12674 /* vectorizable_comparison.
12676 Check if STMT_INFO is comparison expression that can be vectorized.
12677 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12678 comparison, put it in VEC_STMT, and insert it at GSI.
12680 Return true if STMT_INFO is vectorizable in this way. */
12682 static bool
12683 vectorizable_comparison (vec_info *vinfo,
12684 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12685 gimple **vec_stmt,
12686 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12688 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12690 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12691 return false;
12693 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12694 return false;
12696 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12697 if (!stmt)
12698 return false;
12700 enum tree_code code = gimple_assign_rhs_code (stmt);
12701 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12702 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12703 vec_stmt, slp_node, cost_vec))
12704 return false;
12706 if (!vec_stmt)
12707 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
12709 return true;
12712 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
12713 can handle all live statements in the node. Otherwise return true
12714 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
12715 VEC_STMT_P is as for vectorizable_live_operation. */
12717 static bool
12718 can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
12719 slp_tree slp_node, slp_instance slp_node_instance,
12720 bool vec_stmt_p,
12721 stmt_vector_for_cost *cost_vec)
12723 if (slp_node)
12725 stmt_vec_info slp_stmt_info;
12726 unsigned int i;
12727 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
12729 if (STMT_VINFO_LIVE_P (slp_stmt_info)
12730 && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
12731 slp_node_instance, i,
12732 vec_stmt_p, cost_vec))
12733 return false;
12736 else if (STMT_VINFO_LIVE_P (stmt_info)
12737 && !vectorizable_live_operation (vinfo, stmt_info,
12738 slp_node, slp_node_instance, -1,
12739 vec_stmt_p, cost_vec))
12740 return false;
12742 return true;
12745 /* Make sure the statement is vectorizable. */
12747 opt_result
12748 vect_analyze_stmt (vec_info *vinfo,
12749 stmt_vec_info stmt_info, bool *need_to_vectorize,
12750 slp_tree node, slp_instance node_instance,
12751 stmt_vector_for_cost *cost_vec)
12753 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12754 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
12755 bool ok;
12756 gimple_seq pattern_def_seq;
12758 if (dump_enabled_p ())
12759 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
12760 stmt_info->stmt);
12762 if (gimple_has_volatile_ops (stmt_info->stmt))
12763 return opt_result::failure_at (stmt_info->stmt,
12764 "not vectorized:"
12765 " stmt has volatile operands: %G\n",
12766 stmt_info->stmt);
12768 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12769 && node == NULL
12770 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
12772 gimple_stmt_iterator si;
12774 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
12776 stmt_vec_info pattern_def_stmt_info
12777 = vinfo->lookup_stmt (gsi_stmt (si));
12778 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
12779 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
12781 /* Analyze def stmt of STMT if it's a pattern stmt. */
12782 if (dump_enabled_p ())
12783 dump_printf_loc (MSG_NOTE, vect_location,
12784 "==> examining pattern def statement: %G",
12785 pattern_def_stmt_info->stmt);
12787 opt_result res
12788 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
12789 need_to_vectorize, node, node_instance,
12790 cost_vec);
12791 if (!res)
12792 return res;
12797 /* Skip stmts that do not need to be vectorized. In loops this is expected
12798 to include:
12799 - the COND_EXPR which is the loop exit condition
12800 - any LABEL_EXPRs in the loop
12801 - computations that are used only for array indexing or loop control.
12802 In basic blocks we only analyze statements that are a part of some SLP
12803 instance, therefore, all the statements are relevant.
12805 Pattern statement needs to be analyzed instead of the original statement
12806 if the original statement is not relevant. Otherwise, we analyze both
12807 statements. In basic blocks we are called from some SLP instance
12808 traversal, don't analyze pattern stmts instead, the pattern stmts
12809 already will be part of SLP instance. */
12811 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
12812 if (!STMT_VINFO_RELEVANT_P (stmt_info)
12813 && !STMT_VINFO_LIVE_P (stmt_info))
12815 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12816 && pattern_stmt_info
12817 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12818 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12820 /* Analyze PATTERN_STMT instead of the original stmt. */
12821 stmt_info = pattern_stmt_info;
12822 if (dump_enabled_p ())
12823 dump_printf_loc (MSG_NOTE, vect_location,
12824 "==> examining pattern statement: %G",
12825 stmt_info->stmt);
12827 else
12829 if (dump_enabled_p ())
12830 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
12832 return opt_result::success ();
12835 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12836 && node == NULL
12837 && pattern_stmt_info
12838 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12839 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12841 /* Analyze PATTERN_STMT too. */
12842 if (dump_enabled_p ())
12843 dump_printf_loc (MSG_NOTE, vect_location,
12844 "==> examining pattern statement: %G",
12845 pattern_stmt_info->stmt);
12847 opt_result res
12848 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
12849 node_instance, cost_vec);
12850 if (!res)
12851 return res;
12854 switch (STMT_VINFO_DEF_TYPE (stmt_info))
12856 case vect_internal_def:
12857 break;
12859 case vect_reduction_def:
12860 case vect_nested_cycle:
12861 gcc_assert (!bb_vinfo
12862 && (relevance == vect_used_in_outer
12863 || relevance == vect_used_in_outer_by_reduction
12864 || relevance == vect_used_by_reduction
12865 || relevance == vect_unused_in_scope
12866 || relevance == vect_used_only_live));
12867 break;
12869 case vect_induction_def:
12870 case vect_first_order_recurrence:
12871 gcc_assert (!bb_vinfo);
12872 break;
12874 case vect_constant_def:
12875 case vect_external_def:
12876 case vect_unknown_def_type:
12877 default:
12878 gcc_unreachable ();
12881 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12882 if (node)
12883 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
12885 if (STMT_VINFO_RELEVANT_P (stmt_info))
12887 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
12888 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
12889 || (call && gimple_call_lhs (call) == NULL_TREE));
12890 *need_to_vectorize = true;
12893 if (PURE_SLP_STMT (stmt_info) && !node)
12895 if (dump_enabled_p ())
12896 dump_printf_loc (MSG_NOTE, vect_location,
12897 "handled only by SLP analysis\n");
12898 return opt_result::success ();
12901 ok = true;
12902 if (!bb_vinfo
12903 && (STMT_VINFO_RELEVANT_P (stmt_info)
12904 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
12905 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
12906 -mveclibabi= takes preference over library functions with
12907 the simd attribute. */
12908 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12909 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
12910 cost_vec)
12911 || vectorizable_conversion (vinfo, stmt_info,
12912 NULL, NULL, node, cost_vec)
12913 || vectorizable_operation (vinfo, stmt_info,
12914 NULL, NULL, node, cost_vec)
12915 || vectorizable_assignment (vinfo, stmt_info,
12916 NULL, NULL, node, cost_vec)
12917 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12918 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12919 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
12920 node, node_instance, cost_vec)
12921 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
12922 NULL, node, cost_vec)
12923 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12924 || vectorizable_condition (vinfo, stmt_info,
12925 NULL, NULL, node, cost_vec)
12926 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12927 cost_vec)
12928 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
12929 stmt_info, NULL, node)
12930 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
12931 stmt_info, NULL, node, cost_vec));
12932 else
12934 if (bb_vinfo)
12935 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12936 || vectorizable_simd_clone_call (vinfo, stmt_info,
12937 NULL, NULL, node, cost_vec)
12938 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
12939 cost_vec)
12940 || vectorizable_shift (vinfo, stmt_info,
12941 NULL, NULL, node, cost_vec)
12942 || vectorizable_operation (vinfo, stmt_info,
12943 NULL, NULL, node, cost_vec)
12944 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
12945 cost_vec)
12946 || vectorizable_load (vinfo, stmt_info,
12947 NULL, NULL, node, cost_vec)
12948 || vectorizable_store (vinfo, stmt_info,
12949 NULL, NULL, node, cost_vec)
12950 || vectorizable_condition (vinfo, stmt_info,
12951 NULL, NULL, node, cost_vec)
12952 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12953 cost_vec)
12954 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
12957 if (node)
12958 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12960 if (!ok)
12961 return opt_result::failure_at (stmt_info->stmt,
12962 "not vectorized:"
12963 " relevant stmt not supported: %G",
12964 stmt_info->stmt);
12966 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
12967 need extra handling, except for vectorizable reductions. */
12968 if (!bb_vinfo
12969 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
12970 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
12971 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
12972 stmt_info, node, node_instance,
12973 false, cost_vec))
12974 return opt_result::failure_at (stmt_info->stmt,
12975 "not vectorized:"
12976 " live stmt not supported: %G",
12977 stmt_info->stmt);
12979 return opt_result::success ();
12983 /* Function vect_transform_stmt.
12985 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
12987 bool
12988 vect_transform_stmt (vec_info *vinfo,
12989 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12990 slp_tree slp_node, slp_instance slp_node_instance)
12992 bool is_store = false;
12993 gimple *vec_stmt = NULL;
12994 bool done;
12996 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
12998 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12999 if (slp_node)
13000 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
13002 switch (STMT_VINFO_TYPE (stmt_info))
13004 case type_demotion_vec_info_type:
13005 case type_promotion_vec_info_type:
13006 case type_conversion_vec_info_type:
13007 done = vectorizable_conversion (vinfo, stmt_info,
13008 gsi, &vec_stmt, slp_node, NULL);
13009 gcc_assert (done);
13010 break;
13012 case induc_vec_info_type:
13013 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
13014 stmt_info, &vec_stmt, slp_node,
13015 NULL);
13016 gcc_assert (done);
13017 break;
13019 case shift_vec_info_type:
13020 done = vectorizable_shift (vinfo, stmt_info,
13021 gsi, &vec_stmt, slp_node, NULL);
13022 gcc_assert (done);
13023 break;
13025 case op_vec_info_type:
13026 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
13027 NULL);
13028 gcc_assert (done);
13029 break;
13031 case assignment_vec_info_type:
13032 done = vectorizable_assignment (vinfo, stmt_info,
13033 gsi, &vec_stmt, slp_node, NULL);
13034 gcc_assert (done);
13035 break;
13037 case load_vec_info_type:
13038 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
13039 NULL);
13040 gcc_assert (done);
13041 break;
13043 case store_vec_info_type:
13044 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
13045 && !slp_node
13046 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))
13047 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))))
13048 /* In case of interleaving, the whole chain is vectorized when the
13049 last store in the chain is reached. Store stmts before the last
13050 one are skipped, and there vec_stmt_info shouldn't be freed
13051 meanwhile. */
13053 else
13055 done = vectorizable_store (vinfo, stmt_info,
13056 gsi, &vec_stmt, slp_node, NULL);
13057 gcc_assert (done);
13058 is_store = true;
13060 break;
13062 case condition_vec_info_type:
13063 done = vectorizable_condition (vinfo, stmt_info,
13064 gsi, &vec_stmt, slp_node, NULL);
13065 gcc_assert (done);
13066 break;
13068 case comparison_vec_info_type:
13069 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
13070 slp_node, NULL);
13071 gcc_assert (done);
13072 break;
13074 case call_vec_info_type:
13075 done = vectorizable_call (vinfo, stmt_info,
13076 gsi, &vec_stmt, slp_node, NULL);
13077 break;
13079 case call_simd_clone_vec_info_type:
13080 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
13081 slp_node, NULL);
13082 break;
13084 case reduc_vec_info_type:
13085 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
13086 gsi, &vec_stmt, slp_node);
13087 gcc_assert (done);
13088 break;
13090 case cycle_phi_info_type:
13091 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
13092 &vec_stmt, slp_node, slp_node_instance);
13093 gcc_assert (done);
13094 break;
13096 case lc_phi_info_type:
13097 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
13098 stmt_info, &vec_stmt, slp_node);
13099 gcc_assert (done);
13100 break;
13102 case recurr_info_type:
13103 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13104 stmt_info, &vec_stmt, slp_node, NULL);
13105 gcc_assert (done);
13106 break;
13108 case phi_info_type:
13109 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
13110 gcc_assert (done);
13111 break;
13113 default:
13114 if (!STMT_VINFO_LIVE_P (stmt_info))
13116 if (dump_enabled_p ())
13117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13118 "stmt not supported.\n");
13119 gcc_unreachable ();
13121 done = true;
13124 if (!slp_node && vec_stmt)
13125 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
13127 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
13129 /* Handle stmts whose DEF is used outside the loop-nest that is
13130 being vectorized. */
13131 done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
13132 slp_node_instance, true, NULL);
13133 gcc_assert (done);
13136 if (slp_node)
13137 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13139 return is_store;
13143 /* Remove a group of stores (for SLP or interleaving), free their
13144 stmt_vec_info. */
13146 void
13147 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
13149 stmt_vec_info next_stmt_info = first_stmt_info;
13151 while (next_stmt_info)
13153 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
13154 next_stmt_info = vect_orig_stmt (next_stmt_info);
13155 /* Free the attached stmt_vec_info and remove the stmt. */
13156 vinfo->remove_stmt (next_stmt_info);
13157 next_stmt_info = tmp;
13161 /* If NUNITS is nonzero, return a vector type that contains NUNITS
13162 elements of type SCALAR_TYPE, or null if the target doesn't support
13163 such a type.
13165 If NUNITS is zero, return a vector type that contains elements of
13166 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13168 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13169 for this vectorization region and want to "autodetect" the best choice.
13170 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13171 and we want the new type to be interoperable with it. PREVAILING_MODE
13172 in this case can be a scalar integer mode or a vector mode; when it
13173 is a vector mode, the function acts like a tree-level version of
13174 related_vector_mode. */
13176 tree
13177 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
13178 tree scalar_type, poly_uint64 nunits)
13180 tree orig_scalar_type = scalar_type;
13181 scalar_mode inner_mode;
13182 machine_mode simd_mode;
13183 tree vectype;
13185 if ((!INTEGRAL_TYPE_P (scalar_type)
13186 && !POINTER_TYPE_P (scalar_type)
13187 && !SCALAR_FLOAT_TYPE_P (scalar_type))
13188 || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
13189 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
13190 return NULL_TREE;
13192 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
13194 /* Interoperability between modes requires one to be a constant multiple
13195 of the other, so that the number of vectors required for each operation
13196 is a compile-time constant. */
13197 if (prevailing_mode != VOIDmode
13198 && !constant_multiple_p (nunits * nbytes,
13199 GET_MODE_SIZE (prevailing_mode))
13200 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
13201 nunits * nbytes))
13202 return NULL_TREE;
13204 /* For vector types of elements whose mode precision doesn't
13205 match their types precision we use a element type of mode
13206 precision. The vectorization routines will have to make sure
13207 they support the proper result truncation/extension.
13208 We also make sure to build vector types with INTEGER_TYPE
13209 component type only. */
13210 if (INTEGRAL_TYPE_P (scalar_type)
13211 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
13212 || TREE_CODE (scalar_type) != INTEGER_TYPE))
13213 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
13214 TYPE_UNSIGNED (scalar_type));
13216 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13217 When the component mode passes the above test simply use a type
13218 corresponding to that mode. The theory is that any use that
13219 would cause problems with this will disable vectorization anyway. */
13220 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
13221 && !INTEGRAL_TYPE_P (scalar_type))
13222 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
13224 /* We can't build a vector type of elements with alignment bigger than
13225 their size. */
13226 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
13227 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
13228 TYPE_UNSIGNED (scalar_type));
13230 /* If we felt back to using the mode fail if there was
13231 no scalar type for it. */
13232 if (scalar_type == NULL_TREE)
13233 return NULL_TREE;
13235 /* If no prevailing mode was supplied, use the mode the target prefers.
13236 Otherwise lookup a vector mode based on the prevailing mode. */
13237 if (prevailing_mode == VOIDmode)
13239 gcc_assert (known_eq (nunits, 0U));
13240 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
13241 if (SCALAR_INT_MODE_P (simd_mode))
13243 /* Traditional behavior is not to take the integer mode
13244 literally, but simply to use it as a way of determining
13245 the vector size. It is up to mode_for_vector to decide
13246 what the TYPE_MODE should be.
13248 Note that nunits == 1 is allowed in order to support single
13249 element vector types. */
13250 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
13251 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13252 return NULL_TREE;
13255 else if (SCALAR_INT_MODE_P (prevailing_mode)
13256 || !related_vector_mode (prevailing_mode,
13257 inner_mode, nunits).exists (&simd_mode))
13259 /* Fall back to using mode_for_vector, mostly in the hope of being
13260 able to use an integer mode. */
13261 if (known_eq (nunits, 0U)
13262 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
13263 return NULL_TREE;
13265 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13266 return NULL_TREE;
13269 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
13271 /* In cases where the mode was chosen by mode_for_vector, check that
13272 the target actually supports the chosen mode, or that it at least
13273 allows the vector mode to be replaced by a like-sized integer. */
13274 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
13275 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
13276 return NULL_TREE;
13278 /* Re-attach the address-space qualifier if we canonicalized the scalar
13279 type. */
13280 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
13281 return build_qualified_type
13282 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
13284 return vectype;
13287 /* Function get_vectype_for_scalar_type.
13289 Returns the vector type corresponding to SCALAR_TYPE as supported
13290 by the target. If GROUP_SIZE is nonzero and we're performing BB
13291 vectorization, make sure that the number of elements in the vector
13292 is no bigger than GROUP_SIZE. */
13294 tree
13295 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
13296 unsigned int group_size)
13298 /* For BB vectorization, we should always have a group size once we've
13299 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13300 are tentative requests during things like early data reference
13301 analysis and pattern recognition. */
13302 if (is_a <bb_vec_info> (vinfo))
13303 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13304 else
13305 group_size = 0;
13307 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13308 scalar_type);
13309 if (vectype && vinfo->vector_mode == VOIDmode)
13310 vinfo->vector_mode = TYPE_MODE (vectype);
13312 /* Register the natural choice of vector type, before the group size
13313 has been applied. */
13314 if (vectype)
13315 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
13317 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13318 try again with an explicit number of elements. */
13319 if (vectype
13320 && group_size
13321 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
13323 /* Start with the biggest number of units that fits within
13324 GROUP_SIZE and halve it until we find a valid vector type.
13325 Usually either the first attempt will succeed or all will
13326 fail (in the latter case because GROUP_SIZE is too small
13327 for the target), but it's possible that a target could have
13328 a hole between supported vector types.
13330 If GROUP_SIZE is not a power of 2, this has the effect of
13331 trying the largest power of 2 that fits within the group,
13332 even though the group is not a multiple of that vector size.
13333 The BB vectorizer will then try to carve up the group into
13334 smaller pieces. */
13335 unsigned int nunits = 1 << floor_log2 (group_size);
13338 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13339 scalar_type, nunits);
13340 nunits /= 2;
13342 while (nunits > 1 && !vectype);
13345 return vectype;
13348 /* Return the vector type corresponding to SCALAR_TYPE as supported
13349 by the target. NODE, if nonnull, is the SLP tree node that will
13350 use the returned vector type. */
13352 tree
13353 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
13355 unsigned int group_size = 0;
13356 if (node)
13357 group_size = SLP_TREE_LANES (node);
13358 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13361 /* Function get_mask_type_for_scalar_type.
13363 Returns the mask type corresponding to a result of comparison
13364 of vectors of specified SCALAR_TYPE as supported by target.
13365 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13366 make sure that the number of elements in the vector is no bigger
13367 than GROUP_SIZE. */
13369 tree
13370 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13371 unsigned int group_size)
13373 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13375 if (!vectype)
13376 return NULL;
13378 return truth_type_for (vectype);
13381 /* Function get_mask_type_for_scalar_type.
13383 Returns the mask type corresponding to a result of comparison
13384 of vectors of specified SCALAR_TYPE as supported by target.
13385 NODE, if nonnull, is the SLP tree node that will use the returned
13386 vector type. */
13388 tree
13389 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13390 slp_tree node)
13392 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
13394 if (!vectype)
13395 return NULL;
13397 return truth_type_for (vectype);
13400 /* Function get_same_sized_vectype
13402 Returns a vector type corresponding to SCALAR_TYPE of size
13403 VECTOR_TYPE if supported by the target. */
13405 tree
13406 get_same_sized_vectype (tree scalar_type, tree vector_type)
13408 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
13409 return truth_type_for (vector_type);
13411 poly_uint64 nunits;
13412 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
13413 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
13414 return NULL_TREE;
13416 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
13417 scalar_type, nunits);
13420 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13421 would not change the chosen vector modes. */
13423 bool
13424 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
13426 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
13427 i != vinfo->used_vector_modes.end (); ++i)
13428 if (!VECTOR_MODE_P (*i)
13429 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
13430 return false;
13431 return true;
13434 /* Function vect_is_simple_use.
13436 Input:
13437 VINFO - the vect info of the loop or basic block that is being vectorized.
13438 OPERAND - operand in the loop or bb.
13439 Output:
13440 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13441 case OPERAND is an SSA_NAME that is defined in the vectorizable region
13442 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13443 the definition could be anywhere in the function
13444 DT - the type of definition
13446 Returns whether a stmt with OPERAND can be vectorized.
13447 For loops, supportable operands are constants, loop invariants, and operands
13448 that are defined by the current iteration of the loop. Unsupportable
13449 operands are those that are defined by a previous iteration of the loop (as
13450 is the case in reduction/induction computations).
13451 For basic blocks, supportable operands are constants and bb invariants.
13452 For now, operands defined outside the basic block are not supported. */
13454 bool
13455 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13456 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
13458 if (def_stmt_info_out)
13459 *def_stmt_info_out = NULL;
13460 if (def_stmt_out)
13461 *def_stmt_out = NULL;
13462 *dt = vect_unknown_def_type;
13464 if (dump_enabled_p ())
13466 dump_printf_loc (MSG_NOTE, vect_location,
13467 "vect_is_simple_use: operand ");
13468 if (TREE_CODE (operand) == SSA_NAME
13469 && !SSA_NAME_IS_DEFAULT_DEF (operand))
13470 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
13471 else
13472 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
13475 if (CONSTANT_CLASS_P (operand))
13476 *dt = vect_constant_def;
13477 else if (is_gimple_min_invariant (operand))
13478 *dt = vect_external_def;
13479 else if (TREE_CODE (operand) != SSA_NAME)
13480 *dt = vect_unknown_def_type;
13481 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
13482 *dt = vect_external_def;
13483 else
13485 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
13486 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
13487 if (!stmt_vinfo)
13488 *dt = vect_external_def;
13489 else
13491 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
13492 def_stmt = stmt_vinfo->stmt;
13493 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
13494 if (def_stmt_info_out)
13495 *def_stmt_info_out = stmt_vinfo;
13497 if (def_stmt_out)
13498 *def_stmt_out = def_stmt;
13501 if (dump_enabled_p ())
13503 dump_printf (MSG_NOTE, ", type of def: ");
13504 switch (*dt)
13506 case vect_uninitialized_def:
13507 dump_printf (MSG_NOTE, "uninitialized\n");
13508 break;
13509 case vect_constant_def:
13510 dump_printf (MSG_NOTE, "constant\n");
13511 break;
13512 case vect_external_def:
13513 dump_printf (MSG_NOTE, "external\n");
13514 break;
13515 case vect_internal_def:
13516 dump_printf (MSG_NOTE, "internal\n");
13517 break;
13518 case vect_induction_def:
13519 dump_printf (MSG_NOTE, "induction\n");
13520 break;
13521 case vect_reduction_def:
13522 dump_printf (MSG_NOTE, "reduction\n");
13523 break;
13524 case vect_double_reduction_def:
13525 dump_printf (MSG_NOTE, "double reduction\n");
13526 break;
13527 case vect_nested_cycle:
13528 dump_printf (MSG_NOTE, "nested cycle\n");
13529 break;
13530 case vect_first_order_recurrence:
13531 dump_printf (MSG_NOTE, "first order recurrence\n");
13532 break;
13533 case vect_unknown_def_type:
13534 dump_printf (MSG_NOTE, "unknown\n");
13535 break;
13539 if (*dt == vect_unknown_def_type)
13541 if (dump_enabled_p ())
13542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13543 "Unsupported pattern.\n");
13544 return false;
13547 return true;
13550 /* Function vect_is_simple_use.
13552 Same as vect_is_simple_use but also determines the vector operand
13553 type of OPERAND and stores it to *VECTYPE. If the definition of
13554 OPERAND is vect_uninitialized_def, vect_constant_def or
13555 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13556 is responsible to compute the best suited vector type for the
13557 scalar operand. */
13559 bool
13560 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13561 tree *vectype, stmt_vec_info *def_stmt_info_out,
13562 gimple **def_stmt_out)
13564 stmt_vec_info def_stmt_info;
13565 gimple *def_stmt;
13566 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
13567 return false;
13569 if (def_stmt_out)
13570 *def_stmt_out = def_stmt;
13571 if (def_stmt_info_out)
13572 *def_stmt_info_out = def_stmt_info;
13574 /* Now get a vector type if the def is internal, otherwise supply
13575 NULL_TREE and leave it up to the caller to figure out a proper
13576 type for the use stmt. */
13577 if (*dt == vect_internal_def
13578 || *dt == vect_induction_def
13579 || *dt == vect_reduction_def
13580 || *dt == vect_double_reduction_def
13581 || *dt == vect_nested_cycle
13582 || *dt == vect_first_order_recurrence)
13584 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
13585 gcc_assert (*vectype != NULL_TREE);
13586 if (dump_enabled_p ())
13587 dump_printf_loc (MSG_NOTE, vect_location,
13588 "vect_is_simple_use: vectype %T\n", *vectype);
13590 else if (*dt == vect_uninitialized_def
13591 || *dt == vect_constant_def
13592 || *dt == vect_external_def)
13593 *vectype = NULL_TREE;
13594 else
13595 gcc_unreachable ();
13597 return true;
13600 /* Function vect_is_simple_use.
13602 Same as vect_is_simple_use but determines the operand by operand
13603 position OPERAND from either STMT or SLP_NODE, filling in *OP
13604 and *SLP_DEF (when SLP_NODE is not NULL). */
13606 bool
13607 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
13608 unsigned operand, tree *op, slp_tree *slp_def,
13609 enum vect_def_type *dt,
13610 tree *vectype, stmt_vec_info *def_stmt_info_out)
13612 if (slp_node)
13614 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
13615 *slp_def = child;
13616 *vectype = SLP_TREE_VECTYPE (child);
13617 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
13619 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
13620 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
13622 else
13624 if (def_stmt_info_out)
13625 *def_stmt_info_out = NULL;
13626 *op = SLP_TREE_SCALAR_OPS (child)[0];
13627 *dt = SLP_TREE_DEF_TYPE (child);
13628 return true;
13631 else
13633 *slp_def = NULL;
13634 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
13636 if (gimple_assign_rhs_code (ass) == COND_EXPR
13637 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
13639 if (operand < 2)
13640 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
13641 else
13642 *op = gimple_op (ass, operand);
13644 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
13645 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
13646 else
13647 *op = gimple_op (ass, operand + 1);
13649 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
13650 *op = gimple_call_arg (call, operand);
13651 else
13652 gcc_unreachable ();
13653 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
13657 /* If OP is not NULL and is external or constant update its vector
13658 type with VECTYPE. Returns true if successful or false if not,
13659 for example when conflicting vector types are present. */
13661 bool
13662 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
13664 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
13665 return true;
13666 if (SLP_TREE_VECTYPE (op))
13667 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
13668 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
13669 should be handled by patters. Allow vect_constant_def for now. */
13670 if (VECTOR_BOOLEAN_TYPE_P (vectype)
13671 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
13672 return false;
13673 SLP_TREE_VECTYPE (op) = vectype;
13674 return true;
13677 /* Function supportable_widening_operation
13679 Check whether an operation represented by the code CODE is a
13680 widening operation that is supported by the target platform in
13681 vector form (i.e., when operating on arguments of type VECTYPE_IN
13682 producing a result of type VECTYPE_OUT).
13684 Widening operations we currently support are NOP (CONVERT), FLOAT,
13685 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
13686 are supported by the target platform either directly (via vector
13687 tree-codes), or via target builtins.
13689 Output:
13690 - CODE1 and CODE2 are codes of vector operations to be used when
13691 vectorizing the operation, if available.
13692 - MULTI_STEP_CVT determines the number of required intermediate steps in
13693 case of multi-step conversion (like char->short->int - in that case
13694 MULTI_STEP_CVT will be 1).
13695 - INTERM_TYPES contains the intermediate type required to perform the
13696 widening operation (short in the above example). */
13698 bool
13699 supportable_widening_operation (vec_info *vinfo,
13700 code_helper code,
13701 stmt_vec_info stmt_info,
13702 tree vectype_out, tree vectype_in,
13703 code_helper *code1,
13704 code_helper *code2,
13705 int *multi_step_cvt,
13706 vec<tree> *interm_types)
13708 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
13709 class loop *vect_loop = NULL;
13710 machine_mode vec_mode;
13711 enum insn_code icode1, icode2;
13712 optab optab1 = unknown_optab, optab2 = unknown_optab;
13713 tree vectype = vectype_in;
13714 tree wide_vectype = vectype_out;
13715 tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
13716 int i;
13717 tree prev_type, intermediate_type;
13718 machine_mode intermediate_mode, prev_mode;
13719 optab optab3, optab4;
13721 *multi_step_cvt = 0;
13722 if (loop_info)
13723 vect_loop = LOOP_VINFO_LOOP (loop_info);
13725 switch (code.safe_as_tree_code ())
13727 case MAX_TREE_CODES:
13728 /* Don't set c1 and c2 if code is not a tree_code. */
13729 break;
13731 case WIDEN_MULT_EXPR:
13732 /* The result of a vectorized widening operation usually requires
13733 two vectors (because the widened results do not fit into one vector).
13734 The generated vector results would normally be expected to be
13735 generated in the same order as in the original scalar computation,
13736 i.e. if 8 results are generated in each vector iteration, they are
13737 to be organized as follows:
13738 vect1: [res1,res2,res3,res4],
13739 vect2: [res5,res6,res7,res8].
13741 However, in the special case that the result of the widening
13742 operation is used in a reduction computation only, the order doesn't
13743 matter (because when vectorizing a reduction we change the order of
13744 the computation). Some targets can take advantage of this and
13745 generate more efficient code. For example, targets like Altivec,
13746 that support widen_mult using a sequence of {mult_even,mult_odd}
13747 generate the following vectors:
13748 vect1: [res1,res3,res5,res7],
13749 vect2: [res2,res4,res6,res8].
13751 When vectorizing outer-loops, we execute the inner-loop sequentially
13752 (each vectorized inner-loop iteration contributes to VF outer-loop
13753 iterations in parallel). We therefore don't allow to change the
13754 order of the computation in the inner-loop during outer-loop
13755 vectorization. */
13756 /* TODO: Another case in which order doesn't *really* matter is when we
13757 widen and then contract again, e.g. (short)((int)x * y >> 8).
13758 Normally, pack_trunc performs an even/odd permute, whereas the
13759 repack from an even/odd expansion would be an interleave, which
13760 would be significantly simpler for e.g. AVX2. */
13761 /* In any case, in order to avoid duplicating the code below, recurse
13762 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
13763 are properly set up for the caller. If we fail, we'll continue with
13764 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
13765 if (vect_loop
13766 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
13767 && !nested_in_vect_loop_p (vect_loop, stmt_info)
13768 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
13769 stmt_info, vectype_out,
13770 vectype_in, code1,
13771 code2, multi_step_cvt,
13772 interm_types))
13774 /* Elements in a vector with vect_used_by_reduction property cannot
13775 be reordered if the use chain with this property does not have the
13776 same operation. One such an example is s += a * b, where elements
13777 in a and b cannot be reordered. Here we check if the vector defined
13778 by STMT is only directly used in the reduction statement. */
13779 tree lhs = gimple_assign_lhs (stmt_info->stmt);
13780 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
13781 if (use_stmt_info
13782 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
13783 return true;
13785 c1 = VEC_WIDEN_MULT_LO_EXPR;
13786 c2 = VEC_WIDEN_MULT_HI_EXPR;
13787 break;
13789 case DOT_PROD_EXPR:
13790 c1 = DOT_PROD_EXPR;
13791 c2 = DOT_PROD_EXPR;
13792 break;
13794 case SAD_EXPR:
13795 c1 = SAD_EXPR;
13796 c2 = SAD_EXPR;
13797 break;
13799 case VEC_WIDEN_MULT_EVEN_EXPR:
13800 /* Support the recursion induced just above. */
13801 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
13802 c2 = VEC_WIDEN_MULT_ODD_EXPR;
13803 break;
13805 case WIDEN_LSHIFT_EXPR:
13806 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
13807 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
13808 break;
13810 CASE_CONVERT:
13811 c1 = VEC_UNPACK_LO_EXPR;
13812 c2 = VEC_UNPACK_HI_EXPR;
13813 break;
13815 case FLOAT_EXPR:
13816 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
13817 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
13818 break;
13820 case FIX_TRUNC_EXPR:
13821 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
13822 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
13823 break;
13825 default:
13826 gcc_unreachable ();
13829 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
13830 std::swap (c1, c2);
13832 if (code == FIX_TRUNC_EXPR)
13834 /* The signedness is determined from output operand. */
13835 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13836 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13838 else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
13839 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13840 && VECTOR_BOOLEAN_TYPE_P (vectype)
13841 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13842 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13844 /* If the input and result modes are the same, a different optab
13845 is needed where we pass in the number of units in vectype. */
13846 optab1 = vec_unpacks_sbool_lo_optab;
13847 optab2 = vec_unpacks_sbool_hi_optab;
13850 vec_mode = TYPE_MODE (vectype);
13851 if (widening_fn_p (code))
13853 /* If this is an internal fn then we must check whether the target
13854 supports either a low-high split or an even-odd split. */
13855 internal_fn ifn = as_internal_fn ((combined_fn) code);
13857 internal_fn lo, hi, even, odd;
13858 lookup_hilo_internal_fn (ifn, &lo, &hi);
13859 *code1 = as_combined_fn (lo);
13860 *code2 = as_combined_fn (hi);
13861 optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
13862 optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
13864 /* If we don't support low-high, then check for even-odd. */
13865 if (!optab1
13866 || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13867 || !optab2
13868 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13870 lookup_evenodd_internal_fn (ifn, &even, &odd);
13871 *code1 = as_combined_fn (even);
13872 *code2 = as_combined_fn (odd);
13873 optab1 = direct_internal_fn_optab (even, {vectype, vectype});
13874 optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
13877 else if (code.is_tree_code ())
13879 if (code == FIX_TRUNC_EXPR)
13881 /* The signedness is determined from output operand. */
13882 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13883 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13885 else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
13886 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13887 && VECTOR_BOOLEAN_TYPE_P (vectype)
13888 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13889 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13891 /* If the input and result modes are the same, a different optab
13892 is needed where we pass in the number of units in vectype. */
13893 optab1 = vec_unpacks_sbool_lo_optab;
13894 optab2 = vec_unpacks_sbool_hi_optab;
13896 else
13898 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13899 optab2 = optab_for_tree_code (c2, vectype, optab_default);
13901 *code1 = c1;
13902 *code2 = c2;
13905 if (!optab1 || !optab2)
13906 return false;
13908 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13909 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13910 return false;
13913 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13914 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13916 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13917 return true;
13918 /* For scalar masks we may have different boolean
13919 vector types having the same QImode. Thus we
13920 add additional check for elements number. */
13921 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
13922 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13923 return true;
13926 /* Check if it's a multi-step conversion that can be done using intermediate
13927 types. */
13929 prev_type = vectype;
13930 prev_mode = vec_mode;
13932 if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
13933 return false;
13935 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13936 intermediate steps in promotion sequence. We try
13937 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
13938 not. */
13939 interm_types->create (MAX_INTERM_CVT_STEPS);
13940 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
13942 intermediate_mode = insn_data[icode1].operand[0].mode;
13943 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13944 intermediate_type
13945 = vect_halve_mask_nunits (prev_type, intermediate_mode);
13946 else if (VECTOR_MODE_P (intermediate_mode))
13948 tree intermediate_element_type
13949 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
13950 TYPE_UNSIGNED (prev_type));
13951 intermediate_type
13952 = build_vector_type_for_mode (intermediate_element_type,
13953 intermediate_mode);
13955 else
13956 intermediate_type
13957 = lang_hooks.types.type_for_mode (intermediate_mode,
13958 TYPE_UNSIGNED (prev_type));
13960 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13961 && VECTOR_BOOLEAN_TYPE_P (prev_type)
13962 && intermediate_mode == prev_mode
13963 && SCALAR_INT_MODE_P (prev_mode))
13965 /* If the input and result modes are the same, a different optab
13966 is needed where we pass in the number of units in vectype. */
13967 optab3 = vec_unpacks_sbool_lo_optab;
13968 optab4 = vec_unpacks_sbool_hi_optab;
13970 else
13972 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
13973 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
13976 if (!optab3 || !optab4
13977 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
13978 || insn_data[icode1].operand[0].mode != intermediate_mode
13979 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
13980 || insn_data[icode2].operand[0].mode != intermediate_mode
13981 || ((icode1 = optab_handler (optab3, intermediate_mode))
13982 == CODE_FOR_nothing)
13983 || ((icode2 = optab_handler (optab4, intermediate_mode))
13984 == CODE_FOR_nothing))
13985 break;
13987 interm_types->quick_push (intermediate_type);
13988 (*multi_step_cvt)++;
13990 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13991 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13993 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13994 return true;
13995 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
13996 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13997 return true;
14000 prev_type = intermediate_type;
14001 prev_mode = intermediate_mode;
14004 interm_types->release ();
14005 return false;
14009 /* Function supportable_narrowing_operation
14011 Check whether an operation represented by the code CODE is a
14012 narrowing operation that is supported by the target platform in
14013 vector form (i.e., when operating on arguments of type VECTYPE_IN
14014 and producing a result of type VECTYPE_OUT).
14016 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14017 and FLOAT. This function checks if these operations are supported by
14018 the target platform directly via vector tree-codes.
14020 Output:
14021 - CODE1 is the code of a vector operation to be used when
14022 vectorizing the operation, if available.
14023 - MULTI_STEP_CVT determines the number of required intermediate steps in
14024 case of multi-step conversion (like int->short->char - in that case
14025 MULTI_STEP_CVT will be 1).
14026 - INTERM_TYPES contains the intermediate type required to perform the
14027 narrowing operation (short in the above example). */
14029 bool
14030 supportable_narrowing_operation (code_helper code,
14031 tree vectype_out, tree vectype_in,
14032 code_helper *code1, int *multi_step_cvt,
14033 vec<tree> *interm_types)
14035 machine_mode vec_mode;
14036 enum insn_code icode1;
14037 optab optab1, interm_optab;
14038 tree vectype = vectype_in;
14039 tree narrow_vectype = vectype_out;
14040 enum tree_code c1;
14041 tree intermediate_type, prev_type;
14042 machine_mode intermediate_mode, prev_mode;
14043 int i;
14044 unsigned HOST_WIDE_INT n_elts;
14045 bool uns;
14047 if (!code.is_tree_code ())
14048 return false;
14050 *multi_step_cvt = 0;
14051 switch ((tree_code) code)
14053 CASE_CONVERT:
14054 c1 = VEC_PACK_TRUNC_EXPR;
14055 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
14056 && VECTOR_BOOLEAN_TYPE_P (vectype)
14057 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
14058 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
14059 && n_elts < BITS_PER_UNIT)
14060 optab1 = vec_pack_sbool_trunc_optab;
14061 else
14062 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14063 break;
14065 case FIX_TRUNC_EXPR:
14066 c1 = VEC_PACK_FIX_TRUNC_EXPR;
14067 /* The signedness is determined from output operand. */
14068 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14069 break;
14071 case FLOAT_EXPR:
14072 c1 = VEC_PACK_FLOAT_EXPR;
14073 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14074 break;
14076 default:
14077 gcc_unreachable ();
14080 if (!optab1)
14081 return false;
14083 vec_mode = TYPE_MODE (vectype);
14084 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
14085 return false;
14087 *code1 = c1;
14089 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14091 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14092 return true;
14093 /* For scalar masks we may have different boolean
14094 vector types having the same QImode. Thus we
14095 add additional check for elements number. */
14096 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
14097 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14098 return true;
14101 if (code == FLOAT_EXPR)
14102 return false;
14104 /* Check if it's a multi-step conversion that can be done using intermediate
14105 types. */
14106 prev_mode = vec_mode;
14107 prev_type = vectype;
14108 if (code == FIX_TRUNC_EXPR)
14109 uns = TYPE_UNSIGNED (vectype_out);
14110 else
14111 uns = TYPE_UNSIGNED (vectype);
14113 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14114 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14115 costly than signed. */
14116 if (code == FIX_TRUNC_EXPR && uns)
14118 enum insn_code icode2;
14120 intermediate_type
14121 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
14122 interm_optab
14123 = optab_for_tree_code (c1, intermediate_type, optab_default);
14124 if (interm_optab != unknown_optab
14125 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
14126 && insn_data[icode1].operand[0].mode
14127 == insn_data[icode2].operand[0].mode)
14129 uns = false;
14130 optab1 = interm_optab;
14131 icode1 = icode2;
14135 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14136 intermediate steps in promotion sequence. We try
14137 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14138 interm_types->create (MAX_INTERM_CVT_STEPS);
14139 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14141 intermediate_mode = insn_data[icode1].operand[0].mode;
14142 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14143 intermediate_type
14144 = vect_double_mask_nunits (prev_type, intermediate_mode);
14145 else
14146 intermediate_type
14147 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
14148 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14149 && VECTOR_BOOLEAN_TYPE_P (prev_type)
14150 && SCALAR_INT_MODE_P (prev_mode)
14151 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
14152 && n_elts < BITS_PER_UNIT)
14153 interm_optab = vec_pack_sbool_trunc_optab;
14154 else
14155 interm_optab
14156 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
14157 optab_default);
14158 if (!interm_optab
14159 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
14160 || insn_data[icode1].operand[0].mode != intermediate_mode
14161 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
14162 == CODE_FOR_nothing))
14163 break;
14165 interm_types->quick_push (intermediate_type);
14166 (*multi_step_cvt)++;
14168 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14170 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14171 return true;
14172 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
14173 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14174 return true;
14177 prev_mode = intermediate_mode;
14178 prev_type = intermediate_type;
14179 optab1 = interm_optab;
14182 interm_types->release ();
14183 return false;
14186 /* Generate and return a vector mask of MASK_TYPE such that
14187 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14188 Add the statements to SEQ. */
14190 tree
14191 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
14192 tree end_index, const char *name)
14194 tree cmp_type = TREE_TYPE (start_index);
14195 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
14196 cmp_type, mask_type,
14197 OPTIMIZE_FOR_SPEED));
14198 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
14199 start_index, end_index,
14200 build_zero_cst (mask_type));
14201 tree tmp;
14202 if (name)
14203 tmp = make_temp_ssa_name (mask_type, NULL, name);
14204 else
14205 tmp = make_ssa_name (mask_type);
14206 gimple_call_set_lhs (call, tmp);
14207 gimple_seq_add_stmt (seq, call);
14208 return tmp;
14211 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14212 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14214 tree
14215 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
14216 tree end_index)
14218 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
14219 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
14222 /* Try to compute the vector types required to vectorize STMT_INFO,
14223 returning true on success and false if vectorization isn't possible.
14224 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14225 take sure that the number of elements in the vectors is no bigger
14226 than GROUP_SIZE.
14228 On success:
14230 - Set *STMT_VECTYPE_OUT to:
14231 - NULL_TREE if the statement doesn't need to be vectorized;
14232 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14234 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14235 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14236 statement does not help to determine the overall number of units. */
14238 opt_result
14239 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
14240 tree *stmt_vectype_out,
14241 tree *nunits_vectype_out,
14242 unsigned int group_size)
14244 gimple *stmt = stmt_info->stmt;
14246 /* For BB vectorization, we should always have a group size once we've
14247 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14248 are tentative requests during things like early data reference
14249 analysis and pattern recognition. */
14250 if (is_a <bb_vec_info> (vinfo))
14251 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
14252 else
14253 group_size = 0;
14255 *stmt_vectype_out = NULL_TREE;
14256 *nunits_vectype_out = NULL_TREE;
14258 if (gimple_get_lhs (stmt) == NULL_TREE
14259 /* MASK_STORE has no lhs, but is ok. */
14260 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
14262 if (is_a <gcall *> (stmt))
14264 /* Ignore calls with no lhs. These must be calls to
14265 #pragma omp simd functions, and what vectorization factor
14266 it really needs can't be determined until
14267 vectorizable_simd_clone_call. */
14268 if (dump_enabled_p ())
14269 dump_printf_loc (MSG_NOTE, vect_location,
14270 "defer to SIMD clone analysis.\n");
14271 return opt_result::success ();
14274 return opt_result::failure_at (stmt,
14275 "not vectorized: irregular stmt.%G", stmt);
14278 tree vectype;
14279 tree scalar_type = NULL_TREE;
14280 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
14282 vectype = STMT_VINFO_VECTYPE (stmt_info);
14283 if (dump_enabled_p ())
14284 dump_printf_loc (MSG_NOTE, vect_location,
14285 "precomputed vectype: %T\n", vectype);
14287 else if (vect_use_mask_type_p (stmt_info))
14289 unsigned int precision = stmt_info->mask_precision;
14290 scalar_type = build_nonstandard_integer_type (precision, 1);
14291 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
14292 if (!vectype)
14293 return opt_result::failure_at (stmt, "not vectorized: unsupported"
14294 " data-type %T\n", scalar_type);
14295 if (dump_enabled_p ())
14296 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14298 else
14300 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
14301 scalar_type = TREE_TYPE (DR_REF (dr));
14302 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
14303 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
14304 else
14305 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
14307 if (dump_enabled_p ())
14309 if (group_size)
14310 dump_printf_loc (MSG_NOTE, vect_location,
14311 "get vectype for scalar type (group size %d):"
14312 " %T\n", group_size, scalar_type);
14313 else
14314 dump_printf_loc (MSG_NOTE, vect_location,
14315 "get vectype for scalar type: %T\n", scalar_type);
14317 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
14318 if (!vectype)
14319 return opt_result::failure_at (stmt,
14320 "not vectorized:"
14321 " unsupported data-type %T\n",
14322 scalar_type);
14324 if (dump_enabled_p ())
14325 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14328 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
14329 return opt_result::failure_at (stmt,
14330 "not vectorized: vector stmt in loop:%G",
14331 stmt);
14333 *stmt_vectype_out = vectype;
14335 /* Don't try to compute scalar types if the stmt produces a boolean
14336 vector; use the existing vector type instead. */
14337 tree nunits_vectype = vectype;
14338 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14340 /* The number of units is set according to the smallest scalar
14341 type (or the largest vector size, but we only support one
14342 vector size per vectorization). */
14343 scalar_type = vect_get_smallest_scalar_type (stmt_info,
14344 TREE_TYPE (vectype));
14345 if (scalar_type != TREE_TYPE (vectype))
14347 if (dump_enabled_p ())
14348 dump_printf_loc (MSG_NOTE, vect_location,
14349 "get vectype for smallest scalar type: %T\n",
14350 scalar_type);
14351 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
14352 group_size);
14353 if (!nunits_vectype)
14354 return opt_result::failure_at
14355 (stmt, "not vectorized: unsupported data-type %T\n",
14356 scalar_type);
14357 if (dump_enabled_p ())
14358 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
14359 nunits_vectype);
14363 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
14364 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
14365 return opt_result::failure_at (stmt,
14366 "Not vectorized: Incompatible number "
14367 "of vector subparts between %T and %T\n",
14368 nunits_vectype, *stmt_vectype_out);
14370 if (dump_enabled_p ())
14372 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
14373 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
14374 dump_printf (MSG_NOTE, "\n");
14377 *nunits_vectype_out = nunits_vectype;
14378 return opt_result::success ();
14381 /* Generate and return statement sequence that sets vector length LEN that is:
14383 min_of_start_and_end = min (START_INDEX, END_INDEX);
14384 left_len = END_INDEX - min_of_start_and_end;
14385 rhs = min (left_len, LEN_LIMIT);
14386 LEN = rhs;
14388 Note: the cost of the code generated by this function is modeled
14389 by vect_estimate_min_profitable_iters, so changes here may need
14390 corresponding changes there. */
14392 gimple_seq
14393 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
14395 gimple_seq stmts = NULL;
14396 tree len_type = TREE_TYPE (len);
14397 gcc_assert (TREE_TYPE (start_index) == len_type);
14399 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
14400 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
14401 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
14402 gimple* stmt = gimple_build_assign (len, rhs);
14403 gimple_seq_add_stmt (&stmts, stmt);
14405 return stmts;