tree-optimization/112450 - avoid AVX512 style masking for BImode masks
[official-gcc.git] / gcc / tree-vect-stmts.cc
blob8cd02afdeab56de475266131eb6aa110bb0e1b21
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
57 #include "regs.h"
58 #include "attribs.h"
59 #include "optabs-libfuncs.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* Return the vectorized type for the given statement. */
66 tree
67 stmt_vectype (class _stmt_vec_info *stmt_info)
69 return STMT_VINFO_VECTYPE (stmt_info);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
74 bool
75 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
77 gimple *stmt = STMT_VINFO_STMT (stmt_info);
78 basic_block bb = gimple_bb (stmt);
79 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
80 class loop* loop;
82 if (!loop_vinfo)
83 return false;
85 loop = LOOP_VINFO_LOOP (loop_vinfo);
87 return (bb->loop_father == loop->inner);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
94 static unsigned
95 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
96 enum vect_cost_for_stmt kind,
97 stmt_vec_info stmt_info, slp_tree node,
98 tree vectype, int misalign,
99 enum vect_cost_model_location where)
101 if ((kind == vector_load || kind == unaligned_load)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_gather_load;
104 if ((kind == vector_store || kind == unaligned_store)
105 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
106 kind = vector_scatter_store;
108 stmt_info_for_cost si
109 = { count, kind, where, stmt_info, node, vectype, misalign };
110 body_cost_vec->safe_push (si);
112 return (unsigned)
113 (builtin_vectorization_cost (kind, vectype, misalign) * count);
116 unsigned
117 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
118 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
119 tree vectype, int misalign,
120 enum vect_cost_model_location where)
122 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
123 vectype, misalign, where);
126 unsigned
127 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
128 enum vect_cost_for_stmt kind, slp_tree node,
129 tree vectype, int misalign,
130 enum vect_cost_model_location where)
132 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
133 vectype, misalign, where);
136 unsigned
137 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
138 enum vect_cost_for_stmt kind,
139 enum vect_cost_model_location where)
141 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
142 || kind == scalar_stmt);
143 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
144 NULL_TREE, 0, where);
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
149 static tree
150 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
152 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
153 "vect_array");
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
161 static tree
162 read_vector_array (vec_info *vinfo,
163 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
164 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
166 tree vect_type, vect, vect_name, array_ref;
167 gimple *new_stmt;
169 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170 vect_type = TREE_TYPE (TREE_TYPE (array));
171 vect = vect_create_destination_var (scalar_dest, vect_type);
172 array_ref = build4 (ARRAY_REF, vect_type, array,
173 build_int_cst (size_type_node, n),
174 NULL_TREE, NULL_TREE);
176 new_stmt = gimple_build_assign (vect, array_ref);
177 vect_name = make_ssa_name (vect, new_stmt);
178 gimple_assign_set_lhs (new_stmt, vect_name);
179 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
181 return vect_name;
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
188 static void
189 write_vector_array (vec_info *vinfo,
190 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
191 tree vect, tree array, unsigned HOST_WIDE_INT n)
193 tree array_ref;
194 gimple *new_stmt;
196 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
197 build_int_cst (size_type_node, n),
198 NULL_TREE, NULL_TREE);
200 new_stmt = gimple_build_assign (array_ref, vect);
201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
206 (and its group). */
208 static tree
209 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
211 tree mem_ref;
213 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
216 return mem_ref;
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
222 static void
223 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
224 gimple_stmt_iterator *gsi, tree var)
226 tree clobber = build_clobber (TREE_TYPE (var));
227 gimple *new_stmt = gimple_build_assign (var, clobber);
228 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
233 /* Function vect_mark_relevant.
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
237 static void
238 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
239 enum vect_relevant relevant, bool live_p)
241 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
242 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "mark relevant %d, live %d: %G", relevant, live_p,
247 stmt_info->stmt);
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE, vect_location,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
265 stmt_vec_info old_stmt_info = stmt_info;
266 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
268 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
269 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
271 if (live_p && relevant == vect_unused_in_scope)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
276 "relevant.\n");
277 relevant = vect_used_only_live;
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE, vect_location,
282 "mark relevant %d, live %d: %G", relevant, live_p,
283 stmt_info->stmt);
286 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
287 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
288 STMT_VINFO_RELEVANT (stmt_info) = relevant;
290 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE, vect_location,
295 "already marked relevant/live.\n");
296 return;
299 worklist->safe_push (stmt_info);
303 /* Function is_simple_and_all_uses_invariant
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
307 bool
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
309 loop_vec_info loop_vinfo)
311 tree op;
312 ssa_op_iter iter;
314 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
315 if (!stmt)
316 return false;
318 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
320 enum vect_def_type dt = vect_uninitialized_def;
322 if (!vect_is_simple_use (op, loop_vinfo, &dt))
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "use not simple.\n");
327 return false;
330 if (dt != vect_external_def && dt != vect_constant_def)
331 return false;
333 return true;
336 /* Function vect_stmt_relevant_p.
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
346 CHECKME: what other side effects would the vectorizer allow? */
348 static bool
349 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
350 enum vect_relevant *relevant, bool *live_p)
352 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
353 ssa_op_iter op_iter;
354 imm_use_iterator imm_iter;
355 use_operand_p use_p;
356 def_operand_p def_p;
358 *relevant = vect_unused_in_scope;
359 *live_p = false;
361 /* cond stmt other than loop exit cond. */
362 if (is_ctrl_stmt (stmt_info->stmt)
363 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
364 *relevant = vect_used_in_scope;
366 /* changing memory. */
367 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
368 if (gimple_vdef (stmt_info->stmt)
369 && !gimple_clobber_p (stmt_info->stmt))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: stmt has vdefs.\n");
374 *relevant = vect_used_in_scope;
377 /* uses outside the loop. */
378 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
380 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
382 basic_block bb = gimple_bb (USE_STMT (use_p));
383 if (!flow_bb_inside_loop_p (loop, bb))
385 if (is_gimple_debug (USE_STMT (use_p)))
386 continue;
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: used out of loop.\n");
392 /* We expect all such uses to be in the loop exit phis
393 (because of loop closed form) */
394 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
395 gcc_assert (bb == single_exit (loop)->dest);
397 *live_p = true;
402 if (*live_p && *relevant == vect_unused_in_scope
403 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
405 if (dump_enabled_p ())
406 dump_printf_loc (MSG_NOTE, vect_location,
407 "vec_stmt_relevant_p: stmt live but not relevant.\n");
408 *relevant = vect_used_only_live;
411 return (*live_p || *relevant);
415 /* Function exist_non_indexing_operands_for_use_p
417 USE is one of the uses attached to STMT_INFO. Check if USE is
418 used in STMT_INFO for anything other than indexing an array. */
420 static bool
421 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
423 tree operand;
425 /* USE corresponds to some operand in STMT. If there is no data
426 reference in STMT, then any operand that corresponds to USE
427 is not indexing an array. */
428 if (!STMT_VINFO_DATA_REF (stmt_info))
429 return true;
431 /* STMT has a data_ref. FORNOW this means that its of one of
432 the following forms:
433 -1- ARRAY_REF = var
434 -2- var = ARRAY_REF
435 (This should have been verified in analyze_data_refs).
437 'var' in the second case corresponds to a def, not a use,
438 so USE cannot correspond to any operands that are not used
439 for array indexing.
441 Therefore, all we need to check is if STMT falls into the
442 first case, and whether var corresponds to USE. */
444 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
445 if (!assign || !gimple_assign_copy_p (assign))
447 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
448 if (call && gimple_call_internal_p (call))
450 internal_fn ifn = gimple_call_internal_fn (call);
451 int mask_index = internal_fn_mask_index (ifn);
452 if (mask_index >= 0
453 && use == gimple_call_arg (call, mask_index))
454 return true;
455 int stored_value_index = internal_fn_stored_value_index (ifn);
456 if (stored_value_index >= 0
457 && use == gimple_call_arg (call, stored_value_index))
458 return true;
459 if (internal_gather_scatter_fn_p (ifn)
460 && use == gimple_call_arg (call, 1))
461 return true;
463 return false;
466 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
467 return false;
468 operand = gimple_assign_rhs1 (assign);
469 if (TREE_CODE (operand) != SSA_NAME)
470 return false;
472 if (operand == use)
473 return true;
475 return false;
480 Function process_use.
482 Inputs:
483 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
484 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
485 that defined USE. This is done by calling mark_relevant and passing it
486 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
487 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
488 be performed.
490 Outputs:
491 Generally, LIVE_P and RELEVANT are used to define the liveness and
492 relevance info of the DEF_STMT of this USE:
493 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
494 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
495 Exceptions:
496 - case 1: If USE is used only for address computations (e.g. array indexing),
497 which does not need to be directly vectorized, then the liveness/relevance
498 of the respective DEF_STMT is left unchanged.
499 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
500 we skip DEF_STMT cause it had already been processed.
501 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
502 "relevant" will be modified accordingly.
504 Return true if everything is as expected. Return false otherwise. */
506 static opt_result
507 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
508 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
509 bool force)
511 stmt_vec_info dstmt_vinfo;
512 enum vect_def_type dt;
514 /* case 1: we are only interested in uses that need to be vectorized. Uses
515 that are used for address computation are not considered relevant. */
516 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
517 return opt_result::success ();
519 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
520 return opt_result::failure_at (stmt_vinfo->stmt,
521 "not vectorized:"
522 " unsupported use in stmt.\n");
524 if (!dstmt_vinfo)
525 return opt_result::success ();
527 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
528 basic_block bb = gimple_bb (stmt_vinfo->stmt);
530 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
531 We have to force the stmt live since the epilogue loop needs it to
532 continue computing the reduction. */
533 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
534 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
537 && bb->loop_father == def_bb->loop_father)
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_NOTE, vect_location,
541 "reduc-stmt defining reduc-phi in the same nest.\n");
542 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
543 return opt_result::success ();
546 /* case 3a: outer-loop stmt defining an inner-loop stmt:
547 outer-loop-header-bb:
548 d = dstmt_vinfo
549 inner-loop:
550 stmt # use (d)
551 outer-loop-tail-bb:
552 ... */
553 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE, vect_location,
557 "outer-loop def-stmt defining inner-loop stmt.\n");
559 switch (relevant)
561 case vect_unused_in_scope:
562 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
563 vect_used_in_scope : vect_unused_in_scope;
564 break;
566 case vect_used_in_outer_by_reduction:
567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
568 relevant = vect_used_by_reduction;
569 break;
571 case vect_used_in_outer:
572 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
573 relevant = vect_used_in_scope;
574 break;
576 case vect_used_in_scope:
577 break;
579 default:
580 gcc_unreachable ();
584 /* case 3b: inner-loop stmt defining an outer-loop stmt:
585 outer-loop-header-bb:
587 inner-loop:
588 d = dstmt_vinfo
589 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
590 stmt # use (d) */
591 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
593 if (dump_enabled_p ())
594 dump_printf_loc (MSG_NOTE, vect_location,
595 "inner-loop def-stmt defining outer-loop stmt.\n");
597 switch (relevant)
599 case vect_unused_in_scope:
600 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
601 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
602 vect_used_in_outer_by_reduction : vect_unused_in_scope;
603 break;
605 case vect_used_by_reduction:
606 case vect_used_only_live:
607 relevant = vect_used_in_outer_by_reduction;
608 break;
610 case vect_used_in_scope:
611 relevant = vect_used_in_outer;
612 break;
614 default:
615 gcc_unreachable ();
618 /* We are also not interested in uses on loop PHI backedges that are
619 inductions. Otherwise we'll needlessly vectorize the IV increment
620 and cause hybrid SLP for SLP inductions. Unless the PHI is live
621 of course. */
622 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
623 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
624 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
625 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
626 loop_latch_edge (bb->loop_father))
627 == use))
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE, vect_location,
631 "induction value on backedge.\n");
632 return opt_result::success ();
636 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
637 return opt_result::success ();
641 /* Function vect_mark_stmts_to_be_vectorized.
643 Not all stmts in the loop need to be vectorized. For example:
645 for i...
646 for j...
647 1. T0 = i + j
648 2. T1 = a[T0]
650 3. j = j + 1
652 Stmt 1 and 3 do not need to be vectorized, because loop control and
653 addressing of vectorized data-refs are handled differently.
655 This pass detects such stmts. */
657 opt_result
658 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
660 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
661 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
662 unsigned int nbbs = loop->num_nodes;
663 gimple_stmt_iterator si;
664 unsigned int i;
665 basic_block bb;
666 bool live_p;
667 enum vect_relevant relevant;
669 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
671 auto_vec<stmt_vec_info, 64> worklist;
673 /* 1. Init worklist. */
674 for (i = 0; i < nbbs; i++)
676 bb = bbs[i];
677 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
679 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
682 phi_info->stmt);
684 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
685 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
687 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
689 if (is_gimple_debug (gsi_stmt (si)))
690 continue;
691 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE, vect_location,
694 "init: stmt relevant? %G", stmt_info->stmt);
696 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
697 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
701 /* 2. Process_worklist */
702 while (worklist.length () > 0)
704 use_operand_p use_p;
705 ssa_op_iter iter;
707 stmt_vec_info stmt_vinfo = worklist.pop ();
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE, vect_location,
710 "worklist: examine stmt: %G", stmt_vinfo->stmt);
712 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
713 (DEF_STMT) as relevant/irrelevant according to the relevance property
714 of STMT. */
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
717 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
718 propagated as is to the DEF_STMTs of its USEs.
720 One exception is when STMT has been identified as defining a reduction
721 variable; in this case we set the relevance to vect_used_by_reduction.
722 This is because we distinguish between two kinds of relevant stmts -
723 those that are used by a reduction computation, and those that are
724 (also) used by a regular computation. This allows us later on to
725 identify stmts that are used solely by a reduction, and therefore the
726 order of the results that they produce does not have to be kept. */
728 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
730 case vect_reduction_def:
731 gcc_assert (relevant != vect_unused_in_scope);
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_in_scope
734 && relevant != vect_used_by_reduction
735 && relevant != vect_used_only_live)
736 return opt_result::failure_at
737 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
738 break;
740 case vect_nested_cycle:
741 if (relevant != vect_unused_in_scope
742 && relevant != vect_used_in_outer_by_reduction
743 && relevant != vect_used_in_outer)
744 return opt_result::failure_at
745 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
746 break;
748 case vect_double_reduction_def:
749 if (relevant != vect_unused_in_scope
750 && relevant != vect_used_by_reduction
751 && relevant != vect_used_only_live)
752 return opt_result::failure_at
753 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
754 break;
756 default:
757 break;
760 if (is_pattern_stmt_p (stmt_vinfo))
762 /* Pattern statements are not inserted into the code, so
763 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
764 have to scan the RHS or function arguments instead. */
765 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
767 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
768 tree op = gimple_assign_rhs1 (assign);
770 i = 1;
771 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
773 opt_result res
774 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
775 loop_vinfo, relevant, &worklist, false);
776 if (!res)
777 return res;
778 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
779 loop_vinfo, relevant, &worklist, false);
780 if (!res)
781 return res;
782 i = 2;
784 for (; i < gimple_num_ops (assign); i++)
786 op = gimple_op (assign, i);
787 if (TREE_CODE (op) == SSA_NAME)
789 opt_result res
790 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
791 &worklist, false);
792 if (!res)
793 return res;
797 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
799 for (i = 0; i < gimple_call_num_args (call); i++)
801 tree arg = gimple_call_arg (call, i);
802 opt_result res
803 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
804 &worklist, false);
805 if (!res)
806 return res;
810 else
811 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
813 tree op = USE_FROM_PTR (use_p);
814 opt_result res
815 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
816 &worklist, false);
817 if (!res)
818 return res;
821 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
823 gather_scatter_info gs_info;
824 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
825 gcc_unreachable ();
826 opt_result res
827 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
828 &worklist, true);
829 if (!res)
831 if (fatal)
832 *fatal = false;
833 return res;
836 } /* while worklist */
838 return opt_result::success ();
841 /* Function vect_model_simple_cost.
843 Models cost for simple operations, i.e. those that only emit ncopies of a
844 single op. Right now, this does not account for multiple insns that could
845 be generated for the single vector op. We will handle that shortly. */
847 static void
848 vect_model_simple_cost (vec_info *,
849 stmt_vec_info stmt_info, int ncopies,
850 enum vect_def_type *dt,
851 int ndts,
852 slp_tree node,
853 stmt_vector_for_cost *cost_vec,
854 vect_cost_for_stmt kind = vector_stmt)
856 int inside_cost = 0, prologue_cost = 0;
858 gcc_assert (cost_vec != NULL);
860 /* ??? Somehow we need to fix this at the callers. */
861 if (node)
862 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
864 if (!node)
865 /* Cost the "broadcast" of a scalar operand in to a vector operand.
866 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
867 cost model. */
868 for (int i = 0; i < ndts; i++)
869 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
870 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
871 stmt_info, 0, vect_prologue);
873 /* Pass the inside-of-loop statements to the target-specific cost model. */
874 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
875 stmt_info, 0, vect_body);
877 if (dump_enabled_p ())
878 dump_printf_loc (MSG_NOTE, vect_location,
879 "vect_model_simple_cost: inside_cost = %d, "
880 "prologue_cost = %d .\n", inside_cost, prologue_cost);
884 /* Model cost for type demotion and promotion operations. PWR is
885 normally zero for single-step promotions and demotions. It will be
886 one if two-step promotion/demotion is required, and so on. NCOPIES
887 is the number of vector results (and thus number of instructions)
888 for the narrowest end of the operation chain. Each additional
889 step doubles the number of instructions required. If WIDEN_ARITH
890 is true the stmt is doing widening arithmetic. */
892 static void
893 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
894 enum vect_def_type *dt,
895 unsigned int ncopies, int pwr,
896 stmt_vector_for_cost *cost_vec,
897 bool widen_arith)
899 int i;
900 int inside_cost = 0, prologue_cost = 0;
902 for (i = 0; i < pwr + 1; i++)
904 inside_cost += record_stmt_cost (cost_vec, ncopies,
905 widen_arith
906 ? vector_stmt : vec_promote_demote,
907 stmt_info, 0, vect_body);
908 ncopies *= 2;
911 /* FORNOW: Assuming maximum 2 args per stmts. */
912 for (i = 0; i < 2; i++)
913 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
914 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
915 stmt_info, 0, vect_prologue);
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE, vect_location,
919 "vect_model_promotion_demotion_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost, prologue_cost);
923 /* Returns true if the current function returns DECL. */
925 static bool
926 cfun_returns (tree decl)
928 edge_iterator ei;
929 edge e;
930 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
932 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
933 if (!ret)
934 continue;
935 if (gimple_return_retval (ret) == decl)
936 return true;
937 /* We often end up with an aggregate copy to the result decl,
938 handle that case as well. First skip intermediate clobbers
939 though. */
940 gimple *def = ret;
943 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
945 while (gimple_clobber_p (def));
946 if (is_a <gassign *> (def)
947 && gimple_assign_lhs (def) == gimple_return_retval (ret)
948 && gimple_assign_rhs1 (def) == decl)
949 return true;
951 return false;
954 /* Calculate cost of DR's memory access. */
955 void
956 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
957 dr_alignment_support alignment_support_scheme,
958 int misalignment,
959 unsigned int *inside_cost,
960 stmt_vector_for_cost *body_cost_vec)
962 switch (alignment_support_scheme)
964 case dr_aligned:
966 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
967 vector_store, stmt_info, 0,
968 vect_body);
970 if (dump_enabled_p ())
971 dump_printf_loc (MSG_NOTE, vect_location,
972 "vect_model_store_cost: aligned.\n");
973 break;
976 case dr_unaligned_supported:
978 /* Here, we assign an additional cost for the unaligned store. */
979 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
980 unaligned_store, stmt_info,
981 misalignment, vect_body);
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_NOTE, vect_location,
984 "vect_model_store_cost: unaligned supported by "
985 "hardware.\n");
986 break;
989 case dr_unaligned_unsupported:
991 *inside_cost = VECT_MAX_COST;
993 if (dump_enabled_p ())
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
995 "vect_model_store_cost: unsupported access.\n");
996 break;
999 default:
1000 gcc_unreachable ();
1004 /* Calculate cost of DR's memory access. */
1005 void
1006 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1007 dr_alignment_support alignment_support_scheme,
1008 int misalignment,
1009 bool add_realign_cost, unsigned int *inside_cost,
1010 unsigned int *prologue_cost,
1011 stmt_vector_for_cost *prologue_cost_vec,
1012 stmt_vector_for_cost *body_cost_vec,
1013 bool record_prologue_costs)
1015 switch (alignment_support_scheme)
1017 case dr_aligned:
1019 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1020 stmt_info, 0, vect_body);
1022 if (dump_enabled_p ())
1023 dump_printf_loc (MSG_NOTE, vect_location,
1024 "vect_model_load_cost: aligned.\n");
1026 break;
1028 case dr_unaligned_supported:
1030 /* Here, we assign an additional cost for the unaligned load. */
1031 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1032 unaligned_load, stmt_info,
1033 misalignment, vect_body);
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_NOTE, vect_location,
1037 "vect_model_load_cost: unaligned supported by "
1038 "hardware.\n");
1040 break;
1042 case dr_explicit_realign:
1044 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1045 vector_load, stmt_info, 0, vect_body);
1046 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1047 vec_perm, stmt_info, 0, vect_body);
1049 /* FIXME: If the misalignment remains fixed across the iterations of
1050 the containing loop, the following cost should be added to the
1051 prologue costs. */
1052 if (targetm.vectorize.builtin_mask_for_load)
1053 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1054 stmt_info, 0, vect_body);
1056 if (dump_enabled_p ())
1057 dump_printf_loc (MSG_NOTE, vect_location,
1058 "vect_model_load_cost: explicit realign\n");
1060 break;
1062 case dr_explicit_realign_optimized:
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_NOTE, vect_location,
1066 "vect_model_load_cost: unaligned software "
1067 "pipelined.\n");
1069 /* Unaligned software pipeline has a load of an address, an initial
1070 load, and possibly a mask operation to "prime" the loop. However,
1071 if this is an access in a group of loads, which provide grouped
1072 access, then the above cost should only be considered for one
1073 access in the group. Inside the loop, there is a load op
1074 and a realignment op. */
1076 if (add_realign_cost && record_prologue_costs)
1078 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1079 vector_stmt, stmt_info,
1080 0, vect_prologue);
1081 if (targetm.vectorize.builtin_mask_for_load)
1082 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1083 vector_stmt, stmt_info,
1084 0, vect_prologue);
1087 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1088 stmt_info, 0, vect_body);
1089 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1090 stmt_info, 0, vect_body);
1092 if (dump_enabled_p ())
1093 dump_printf_loc (MSG_NOTE, vect_location,
1094 "vect_model_load_cost: explicit realign optimized"
1095 "\n");
1097 break;
1100 case dr_unaligned_unsupported:
1102 *inside_cost = VECT_MAX_COST;
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1106 "vect_model_load_cost: unsupported access.\n");
1107 break;
1110 default:
1111 gcc_unreachable ();
1115 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1116 the loop preheader for the vectorized stmt STMT_VINFO. */
1118 static void
1119 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1120 gimple_stmt_iterator *gsi)
1122 if (gsi)
1123 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1124 else
1125 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1127 if (dump_enabled_p ())
1128 dump_printf_loc (MSG_NOTE, vect_location,
1129 "created new init_stmt: %G", new_stmt);
1132 /* Function vect_init_vector.
1134 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1135 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1136 vector type a vector with all elements equal to VAL is created first.
1137 Place the initialization at GSI if it is not NULL. Otherwise, place the
1138 initialization at the loop preheader.
1139 Return the DEF of INIT_STMT.
1140 It will be used in the vectorization of STMT_INFO. */
1142 tree
1143 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1144 gimple_stmt_iterator *gsi)
1146 gimple *init_stmt;
1147 tree new_temp;
1149 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1150 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1152 gcc_assert (VECTOR_TYPE_P (type));
1153 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1155 /* Scalar boolean value should be transformed into
1156 all zeros or all ones value before building a vector. */
1157 if (VECTOR_BOOLEAN_TYPE_P (type))
1159 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1160 tree false_val = build_zero_cst (TREE_TYPE (type));
1162 if (CONSTANT_CLASS_P (val))
1163 val = integer_zerop (val) ? false_val : true_val;
1164 else
1166 new_temp = make_ssa_name (TREE_TYPE (type));
1167 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1168 val, true_val, false_val);
1169 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1170 val = new_temp;
1173 else
1175 gimple_seq stmts = NULL;
1176 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1177 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1178 TREE_TYPE (type), val);
1179 else
1180 /* ??? Condition vectorization expects us to do
1181 promotion of invariant/external defs. */
1182 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1183 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1184 !gsi_end_p (gsi2); )
1186 init_stmt = gsi_stmt (gsi2);
1187 gsi_remove (&gsi2, false);
1188 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1192 val = build_vector_from_val (type, val);
1195 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1196 init_stmt = gimple_build_assign (new_temp, val);
1197 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1198 return new_temp;
1202 /* Function vect_get_vec_defs_for_operand.
1204 OP is an operand in STMT_VINFO. This function returns a vector of
1205 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1207 In the case that OP is an SSA_NAME which is defined in the loop, then
1208 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1210 In case OP is an invariant or constant, a new stmt that creates a vector def
1211 needs to be introduced. VECTYPE may be used to specify a required type for
1212 vector invariant. */
1214 void
1215 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1216 unsigned ncopies,
1217 tree op, vec<tree> *vec_oprnds, tree vectype)
1219 gimple *def_stmt;
1220 enum vect_def_type dt;
1221 bool is_simple_use;
1222 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE, vect_location,
1226 "vect_get_vec_defs_for_operand: %T\n", op);
1228 stmt_vec_info def_stmt_info;
1229 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1230 &def_stmt_info, &def_stmt);
1231 gcc_assert (is_simple_use);
1232 if (def_stmt && dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1235 vec_oprnds->create (ncopies);
1236 if (dt == vect_constant_def || dt == vect_external_def)
1238 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1239 tree vector_type;
1241 if (vectype)
1242 vector_type = vectype;
1243 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1244 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1245 vector_type = truth_type_for (stmt_vectype);
1246 else
1247 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1249 gcc_assert (vector_type);
1250 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1251 while (ncopies--)
1252 vec_oprnds->quick_push (vop);
1254 else
1256 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1257 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1258 for (unsigned i = 0; i < ncopies; ++i)
1259 vec_oprnds->quick_push (gimple_get_lhs
1260 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1265 /* Get vectorized definitions for OP0 and OP1. */
1267 void
1268 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1269 unsigned ncopies,
1270 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1271 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1272 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1273 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1275 if (slp_node)
1277 if (op0)
1278 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1279 if (op1)
1280 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1281 if (op2)
1282 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1283 if (op3)
1284 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1286 else
1288 if (op0)
1289 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1290 op0, vec_oprnds0, vectype0);
1291 if (op1)
1292 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1293 op1, vec_oprnds1, vectype1);
1294 if (op2)
1295 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1296 op2, vec_oprnds2, vectype2);
1297 if (op3)
1298 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1299 op3, vec_oprnds3, vectype3);
1303 void
1304 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1305 unsigned ncopies,
1306 tree op0, vec<tree> *vec_oprnds0,
1307 tree op1, vec<tree> *vec_oprnds1,
1308 tree op2, vec<tree> *vec_oprnds2,
1309 tree op3, vec<tree> *vec_oprnds3)
1311 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1312 op0, vec_oprnds0, NULL_TREE,
1313 op1, vec_oprnds1, NULL_TREE,
1314 op2, vec_oprnds2, NULL_TREE,
1315 op3, vec_oprnds3, NULL_TREE);
1318 /* Helper function called by vect_finish_replace_stmt and
1319 vect_finish_stmt_generation. Set the location of the new
1320 statement and create and return a stmt_vec_info for it. */
1322 static void
1323 vect_finish_stmt_generation_1 (vec_info *,
1324 stmt_vec_info stmt_info, gimple *vec_stmt)
1326 if (dump_enabled_p ())
1327 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1329 if (stmt_info)
1331 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1333 /* While EH edges will generally prevent vectorization, stmt might
1334 e.g. be in a must-not-throw region. Ensure newly created stmts
1335 that could throw are part of the same region. */
1336 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1337 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1338 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1340 else
1341 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1344 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1345 which sets the same scalar result as STMT_INFO did. Create and return a
1346 stmt_vec_info for VEC_STMT. */
1348 void
1349 vect_finish_replace_stmt (vec_info *vinfo,
1350 stmt_vec_info stmt_info, gimple *vec_stmt)
1352 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1353 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1355 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1356 gsi_replace (&gsi, vec_stmt, true);
1358 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1361 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1362 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1364 void
1365 vect_finish_stmt_generation (vec_info *vinfo,
1366 stmt_vec_info stmt_info, gimple *vec_stmt,
1367 gimple_stmt_iterator *gsi)
1369 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1371 if (!gsi_end_p (*gsi)
1372 && gimple_has_mem_ops (vec_stmt))
1374 gimple *at_stmt = gsi_stmt (*gsi);
1375 tree vuse = gimple_vuse (at_stmt);
1376 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1378 tree vdef = gimple_vdef (at_stmt);
1379 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1380 gimple_set_modified (vec_stmt, true);
1381 /* If we have an SSA vuse and insert a store, update virtual
1382 SSA form to avoid triggering the renamer. Do so only
1383 if we can easily see all uses - which is what almost always
1384 happens with the way vectorized stmts are inserted. */
1385 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1386 && ((is_gimple_assign (vec_stmt)
1387 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1388 || (is_gimple_call (vec_stmt)
1389 && (!(gimple_call_flags (vec_stmt)
1390 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1391 || (gimple_call_lhs (vec_stmt)
1392 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1394 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1395 gimple_set_vdef (vec_stmt, new_vdef);
1396 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1400 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1401 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1404 /* We want to vectorize a call to combined function CFN with function
1405 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1406 as the types of all inputs. Check whether this is possible using
1407 an internal function, returning its code if so or IFN_LAST if not. */
1409 static internal_fn
1410 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1411 tree vectype_out, tree vectype_in)
1413 internal_fn ifn;
1414 if (internal_fn_p (cfn))
1415 ifn = as_internal_fn (cfn);
1416 else
1417 ifn = associated_internal_fn (fndecl);
1418 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1420 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1421 if (info.vectorizable)
1423 bool same_size_p = TYPE_SIZE (vectype_in) == TYPE_SIZE (vectype_out);
1424 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1425 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1427 /* The type size of both the vectype_in and vectype_out should be
1428 exactly the same when vectype_out isn't participating the optab.
1429 While there is no restriction for type size when vectype_out
1430 is part of the optab query. */
1431 if (type0 != vectype_out && type1 != vectype_out && !same_size_p)
1432 return IFN_LAST;
1434 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1435 OPTIMIZE_FOR_SPEED))
1436 return ifn;
1439 return IFN_LAST;
1443 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1444 gimple_stmt_iterator *);
1446 /* Check whether a load or store statement in the loop described by
1447 LOOP_VINFO is possible in a loop using partial vectors. This is
1448 testing whether the vectorizer pass has the appropriate support,
1449 as well as whether the target does.
1451 VLS_TYPE says whether the statement is a load or store and VECTYPE
1452 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1453 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1454 says how the load or store is going to be implemented and GROUP_SIZE
1455 is the number of load or store statements in the containing group.
1456 If the access is a gather load or scatter store, GS_INFO describes
1457 its arguments. If the load or store is conditional, SCALAR_MASK is the
1458 condition under which it occurs.
1460 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1461 vectors is not supported, otherwise record the required rgroup control
1462 types. */
1464 static void
1465 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1466 slp_tree slp_node,
1467 vec_load_store_type vls_type,
1468 int group_size,
1469 vect_memory_access_type
1470 memory_access_type,
1471 gather_scatter_info *gs_info,
1472 tree scalar_mask)
1474 /* Invariant loads need no special support. */
1475 if (memory_access_type == VMAT_INVARIANT)
1476 return;
1478 unsigned int nvectors;
1479 if (slp_node)
1480 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1481 else
1482 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1484 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1485 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1486 machine_mode vecmode = TYPE_MODE (vectype);
1487 bool is_load = (vls_type == VLS_LOAD);
1488 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1490 internal_fn ifn
1491 = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1492 : vect_store_lanes_supported (vectype, group_size, true));
1493 if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1494 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1495 else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1496 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1497 scalar_mask);
1498 else
1500 if (dump_enabled_p ())
1501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1502 "can't operate on partial vectors because"
1503 " the target doesn't have an appropriate"
1504 " load/store-lanes instruction.\n");
1505 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1507 return;
1510 if (memory_access_type == VMAT_GATHER_SCATTER)
1512 internal_fn ifn = (is_load
1513 ? IFN_MASK_GATHER_LOAD
1514 : IFN_MASK_SCATTER_STORE);
1515 internal_fn len_ifn = (is_load
1516 ? IFN_MASK_LEN_GATHER_LOAD
1517 : IFN_MASK_LEN_SCATTER_STORE);
1518 if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1519 gs_info->memory_type,
1520 gs_info->offset_vectype,
1521 gs_info->scale))
1522 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1523 else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1524 gs_info->memory_type,
1525 gs_info->offset_vectype,
1526 gs_info->scale))
1527 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1528 scalar_mask);
1529 else
1531 if (dump_enabled_p ())
1532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1533 "can't operate on partial vectors because"
1534 " the target doesn't have an appropriate"
1535 " gather load or scatter store instruction.\n");
1536 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1538 return;
1541 if (memory_access_type != VMAT_CONTIGUOUS
1542 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1544 /* Element X of the data must come from iteration i * VF + X of the
1545 scalar loop. We need more work to support other mappings. */
1546 if (dump_enabled_p ())
1547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1548 "can't operate on partial vectors because an"
1549 " access isn't contiguous.\n");
1550 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1551 return;
1554 if (!VECTOR_MODE_P (vecmode))
1556 if (dump_enabled_p ())
1557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1558 "can't operate on partial vectors when emulating"
1559 " vector operations.\n");
1560 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1561 return;
1564 /* We might load more scalars than we need for permuting SLP loads.
1565 We checked in get_group_load_store_type that the extra elements
1566 don't leak into a new vector. */
1567 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1569 unsigned int nvectors;
1570 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1571 return nvectors;
1572 gcc_unreachable ();
1575 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1576 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1577 machine_mode mask_mode;
1578 machine_mode vmode;
1579 bool using_partial_vectors_p = false;
1580 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1582 nvectors = group_memory_nvectors (group_size * vf, nunits);
1583 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1584 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1585 using_partial_vectors_p = true;
1587 else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1588 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1590 nvectors = group_memory_nvectors (group_size * vf, nunits);
1591 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1592 using_partial_vectors_p = true;
1595 if (!using_partial_vectors_p)
1597 if (dump_enabled_p ())
1598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1599 "can't operate on partial vectors because the"
1600 " target doesn't have the appropriate partial"
1601 " vectorization load or store.\n");
1602 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1606 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1607 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1608 that needs to be applied to all loads and stores in a vectorized loop.
1609 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1610 otherwise return VEC_MASK & LOOP_MASK.
1612 MASK_TYPE is the type of both masks. If new statements are needed,
1613 insert them before GSI. */
1615 static tree
1616 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1617 tree vec_mask, gimple_stmt_iterator *gsi)
1619 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1620 if (!loop_mask)
1621 return vec_mask;
1623 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1625 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1626 return vec_mask;
1628 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1629 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1630 vec_mask, loop_mask);
1632 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1633 return and_res;
1636 /* Determine whether we can use a gather load or scatter store to vectorize
1637 strided load or store STMT_INFO by truncating the current offset to a
1638 smaller width. We need to be able to construct an offset vector:
1640 { 0, X, X*2, X*3, ... }
1642 without loss of precision, where X is STMT_INFO's DR_STEP.
1644 Return true if this is possible, describing the gather load or scatter
1645 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1647 static bool
1648 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1649 loop_vec_info loop_vinfo, bool masked_p,
1650 gather_scatter_info *gs_info)
1652 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1653 data_reference *dr = dr_info->dr;
1654 tree step = DR_STEP (dr);
1655 if (TREE_CODE (step) != INTEGER_CST)
1657 /* ??? Perhaps we could use range information here? */
1658 if (dump_enabled_p ())
1659 dump_printf_loc (MSG_NOTE, vect_location,
1660 "cannot truncate variable step.\n");
1661 return false;
1664 /* Get the number of bits in an element. */
1665 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1666 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1667 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1669 /* Set COUNT to the upper limit on the number of elements - 1.
1670 Start with the maximum vectorization factor. */
1671 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1673 /* Try lowering COUNT to the number of scalar latch iterations. */
1674 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1675 widest_int max_iters;
1676 if (max_loop_iterations (loop, &max_iters)
1677 && max_iters < count)
1678 count = max_iters.to_shwi ();
1680 /* Try scales of 1 and the element size. */
1681 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1682 wi::overflow_type overflow = wi::OVF_NONE;
1683 for (int i = 0; i < 2; ++i)
1685 int scale = scales[i];
1686 widest_int factor;
1687 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1688 continue;
1690 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1691 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1692 if (overflow)
1693 continue;
1694 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1695 unsigned int min_offset_bits = wi::min_precision (range, sign);
1697 /* Find the narrowest viable offset type. */
1698 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1699 tree offset_type = build_nonstandard_integer_type (offset_bits,
1700 sign == UNSIGNED);
1702 /* See whether the target supports the operation with an offset
1703 no narrower than OFFSET_TYPE. */
1704 tree memory_type = TREE_TYPE (DR_REF (dr));
1705 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1706 vectype, memory_type, offset_type, scale,
1707 &gs_info->ifn, &gs_info->offset_vectype)
1708 || gs_info->ifn == IFN_LAST)
1709 continue;
1711 gs_info->decl = NULL_TREE;
1712 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1713 but we don't need to store that here. */
1714 gs_info->base = NULL_TREE;
1715 gs_info->element_type = TREE_TYPE (vectype);
1716 gs_info->offset = fold_convert (offset_type, step);
1717 gs_info->offset_dt = vect_constant_def;
1718 gs_info->scale = scale;
1719 gs_info->memory_type = memory_type;
1720 return true;
1723 if (overflow && dump_enabled_p ())
1724 dump_printf_loc (MSG_NOTE, vect_location,
1725 "truncating gather/scatter offset to %d bits"
1726 " might change its value.\n", element_bits);
1728 return false;
1731 /* Return true if we can use gather/scatter internal functions to
1732 vectorize STMT_INFO, which is a grouped or strided load or store.
1733 MASKED_P is true if load or store is conditional. When returning
1734 true, fill in GS_INFO with the information required to perform the
1735 operation. */
1737 static bool
1738 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1739 loop_vec_info loop_vinfo, bool masked_p,
1740 gather_scatter_info *gs_info)
1742 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1743 || gs_info->ifn == IFN_LAST)
1744 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1745 masked_p, gs_info);
1747 tree old_offset_type = TREE_TYPE (gs_info->offset);
1748 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1750 gcc_assert (TYPE_PRECISION (new_offset_type)
1751 >= TYPE_PRECISION (old_offset_type));
1752 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1754 if (dump_enabled_p ())
1755 dump_printf_loc (MSG_NOTE, vect_location,
1756 "using gather/scatter for strided/grouped access,"
1757 " scale = %d\n", gs_info->scale);
1759 return true;
1762 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1763 elements with a known constant step. Return -1 if that step
1764 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1766 static int
1767 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1769 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1770 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1771 size_zero_node);
1774 /* If the target supports a permute mask that reverses the elements in
1775 a vector of type VECTYPE, return that mask, otherwise return null. */
1777 static tree
1778 perm_mask_for_reverse (tree vectype)
1780 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1782 /* The encoding has a single stepped pattern. */
1783 vec_perm_builder sel (nunits, 1, 3);
1784 for (int i = 0; i < 3; ++i)
1785 sel.quick_push (nunits - 1 - i);
1787 vec_perm_indices indices (sel, 1, nunits);
1788 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1789 indices))
1790 return NULL_TREE;
1791 return vect_gen_perm_mask_checked (vectype, indices);
1794 /* A subroutine of get_load_store_type, with a subset of the same
1795 arguments. Handle the case where STMT_INFO is a load or store that
1796 accesses consecutive elements with a negative step. Sets *POFFSET
1797 to the offset to be applied to the DR for the first access. */
1799 static vect_memory_access_type
1800 get_negative_load_store_type (vec_info *vinfo,
1801 stmt_vec_info stmt_info, tree vectype,
1802 vec_load_store_type vls_type,
1803 unsigned int ncopies, poly_int64 *poffset)
1805 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1806 dr_alignment_support alignment_support_scheme;
1808 if (ncopies > 1)
1810 if (dump_enabled_p ())
1811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1812 "multiple types with negative step.\n");
1813 return VMAT_ELEMENTWISE;
1816 /* For backward running DRs the first access in vectype actually is
1817 N-1 elements before the address of the DR. */
1818 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1819 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1821 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
1822 alignment_support_scheme
1823 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1824 if (alignment_support_scheme != dr_aligned
1825 && alignment_support_scheme != dr_unaligned_supported)
1827 if (dump_enabled_p ())
1828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1829 "negative step but alignment required.\n");
1830 *poffset = 0;
1831 return VMAT_ELEMENTWISE;
1834 if (vls_type == VLS_STORE_INVARIANT)
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_NOTE, vect_location,
1838 "negative step with invariant source;"
1839 " no permute needed.\n");
1840 return VMAT_CONTIGUOUS_DOWN;
1843 if (!perm_mask_for_reverse (vectype))
1845 if (dump_enabled_p ())
1846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1847 "negative step and reversing not supported.\n");
1848 *poffset = 0;
1849 return VMAT_ELEMENTWISE;
1852 return VMAT_CONTIGUOUS_REVERSE;
1855 /* STMT_INFO is either a masked or unconditional store. Return the value
1856 being stored. */
1858 tree
1859 vect_get_store_rhs (stmt_vec_info stmt_info)
1861 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1863 gcc_assert (gimple_assign_single_p (assign));
1864 return gimple_assign_rhs1 (assign);
1866 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1868 internal_fn ifn = gimple_call_internal_fn (call);
1869 int index = internal_fn_stored_value_index (ifn);
1870 gcc_assert (index >= 0);
1871 return gimple_call_arg (call, index);
1873 gcc_unreachable ();
1876 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1878 This function returns a vector type which can be composed with NETLS pieces,
1879 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1880 same vector size as the return vector. It checks target whether supports
1881 pieces-size vector mode for construction firstly, if target fails to, check
1882 pieces-size scalar mode for construction further. It returns NULL_TREE if
1883 fails to find the available composition.
1885 For example, for (vtype=V16QI, nelts=4), we can probably get:
1886 - V16QI with PTYPE V4QI.
1887 - V4SI with PTYPE SI.
1888 - NULL_TREE. */
1890 static tree
1891 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
1893 gcc_assert (VECTOR_TYPE_P (vtype));
1894 gcc_assert (known_gt (nelts, 0U));
1896 machine_mode vmode = TYPE_MODE (vtype);
1897 if (!VECTOR_MODE_P (vmode))
1898 return NULL_TREE;
1900 /* When we are asked to compose the vector from its components let
1901 that happen directly. */
1902 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
1904 *ptype = TREE_TYPE (vtype);
1905 return vtype;
1908 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
1909 unsigned int pbsize;
1910 if (constant_multiple_p (vbsize, nelts, &pbsize))
1912 /* First check if vec_init optab supports construction from
1913 vector pieces directly. */
1914 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
1915 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
1916 machine_mode rmode;
1917 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
1918 && (convert_optab_handler (vec_init_optab, vmode, rmode)
1919 != CODE_FOR_nothing))
1921 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
1922 return vtype;
1925 /* Otherwise check if exists an integer type of the same piece size and
1926 if vec_init optab supports construction from it directly. */
1927 if (int_mode_for_size (pbsize, 0).exists (&elmode)
1928 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
1929 && (convert_optab_handler (vec_init_optab, rmode, elmode)
1930 != CODE_FOR_nothing))
1932 *ptype = build_nonstandard_integer_type (pbsize, 1);
1933 return build_vector_type (*ptype, nelts);
1937 return NULL_TREE;
1940 /* A subroutine of get_load_store_type, with a subset of the same
1941 arguments. Handle the case where STMT_INFO is part of a grouped load
1942 or store.
1944 For stores, the statements in the group are all consecutive
1945 and there is no gap at the end. For loads, the statements in the
1946 group might not be consecutive; there can be gaps between statements
1947 as well as at the end. */
1949 static bool
1950 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
1951 tree vectype, slp_tree slp_node,
1952 bool masked_p, vec_load_store_type vls_type,
1953 vect_memory_access_type *memory_access_type,
1954 poly_int64 *poffset,
1955 dr_alignment_support *alignment_support_scheme,
1956 int *misalignment,
1957 gather_scatter_info *gs_info,
1958 internal_fn *lanes_ifn)
1960 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1961 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1962 stmt_vec_info first_stmt_info;
1963 unsigned int group_size;
1964 unsigned HOST_WIDE_INT gap;
1965 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1967 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1968 group_size = DR_GROUP_SIZE (first_stmt_info);
1969 gap = DR_GROUP_GAP (first_stmt_info);
1971 else
1973 first_stmt_info = stmt_info;
1974 group_size = 1;
1975 gap = 0;
1977 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
1978 bool single_element_p = (stmt_info == first_stmt_info
1979 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
1980 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1982 /* True if the vectorized statements would access beyond the last
1983 statement in the group. */
1984 bool overrun_p = false;
1986 /* True if we can cope with such overrun by peeling for gaps, so that
1987 there is at least one final scalar iteration after the vector loop. */
1988 bool can_overrun_p = (!masked_p
1989 && vls_type == VLS_LOAD
1990 && loop_vinfo
1991 && !loop->inner);
1993 /* There can only be a gap at the end of the group if the stride is
1994 known at compile time. */
1995 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
1997 /* Stores can't yet have gaps. */
1998 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2000 if (slp_node)
2002 /* For SLP vectorization we directly vectorize a subchain
2003 without permutation. */
2004 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2005 first_dr_info
2006 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2007 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2009 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2010 separated by the stride, until we have a complete vector.
2011 Fall back to scalar accesses if that isn't possible. */
2012 if (multiple_p (nunits, group_size))
2013 *memory_access_type = VMAT_STRIDED_SLP;
2014 else
2015 *memory_access_type = VMAT_ELEMENTWISE;
2017 else
2019 overrun_p = loop_vinfo && gap != 0;
2020 if (overrun_p && vls_type != VLS_LOAD)
2022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2023 "Grouped store with gaps requires"
2024 " non-consecutive accesses\n");
2025 return false;
2027 /* An overrun is fine if the trailing elements are smaller
2028 than the alignment boundary B. Every vector access will
2029 be a multiple of B and so we are guaranteed to access a
2030 non-gap element in the same B-sized block. */
2031 if (overrun_p
2032 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2033 vectype)
2034 / vect_get_scalar_dr_size (first_dr_info)))
2035 overrun_p = false;
2037 /* If the gap splits the vector in half and the target
2038 can do half-vector operations avoid the epilogue peeling
2039 by simply loading half of the vector only. Usually
2040 the construction with an upper zero half will be elided. */
2041 dr_alignment_support alss;
2042 int misalign = dr_misalignment (first_dr_info, vectype);
2043 tree half_vtype;
2044 if (overrun_p
2045 && !masked_p
2046 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2047 vectype, misalign)))
2048 == dr_aligned
2049 || alss == dr_unaligned_supported)
2050 && known_eq (nunits, (group_size - gap) * 2)
2051 && known_eq (nunits, group_size)
2052 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2053 != NULL_TREE))
2054 overrun_p = false;
2056 if (overrun_p && !can_overrun_p)
2058 if (dump_enabled_p ())
2059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2060 "Peeling for outer loop is not supported\n");
2061 return false;
2063 int cmp = compare_step_with_zero (vinfo, stmt_info);
2064 if (cmp < 0)
2066 if (single_element_p)
2067 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2068 only correct for single element "interleaving" SLP. */
2069 *memory_access_type = get_negative_load_store_type
2070 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2071 else
2073 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2074 separated by the stride, until we have a complete vector.
2075 Fall back to scalar accesses if that isn't possible. */
2076 if (multiple_p (nunits, group_size))
2077 *memory_access_type = VMAT_STRIDED_SLP;
2078 else
2079 *memory_access_type = VMAT_ELEMENTWISE;
2082 else if (cmp == 0 && loop_vinfo)
2084 gcc_assert (vls_type == VLS_LOAD);
2085 *memory_access_type = VMAT_INVARIANT;
2086 /* Invariant accesses perform only component accesses, alignment
2087 is irrelevant for them. */
2088 *alignment_support_scheme = dr_unaligned_supported;
2090 else
2091 *memory_access_type = VMAT_CONTIGUOUS;
2093 /* When we have a contiguous access across loop iterations
2094 but the access in the loop doesn't cover the full vector
2095 we can end up with no gap recorded but still excess
2096 elements accessed, see PR103116. Make sure we peel for
2097 gaps if necessary and sufficient and give up if not.
2099 If there is a combination of the access not covering the full
2100 vector and a gap recorded then we may need to peel twice. */
2101 if (loop_vinfo
2102 && *memory_access_type == VMAT_CONTIGUOUS
2103 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2104 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2105 nunits))
2107 unsigned HOST_WIDE_INT cnunits, cvf;
2108 if (!can_overrun_p
2109 || !nunits.is_constant (&cnunits)
2110 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2111 /* Peeling for gaps assumes that a single scalar iteration
2112 is enough to make sure the last vector iteration doesn't
2113 access excess elements.
2114 ??? Enhancements include peeling multiple iterations
2115 or using masked loads with a static mask. */
2116 || (group_size * cvf) % cnunits + group_size - gap < cnunits)
2118 if (dump_enabled_p ())
2119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2120 "peeling for gaps insufficient for "
2121 "access\n");
2122 return false;
2124 overrun_p = true;
2128 else
2130 /* We can always handle this case using elementwise accesses,
2131 but see if something more efficient is available. */
2132 *memory_access_type = VMAT_ELEMENTWISE;
2134 /* If there is a gap at the end of the group then these optimizations
2135 would access excess elements in the last iteration. */
2136 bool would_overrun_p = (gap != 0);
2137 /* An overrun is fine if the trailing elements are smaller than the
2138 alignment boundary B. Every vector access will be a multiple of B
2139 and so we are guaranteed to access a non-gap element in the
2140 same B-sized block. */
2141 if (would_overrun_p
2142 && !masked_p
2143 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2144 / vect_get_scalar_dr_size (first_dr_info)))
2145 would_overrun_p = false;
2147 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2148 && (can_overrun_p || !would_overrun_p)
2149 && compare_step_with_zero (vinfo, stmt_info) > 0)
2151 /* First cope with the degenerate case of a single-element
2152 vector. */
2153 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2156 else
2158 /* Otherwise try using LOAD/STORE_LANES. */
2159 *lanes_ifn
2160 = vls_type == VLS_LOAD
2161 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2162 : vect_store_lanes_supported (vectype, group_size,
2163 masked_p);
2164 if (*lanes_ifn != IFN_LAST)
2166 *memory_access_type = VMAT_LOAD_STORE_LANES;
2167 overrun_p = would_overrun_p;
2170 /* If that fails, try using permuting loads. */
2171 else if (vls_type == VLS_LOAD
2172 ? vect_grouped_load_supported (vectype,
2173 single_element_p,
2174 group_size)
2175 : vect_grouped_store_supported (vectype, group_size))
2177 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2178 overrun_p = would_overrun_p;
2183 /* As a last resort, trying using a gather load or scatter store.
2185 ??? Although the code can handle all group sizes correctly,
2186 it probably isn't a win to use separate strided accesses based
2187 on nearby locations. Or, even if it's a win over scalar code,
2188 it might not be a win over vectorizing at a lower VF, if that
2189 allows us to use contiguous accesses. */
2190 if (*memory_access_type == VMAT_ELEMENTWISE
2191 && single_element_p
2192 && loop_vinfo
2193 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2194 masked_p, gs_info))
2195 *memory_access_type = VMAT_GATHER_SCATTER;
2198 if (*memory_access_type == VMAT_GATHER_SCATTER
2199 || *memory_access_type == VMAT_ELEMENTWISE)
2201 *alignment_support_scheme = dr_unaligned_supported;
2202 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2204 else
2206 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2207 *alignment_support_scheme
2208 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2209 *misalignment);
2212 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2214 /* STMT is the leader of the group. Check the operands of all the
2215 stmts of the group. */
2216 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2217 while (next_stmt_info)
2219 tree op = vect_get_store_rhs (next_stmt_info);
2220 enum vect_def_type dt;
2221 if (!vect_is_simple_use (op, vinfo, &dt))
2223 if (dump_enabled_p ())
2224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2225 "use not simple.\n");
2226 return false;
2228 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2232 if (overrun_p)
2234 gcc_assert (can_overrun_p);
2235 if (dump_enabled_p ())
2236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2237 "Data access with gaps requires scalar "
2238 "epilogue loop\n");
2239 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2242 return true;
2245 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2246 if there is a memory access type that the vectorized form can use,
2247 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2248 or scatters, fill in GS_INFO accordingly. In addition
2249 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2250 the target does not support the alignment scheme. *MISALIGNMENT
2251 is set according to the alignment of the access (including
2252 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2254 SLP says whether we're performing SLP rather than loop vectorization.
2255 MASKED_P is true if the statement is conditional on a vectorized mask.
2256 VECTYPE is the vector type that the vectorized statements will use.
2257 NCOPIES is the number of vector statements that will be needed. */
2259 static bool
2260 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2261 tree vectype, slp_tree slp_node,
2262 bool masked_p, vec_load_store_type vls_type,
2263 unsigned int ncopies,
2264 vect_memory_access_type *memory_access_type,
2265 poly_int64 *poffset,
2266 dr_alignment_support *alignment_support_scheme,
2267 int *misalignment,
2268 gather_scatter_info *gs_info,
2269 internal_fn *lanes_ifn)
2271 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2272 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2273 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2274 *poffset = 0;
2275 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2277 *memory_access_type = VMAT_GATHER_SCATTER;
2278 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2279 gcc_unreachable ();
2280 /* When using internal functions, we rely on pattern recognition
2281 to convert the type of the offset to the type that the target
2282 requires, with the result being a call to an internal function.
2283 If that failed for some reason (e.g. because another pattern
2284 took priority), just handle cases in which the offset already
2285 has the right type. */
2286 else if (gs_info->ifn != IFN_LAST
2287 && !is_gimple_call (stmt_info->stmt)
2288 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2289 TREE_TYPE (gs_info->offset_vectype)))
2291 if (dump_enabled_p ())
2292 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2293 "%s offset requires a conversion\n",
2294 vls_type == VLS_LOAD ? "gather" : "scatter");
2295 return false;
2297 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2298 &gs_info->offset_dt,
2299 &gs_info->offset_vectype))
2301 if (dump_enabled_p ())
2302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2303 "%s index use not simple.\n",
2304 vls_type == VLS_LOAD ? "gather" : "scatter");
2305 return false;
2307 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2309 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2310 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2311 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2312 (gs_info->offset_vectype),
2313 TYPE_VECTOR_SUBPARTS (vectype)))
2315 if (dump_enabled_p ())
2316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2317 "unsupported vector types for emulated "
2318 "gather.\n");
2319 return false;
2322 /* Gather-scatter accesses perform only component accesses, alignment
2323 is irrelevant for them. */
2324 *alignment_support_scheme = dr_unaligned_supported;
2326 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
2328 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2329 masked_p,
2330 vls_type, memory_access_type, poffset,
2331 alignment_support_scheme,
2332 misalignment, gs_info, lanes_ifn))
2333 return false;
2335 else if (STMT_VINFO_STRIDED_P (stmt_info))
2337 gcc_assert (!slp_node);
2338 if (loop_vinfo
2339 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2340 masked_p, gs_info))
2341 *memory_access_type = VMAT_GATHER_SCATTER;
2342 else
2343 *memory_access_type = VMAT_ELEMENTWISE;
2344 /* Alignment is irrelevant here. */
2345 *alignment_support_scheme = dr_unaligned_supported;
2347 else
2349 int cmp = compare_step_with_zero (vinfo, stmt_info);
2350 if (cmp == 0)
2352 gcc_assert (vls_type == VLS_LOAD);
2353 *memory_access_type = VMAT_INVARIANT;
2354 /* Invariant accesses perform only component accesses, alignment
2355 is irrelevant for them. */
2356 *alignment_support_scheme = dr_unaligned_supported;
2358 else
2360 if (cmp < 0)
2361 *memory_access_type = get_negative_load_store_type
2362 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2363 else
2364 *memory_access_type = VMAT_CONTIGUOUS;
2365 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2366 vectype, *poffset);
2367 *alignment_support_scheme
2368 = vect_supportable_dr_alignment (vinfo,
2369 STMT_VINFO_DR_INFO (stmt_info),
2370 vectype, *misalignment);
2374 if ((*memory_access_type == VMAT_ELEMENTWISE
2375 || *memory_access_type == VMAT_STRIDED_SLP)
2376 && !nunits.is_constant ())
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2380 "Not using elementwise accesses due to variable "
2381 "vectorization factor.\n");
2382 return false;
2385 if (*alignment_support_scheme == dr_unaligned_unsupported)
2387 if (dump_enabled_p ())
2388 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2389 "unsupported unaligned access\n");
2390 return false;
2393 /* FIXME: At the moment the cost model seems to underestimate the
2394 cost of using elementwise accesses. This check preserves the
2395 traditional behavior until that can be fixed. */
2396 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2397 if (!first_stmt_info)
2398 first_stmt_info = stmt_info;
2399 if (*memory_access_type == VMAT_ELEMENTWISE
2400 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2401 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2402 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2403 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2407 "not falling back to elementwise accesses\n");
2408 return false;
2410 return true;
2413 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2414 conditional operation STMT_INFO. When returning true, store the mask
2415 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2416 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2417 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2419 static bool
2420 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2421 slp_tree slp_node, unsigned mask_index,
2422 tree *mask, slp_tree *mask_node,
2423 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2425 enum vect_def_type mask_dt;
2426 tree mask_vectype;
2427 slp_tree mask_node_1;
2428 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2429 mask, &mask_node_1, &mask_dt, &mask_vectype))
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2433 "mask use not simple.\n");
2434 return false;
2437 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2439 if (dump_enabled_p ())
2440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2441 "mask argument is not a boolean.\n");
2442 return false;
2445 /* If the caller is not prepared for adjusting an external/constant
2446 SLP mask vector type fail. */
2447 if (slp_node
2448 && !mask_node
2449 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2451 if (dump_enabled_p ())
2452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2453 "SLP mask argument is not vectorized.\n");
2454 return false;
2457 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2458 if (!mask_vectype)
2459 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
2460 mask_node_1);
2462 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2464 if (dump_enabled_p ())
2465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2466 "could not find an appropriate vector mask type.\n");
2467 return false;
2470 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2471 TYPE_VECTOR_SUBPARTS (vectype)))
2473 if (dump_enabled_p ())
2474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2475 "vector mask type %T"
2476 " does not match vector data type %T.\n",
2477 mask_vectype, vectype);
2479 return false;
2482 *mask_dt_out = mask_dt;
2483 *mask_vectype_out = mask_vectype;
2484 if (mask_node)
2485 *mask_node = mask_node_1;
2486 return true;
2489 /* Return true if stored value is suitable for vectorizing store
2490 statement STMT_INFO. When returning true, store the scalar stored
2491 in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
2492 the type of the vectorized store value in
2493 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2495 static bool
2496 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2497 slp_tree slp_node, tree *rhs, slp_tree *rhs_node,
2498 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2499 vec_load_store_type *vls_type_out)
2501 int op_no = 0;
2502 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2504 if (gimple_call_internal_p (call)
2505 && internal_store_fn_p (gimple_call_internal_fn (call)))
2506 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2508 if (slp_node)
2509 op_no = vect_slp_child_index_for_operand
2510 (stmt_info->stmt, op_no, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
2512 enum vect_def_type rhs_dt;
2513 tree rhs_vectype;
2514 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2515 rhs, rhs_node, &rhs_dt, &rhs_vectype))
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2519 "use not simple.\n");
2520 return false;
2523 /* In the case this is a store from a constant make sure
2524 native_encode_expr can handle it. */
2525 if (CONSTANT_CLASS_P (*rhs) && native_encode_expr (*rhs, NULL, 64) == 0)
2527 if (dump_enabled_p ())
2528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2529 "cannot encode constant as a byte sequence.\n");
2530 return false;
2533 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2534 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2536 if (dump_enabled_p ())
2537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2538 "incompatible vector types.\n");
2539 return false;
2542 *rhs_dt_out = rhs_dt;
2543 *rhs_vectype_out = rhs_vectype;
2544 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2545 *vls_type_out = VLS_STORE_INVARIANT;
2546 else
2547 *vls_type_out = VLS_STORE;
2548 return true;
2551 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2552 Note that we support masks with floating-point type, in which case the
2553 floats are interpreted as a bitmask. */
2555 static tree
2556 vect_build_all_ones_mask (vec_info *vinfo,
2557 stmt_vec_info stmt_info, tree masktype)
2559 if (TREE_CODE (masktype) == INTEGER_TYPE)
2560 return build_int_cst (masktype, -1);
2561 else if (VECTOR_BOOLEAN_TYPE_P (masktype)
2562 || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2564 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2565 mask = build_vector_from_val (masktype, mask);
2566 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2568 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2570 REAL_VALUE_TYPE r;
2571 long tmp[6];
2572 for (int j = 0; j < 6; ++j)
2573 tmp[j] = -1;
2574 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2575 tree mask = build_real (TREE_TYPE (masktype), r);
2576 mask = build_vector_from_val (masktype, mask);
2577 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2579 gcc_unreachable ();
2582 /* Build an all-zero merge value of type VECTYPE while vectorizing
2583 STMT_INFO as a gather load. */
2585 static tree
2586 vect_build_zero_merge_argument (vec_info *vinfo,
2587 stmt_vec_info stmt_info, tree vectype)
2589 tree merge;
2590 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2591 merge = build_int_cst (TREE_TYPE (vectype), 0);
2592 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2594 REAL_VALUE_TYPE r;
2595 long tmp[6];
2596 for (int j = 0; j < 6; ++j)
2597 tmp[j] = 0;
2598 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2599 merge = build_real (TREE_TYPE (vectype), r);
2601 else
2602 gcc_unreachable ();
2603 merge = build_vector_from_val (vectype, merge);
2604 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2607 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2608 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2609 the gather load operation. If the load is conditional, MASK is the
2610 vectorized condition, otherwise MASK is null. PTR is the base
2611 pointer and OFFSET is the vectorized offset. */
2613 static gimple *
2614 vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
2615 gimple_stmt_iterator *gsi,
2616 gather_scatter_info *gs_info,
2617 tree ptr, tree offset, tree mask)
2619 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2620 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2621 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2622 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2623 /* ptrtype */ arglist = TREE_CHAIN (arglist);
2624 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2625 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2626 tree scaletype = TREE_VALUE (arglist);
2627 tree var;
2628 gcc_checking_assert (types_compatible_p (srctype, rettype)
2629 && (!mask
2630 || TREE_CODE (masktype) == INTEGER_TYPE
2631 || types_compatible_p (srctype, masktype)));
2633 tree op = offset;
2634 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2636 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2637 TYPE_VECTOR_SUBPARTS (idxtype)));
2638 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2639 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2640 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2641 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2642 op = var;
2645 tree src_op = NULL_TREE;
2646 tree mask_op = NULL_TREE;
2647 if (mask)
2649 if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
2651 tree utype, optype = TREE_TYPE (mask);
2652 if (VECTOR_TYPE_P (masktype)
2653 || TYPE_MODE (masktype) == TYPE_MODE (optype))
2654 utype = masktype;
2655 else
2656 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2657 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2658 tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
2659 gassign *new_stmt
2660 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2661 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2662 mask_arg = var;
2663 if (!useless_type_conversion_p (masktype, utype))
2665 gcc_assert (TYPE_PRECISION (utype)
2666 <= TYPE_PRECISION (masktype));
2667 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2668 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2669 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2670 mask_arg = var;
2672 src_op = build_zero_cst (srctype);
2673 mask_op = mask_arg;
2675 else
2677 src_op = mask;
2678 mask_op = mask;
2681 else
2683 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2684 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2687 tree scale = build_int_cst (scaletype, gs_info->scale);
2688 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2689 mask_op, scale);
2691 if (!useless_type_conversion_p (vectype, rettype))
2693 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2694 TYPE_VECTOR_SUBPARTS (rettype)));
2695 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2696 gimple_call_set_lhs (new_stmt, op);
2697 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2698 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2699 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
2702 return new_stmt;
2705 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2706 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2707 the scatter store operation. If the store is conditional, MASK is the
2708 unvectorized condition, otherwise MASK is null. */
2710 static void
2711 vect_build_scatter_store_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2712 gimple_stmt_iterator *gsi, gimple **vec_stmt,
2713 gather_scatter_info *gs_info, tree mask,
2714 stmt_vector_for_cost *cost_vec)
2716 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2717 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2718 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2719 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2720 enum { NARROW, NONE, WIDEN } modifier;
2721 poly_uint64 scatter_off_nunits
2722 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2724 /* FIXME: Keep the previous costing way in vect_model_store_cost by
2725 costing N scalar stores, but it should be tweaked to use target
2726 specific costs on related scatter store calls. */
2727 if (cost_vec)
2729 tree op = vect_get_store_rhs (stmt_info);
2730 enum vect_def_type dt;
2731 gcc_assert (vect_is_simple_use (op, vinfo, &dt));
2732 unsigned int inside_cost, prologue_cost = 0;
2733 if (dt == vect_constant_def || dt == vect_external_def)
2734 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
2735 stmt_info, 0, vect_prologue);
2736 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
2737 inside_cost = record_stmt_cost (cost_vec, ncopies * assumed_nunits,
2738 scalar_store, stmt_info, 0, vect_body);
2740 if (dump_enabled_p ())
2741 dump_printf_loc (MSG_NOTE, vect_location,
2742 "vect_model_store_cost: inside_cost = %d, "
2743 "prologue_cost = %d .\n",
2744 inside_cost, prologue_cost);
2745 return;
2748 tree perm_mask = NULL_TREE, mask_halfvectype = NULL_TREE;
2749 if (known_eq (nunits, scatter_off_nunits))
2750 modifier = NONE;
2751 else if (known_eq (nunits * 2, scatter_off_nunits))
2753 modifier = WIDEN;
2755 /* Currently gathers and scatters are only supported for
2756 fixed-length vectors. */
2757 unsigned int count = scatter_off_nunits.to_constant ();
2758 vec_perm_builder sel (count, count, 1);
2759 for (unsigned i = 0; i < (unsigned int) count; ++i)
2760 sel.quick_push (i | (count / 2));
2762 vec_perm_indices indices (sel, 1, count);
2763 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype, indices);
2764 gcc_assert (perm_mask != NULL_TREE);
2766 else if (known_eq (nunits, scatter_off_nunits * 2))
2768 modifier = NARROW;
2770 /* Currently gathers and scatters are only supported for
2771 fixed-length vectors. */
2772 unsigned int count = nunits.to_constant ();
2773 vec_perm_builder sel (count, count, 1);
2774 for (unsigned i = 0; i < (unsigned int) count; ++i)
2775 sel.quick_push (i | (count / 2));
2777 vec_perm_indices indices (sel, 2, count);
2778 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2779 gcc_assert (perm_mask != NULL_TREE);
2780 ncopies *= 2;
2782 if (mask)
2783 mask_halfvectype = truth_type_for (gs_info->offset_vectype);
2785 else
2786 gcc_unreachable ();
2788 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2789 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2790 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2791 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2792 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2793 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2794 tree scaletype = TREE_VALUE (arglist);
2796 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
2797 && TREE_CODE (rettype) == VOID_TYPE);
2799 tree ptr = fold_convert (ptrtype, gs_info->base);
2800 if (!is_gimple_min_invariant (ptr))
2802 gimple_seq seq;
2803 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2804 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2805 edge pe = loop_preheader_edge (loop);
2806 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2807 gcc_assert (!new_bb);
2810 tree mask_arg = NULL_TREE;
2811 if (mask == NULL_TREE)
2813 mask_arg = build_int_cst (masktype, -1);
2814 mask_arg = vect_init_vector (vinfo, stmt_info, mask_arg, masktype, NULL);
2817 tree scale = build_int_cst (scaletype, gs_info->scale);
2819 auto_vec<tree> vec_oprnds0;
2820 auto_vec<tree> vec_oprnds1;
2821 auto_vec<tree> vec_masks;
2822 if (mask)
2824 tree mask_vectype = truth_type_for (vectype);
2825 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2826 modifier == NARROW ? ncopies / 2 : ncopies,
2827 mask, &vec_masks, mask_vectype);
2829 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2830 modifier == WIDEN ? ncopies / 2 : ncopies,
2831 gs_info->offset, &vec_oprnds0);
2832 tree op = vect_get_store_rhs (stmt_info);
2833 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2834 modifier == NARROW ? ncopies / 2 : ncopies, op,
2835 &vec_oprnds1);
2837 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2838 tree mask_op = NULL_TREE;
2839 tree src, vec_mask;
2840 for (int j = 0; j < ncopies; ++j)
2842 if (modifier == WIDEN)
2844 if (j & 1)
2845 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0, perm_mask,
2846 stmt_info, gsi);
2847 else
2848 op = vec_oprnd0 = vec_oprnds0[j / 2];
2849 src = vec_oprnd1 = vec_oprnds1[j];
2850 if (mask)
2851 mask_op = vec_mask = vec_masks[j];
2853 else if (modifier == NARROW)
2855 if (j & 1)
2856 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
2857 perm_mask, stmt_info, gsi);
2858 else
2859 src = vec_oprnd1 = vec_oprnds1[j / 2];
2860 op = vec_oprnd0 = vec_oprnds0[j];
2861 if (mask)
2862 mask_op = vec_mask = vec_masks[j / 2];
2864 else
2866 op = vec_oprnd0 = vec_oprnds0[j];
2867 src = vec_oprnd1 = vec_oprnds1[j];
2868 if (mask)
2869 mask_op = vec_mask = vec_masks[j];
2872 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
2874 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
2875 TYPE_VECTOR_SUBPARTS (srctype)));
2876 tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
2877 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
2878 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
2879 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2880 src = var;
2883 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2885 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2886 TYPE_VECTOR_SUBPARTS (idxtype)));
2887 tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2888 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2889 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2890 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2891 op = var;
2894 if (mask)
2896 tree utype;
2897 mask_arg = mask_op;
2898 if (modifier == NARROW)
2900 tree var
2901 = vect_get_new_ssa_name (mask_halfvectype, vect_simple_var);
2902 gassign *new_stmt
2903 = gimple_build_assign (var,
2904 (j & 1) ? VEC_UNPACK_HI_EXPR
2905 : VEC_UNPACK_LO_EXPR,
2906 mask_op);
2907 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2908 mask_arg = var;
2910 tree optype = TREE_TYPE (mask_arg);
2911 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
2912 utype = masktype;
2913 else
2914 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2915 tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
2916 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
2917 gassign *new_stmt
2918 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2919 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2920 mask_arg = var;
2921 if (!useless_type_conversion_p (masktype, utype))
2923 gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
2924 tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2925 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2926 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2927 mask_arg = var;
2931 gcall *new_stmt
2932 = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
2933 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2935 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2937 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2940 /* Prepare the base and offset in GS_INFO for vectorization.
2941 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2942 to the vectorized offset argument for the first copy of STMT_INFO.
2943 STMT_INFO is the statement described by GS_INFO and LOOP is the
2944 containing loop. */
2946 static void
2947 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2948 class loop *loop, stmt_vec_info stmt_info,
2949 slp_tree slp_node, gather_scatter_info *gs_info,
2950 tree *dataref_ptr, vec<tree> *vec_offset)
2952 gimple_seq stmts = NULL;
2953 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2954 if (stmts != NULL)
2956 basic_block new_bb;
2957 edge pe = loop_preheader_edge (loop);
2958 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2959 gcc_assert (!new_bb);
2961 if (slp_node)
2962 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
2963 else
2965 unsigned ncopies
2966 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2967 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2968 gs_info->offset, vec_offset,
2969 gs_info->offset_vectype);
2973 /* Prepare to implement a grouped or strided load or store using
2974 the gather load or scatter store operation described by GS_INFO.
2975 STMT_INFO is the load or store statement.
2977 Set *DATAREF_BUMP to the amount that should be added to the base
2978 address after each copy of the vectorized statement. Set *VEC_OFFSET
2979 to an invariant offset vector in which element I has the value
2980 I * DR_STEP / SCALE. */
2982 static void
2983 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2984 loop_vec_info loop_vinfo,
2985 gimple_stmt_iterator *gsi,
2986 gather_scatter_info *gs_info,
2987 tree *dataref_bump, tree *vec_offset,
2988 vec_loop_lens *loop_lens)
2990 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2991 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2993 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2995 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
2996 ivtmp_8 = _31 * 16 (step in bytes);
2997 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2998 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
2999 tree loop_len
3000 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
3001 tree tmp
3002 = fold_build2 (MULT_EXPR, sizetype,
3003 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3004 loop_len);
3005 *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
3006 GSI_SAME_STMT);
3008 else
3010 tree bump
3011 = size_binop (MULT_EXPR,
3012 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3013 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3014 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3017 /* The offset given in GS_INFO can have pointer type, so use the element
3018 type of the vector instead. */
3019 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3021 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3022 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3023 ssize_int (gs_info->scale));
3024 step = fold_convert (offset_type, step);
3026 /* Create {0, X, X*2, X*3, ...}. */
3027 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3028 build_zero_cst (offset_type), step);
3029 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3032 /* Prepare the pointer IVs which needs to be updated by a variable amount.
3033 Such variable amount is the outcome of .SELECT_VL. In this case, we can
3034 allow each iteration process the flexible number of elements as long as
3035 the number <= vf elments.
3037 Return data reference according to SELECT_VL.
3038 If new statements are needed, insert them before GSI. */
3040 static tree
3041 vect_get_loop_variant_data_ptr_increment (
3042 vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
3043 vec_loop_lens *loop_lens, dr_vec_info *dr_info,
3044 vect_memory_access_type memory_access_type)
3046 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3047 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3049 /* gather/scatter never reach here. */
3050 gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
3052 /* When we support SELECT_VL pattern, we dynamic adjust
3053 the memory address by .SELECT_VL result.
3055 The result of .SELECT_VL is the number of elements to
3056 be processed of each iteration. So the memory address
3057 adjustment operation should be:
3059 addr = addr + .SELECT_VL (ARG..) * step;
3061 tree loop_len
3062 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
3063 tree len_type = TREE_TYPE (loop_len);
3064 /* Since the outcome of .SELECT_VL is element size, we should adjust
3065 it into bytesize so that it can be used in address pointer variable
3066 amount IVs adjustment. */
3067 tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
3068 wide_int_to_tree (len_type, wi::to_widest (step)));
3069 tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
3070 gassign *assign = gimple_build_assign (bump, tmp);
3071 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
3072 return bump;
3075 /* Return the amount that should be added to a vector pointer to move
3076 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3077 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3078 vectorization. */
3080 static tree
3081 vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
3082 dr_vec_info *dr_info, tree aggr_type,
3083 vect_memory_access_type memory_access_type,
3084 vec_loop_lens *loop_lens = nullptr)
3086 if (memory_access_type == VMAT_INVARIANT)
3087 return size_zero_node;
3089 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3090 if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3091 return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
3092 loop_lens, dr_info,
3093 memory_access_type);
3095 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3096 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3097 if (tree_int_cst_sgn (step) == -1)
3098 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3099 return iv_step;
3102 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3104 static bool
3105 vectorizable_bswap (vec_info *vinfo,
3106 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3107 gimple **vec_stmt, slp_tree slp_node,
3108 slp_tree *slp_op,
3109 tree vectype_in, stmt_vector_for_cost *cost_vec)
3111 tree op, vectype;
3112 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3113 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3114 unsigned ncopies;
3116 op = gimple_call_arg (stmt, 0);
3117 vectype = STMT_VINFO_VECTYPE (stmt_info);
3118 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3120 /* Multiple types in SLP are handled by creating the appropriate number of
3121 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3122 case of SLP. */
3123 if (slp_node)
3124 ncopies = 1;
3125 else
3126 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3128 gcc_assert (ncopies >= 1);
3130 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3131 if (! char_vectype)
3132 return false;
3134 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3135 unsigned word_bytes;
3136 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3137 return false;
3139 /* The encoding uses one stepped pattern for each byte in the word. */
3140 vec_perm_builder elts (num_bytes, word_bytes, 3);
3141 for (unsigned i = 0; i < 3; ++i)
3142 for (unsigned j = 0; j < word_bytes; ++j)
3143 elts.quick_push ((i + 1) * word_bytes - j - 1);
3145 vec_perm_indices indices (elts, 1, num_bytes);
3146 machine_mode vmode = TYPE_MODE (char_vectype);
3147 if (!can_vec_perm_const_p (vmode, vmode, indices))
3148 return false;
3150 if (! vec_stmt)
3152 if (slp_node
3153 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3155 if (dump_enabled_p ())
3156 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3157 "incompatible vector types for invariants\n");
3158 return false;
3161 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3162 DUMP_VECT_SCOPE ("vectorizable_bswap");
3163 record_stmt_cost (cost_vec,
3164 1, vector_stmt, stmt_info, 0, vect_prologue);
3165 record_stmt_cost (cost_vec,
3166 slp_node
3167 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3168 vec_perm, stmt_info, 0, vect_body);
3169 return true;
3172 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3174 /* Transform. */
3175 vec<tree> vec_oprnds = vNULL;
3176 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3177 op, &vec_oprnds);
3178 /* Arguments are ready. create the new vector stmt. */
3179 unsigned i;
3180 tree vop;
3181 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3183 gimple *new_stmt;
3184 tree tem = make_ssa_name (char_vectype);
3185 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3186 char_vectype, vop));
3187 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3188 tree tem2 = make_ssa_name (char_vectype);
3189 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3190 tem, tem, bswap_vconst);
3191 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3192 tem = make_ssa_name (vectype);
3193 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3194 vectype, tem2));
3195 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3196 if (slp_node)
3197 slp_node->push_vec_def (new_stmt);
3198 else
3199 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3202 if (!slp_node)
3203 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3205 vec_oprnds.release ();
3206 return true;
3209 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3210 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3211 in a single step. On success, store the binary pack code in
3212 *CONVERT_CODE. */
3214 static bool
3215 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3216 code_helper *convert_code)
3218 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3219 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3220 return false;
3222 code_helper code;
3223 int multi_step_cvt = 0;
3224 auto_vec <tree, 8> interm_types;
3225 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3226 &code, &multi_step_cvt, &interm_types)
3227 || multi_step_cvt)
3228 return false;
3230 *convert_code = code;
3231 return true;
3234 /* Function vectorizable_call.
3236 Check if STMT_INFO performs a function call that can be vectorized.
3237 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3238 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3239 Return true if STMT_INFO is vectorizable in this way. */
3241 static bool
3242 vectorizable_call (vec_info *vinfo,
3243 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3244 gimple **vec_stmt, slp_tree slp_node,
3245 stmt_vector_for_cost *cost_vec)
3247 gcall *stmt;
3248 tree vec_dest;
3249 tree scalar_dest;
3250 tree op;
3251 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3252 tree vectype_out, vectype_in;
3253 poly_uint64 nunits_in;
3254 poly_uint64 nunits_out;
3255 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3256 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3257 tree fndecl, new_temp, rhs_type;
3258 enum vect_def_type dt[4]
3259 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3260 vect_unknown_def_type };
3261 tree vectypes[ARRAY_SIZE (dt)] = {};
3262 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3263 int ndts = ARRAY_SIZE (dt);
3264 int ncopies, j;
3265 auto_vec<tree, 8> vargs;
3266 enum { NARROW, NONE, WIDEN } modifier;
3267 size_t i, nargs;
3268 tree lhs;
3270 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3271 return false;
3273 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3274 && ! vec_stmt)
3275 return false;
3277 /* Is STMT_INFO a vectorizable call? */
3278 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3279 if (!stmt)
3280 return false;
3282 if (gimple_call_internal_p (stmt)
3283 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3284 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3285 /* Handled by vectorizable_load and vectorizable_store. */
3286 return false;
3288 if (gimple_call_lhs (stmt) == NULL_TREE
3289 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3290 return false;
3292 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3294 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3296 /* Process function arguments. */
3297 rhs_type = NULL_TREE;
3298 vectype_in = NULL_TREE;
3299 nargs = gimple_call_num_args (stmt);
3301 /* Bail out if the function has more than four arguments, we do not have
3302 interesting builtin functions to vectorize with more than two arguments
3303 except for fma. No arguments is also not good. */
3304 if (nargs == 0 || nargs > 4)
3305 return false;
3307 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3308 combined_fn cfn = gimple_call_combined_fn (stmt);
3309 if (cfn == CFN_GOMP_SIMD_LANE)
3311 nargs = 0;
3312 rhs_type = unsigned_type_node;
3315 int mask_opno = -1;
3316 if (internal_fn_p (cfn))
3317 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3319 for (i = 0; i < nargs; i++)
3321 if ((int) i == mask_opno)
3323 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3324 &op, &slp_op[i], &dt[i], &vectypes[i]))
3325 return false;
3326 continue;
3329 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3330 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3332 if (dump_enabled_p ())
3333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3334 "use not simple.\n");
3335 return false;
3338 /* We can only handle calls with arguments of the same type. */
3339 if (rhs_type
3340 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3342 if (dump_enabled_p ())
3343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3344 "argument types differ.\n");
3345 return false;
3347 if (!rhs_type)
3348 rhs_type = TREE_TYPE (op);
3350 if (!vectype_in)
3351 vectype_in = vectypes[i];
3352 else if (vectypes[i]
3353 && !types_compatible_p (vectypes[i], vectype_in))
3355 if (dump_enabled_p ())
3356 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3357 "argument vector types differ.\n");
3358 return false;
3361 /* If all arguments are external or constant defs, infer the vector type
3362 from the scalar type. */
3363 if (!vectype_in)
3364 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3365 if (vec_stmt)
3366 gcc_assert (vectype_in);
3367 if (!vectype_in)
3369 if (dump_enabled_p ())
3370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3371 "no vectype for scalar type %T\n", rhs_type);
3373 return false;
3376 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3377 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3379 if (dump_enabled_p ())
3380 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3381 "mixed mask and nonmask vector types\n");
3382 return false;
3385 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3387 if (dump_enabled_p ())
3388 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3389 "use emulated vector type for call\n");
3390 return false;
3393 /* FORNOW */
3394 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3395 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3396 if (known_eq (nunits_in * 2, nunits_out))
3397 modifier = NARROW;
3398 else if (known_eq (nunits_out, nunits_in))
3399 modifier = NONE;
3400 else if (known_eq (nunits_out * 2, nunits_in))
3401 modifier = WIDEN;
3402 else
3403 return false;
3405 /* We only handle functions that do not read or clobber memory. */
3406 if (gimple_vuse (stmt))
3408 if (dump_enabled_p ())
3409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3410 "function reads from or writes to memory.\n");
3411 return false;
3414 /* For now, we only vectorize functions if a target specific builtin
3415 is available. TODO -- in some cases, it might be profitable to
3416 insert the calls for pieces of the vector, in order to be able
3417 to vectorize other operations in the loop. */
3418 fndecl = NULL_TREE;
3419 internal_fn ifn = IFN_LAST;
3420 tree callee = gimple_call_fndecl (stmt);
3422 /* First try using an internal function. */
3423 code_helper convert_code = MAX_TREE_CODES;
3424 if (cfn != CFN_LAST
3425 && (modifier == NONE
3426 || (modifier == NARROW
3427 && simple_integer_narrowing (vectype_out, vectype_in,
3428 &convert_code))))
3429 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3430 vectype_in);
3432 /* If that fails, try asking for a target-specific built-in function. */
3433 if (ifn == IFN_LAST)
3435 if (cfn != CFN_LAST)
3436 fndecl = targetm.vectorize.builtin_vectorized_function
3437 (cfn, vectype_out, vectype_in);
3438 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3439 fndecl = targetm.vectorize.builtin_md_vectorized_function
3440 (callee, vectype_out, vectype_in);
3443 if (ifn == IFN_LAST && !fndecl)
3445 if (cfn == CFN_GOMP_SIMD_LANE
3446 && !slp_node
3447 && loop_vinfo
3448 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3449 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3450 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3451 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3453 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3454 { 0, 1, 2, ... vf - 1 } vector. */
3455 gcc_assert (nargs == 0);
3457 else if (modifier == NONE
3458 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3459 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3460 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3461 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3462 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3463 slp_op, vectype_in, cost_vec);
3464 else
3466 if (dump_enabled_p ())
3467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3468 "function is not vectorizable.\n");
3469 return false;
3473 if (slp_node)
3474 ncopies = 1;
3475 else if (modifier == NARROW && ifn == IFN_LAST)
3476 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3477 else
3478 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3480 /* Sanity check: make sure that at least one copy of the vectorized stmt
3481 needs to be generated. */
3482 gcc_assert (ncopies >= 1);
3484 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3485 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3486 internal_fn cond_len_fn = get_len_internal_fn (ifn);
3487 int len_opno = internal_fn_len_index (cond_len_fn);
3488 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3489 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3490 if (!vec_stmt) /* transformation not required. */
3492 if (slp_node)
3493 for (i = 0; i < nargs; ++i)
3494 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3495 vectypes[i]
3496 ? vectypes[i] : vectype_in))
3498 if (dump_enabled_p ())
3499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3500 "incompatible vector types for invariants\n");
3501 return false;
3503 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3504 DUMP_VECT_SCOPE ("vectorizable_call");
3505 vect_model_simple_cost (vinfo, stmt_info,
3506 ncopies, dt, ndts, slp_node, cost_vec);
3507 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3508 record_stmt_cost (cost_vec, ncopies / 2,
3509 vec_promote_demote, stmt_info, 0, vect_body);
3511 if (loop_vinfo
3512 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3513 && (reduc_idx >= 0 || mask_opno >= 0))
3515 if (reduc_idx >= 0
3516 && (cond_fn == IFN_LAST
3517 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3518 OPTIMIZE_FOR_SPEED))
3519 && (cond_len_fn == IFN_LAST
3520 || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3521 OPTIMIZE_FOR_SPEED)))
3523 if (dump_enabled_p ())
3524 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3525 "can't use a fully-masked loop because no"
3526 " conditional operation is available.\n");
3527 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3529 else
3531 unsigned int nvectors
3532 = (slp_node
3533 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3534 : ncopies);
3535 tree scalar_mask = NULL_TREE;
3536 if (mask_opno >= 0)
3537 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3538 if (cond_len_fn != IFN_LAST
3539 && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3540 OPTIMIZE_FOR_SPEED))
3541 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3543 else
3544 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3545 scalar_mask);
3548 return true;
3551 /* Transform. */
3553 if (dump_enabled_p ())
3554 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3556 /* Handle def. */
3557 scalar_dest = gimple_call_lhs (stmt);
3558 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3560 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3561 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3562 unsigned int vect_nargs = nargs;
3563 if (len_loop_p)
3565 if (len_opno >= 0)
3567 ifn = cond_len_fn;
3568 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3569 vect_nargs += 2;
3571 else if (reduc_idx >= 0)
3572 gcc_unreachable ();
3574 else if (masked_loop_p && reduc_idx >= 0)
3576 ifn = cond_fn;
3577 vect_nargs += 2;
3580 if (modifier == NONE || ifn != IFN_LAST)
3582 tree prev_res = NULL_TREE;
3583 vargs.safe_grow (vect_nargs, true);
3584 auto_vec<vec<tree> > vec_defs (nargs);
3585 for (j = 0; j < ncopies; ++j)
3587 /* Build argument list for the vectorized call. */
3588 if (slp_node)
3590 vec<tree> vec_oprnds0;
3592 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3593 vec_oprnds0 = vec_defs[0];
3595 /* Arguments are ready. Create the new vector stmt. */
3596 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3598 int varg = 0;
3599 if (masked_loop_p && reduc_idx >= 0)
3601 unsigned int vec_num = vec_oprnds0.length ();
3602 /* Always true for SLP. */
3603 gcc_assert (ncopies == 1);
3604 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
3605 gsi, masks, vec_num,
3606 vectype_out, i);
3608 size_t k;
3609 for (k = 0; k < nargs; k++)
3611 vec<tree> vec_oprndsk = vec_defs[k];
3612 vargs[varg++] = vec_oprndsk[i];
3614 if (masked_loop_p && reduc_idx >= 0)
3615 vargs[varg++] = vargs[reduc_idx + 1];
3616 gimple *new_stmt;
3617 if (modifier == NARROW)
3619 /* We don't define any narrowing conditional functions
3620 at present. */
3621 gcc_assert (mask_opno < 0);
3622 tree half_res = make_ssa_name (vectype_in);
3623 gcall *call
3624 = gimple_build_call_internal_vec (ifn, vargs);
3625 gimple_call_set_lhs (call, half_res);
3626 gimple_call_set_nothrow (call, true);
3627 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3628 if ((i & 1) == 0)
3630 prev_res = half_res;
3631 continue;
3633 new_temp = make_ssa_name (vec_dest);
3634 new_stmt = vect_gimple_build (new_temp, convert_code,
3635 prev_res, half_res);
3636 vect_finish_stmt_generation (vinfo, stmt_info,
3637 new_stmt, gsi);
3639 else
3641 if (len_opno >= 0 && len_loop_p)
3643 unsigned int vec_num = vec_oprnds0.length ();
3644 /* Always true for SLP. */
3645 gcc_assert (ncopies == 1);
3646 tree len
3647 = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3648 vectype_out, i, 1);
3649 signed char biasval
3650 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3651 tree bias = build_int_cst (intQI_type_node, biasval);
3652 vargs[len_opno] = len;
3653 vargs[len_opno + 1] = bias;
3655 else if (mask_opno >= 0 && masked_loop_p)
3657 unsigned int vec_num = vec_oprnds0.length ();
3658 /* Always true for SLP. */
3659 gcc_assert (ncopies == 1);
3660 tree mask = vect_get_loop_mask (loop_vinfo,
3661 gsi, masks, vec_num,
3662 vectype_out, i);
3663 vargs[mask_opno] = prepare_vec_mask
3664 (loop_vinfo, TREE_TYPE (mask), mask,
3665 vargs[mask_opno], gsi);
3668 gcall *call;
3669 if (ifn != IFN_LAST)
3670 call = gimple_build_call_internal_vec (ifn, vargs);
3671 else
3672 call = gimple_build_call_vec (fndecl, vargs);
3673 new_temp = make_ssa_name (vec_dest, call);
3674 gimple_call_set_lhs (call, new_temp);
3675 gimple_call_set_nothrow (call, true);
3676 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3677 new_stmt = call;
3679 slp_node->push_vec_def (new_stmt);
3681 continue;
3684 int varg = 0;
3685 if (masked_loop_p && reduc_idx >= 0)
3686 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3687 vectype_out, j);
3688 for (i = 0; i < nargs; i++)
3690 op = gimple_call_arg (stmt, i);
3691 if (j == 0)
3693 vec_defs.quick_push (vNULL);
3694 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3695 op, &vec_defs[i],
3696 vectypes[i]);
3698 vargs[varg++] = vec_defs[i][j];
3700 if (masked_loop_p && reduc_idx >= 0)
3701 vargs[varg++] = vargs[reduc_idx + 1];
3703 if (len_opno >= 0 && len_loop_p)
3705 tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3706 vectype_out, j, 1);
3707 signed char biasval
3708 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3709 tree bias = build_int_cst (intQI_type_node, biasval);
3710 vargs[len_opno] = len;
3711 vargs[len_opno + 1] = bias;
3713 else if (mask_opno >= 0 && masked_loop_p)
3715 tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3716 vectype_out, j);
3717 vargs[mask_opno]
3718 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3719 vargs[mask_opno], gsi);
3722 gimple *new_stmt;
3723 if (cfn == CFN_GOMP_SIMD_LANE)
3725 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3726 tree new_var
3727 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3728 gimple *init_stmt = gimple_build_assign (new_var, cst);
3729 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3730 new_temp = make_ssa_name (vec_dest);
3731 new_stmt = gimple_build_assign (new_temp, new_var);
3732 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3734 else if (modifier == NARROW)
3736 /* We don't define any narrowing conditional functions at
3737 present. */
3738 gcc_assert (mask_opno < 0);
3739 tree half_res = make_ssa_name (vectype_in);
3740 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3741 gimple_call_set_lhs (call, half_res);
3742 gimple_call_set_nothrow (call, true);
3743 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3744 if ((j & 1) == 0)
3746 prev_res = half_res;
3747 continue;
3749 new_temp = make_ssa_name (vec_dest);
3750 new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
3751 half_res);
3752 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3754 else
3756 gcall *call;
3757 if (ifn != IFN_LAST)
3758 call = gimple_build_call_internal_vec (ifn, vargs);
3759 else
3760 call = gimple_build_call_vec (fndecl, vargs);
3761 new_temp = make_ssa_name (vec_dest, call);
3762 gimple_call_set_lhs (call, new_temp);
3763 gimple_call_set_nothrow (call, true);
3764 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3765 new_stmt = call;
3768 if (j == (modifier == NARROW ? 1 : 0))
3769 *vec_stmt = new_stmt;
3770 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3772 for (i = 0; i < nargs; i++)
3774 vec<tree> vec_oprndsi = vec_defs[i];
3775 vec_oprndsi.release ();
3778 else if (modifier == NARROW)
3780 auto_vec<vec<tree> > vec_defs (nargs);
3781 /* We don't define any narrowing conditional functions at present. */
3782 gcc_assert (mask_opno < 0);
3783 for (j = 0; j < ncopies; ++j)
3785 /* Build argument list for the vectorized call. */
3786 if (j == 0)
3787 vargs.create (nargs * 2);
3788 else
3789 vargs.truncate (0);
3791 if (slp_node)
3793 vec<tree> vec_oprnds0;
3795 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3796 vec_oprnds0 = vec_defs[0];
3798 /* Arguments are ready. Create the new vector stmt. */
3799 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3801 size_t k;
3802 vargs.truncate (0);
3803 for (k = 0; k < nargs; k++)
3805 vec<tree> vec_oprndsk = vec_defs[k];
3806 vargs.quick_push (vec_oprndsk[i]);
3807 vargs.quick_push (vec_oprndsk[i + 1]);
3809 gcall *call;
3810 if (ifn != IFN_LAST)
3811 call = gimple_build_call_internal_vec (ifn, vargs);
3812 else
3813 call = gimple_build_call_vec (fndecl, vargs);
3814 new_temp = make_ssa_name (vec_dest, call);
3815 gimple_call_set_lhs (call, new_temp);
3816 gimple_call_set_nothrow (call, true);
3817 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3818 slp_node->push_vec_def (call);
3820 continue;
3823 for (i = 0; i < nargs; i++)
3825 op = gimple_call_arg (stmt, i);
3826 if (j == 0)
3828 vec_defs.quick_push (vNULL);
3829 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3830 op, &vec_defs[i], vectypes[i]);
3832 vec_oprnd0 = vec_defs[i][2*j];
3833 vec_oprnd1 = vec_defs[i][2*j+1];
3835 vargs.quick_push (vec_oprnd0);
3836 vargs.quick_push (vec_oprnd1);
3839 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3840 new_temp = make_ssa_name (vec_dest, new_stmt);
3841 gimple_call_set_lhs (new_stmt, new_temp);
3842 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3844 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3847 if (!slp_node)
3848 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3850 for (i = 0; i < nargs; i++)
3852 vec<tree> vec_oprndsi = vec_defs[i];
3853 vec_oprndsi.release ();
3856 else
3857 /* No current target implements this case. */
3858 return false;
3860 vargs.release ();
3862 /* The call in STMT might prevent it from being removed in dce.
3863 We however cannot remove it here, due to the way the ssa name
3864 it defines is mapped to the new definition. So just replace
3865 rhs of the statement with something harmless. */
3867 if (slp_node)
3868 return true;
3870 stmt_info = vect_orig_stmt (stmt_info);
3871 lhs = gimple_get_lhs (stmt_info->stmt);
3873 gassign *new_stmt
3874 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3875 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3877 return true;
3881 struct simd_call_arg_info
3883 tree vectype;
3884 tree op;
3885 HOST_WIDE_INT linear_step;
3886 enum vect_def_type dt;
3887 unsigned int align;
3888 bool simd_lane_linear;
3891 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3892 is linear within simd lane (but not within whole loop), note it in
3893 *ARGINFO. */
3895 static void
3896 vect_simd_lane_linear (tree op, class loop *loop,
3897 struct simd_call_arg_info *arginfo)
3899 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3901 if (!is_gimple_assign (def_stmt)
3902 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3903 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3904 return;
3906 tree base = gimple_assign_rhs1 (def_stmt);
3907 HOST_WIDE_INT linear_step = 0;
3908 tree v = gimple_assign_rhs2 (def_stmt);
3909 while (TREE_CODE (v) == SSA_NAME)
3911 tree t;
3912 def_stmt = SSA_NAME_DEF_STMT (v);
3913 if (is_gimple_assign (def_stmt))
3914 switch (gimple_assign_rhs_code (def_stmt))
3916 case PLUS_EXPR:
3917 t = gimple_assign_rhs2 (def_stmt);
3918 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3919 return;
3920 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3921 v = gimple_assign_rhs1 (def_stmt);
3922 continue;
3923 case MULT_EXPR:
3924 t = gimple_assign_rhs2 (def_stmt);
3925 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3926 return;
3927 linear_step = tree_to_shwi (t);
3928 v = gimple_assign_rhs1 (def_stmt);
3929 continue;
3930 CASE_CONVERT:
3931 t = gimple_assign_rhs1 (def_stmt);
3932 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3933 || (TYPE_PRECISION (TREE_TYPE (v))
3934 < TYPE_PRECISION (TREE_TYPE (t))))
3935 return;
3936 if (!linear_step)
3937 linear_step = 1;
3938 v = t;
3939 continue;
3940 default:
3941 return;
3943 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3944 && loop->simduid
3945 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3946 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3947 == loop->simduid))
3949 if (!linear_step)
3950 linear_step = 1;
3951 arginfo->linear_step = linear_step;
3952 arginfo->op = base;
3953 arginfo->simd_lane_linear = true;
3954 return;
3959 /* Function vectorizable_simd_clone_call.
3961 Check if STMT_INFO performs a function call that can be vectorized
3962 by calling a simd clone of the function.
3963 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3964 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3965 Return true if STMT_INFO is vectorizable in this way. */
3967 static bool
3968 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3969 gimple_stmt_iterator *gsi,
3970 gimple **vec_stmt, slp_tree slp_node,
3971 stmt_vector_for_cost *)
3973 tree vec_dest;
3974 tree scalar_dest;
3975 tree op, type;
3976 tree vec_oprnd0 = NULL_TREE;
3977 tree vectype;
3978 poly_uint64 nunits;
3979 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3980 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3981 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3982 tree fndecl, new_temp;
3983 int ncopies, j;
3984 auto_vec<simd_call_arg_info> arginfo;
3985 vec<tree> vargs = vNULL;
3986 size_t i, nargs;
3987 tree lhs, rtype, ratype;
3988 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3989 int masked_call_offset = 0;
3991 /* Is STMT a vectorizable call? */
3992 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3993 if (!stmt)
3994 return false;
3996 fndecl = gimple_call_fndecl (stmt);
3997 if (fndecl == NULL_TREE
3998 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
4000 fndecl = gimple_call_arg (stmt, 0);
4001 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
4002 fndecl = TREE_OPERAND (fndecl, 0);
4003 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
4004 masked_call_offset = 1;
4006 if (fndecl == NULL_TREE)
4007 return false;
4009 struct cgraph_node *node = cgraph_node::get (fndecl);
4010 if (node == NULL || node->simd_clones == NULL)
4011 return false;
4013 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4014 return false;
4016 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4017 && ! vec_stmt)
4018 return false;
4020 if (gimple_call_lhs (stmt)
4021 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4022 return false;
4024 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4026 vectype = STMT_VINFO_VECTYPE (stmt_info);
4028 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4029 return false;
4031 /* Process function arguments. */
4032 nargs = gimple_call_num_args (stmt) - masked_call_offset;
4034 /* Bail out if the function has zero arguments. */
4035 if (nargs == 0)
4036 return false;
4038 vec<tree>& simd_clone_info = (slp_node ? SLP_TREE_SIMD_CLONE_INFO (slp_node)
4039 : STMT_VINFO_SIMD_CLONE_INFO (stmt_info));
4040 arginfo.reserve (nargs, true);
4041 auto_vec<slp_tree> slp_op;
4042 slp_op.safe_grow_cleared (nargs);
4044 for (i = 0; i < nargs; i++)
4046 simd_call_arg_info thisarginfo;
4047 affine_iv iv;
4049 thisarginfo.linear_step = 0;
4050 thisarginfo.align = 0;
4051 thisarginfo.op = NULL_TREE;
4052 thisarginfo.simd_lane_linear = false;
4054 int op_no = i + masked_call_offset;
4055 if (slp_node)
4056 op_no = vect_slp_child_index_for_operand (stmt, op_no, false);
4057 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4058 op_no, &op, &slp_op[i],
4059 &thisarginfo.dt, &thisarginfo.vectype)
4060 || thisarginfo.dt == vect_uninitialized_def)
4062 if (dump_enabled_p ())
4063 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4064 "use not simple.\n");
4065 return false;
4068 if (thisarginfo.dt == vect_constant_def
4069 || thisarginfo.dt == vect_external_def)
4071 /* With SLP we determine the vector type of constants/externals
4072 at analysis time, handling conflicts via
4073 vect_maybe_update_slp_op_vectype. At transform time
4074 we have a vector type recorded for SLP. */
4075 gcc_assert (!vec_stmt
4076 || !slp_node
4077 || thisarginfo.vectype != NULL_TREE);
4078 if (!vec_stmt)
4079 thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
4080 TREE_TYPE (op),
4081 slp_node);
4083 else
4084 gcc_assert (thisarginfo.vectype != NULL_TREE);
4086 /* For linear arguments, the analyze phase should have saved
4087 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */
4088 if (i * 3 + 4 <= simd_clone_info.length ()
4089 && simd_clone_info[i * 3 + 2])
4091 gcc_assert (vec_stmt);
4092 thisarginfo.linear_step = tree_to_shwi (simd_clone_info[i * 3 + 2]);
4093 thisarginfo.op = simd_clone_info[i * 3 + 1];
4094 thisarginfo.simd_lane_linear
4095 = (simd_clone_info[i * 3 + 3] == boolean_true_node);
4096 /* If loop has been peeled for alignment, we need to adjust it. */
4097 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4098 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4099 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4101 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4102 tree step = simd_clone_info[i * 3 + 2];
4103 tree opt = TREE_TYPE (thisarginfo.op);
4104 bias = fold_convert (TREE_TYPE (step), bias);
4105 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4106 thisarginfo.op
4107 = fold_build2 (POINTER_TYPE_P (opt)
4108 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4109 thisarginfo.op, bias);
4112 else if (!vec_stmt
4113 && thisarginfo.dt != vect_constant_def
4114 && thisarginfo.dt != vect_external_def
4115 && loop_vinfo
4116 && TREE_CODE (op) == SSA_NAME
4117 && simple_iv (loop, loop_containing_stmt (stmt), op,
4118 &iv, false)
4119 && tree_fits_shwi_p (iv.step))
4121 thisarginfo.linear_step = tree_to_shwi (iv.step);
4122 thisarginfo.op = iv.base;
4124 else if ((thisarginfo.dt == vect_constant_def
4125 || thisarginfo.dt == vect_external_def)
4126 && POINTER_TYPE_P (TREE_TYPE (op)))
4127 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4128 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4129 linear too. */
4130 if (POINTER_TYPE_P (TREE_TYPE (op))
4131 && !thisarginfo.linear_step
4132 && !vec_stmt
4133 && thisarginfo.dt != vect_constant_def
4134 && thisarginfo.dt != vect_external_def
4135 && loop_vinfo
4136 && TREE_CODE (op) == SSA_NAME)
4137 vect_simd_lane_linear (op, loop, &thisarginfo);
4139 arginfo.quick_push (thisarginfo);
4142 poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
4143 unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : 1;
4144 unsigned int badness = 0;
4145 struct cgraph_node *bestn = NULL;
4146 if (simd_clone_info.exists ())
4147 bestn = cgraph_node::get (simd_clone_info[0]);
4148 else
4149 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4150 n = n->simdclone->next_clone)
4152 unsigned int this_badness = 0;
4153 unsigned int num_calls;
4154 /* The number of arguments in the call and the number of parameters in
4155 the simdclone should match. However, when the simdclone is
4156 'inbranch', it could have one more paramater than nargs when using
4157 an inbranch simdclone to call a non-inbranch call, either in a
4158 non-masked loop using a all true constant mask, or inside a masked
4159 loop using it's mask. */
4160 size_t simd_nargs = n->simdclone->nargs;
4161 if (!masked_call_offset && n->simdclone->inbranch)
4162 simd_nargs--;
4163 if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen,
4164 &num_calls)
4165 || (!n->simdclone->inbranch && (masked_call_offset > 0))
4166 || (nargs != simd_nargs))
4167 continue;
4168 if (num_calls != 1)
4169 this_badness += exact_log2 (num_calls) * 4096;
4170 if (n->simdclone->inbranch)
4171 this_badness += 8192;
4172 int target_badness = targetm.simd_clone.usable (n);
4173 if (target_badness < 0)
4174 continue;
4175 this_badness += target_badness * 512;
4176 for (i = 0; i < nargs; i++)
4178 switch (n->simdclone->args[i].arg_type)
4180 case SIMD_CLONE_ARG_TYPE_VECTOR:
4181 if (!useless_type_conversion_p
4182 (n->simdclone->args[i].orig_type,
4183 TREE_TYPE (gimple_call_arg (stmt,
4184 i + masked_call_offset))))
4185 i = -1;
4186 else if (arginfo[i].dt == vect_constant_def
4187 || arginfo[i].dt == vect_external_def
4188 || arginfo[i].linear_step)
4189 this_badness += 64;
4190 break;
4191 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4192 if (arginfo[i].dt != vect_constant_def
4193 && arginfo[i].dt != vect_external_def)
4194 i = -1;
4195 break;
4196 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4197 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4198 if (arginfo[i].dt == vect_constant_def
4199 || arginfo[i].dt == vect_external_def
4200 || (arginfo[i].linear_step
4201 != n->simdclone->args[i].linear_step))
4202 i = -1;
4203 break;
4204 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4205 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4206 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4207 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4208 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4209 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4210 /* FORNOW */
4211 i = -1;
4212 break;
4213 case SIMD_CLONE_ARG_TYPE_MASK:
4214 /* While we can create a traditional data vector from
4215 an incoming integer mode mask we have no good way to
4216 force generate an integer mode mask from a traditional
4217 boolean vector input. */
4218 if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4219 && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4220 i = -1;
4221 else if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4222 && SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4223 this_badness += 2048;
4224 break;
4226 if (i == (size_t) -1)
4227 break;
4228 if (n->simdclone->args[i].alignment > arginfo[i].align)
4230 i = -1;
4231 break;
4233 if (arginfo[i].align)
4234 this_badness += (exact_log2 (arginfo[i].align)
4235 - exact_log2 (n->simdclone->args[i].alignment));
4237 if (i == (size_t) -1)
4238 continue;
4239 if (masked_call_offset == 0
4240 && n->simdclone->inbranch
4241 && n->simdclone->nargs > nargs)
4243 gcc_assert (n->simdclone->args[n->simdclone->nargs - 1].arg_type ==
4244 SIMD_CLONE_ARG_TYPE_MASK);
4245 /* Penalize using a masked SIMD clone in a non-masked loop, that is
4246 not in a branch, as we'd have to construct an all-true mask. */
4247 if (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4248 this_badness += 64;
4250 if (bestn == NULL || this_badness < badness)
4252 bestn = n;
4253 badness = this_badness;
4257 if (bestn == NULL)
4258 return false;
4260 unsigned int num_mask_args = 0;
4261 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4262 for (i = 0; i < nargs; i++)
4263 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4264 num_mask_args++;
4266 for (i = 0; i < nargs; i++)
4268 if ((arginfo[i].dt == vect_constant_def
4269 || arginfo[i].dt == vect_external_def)
4270 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4272 tree arg_type = TREE_TYPE (gimple_call_arg (stmt,
4273 i + masked_call_offset));
4274 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4275 slp_node);
4276 if (arginfo[i].vectype == NULL
4277 || !constant_multiple_p (bestn->simdclone->simdlen,
4278 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4279 return false;
4282 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4283 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4285 if (dump_enabled_p ())
4286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4287 "vector mask arguments are not supported.\n");
4288 return false;
4291 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4293 tree clone_arg_vectype = bestn->simdclone->args[i].vector_type;
4294 if (bestn->simdclone->mask_mode == VOIDmode)
4296 if (maybe_ne (TYPE_VECTOR_SUBPARTS (clone_arg_vectype),
4297 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4299 /* FORNOW we only have partial support for vector-type masks
4300 that can't hold all of simdlen. */
4301 if (dump_enabled_p ())
4302 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4303 vect_location,
4304 "in-branch vector clones are not yet"
4305 " supported for mismatched vector sizes.\n");
4306 return false;
4309 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4311 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype))
4312 || maybe_ne (exact_div (bestn->simdclone->simdlen,
4313 num_mask_args),
4314 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4316 /* FORNOW we only have partial support for integer-type masks
4317 that represent the same number of lanes as the
4318 vectorized mask inputs. */
4319 if (dump_enabled_p ())
4320 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4321 vect_location,
4322 "in-branch vector clones are not yet "
4323 "supported for mismatched vector sizes.\n");
4324 return false;
4327 else
4329 if (dump_enabled_p ())
4330 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4331 vect_location,
4332 "in-branch vector clones not supported"
4333 " on this target.\n");
4334 return false;
4339 fndecl = bestn->decl;
4340 nunits = bestn->simdclone->simdlen;
4341 if (slp_node)
4342 ncopies = vector_unroll_factor (vf * group_size, nunits);
4343 else
4344 ncopies = vector_unroll_factor (vf, nunits);
4346 /* If the function isn't const, only allow it in simd loops where user
4347 has asserted that at least nunits consecutive iterations can be
4348 performed using SIMD instructions. */
4349 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4350 && gimple_vuse (stmt))
4351 return false;
4353 /* Sanity check: make sure that at least one copy of the vectorized stmt
4354 needs to be generated. */
4355 gcc_assert (ncopies >= 1);
4357 if (!vec_stmt) /* transformation not required. */
4359 if (slp_node)
4360 for (unsigned i = 0; i < nargs; ++i)
4361 if (!vect_maybe_update_slp_op_vectype (slp_op[i], arginfo[i].vectype))
4363 if (dump_enabled_p ())
4364 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4365 "incompatible vector types for invariants\n");
4366 return false;
4368 /* When the original call is pure or const but the SIMD ABI dictates
4369 an aggregate return we will have to use a virtual definition and
4370 in a loop eventually even need to add a virtual PHI. That's
4371 not straight-forward so allow to fix this up via renaming. */
4372 if (gimple_call_lhs (stmt)
4373 && !gimple_vdef (stmt)
4374 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4375 vinfo->any_known_not_updated_vssa = true;
4376 /* ??? For SLP code-gen we end up inserting after the last
4377 vector argument def rather than at the original call position
4378 so automagic virtual operand updating doesn't work. */
4379 if (gimple_vuse (stmt) && slp_node)
4380 vinfo->any_known_not_updated_vssa = true;
4381 simd_clone_info.safe_push (bestn->decl);
4382 for (i = 0; i < bestn->simdclone->nargs; i++)
4384 switch (bestn->simdclone->args[i].arg_type)
4386 default:
4387 continue;
4388 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4389 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4391 simd_clone_info.safe_grow_cleared (i * 3 + 1, true);
4392 simd_clone_info.safe_push (arginfo[i].op);
4393 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4394 ? size_type_node : TREE_TYPE (arginfo[i].op);
4395 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4396 simd_clone_info.safe_push (ls);
4397 tree sll = arginfo[i].simd_lane_linear
4398 ? boolean_true_node : boolean_false_node;
4399 simd_clone_info.safe_push (sll);
4401 break;
4402 case SIMD_CLONE_ARG_TYPE_MASK:
4403 if (loop_vinfo
4404 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4405 vect_record_loop_mask (loop_vinfo,
4406 &LOOP_VINFO_MASKS (loop_vinfo),
4407 ncopies, vectype, op);
4409 break;
4413 if (!bestn->simdclone->inbranch && loop_vinfo)
4415 if (dump_enabled_p ()
4416 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4417 dump_printf_loc (MSG_NOTE, vect_location,
4418 "can't use a fully-masked loop because a"
4419 " non-masked simd clone was selected.\n");
4420 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
4423 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4424 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4425 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4426 dt, slp_node, cost_vec); */
4427 return true;
4430 /* Transform. */
4432 if (dump_enabled_p ())
4433 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4435 /* Handle def. */
4436 scalar_dest = gimple_call_lhs (stmt);
4437 vec_dest = NULL_TREE;
4438 rtype = NULL_TREE;
4439 ratype = NULL_TREE;
4440 if (scalar_dest)
4442 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4443 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4444 if (TREE_CODE (rtype) == ARRAY_TYPE)
4446 ratype = rtype;
4447 rtype = TREE_TYPE (ratype);
4451 auto_vec<vec<tree> > vec_oprnds;
4452 auto_vec<unsigned> vec_oprnds_i;
4453 vec_oprnds_i.safe_grow_cleared (nargs, true);
4454 if (slp_node)
4456 vec_oprnds.reserve_exact (nargs);
4457 vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
4459 else
4460 vec_oprnds.safe_grow_cleared (nargs, true);
4461 for (j = 0; j < ncopies; ++j)
4463 poly_uint64 callee_nelements;
4464 poly_uint64 caller_nelements;
4465 /* Build argument list for the vectorized call. */
4466 if (j == 0)
4467 vargs.create (nargs);
4468 else
4469 vargs.truncate (0);
4471 for (i = 0; i < nargs; i++)
4473 unsigned int k, l, m, o;
4474 tree atype;
4475 op = gimple_call_arg (stmt, i + masked_call_offset);
4476 switch (bestn->simdclone->args[i].arg_type)
4478 case SIMD_CLONE_ARG_TYPE_VECTOR:
4479 atype = bestn->simdclone->args[i].vector_type;
4480 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4481 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4482 o = vector_unroll_factor (nunits, callee_nelements);
4483 for (m = j * o; m < (j + 1) * o; m++)
4485 if (known_lt (callee_nelements, caller_nelements))
4487 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4488 if (!constant_multiple_p (caller_nelements,
4489 callee_nelements, &k))
4490 gcc_unreachable ();
4492 gcc_assert ((k & (k - 1)) == 0);
4493 if (m == 0)
4495 if (!slp_node)
4496 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4497 ncopies * o / k, op,
4498 &vec_oprnds[i]);
4499 vec_oprnds_i[i] = 0;
4500 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4502 else
4504 vec_oprnd0 = arginfo[i].op;
4505 if ((m & (k - 1)) == 0)
4506 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4508 arginfo[i].op = vec_oprnd0;
4509 vec_oprnd0
4510 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4511 bitsize_int (prec),
4512 bitsize_int ((m & (k - 1)) * prec));
4513 gassign *new_stmt
4514 = gimple_build_assign (make_ssa_name (atype),
4515 vec_oprnd0);
4516 vect_finish_stmt_generation (vinfo, stmt_info,
4517 new_stmt, gsi);
4518 vargs.safe_push (gimple_assign_lhs (new_stmt));
4520 else
4522 if (!constant_multiple_p (callee_nelements,
4523 caller_nelements, &k))
4524 gcc_unreachable ();
4525 gcc_assert ((k & (k - 1)) == 0);
4526 vec<constructor_elt, va_gc> *ctor_elts;
4527 if (k != 1)
4528 vec_alloc (ctor_elts, k);
4529 else
4530 ctor_elts = NULL;
4531 for (l = 0; l < k; l++)
4533 if (m == 0 && l == 0)
4535 if (!slp_node)
4536 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4537 k * o * ncopies,
4539 &vec_oprnds[i]);
4540 vec_oprnds_i[i] = 0;
4541 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4543 else
4544 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4545 arginfo[i].op = vec_oprnd0;
4546 if (k == 1)
4547 break;
4548 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4549 vec_oprnd0);
4551 if (k == 1)
4552 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4553 atype))
4555 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, atype,
4556 vec_oprnd0);
4557 gassign *new_stmt
4558 = gimple_build_assign (make_ssa_name (atype),
4559 vec_oprnd0);
4560 vect_finish_stmt_generation (vinfo, stmt_info,
4561 new_stmt, gsi);
4562 vargs.safe_push (gimple_get_lhs (new_stmt));
4564 else
4565 vargs.safe_push (vec_oprnd0);
4566 else
4568 vec_oprnd0 = build_constructor (atype, ctor_elts);
4569 gassign *new_stmt
4570 = gimple_build_assign (make_ssa_name (atype),
4571 vec_oprnd0);
4572 vect_finish_stmt_generation (vinfo, stmt_info,
4573 new_stmt, gsi);
4574 vargs.safe_push (gimple_assign_lhs (new_stmt));
4578 break;
4579 case SIMD_CLONE_ARG_TYPE_MASK:
4580 if (bestn->simdclone->mask_mode == VOIDmode)
4582 atype = bestn->simdclone->args[i].vector_type;
4583 tree elt_type = TREE_TYPE (atype);
4584 tree one = fold_convert (elt_type, integer_one_node);
4585 tree zero = fold_convert (elt_type, integer_zero_node);
4586 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4587 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4588 o = vector_unroll_factor (nunits, callee_nelements);
4589 for (m = j * o; m < (j + 1) * o; m++)
4591 if (maybe_lt (callee_nelements, caller_nelements))
4593 /* The mask type has fewer elements than simdlen. */
4595 /* FORNOW */
4596 gcc_unreachable ();
4598 else if (known_eq (callee_nelements, caller_nelements))
4600 /* The SIMD clone function has the same number of
4601 elements as the current function. */
4602 if (m == 0)
4604 if (!slp_node)
4605 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4606 o * ncopies,
4608 &vec_oprnds[i]);
4609 vec_oprnds_i[i] = 0;
4611 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4612 if (loop_vinfo
4613 && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4615 vec_loop_masks *loop_masks
4616 = &LOOP_VINFO_MASKS (loop_vinfo);
4617 tree loop_mask
4618 = vect_get_loop_mask (loop_vinfo, gsi,
4619 loop_masks, ncopies,
4620 vectype, j);
4621 vec_oprnd0
4622 = prepare_vec_mask (loop_vinfo,
4623 TREE_TYPE (loop_mask),
4624 loop_mask, vec_oprnd0,
4625 gsi);
4626 loop_vinfo->vec_cond_masked_set.add ({ vec_oprnd0,
4627 loop_mask });
4630 vec_oprnd0
4631 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4632 build_vector_from_val (atype, one),
4633 build_vector_from_val (atype, zero));
4634 gassign *new_stmt
4635 = gimple_build_assign (make_ssa_name (atype),
4636 vec_oprnd0);
4637 vect_finish_stmt_generation (vinfo, stmt_info,
4638 new_stmt, gsi);
4639 vargs.safe_push (gimple_assign_lhs (new_stmt));
4641 else
4643 /* The mask type has more elements than simdlen. */
4645 /* FORNOW */
4646 gcc_unreachable ();
4650 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4652 atype = bestn->simdclone->args[i].vector_type;
4653 /* Guess the number of lanes represented by atype. */
4654 poly_uint64 atype_subparts
4655 = exact_div (bestn->simdclone->simdlen,
4656 num_mask_args);
4657 o = vector_unroll_factor (nunits, atype_subparts);
4658 for (m = j * o; m < (j + 1) * o; m++)
4660 if (m == 0)
4662 if (!slp_node)
4663 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4664 o * ncopies,
4666 &vec_oprnds[i]);
4667 vec_oprnds_i[i] = 0;
4669 if (maybe_lt (atype_subparts,
4670 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4672 /* The mask argument has fewer elements than the
4673 input vector. */
4674 /* FORNOW */
4675 gcc_unreachable ();
4677 else if (known_eq (atype_subparts,
4678 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4680 /* The vector mask argument matches the input
4681 in the number of lanes, but not necessarily
4682 in the mode. */
4683 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4684 tree st = lang_hooks.types.type_for_mode
4685 (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
4686 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
4687 vec_oprnd0);
4688 gassign *new_stmt
4689 = gimple_build_assign (make_ssa_name (st),
4690 vec_oprnd0);
4691 vect_finish_stmt_generation (vinfo, stmt_info,
4692 new_stmt, gsi);
4693 if (!types_compatible_p (atype, st))
4695 new_stmt
4696 = gimple_build_assign (make_ssa_name (atype),
4697 NOP_EXPR,
4698 gimple_assign_lhs
4699 (new_stmt));
4700 vect_finish_stmt_generation (vinfo, stmt_info,
4701 new_stmt, gsi);
4703 vargs.safe_push (gimple_assign_lhs (new_stmt));
4705 else
4707 /* The mask argument has more elements than the
4708 input vector. */
4709 /* FORNOW */
4710 gcc_unreachable ();
4714 else
4715 gcc_unreachable ();
4716 break;
4717 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4718 vargs.safe_push (op);
4719 break;
4720 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4721 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4722 if (j == 0)
4724 gimple_seq stmts;
4725 arginfo[i].op
4726 = force_gimple_operand (unshare_expr (arginfo[i].op),
4727 &stmts, true, NULL_TREE);
4728 if (stmts != NULL)
4730 basic_block new_bb;
4731 edge pe = loop_preheader_edge (loop);
4732 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4733 gcc_assert (!new_bb);
4735 if (arginfo[i].simd_lane_linear)
4737 vargs.safe_push (arginfo[i].op);
4738 break;
4740 tree phi_res = copy_ssa_name (op);
4741 gphi *new_phi = create_phi_node (phi_res, loop->header);
4742 add_phi_arg (new_phi, arginfo[i].op,
4743 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4744 enum tree_code code
4745 = POINTER_TYPE_P (TREE_TYPE (op))
4746 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4747 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4748 ? sizetype : TREE_TYPE (op);
4749 poly_widest_int cst
4750 = wi::mul (bestn->simdclone->args[i].linear_step,
4751 ncopies * nunits);
4752 tree tcst = wide_int_to_tree (type, cst);
4753 tree phi_arg = copy_ssa_name (op);
4754 gassign *new_stmt
4755 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4756 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4757 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4758 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4759 UNKNOWN_LOCATION);
4760 arginfo[i].op = phi_res;
4761 vargs.safe_push (phi_res);
4763 else
4765 enum tree_code code
4766 = POINTER_TYPE_P (TREE_TYPE (op))
4767 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4768 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4769 ? sizetype : TREE_TYPE (op);
4770 poly_widest_int cst
4771 = wi::mul (bestn->simdclone->args[i].linear_step,
4772 j * nunits);
4773 tree tcst = wide_int_to_tree (type, cst);
4774 new_temp = make_ssa_name (TREE_TYPE (op));
4775 gassign *new_stmt
4776 = gimple_build_assign (new_temp, code,
4777 arginfo[i].op, tcst);
4778 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4779 vargs.safe_push (new_temp);
4781 break;
4782 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4783 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4784 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4785 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4786 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4787 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4788 default:
4789 gcc_unreachable ();
4793 if (masked_call_offset == 0
4794 && bestn->simdclone->inbranch
4795 && bestn->simdclone->nargs > nargs)
4797 unsigned long m, o;
4798 size_t mask_i = bestn->simdclone->nargs - 1;
4799 tree mask;
4800 gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
4801 SIMD_CLONE_ARG_TYPE_MASK);
4803 tree masktype = bestn->simdclone->args[mask_i].vector_type;
4804 callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
4805 o = vector_unroll_factor (nunits, callee_nelements);
4806 for (m = j * o; m < (j + 1) * o; m++)
4808 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4810 vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
4811 mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
4812 ncopies, vectype, j);
4814 else
4815 mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
4817 gassign *new_stmt;
4818 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4820 /* This means we are dealing with integer mask modes.
4821 First convert to an integer type with the same size as
4822 the current vector type. */
4823 unsigned HOST_WIDE_INT intermediate_size
4824 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
4825 tree mid_int_type =
4826 build_nonstandard_integer_type (intermediate_size, 1);
4827 mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
4828 new_stmt
4829 = gimple_build_assign (make_ssa_name (mid_int_type),
4830 mask);
4831 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4832 /* Then zero-extend to the mask mode. */
4833 mask = fold_build1 (NOP_EXPR, masktype,
4834 gimple_get_lhs (new_stmt));
4836 else if (bestn->simdclone->mask_mode == VOIDmode)
4838 tree one = fold_convert (TREE_TYPE (masktype),
4839 integer_one_node);
4840 tree zero = fold_convert (TREE_TYPE (masktype),
4841 integer_zero_node);
4842 mask = build3 (VEC_COND_EXPR, masktype, mask,
4843 build_vector_from_val (masktype, one),
4844 build_vector_from_val (masktype, zero));
4846 else
4847 gcc_unreachable ();
4849 new_stmt = gimple_build_assign (make_ssa_name (masktype), mask);
4850 vect_finish_stmt_generation (vinfo, stmt_info,
4851 new_stmt, gsi);
4852 mask = gimple_assign_lhs (new_stmt);
4853 vargs.safe_push (mask);
4857 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4858 if (vec_dest)
4860 gcc_assert (ratype
4861 || known_eq (TYPE_VECTOR_SUBPARTS (rtype), nunits));
4862 if (ratype)
4863 new_temp = create_tmp_var (ratype);
4864 else if (useless_type_conversion_p (vectype, rtype))
4865 new_temp = make_ssa_name (vec_dest, new_call);
4866 else
4867 new_temp = make_ssa_name (rtype, new_call);
4868 gimple_call_set_lhs (new_call, new_temp);
4870 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4871 gimple *new_stmt = new_call;
4873 if (vec_dest)
4875 if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype), nunits))
4877 unsigned int k, l;
4878 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4879 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4880 k = vector_unroll_factor (nunits,
4881 TYPE_VECTOR_SUBPARTS (vectype));
4882 gcc_assert ((k & (k - 1)) == 0);
4883 for (l = 0; l < k; l++)
4885 tree t;
4886 if (ratype)
4888 t = build_fold_addr_expr (new_temp);
4889 t = build2 (MEM_REF, vectype, t,
4890 build_int_cst (TREE_TYPE (t), l * bytes));
4892 else
4893 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4894 bitsize_int (prec), bitsize_int (l * prec));
4895 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4896 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4898 if (j == 0 && l == 0)
4899 *vec_stmt = new_stmt;
4900 if (slp_node)
4901 SLP_TREE_VEC_DEFS (slp_node)
4902 .quick_push (gimple_assign_lhs (new_stmt));
4903 else
4904 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4907 if (ratype)
4908 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4909 continue;
4911 else if (!multiple_p (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
4913 unsigned int k;
4914 if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
4915 TYPE_VECTOR_SUBPARTS (rtype), &k))
4916 gcc_unreachable ();
4917 gcc_assert ((k & (k - 1)) == 0);
4918 if ((j & (k - 1)) == 0)
4919 vec_alloc (ret_ctor_elts, k);
4920 if (ratype)
4922 unsigned int m, o;
4923 o = vector_unroll_factor (nunits,
4924 TYPE_VECTOR_SUBPARTS (rtype));
4925 for (m = 0; m < o; m++)
4927 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4928 size_int (m), NULL_TREE, NULL_TREE);
4929 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4930 tem);
4931 vect_finish_stmt_generation (vinfo, stmt_info,
4932 new_stmt, gsi);
4933 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4934 gimple_assign_lhs (new_stmt));
4936 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4938 else
4939 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4940 if ((j & (k - 1)) != k - 1)
4941 continue;
4942 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4943 new_stmt
4944 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4945 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4947 if ((unsigned) j == k - 1)
4948 *vec_stmt = new_stmt;
4949 if (slp_node)
4950 SLP_TREE_VEC_DEFS (slp_node)
4951 .quick_push (gimple_assign_lhs (new_stmt));
4952 else
4953 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4954 continue;
4956 else if (ratype)
4958 tree t = build_fold_addr_expr (new_temp);
4959 t = build2 (MEM_REF, vectype, t,
4960 build_int_cst (TREE_TYPE (t), 0));
4961 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4962 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4963 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4965 else if (!useless_type_conversion_p (vectype, rtype))
4967 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4968 new_stmt
4969 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4970 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4974 if (j == 0)
4975 *vec_stmt = new_stmt;
4976 if (slp_node)
4977 SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
4978 else
4979 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4982 for (i = 0; i < nargs; ++i)
4984 vec<tree> oprndsi = vec_oprnds[i];
4985 oprndsi.release ();
4987 vargs.release ();
4989 /* Mark the clone as no longer being a candidate for GC. */
4990 bestn->gc_candidate = false;
4992 /* The call in STMT might prevent it from being removed in dce.
4993 We however cannot remove it here, due to the way the ssa name
4994 it defines is mapped to the new definition. So just replace
4995 rhs of the statement with something harmless. */
4997 if (slp_node)
4998 return true;
5000 gimple *new_stmt;
5001 if (scalar_dest)
5003 type = TREE_TYPE (scalar_dest);
5004 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
5005 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
5007 else
5008 new_stmt = gimple_build_nop ();
5009 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
5010 unlink_stmt_vdef (stmt);
5012 return true;
5016 /* Function vect_gen_widened_results_half
5018 Create a vector stmt whose code, type, number of arguments, and result
5019 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
5020 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
5021 In the case that CODE is a CALL_EXPR, this means that a call to DECL
5022 needs to be created (DECL is a function-decl of a target-builtin).
5023 STMT_INFO is the original scalar stmt that we are vectorizing. */
5025 static gimple *
5026 vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
5027 tree vec_oprnd0, tree vec_oprnd1, int op_type,
5028 tree vec_dest, gimple_stmt_iterator *gsi,
5029 stmt_vec_info stmt_info)
5031 gimple *new_stmt;
5032 tree new_temp;
5034 /* Generate half of the widened result: */
5035 if (op_type != binary_op)
5036 vec_oprnd1 = NULL;
5037 new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
5038 new_temp = make_ssa_name (vec_dest, new_stmt);
5039 gimple_set_lhs (new_stmt, new_temp);
5040 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5042 return new_stmt;
5046 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
5047 For multi-step conversions store the resulting vectors and call the function
5048 recursively. When NARROW_SRC_P is true, there's still a conversion after
5049 narrowing, don't store the vectors in the SLP_NODE or in vector info of
5050 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
5052 static void
5053 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
5054 int multi_step_cvt,
5055 stmt_vec_info stmt_info,
5056 vec<tree> &vec_dsts,
5057 gimple_stmt_iterator *gsi,
5058 slp_tree slp_node, code_helper code,
5059 bool narrow_src_p)
5061 unsigned int i;
5062 tree vop0, vop1, new_tmp, vec_dest;
5064 vec_dest = vec_dsts.pop ();
5066 for (i = 0; i < vec_oprnds->length (); i += 2)
5068 /* Create demotion operation. */
5069 vop0 = (*vec_oprnds)[i];
5070 vop1 = (*vec_oprnds)[i + 1];
5071 gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
5072 new_tmp = make_ssa_name (vec_dest, new_stmt);
5073 gimple_set_lhs (new_stmt, new_tmp);
5074 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5075 if (multi_step_cvt || narrow_src_p)
5076 /* Store the resulting vector for next recursive call,
5077 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
5078 (*vec_oprnds)[i/2] = new_tmp;
5079 else
5081 /* This is the last step of the conversion sequence. Store the
5082 vectors in SLP_NODE or in vector info of the scalar statement
5083 (or in STMT_VINFO_RELATED_STMT chain). */
5084 if (slp_node)
5085 slp_node->push_vec_def (new_stmt);
5086 else
5087 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5091 /* For multi-step demotion operations we first generate demotion operations
5092 from the source type to the intermediate types, and then combine the
5093 results (stored in VEC_OPRNDS) in demotion operation to the destination
5094 type. */
5095 if (multi_step_cvt)
5097 /* At each level of recursion we have half of the operands we had at the
5098 previous level. */
5099 vec_oprnds->truncate ((i+1)/2);
5100 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
5101 multi_step_cvt - 1,
5102 stmt_info, vec_dsts, gsi,
5103 slp_node, VEC_PACK_TRUNC_EXPR,
5104 narrow_src_p);
5107 vec_dsts.quick_push (vec_dest);
5111 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5112 and VEC_OPRNDS1, for a binary operation associated with scalar statement
5113 STMT_INFO. For multi-step conversions store the resulting vectors and
5114 call the function recursively. */
5116 static void
5117 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
5118 vec<tree> *vec_oprnds0,
5119 vec<tree> *vec_oprnds1,
5120 stmt_vec_info stmt_info, tree vec_dest,
5121 gimple_stmt_iterator *gsi,
5122 code_helper ch1,
5123 code_helper ch2, int op_type)
5125 int i;
5126 tree vop0, vop1, new_tmp1, new_tmp2;
5127 gimple *new_stmt1, *new_stmt2;
5128 vec<tree> vec_tmp = vNULL;
5130 vec_tmp.create (vec_oprnds0->length () * 2);
5131 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5133 if (op_type == binary_op)
5134 vop1 = (*vec_oprnds1)[i];
5135 else
5136 vop1 = NULL_TREE;
5138 /* Generate the two halves of promotion operation. */
5139 new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
5140 op_type, vec_dest, gsi,
5141 stmt_info);
5142 new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
5143 op_type, vec_dest, gsi,
5144 stmt_info);
5145 if (is_gimple_call (new_stmt1))
5147 new_tmp1 = gimple_call_lhs (new_stmt1);
5148 new_tmp2 = gimple_call_lhs (new_stmt2);
5150 else
5152 new_tmp1 = gimple_assign_lhs (new_stmt1);
5153 new_tmp2 = gimple_assign_lhs (new_stmt2);
5156 /* Store the results for the next step. */
5157 vec_tmp.quick_push (new_tmp1);
5158 vec_tmp.quick_push (new_tmp2);
5161 vec_oprnds0->release ();
5162 *vec_oprnds0 = vec_tmp;
5165 /* Create vectorized promotion stmts for widening stmts using only half the
5166 potential vector size for input. */
5167 static void
5168 vect_create_half_widening_stmts (vec_info *vinfo,
5169 vec<tree> *vec_oprnds0,
5170 vec<tree> *vec_oprnds1,
5171 stmt_vec_info stmt_info, tree vec_dest,
5172 gimple_stmt_iterator *gsi,
5173 code_helper code1,
5174 int op_type)
5176 int i;
5177 tree vop0, vop1;
5178 gimple *new_stmt1;
5179 gimple *new_stmt2;
5180 gimple *new_stmt3;
5181 vec<tree> vec_tmp = vNULL;
5183 vec_tmp.create (vec_oprnds0->length ());
5184 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5186 tree new_tmp1, new_tmp2, new_tmp3, out_type;
5188 gcc_assert (op_type == binary_op);
5189 vop1 = (*vec_oprnds1)[i];
5191 /* Widen the first vector input. */
5192 out_type = TREE_TYPE (vec_dest);
5193 new_tmp1 = make_ssa_name (out_type);
5194 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5195 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
5196 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5198 /* Widen the second vector input. */
5199 new_tmp2 = make_ssa_name (out_type);
5200 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
5202 /* Perform the operation. With both vector inputs widened. */
5203 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5205 else
5207 /* Perform the operation. With the single vector input widened. */
5208 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5211 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
5212 gimple_assign_set_lhs (new_stmt3, new_tmp3);
5213 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
5215 /* Store the results for the next step. */
5216 vec_tmp.quick_push (new_tmp3);
5219 vec_oprnds0->release ();
5220 *vec_oprnds0 = vec_tmp;
5224 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5225 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5226 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5227 Return true if STMT_INFO is vectorizable in this way. */
5229 static bool
5230 vectorizable_conversion (vec_info *vinfo,
5231 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5232 gimple **vec_stmt, slp_tree slp_node,
5233 stmt_vector_for_cost *cost_vec)
5235 tree vec_dest, cvt_op = NULL_TREE;
5236 tree scalar_dest;
5237 tree op0, op1 = NULL_TREE;
5238 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5239 tree_code tc1, tc2;
5240 code_helper code, code1, code2;
5241 code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5242 tree new_temp;
5243 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5244 int ndts = 2;
5245 poly_uint64 nunits_in;
5246 poly_uint64 nunits_out;
5247 tree vectype_out, vectype_in;
5248 int ncopies, i;
5249 tree lhs_type, rhs_type;
5250 /* For conversions between floating point and integer, there're 2 NARROW
5251 cases. NARROW_SRC is for FLOAT_EXPR, means
5252 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5253 This is safe when the range of the source integer can fit into the lower
5254 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5255 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5256 For other conversions, when there's narrowing, NARROW_DST is used as
5257 default. */
5258 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5259 vec<tree> vec_oprnds0 = vNULL;
5260 vec<tree> vec_oprnds1 = vNULL;
5261 tree vop0;
5262 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5263 int multi_step_cvt = 0;
5264 vec<tree> interm_types = vNULL;
5265 tree intermediate_type, cvt_type = NULL_TREE;
5266 int op_type;
5267 unsigned short fltsz;
5269 /* Is STMT a vectorizable conversion? */
5271 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5272 return false;
5274 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5275 && ! vec_stmt)
5276 return false;
5278 gimple* stmt = stmt_info->stmt;
5279 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
5280 return false;
5282 if (gimple_get_lhs (stmt) == NULL_TREE
5283 || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5284 return false;
5286 if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5287 return false;
5289 if (is_gimple_assign (stmt))
5291 code = gimple_assign_rhs_code (stmt);
5292 op_type = TREE_CODE_LENGTH ((tree_code) code);
5294 else if (gimple_call_internal_p (stmt))
5296 code = gimple_call_internal_fn (stmt);
5297 op_type = gimple_call_num_args (stmt);
5299 else
5300 return false;
5302 bool widen_arith = (code == WIDEN_MULT_EXPR
5303 || code == WIDEN_LSHIFT_EXPR
5304 || widening_fn_p (code));
5306 if (!widen_arith
5307 && !CONVERT_EXPR_CODE_P (code)
5308 && code != FIX_TRUNC_EXPR
5309 && code != FLOAT_EXPR)
5310 return false;
5312 /* Check types of lhs and rhs. */
5313 scalar_dest = gimple_get_lhs (stmt);
5314 lhs_type = TREE_TYPE (scalar_dest);
5315 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5317 /* Check the operands of the operation. */
5318 slp_tree slp_op0, slp_op1 = NULL;
5319 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5320 0, &op0, &slp_op0, &dt[0], &vectype_in))
5322 if (dump_enabled_p ())
5323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5324 "use not simple.\n");
5325 return false;
5328 rhs_type = TREE_TYPE (op0);
5329 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5330 && !((INTEGRAL_TYPE_P (lhs_type)
5331 && INTEGRAL_TYPE_P (rhs_type))
5332 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5333 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5334 return false;
5336 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5337 && ((INTEGRAL_TYPE_P (lhs_type)
5338 && !type_has_mode_precision_p (lhs_type))
5339 || (INTEGRAL_TYPE_P (rhs_type)
5340 && !type_has_mode_precision_p (rhs_type))))
5342 if (dump_enabled_p ())
5343 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5344 "type conversion to/from bit-precision unsupported."
5345 "\n");
5346 return false;
5349 if (op_type == binary_op)
5351 gcc_assert (code == WIDEN_MULT_EXPR
5352 || code == WIDEN_LSHIFT_EXPR
5353 || widening_fn_p (code));
5355 op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
5356 gimple_call_arg (stmt, 0);
5357 tree vectype1_in;
5358 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5359 &op1, &slp_op1, &dt[1], &vectype1_in))
5361 if (dump_enabled_p ())
5362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5363 "use not simple.\n");
5364 return false;
5366 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5367 OP1. */
5368 if (!vectype_in)
5369 vectype_in = vectype1_in;
5372 /* If op0 is an external or constant def, infer the vector type
5373 from the scalar type. */
5374 if (!vectype_in)
5375 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5376 if (vec_stmt)
5377 gcc_assert (vectype_in);
5378 if (!vectype_in)
5380 if (dump_enabled_p ())
5381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5382 "no vectype for scalar type %T\n", rhs_type);
5384 return false;
5387 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5388 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5390 if (dump_enabled_p ())
5391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5392 "can't convert between boolean and non "
5393 "boolean vectors %T\n", rhs_type);
5395 return false;
5398 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5399 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5400 if (known_eq (nunits_out, nunits_in))
5401 if (widen_arith)
5402 modifier = WIDEN;
5403 else
5404 modifier = NONE;
5405 else if (multiple_p (nunits_out, nunits_in))
5406 modifier = NARROW_DST;
5407 else
5409 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5410 modifier = WIDEN;
5413 /* Multiple types in SLP are handled by creating the appropriate number of
5414 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5415 case of SLP. */
5416 if (slp_node)
5417 ncopies = 1;
5418 else if (modifier == NARROW_DST)
5419 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5420 else
5421 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5423 /* Sanity check: make sure that at least one copy of the vectorized stmt
5424 needs to be generated. */
5425 gcc_assert (ncopies >= 1);
5427 bool found_mode = false;
5428 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5429 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5430 opt_scalar_mode rhs_mode_iter;
5432 /* Supportable by target? */
5433 switch (modifier)
5435 case NONE:
5436 if (code != FIX_TRUNC_EXPR
5437 && code != FLOAT_EXPR
5438 && !CONVERT_EXPR_CODE_P (code))
5439 return false;
5440 gcc_assert (code.is_tree_code ());
5441 if (supportable_convert_operation ((tree_code) code, vectype_out,
5442 vectype_in, &tc1))
5444 code1 = tc1;
5445 break;
5448 /* For conversions between float and integer types try whether
5449 we can use intermediate signed integer types to support the
5450 conversion. */
5451 if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
5452 && (code == FLOAT_EXPR ||
5453 (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
5455 bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
5456 bool float_expr_p = code == FLOAT_EXPR;
5457 unsigned short target_size;
5458 scalar_mode intermediate_mode;
5459 if (demotion)
5461 intermediate_mode = lhs_mode;
5462 target_size = GET_MODE_SIZE (rhs_mode);
5464 else
5466 target_size = GET_MODE_SIZE (lhs_mode);
5467 if (!int_mode_for_size
5468 (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
5469 goto unsupported;
5471 code1 = float_expr_p ? code : NOP_EXPR;
5472 codecvt1 = float_expr_p ? NOP_EXPR : code;
5473 opt_scalar_mode mode_iter;
5474 FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
5476 intermediate_mode = mode_iter.require ();
5478 if (GET_MODE_SIZE (intermediate_mode) > target_size)
5479 break;
5481 scalar_mode cvt_mode;
5482 if (!int_mode_for_size
5483 (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
5484 break;
5486 cvt_type = build_nonstandard_integer_type
5487 (GET_MODE_BITSIZE (cvt_mode), 0);
5489 /* Check if the intermediate type can hold OP0's range.
5490 When converting from float to integer this is not necessary
5491 because values that do not fit the (smaller) target type are
5492 unspecified anyway. */
5493 if (demotion && float_expr_p)
5495 wide_int op_min_value, op_max_value;
5496 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5497 break;
5499 if (cvt_type == NULL_TREE
5500 || (wi::min_precision (op_max_value, SIGNED)
5501 > TYPE_PRECISION (cvt_type))
5502 || (wi::min_precision (op_min_value, SIGNED)
5503 > TYPE_PRECISION (cvt_type)))
5504 continue;
5507 cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
5508 /* This should only happened for SLP as long as loop vectorizer
5509 only supports same-sized vector. */
5510 if (cvt_type == NULL_TREE
5511 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
5512 || !supportable_convert_operation ((tree_code) code1,
5513 vectype_out,
5514 cvt_type, &tc1)
5515 || !supportable_convert_operation ((tree_code) codecvt1,
5516 cvt_type,
5517 vectype_in, &tc2))
5518 continue;
5520 found_mode = true;
5521 break;
5524 if (found_mode)
5526 multi_step_cvt++;
5527 interm_types.safe_push (cvt_type);
5528 cvt_type = NULL_TREE;
5529 code1 = tc1;
5530 codecvt1 = tc2;
5531 break;
5534 /* FALLTHRU */
5535 unsupported:
5536 if (dump_enabled_p ())
5537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5538 "conversion not supported by target.\n");
5539 return false;
5541 case WIDEN:
5542 if (known_eq (nunits_in, nunits_out))
5544 if (!(code.is_tree_code ()
5545 && supportable_half_widening_operation ((tree_code) code,
5546 vectype_out, vectype_in,
5547 &tc1)))
5548 goto unsupported;
5549 code1 = tc1;
5550 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5551 break;
5553 if (supportable_widening_operation (vinfo, code, stmt_info,
5554 vectype_out, vectype_in, &code1,
5555 &code2, &multi_step_cvt,
5556 &interm_types))
5558 /* Binary widening operation can only be supported directly by the
5559 architecture. */
5560 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5561 break;
5564 if (code != FLOAT_EXPR
5565 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5566 goto unsupported;
5568 fltsz = GET_MODE_SIZE (lhs_mode);
5569 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5571 rhs_mode = rhs_mode_iter.require ();
5572 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5573 break;
5575 cvt_type
5576 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5577 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5578 if (cvt_type == NULL_TREE)
5579 goto unsupported;
5581 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5583 tc1 = ERROR_MARK;
5584 gcc_assert (code.is_tree_code ());
5585 if (!supportable_convert_operation ((tree_code) code, vectype_out,
5586 cvt_type, &tc1))
5587 goto unsupported;
5588 codecvt1 = tc1;
5590 else if (!supportable_widening_operation (vinfo, code,
5591 stmt_info, vectype_out,
5592 cvt_type, &codecvt1,
5593 &codecvt2, &multi_step_cvt,
5594 &interm_types))
5595 continue;
5596 else
5597 gcc_assert (multi_step_cvt == 0);
5599 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5600 cvt_type,
5601 vectype_in, &code1,
5602 &code2, &multi_step_cvt,
5603 &interm_types))
5605 found_mode = true;
5606 break;
5610 if (!found_mode)
5611 goto unsupported;
5613 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5614 codecvt2 = ERROR_MARK;
5615 else
5617 multi_step_cvt++;
5618 interm_types.safe_push (cvt_type);
5619 cvt_type = NULL_TREE;
5621 break;
5623 case NARROW_DST:
5624 gcc_assert (op_type == unary_op);
5625 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5626 &code1, &multi_step_cvt,
5627 &interm_types))
5628 break;
5630 if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5631 goto unsupported;
5633 if (code == FIX_TRUNC_EXPR)
5635 cvt_type
5636 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5637 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5638 if (cvt_type == NULL_TREE)
5639 goto unsupported;
5640 if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5641 &tc1))
5642 codecvt1 = tc1;
5643 else
5644 goto unsupported;
5645 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5646 &code1, &multi_step_cvt,
5647 &interm_types))
5648 break;
5650 /* If op0 can be represented with low precision integer,
5651 truncate it to cvt_type and the do FLOAT_EXPR. */
5652 else if (code == FLOAT_EXPR)
5654 wide_int op_min_value, op_max_value;
5655 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5656 goto unsupported;
5658 cvt_type
5659 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5660 if (cvt_type == NULL_TREE
5661 || (wi::min_precision (op_max_value, SIGNED)
5662 > TYPE_PRECISION (cvt_type))
5663 || (wi::min_precision (op_min_value, SIGNED)
5664 > TYPE_PRECISION (cvt_type)))
5665 goto unsupported;
5667 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5668 if (cvt_type == NULL_TREE)
5669 goto unsupported;
5670 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5671 &code1, &multi_step_cvt,
5672 &interm_types))
5673 goto unsupported;
5674 if (supportable_convert_operation ((tree_code) code, vectype_out,
5675 cvt_type, &tc1))
5677 codecvt1 = tc1;
5678 modifier = NARROW_SRC;
5679 break;
5683 goto unsupported;
5685 default:
5686 gcc_unreachable ();
5689 if (!vec_stmt) /* transformation not required. */
5691 if (slp_node
5692 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5693 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5695 if (dump_enabled_p ())
5696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5697 "incompatible vector types for invariants\n");
5698 return false;
5700 DUMP_VECT_SCOPE ("vectorizable_conversion");
5701 if (modifier == NONE)
5703 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5704 vect_model_simple_cost (vinfo, stmt_info,
5705 ncopies * (1 + multi_step_cvt),
5706 dt, ndts, slp_node, cost_vec);
5708 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5710 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5711 /* The final packing step produces one vector result per copy. */
5712 unsigned int nvectors
5713 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5714 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5715 multi_step_cvt, cost_vec,
5716 widen_arith);
5718 else
5720 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5721 /* The initial unpacking step produces two vector results
5722 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5723 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5724 unsigned int nvectors
5725 = (slp_node
5726 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5727 : ncopies * 2);
5728 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5729 multi_step_cvt, cost_vec,
5730 widen_arith);
5732 interm_types.release ();
5733 return true;
5736 /* Transform. */
5737 if (dump_enabled_p ())
5738 dump_printf_loc (MSG_NOTE, vect_location,
5739 "transform conversion. ncopies = %d.\n", ncopies);
5741 if (op_type == binary_op)
5743 if (CONSTANT_CLASS_P (op0))
5744 op0 = fold_convert (TREE_TYPE (op1), op0);
5745 else if (CONSTANT_CLASS_P (op1))
5746 op1 = fold_convert (TREE_TYPE (op0), op1);
5749 /* In case of multi-step conversion, we first generate conversion operations
5750 to the intermediate types, and then from that types to the final one.
5751 We create vector destinations for the intermediate type (TYPES) received
5752 from supportable_*_operation, and store them in the correct order
5753 for future use in vect_create_vectorized_*_stmts (). */
5754 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5755 bool widen_or_narrow_float_p
5756 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5757 vec_dest = vect_create_destination_var (scalar_dest,
5758 widen_or_narrow_float_p
5759 ? cvt_type : vectype_out);
5760 vec_dsts.quick_push (vec_dest);
5762 if (multi_step_cvt)
5764 for (i = interm_types.length () - 1;
5765 interm_types.iterate (i, &intermediate_type); i--)
5767 vec_dest = vect_create_destination_var (scalar_dest,
5768 intermediate_type);
5769 vec_dsts.quick_push (vec_dest);
5773 if (cvt_type)
5774 vec_dest = vect_create_destination_var (scalar_dest,
5775 widen_or_narrow_float_p
5776 ? vectype_out : cvt_type);
5778 int ninputs = 1;
5779 if (!slp_node)
5781 if (modifier == WIDEN)
5783 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5785 if (multi_step_cvt)
5786 ninputs = vect_pow2 (multi_step_cvt);
5787 ninputs *= 2;
5791 switch (modifier)
5793 case NONE:
5794 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5795 op0, &vec_oprnds0);
5796 /* vec_dest is intermediate type operand when multi_step_cvt. */
5797 if (multi_step_cvt)
5799 cvt_op = vec_dest;
5800 vec_dest = vec_dsts[0];
5803 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5805 /* Arguments are ready, create the new vector stmt. */
5806 gimple* new_stmt;
5807 if (multi_step_cvt)
5809 gcc_assert (multi_step_cvt == 1);
5810 new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5811 new_temp = make_ssa_name (cvt_op, new_stmt);
5812 gimple_assign_set_lhs (new_stmt, new_temp);
5813 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5814 vop0 = new_temp;
5816 new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5817 new_temp = make_ssa_name (vec_dest, new_stmt);
5818 gimple_set_lhs (new_stmt, new_temp);
5819 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5821 if (slp_node)
5822 slp_node->push_vec_def (new_stmt);
5823 else
5824 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5826 break;
5828 case WIDEN:
5829 /* In case the vectorization factor (VF) is bigger than the number
5830 of elements that we can fit in a vectype (nunits), we have to
5831 generate more than one vector stmt - i.e - we need to "unroll"
5832 the vector stmt by a factor VF/nunits. */
5833 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5834 op0, &vec_oprnds0,
5835 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5836 &vec_oprnds1);
5837 if (code == WIDEN_LSHIFT_EXPR)
5839 int oprnds_size = vec_oprnds0.length ();
5840 vec_oprnds1.create (oprnds_size);
5841 for (i = 0; i < oprnds_size; ++i)
5842 vec_oprnds1.quick_push (op1);
5844 /* Arguments are ready. Create the new vector stmts. */
5845 for (i = multi_step_cvt; i >= 0; i--)
5847 tree this_dest = vec_dsts[i];
5848 code_helper c1 = code1, c2 = code2;
5849 if (i == 0 && codecvt2 != ERROR_MARK)
5851 c1 = codecvt1;
5852 c2 = codecvt2;
5854 if (known_eq (nunits_out, nunits_in))
5855 vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
5856 stmt_info, this_dest, gsi, c1,
5857 op_type);
5858 else
5859 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5860 &vec_oprnds1, stmt_info,
5861 this_dest, gsi,
5862 c1, c2, op_type);
5865 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5867 gimple *new_stmt;
5868 if (cvt_type)
5870 new_temp = make_ssa_name (vec_dest);
5871 new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5872 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5874 else
5875 new_stmt = SSA_NAME_DEF_STMT (vop0);
5877 if (slp_node)
5878 slp_node->push_vec_def (new_stmt);
5879 else
5880 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5882 break;
5884 case NARROW_SRC:
5885 case NARROW_DST:
5886 /* In case the vectorization factor (VF) is bigger than the number
5887 of elements that we can fit in a vectype (nunits), we have to
5888 generate more than one vector stmt - i.e - we need to "unroll"
5889 the vector stmt by a factor VF/nunits. */
5890 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5891 op0, &vec_oprnds0);
5892 /* Arguments are ready. Create the new vector stmts. */
5893 if (cvt_type && modifier == NARROW_DST)
5894 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5896 new_temp = make_ssa_name (vec_dest);
5897 gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5898 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5899 vec_oprnds0[i] = new_temp;
5902 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5903 multi_step_cvt,
5904 stmt_info, vec_dsts, gsi,
5905 slp_node, code1,
5906 modifier == NARROW_SRC);
5907 /* After demoting op0 to cvt_type, convert it to dest. */
5908 if (cvt_type && code == FLOAT_EXPR)
5910 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5912 /* Arguments are ready, create the new vector stmt. */
5913 gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5914 gimple *new_stmt
5915 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5916 new_temp = make_ssa_name (vec_dest, new_stmt);
5917 gimple_set_lhs (new_stmt, new_temp);
5918 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5920 /* This is the last step of the conversion sequence. Store the
5921 vectors in SLP_NODE or in vector info of the scalar statement
5922 (or in STMT_VINFO_RELATED_STMT chain). */
5923 if (slp_node)
5924 slp_node->push_vec_def (new_stmt);
5925 else
5926 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5929 break;
5931 if (!slp_node)
5932 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5934 vec_oprnds0.release ();
5935 vec_oprnds1.release ();
5936 interm_types.release ();
5938 return true;
5941 /* Return true if we can assume from the scalar form of STMT_INFO that
5942 neither the scalar nor the vector forms will generate code. STMT_INFO
5943 is known not to involve a data reference. */
5945 bool
5946 vect_nop_conversion_p (stmt_vec_info stmt_info)
5948 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5949 if (!stmt)
5950 return false;
5952 tree lhs = gimple_assign_lhs (stmt);
5953 tree_code code = gimple_assign_rhs_code (stmt);
5954 tree rhs = gimple_assign_rhs1 (stmt);
5956 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5957 return true;
5959 if (CONVERT_EXPR_CODE_P (code))
5960 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5962 return false;
5965 /* Function vectorizable_assignment.
5967 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5968 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5969 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5970 Return true if STMT_INFO is vectorizable in this way. */
5972 static bool
5973 vectorizable_assignment (vec_info *vinfo,
5974 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5975 gimple **vec_stmt, slp_tree slp_node,
5976 stmt_vector_for_cost *cost_vec)
5978 tree vec_dest;
5979 tree scalar_dest;
5980 tree op;
5981 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5982 tree new_temp;
5983 enum vect_def_type dt[1] = {vect_unknown_def_type};
5984 int ndts = 1;
5985 int ncopies;
5986 int i;
5987 vec<tree> vec_oprnds = vNULL;
5988 tree vop;
5989 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5990 enum tree_code code;
5991 tree vectype_in;
5993 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5994 return false;
5996 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5997 && ! vec_stmt)
5998 return false;
6000 /* Is vectorizable assignment? */
6001 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6002 if (!stmt)
6003 return false;
6005 scalar_dest = gimple_assign_lhs (stmt);
6006 if (TREE_CODE (scalar_dest) != SSA_NAME)
6007 return false;
6009 if (STMT_VINFO_DATA_REF (stmt_info))
6010 return false;
6012 code = gimple_assign_rhs_code (stmt);
6013 if (!(gimple_assign_single_p (stmt)
6014 || code == PAREN_EXPR
6015 || CONVERT_EXPR_CODE_P (code)))
6016 return false;
6018 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6019 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6021 /* Multiple types in SLP are handled by creating the appropriate number of
6022 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6023 case of SLP. */
6024 if (slp_node)
6025 ncopies = 1;
6026 else
6027 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6029 gcc_assert (ncopies >= 1);
6031 slp_tree slp_op;
6032 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
6033 &dt[0], &vectype_in))
6035 if (dump_enabled_p ())
6036 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6037 "use not simple.\n");
6038 return false;
6040 if (!vectype_in)
6041 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
6043 /* We can handle NOP_EXPR conversions that do not change the number
6044 of elements or the vector size. */
6045 if ((CONVERT_EXPR_CODE_P (code)
6046 || code == VIEW_CONVERT_EXPR)
6047 && (!vectype_in
6048 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
6049 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
6050 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
6051 return false;
6053 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
6055 if (dump_enabled_p ())
6056 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6057 "can't convert between boolean and non "
6058 "boolean vectors %T\n", TREE_TYPE (op));
6060 return false;
6063 /* We do not handle bit-precision changes. */
6064 if ((CONVERT_EXPR_CODE_P (code)
6065 || code == VIEW_CONVERT_EXPR)
6066 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6067 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6068 || (INTEGRAL_TYPE_P (TREE_TYPE (op))
6069 && !type_has_mode_precision_p (TREE_TYPE (op))))
6070 /* But a conversion that does not change the bit-pattern is ok. */
6071 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6072 && INTEGRAL_TYPE_P (TREE_TYPE (op))
6073 && (((TYPE_PRECISION (TREE_TYPE (scalar_dest))
6074 > TYPE_PRECISION (TREE_TYPE (op)))
6075 && TYPE_UNSIGNED (TREE_TYPE (op)))
6076 || (TYPE_PRECISION (TREE_TYPE (scalar_dest))
6077 == TYPE_PRECISION (TREE_TYPE (op))))))
6079 if (dump_enabled_p ())
6080 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6081 "type conversion to/from bit-precision "
6082 "unsupported.\n");
6083 return false;
6086 if (!vec_stmt) /* transformation not required. */
6088 if (slp_node
6089 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
6091 if (dump_enabled_p ())
6092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6093 "incompatible vector types for invariants\n");
6094 return false;
6096 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
6097 DUMP_VECT_SCOPE ("vectorizable_assignment");
6098 if (!vect_nop_conversion_p (stmt_info))
6099 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
6100 cost_vec);
6101 return true;
6104 /* Transform. */
6105 if (dump_enabled_p ())
6106 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
6108 /* Handle def. */
6109 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6111 /* Handle use. */
6112 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
6114 /* Arguments are ready. create the new vector stmt. */
6115 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
6117 if (CONVERT_EXPR_CODE_P (code)
6118 || code == VIEW_CONVERT_EXPR)
6119 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
6120 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
6121 new_temp = make_ssa_name (vec_dest, new_stmt);
6122 gimple_assign_set_lhs (new_stmt, new_temp);
6123 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6124 if (slp_node)
6125 slp_node->push_vec_def (new_stmt);
6126 else
6127 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6129 if (!slp_node)
6130 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6132 vec_oprnds.release ();
6133 return true;
6137 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6138 either as shift by a scalar or by a vector. */
6140 bool
6141 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
6144 machine_mode vec_mode;
6145 optab optab;
6146 int icode;
6147 tree vectype;
6149 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6150 if (!vectype)
6151 return false;
6153 optab = optab_for_tree_code (code, vectype, optab_scalar);
6154 if (!optab
6155 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
6157 optab = optab_for_tree_code (code, vectype, optab_vector);
6158 if (!optab
6159 || (optab_handler (optab, TYPE_MODE (vectype))
6160 == CODE_FOR_nothing))
6161 return false;
6164 vec_mode = TYPE_MODE (vectype);
6165 icode = (int) optab_handler (optab, vec_mode);
6166 if (icode == CODE_FOR_nothing)
6167 return false;
6169 return true;
6173 /* Function vectorizable_shift.
6175 Check if STMT_INFO performs a shift operation that can be vectorized.
6176 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6177 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6178 Return true if STMT_INFO is vectorizable in this way. */
6180 static bool
6181 vectorizable_shift (vec_info *vinfo,
6182 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6183 gimple **vec_stmt, slp_tree slp_node,
6184 stmt_vector_for_cost *cost_vec)
6186 tree vec_dest;
6187 tree scalar_dest;
6188 tree op0, op1 = NULL;
6189 tree vec_oprnd1 = NULL_TREE;
6190 tree vectype;
6191 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6192 enum tree_code code;
6193 machine_mode vec_mode;
6194 tree new_temp;
6195 optab optab;
6196 int icode;
6197 machine_mode optab_op2_mode;
6198 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
6199 int ndts = 2;
6200 poly_uint64 nunits_in;
6201 poly_uint64 nunits_out;
6202 tree vectype_out;
6203 tree op1_vectype;
6204 int ncopies;
6205 int i;
6206 vec<tree> vec_oprnds0 = vNULL;
6207 vec<tree> vec_oprnds1 = vNULL;
6208 tree vop0, vop1;
6209 unsigned int k;
6210 bool scalar_shift_arg = true;
6211 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6212 bool incompatible_op1_vectype_p = false;
6214 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6215 return false;
6217 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6218 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6219 && ! vec_stmt)
6220 return false;
6222 /* Is STMT a vectorizable binary/unary operation? */
6223 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6224 if (!stmt)
6225 return false;
6227 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6228 return false;
6230 code = gimple_assign_rhs_code (stmt);
6232 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6233 || code == RROTATE_EXPR))
6234 return false;
6236 scalar_dest = gimple_assign_lhs (stmt);
6237 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6238 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6240 if (dump_enabled_p ())
6241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6242 "bit-precision shifts not supported.\n");
6243 return false;
6246 slp_tree slp_op0;
6247 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6248 0, &op0, &slp_op0, &dt[0], &vectype))
6250 if (dump_enabled_p ())
6251 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6252 "use not simple.\n");
6253 return false;
6255 /* If op0 is an external or constant def, infer the vector type
6256 from the scalar type. */
6257 if (!vectype)
6258 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6259 if (vec_stmt)
6260 gcc_assert (vectype);
6261 if (!vectype)
6263 if (dump_enabled_p ())
6264 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6265 "no vectype for scalar type\n");
6266 return false;
6269 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6270 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6271 if (maybe_ne (nunits_out, nunits_in))
6272 return false;
6274 stmt_vec_info op1_def_stmt_info;
6275 slp_tree slp_op1;
6276 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
6277 &dt[1], &op1_vectype, &op1_def_stmt_info))
6279 if (dump_enabled_p ())
6280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6281 "use not simple.\n");
6282 return false;
6285 /* Multiple types in SLP are handled by creating the appropriate number of
6286 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6287 case of SLP. */
6288 if (slp_node)
6289 ncopies = 1;
6290 else
6291 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6293 gcc_assert (ncopies >= 1);
6295 /* Determine whether the shift amount is a vector, or scalar. If the
6296 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6298 if ((dt[1] == vect_internal_def
6299 || dt[1] == vect_induction_def
6300 || dt[1] == vect_nested_cycle)
6301 && !slp_node)
6302 scalar_shift_arg = false;
6303 else if (dt[1] == vect_constant_def
6304 || dt[1] == vect_external_def
6305 || dt[1] == vect_internal_def)
6307 /* In SLP, need to check whether the shift count is the same,
6308 in loops if it is a constant or invariant, it is always
6309 a scalar shift. */
6310 if (slp_node)
6312 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6313 stmt_vec_info slpstmt_info;
6315 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6317 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
6318 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
6319 scalar_shift_arg = false;
6322 /* For internal SLP defs we have to make sure we see scalar stmts
6323 for all vector elements.
6324 ??? For different vectors we could resort to a different
6325 scalar shift operand but code-generation below simply always
6326 takes the first. */
6327 if (dt[1] == vect_internal_def
6328 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6329 stmts.length ()))
6330 scalar_shift_arg = false;
6333 /* If the shift amount is computed by a pattern stmt we cannot
6334 use the scalar amount directly thus give up and use a vector
6335 shift. */
6336 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
6337 scalar_shift_arg = false;
6339 else
6341 if (dump_enabled_p ())
6342 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6343 "operand mode requires invariant argument.\n");
6344 return false;
6347 /* Vector shifted by vector. */
6348 bool was_scalar_shift_arg = scalar_shift_arg;
6349 if (!scalar_shift_arg)
6351 optab = optab_for_tree_code (code, vectype, optab_vector);
6352 if (dump_enabled_p ())
6353 dump_printf_loc (MSG_NOTE, vect_location,
6354 "vector/vector shift/rotate found.\n");
6356 if (!op1_vectype)
6357 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6358 slp_op1);
6359 incompatible_op1_vectype_p
6360 = (op1_vectype == NULL_TREE
6361 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
6362 TYPE_VECTOR_SUBPARTS (vectype))
6363 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6364 if (incompatible_op1_vectype_p
6365 && (!slp_node
6366 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6367 || slp_op1->refcnt != 1))
6369 if (dump_enabled_p ())
6370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6371 "unusable type for last operand in"
6372 " vector/vector shift/rotate.\n");
6373 return false;
6376 /* See if the machine has a vector shifted by scalar insn and if not
6377 then see if it has a vector shifted by vector insn. */
6378 else
6380 optab = optab_for_tree_code (code, vectype, optab_scalar);
6381 if (optab
6382 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6384 if (dump_enabled_p ())
6385 dump_printf_loc (MSG_NOTE, vect_location,
6386 "vector/scalar shift/rotate found.\n");
6388 else
6390 optab = optab_for_tree_code (code, vectype, optab_vector);
6391 if (optab
6392 && (optab_handler (optab, TYPE_MODE (vectype))
6393 != CODE_FOR_nothing))
6395 scalar_shift_arg = false;
6397 if (dump_enabled_p ())
6398 dump_printf_loc (MSG_NOTE, vect_location,
6399 "vector/vector shift/rotate found.\n");
6401 if (!op1_vectype)
6402 op1_vectype = get_vectype_for_scalar_type (vinfo,
6403 TREE_TYPE (op1),
6404 slp_op1);
6406 /* Unlike the other binary operators, shifts/rotates have
6407 the rhs being int, instead of the same type as the lhs,
6408 so make sure the scalar is the right type if we are
6409 dealing with vectors of long long/long/short/char. */
6410 incompatible_op1_vectype_p
6411 = (!op1_vectype
6412 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6413 TREE_TYPE (op1)));
6414 if (incompatible_op1_vectype_p
6415 && dt[1] == vect_internal_def)
6417 if (dump_enabled_p ())
6418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6419 "unusable type for last operand in"
6420 " vector/vector shift/rotate.\n");
6421 return false;
6427 /* Supportable by target? */
6428 if (!optab)
6430 if (dump_enabled_p ())
6431 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6432 "no optab.\n");
6433 return false;
6435 vec_mode = TYPE_MODE (vectype);
6436 icode = (int) optab_handler (optab, vec_mode);
6437 if (icode == CODE_FOR_nothing)
6439 if (dump_enabled_p ())
6440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6441 "op not supported by target.\n");
6442 return false;
6444 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6445 if (vect_emulated_vector_p (vectype))
6446 return false;
6448 if (!vec_stmt) /* transformation not required. */
6450 if (slp_node
6451 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6452 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6453 && (!incompatible_op1_vectype_p
6454 || dt[1] == vect_constant_def)
6455 && !vect_maybe_update_slp_op_vectype
6456 (slp_op1,
6457 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6459 if (dump_enabled_p ())
6460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6461 "incompatible vector types for invariants\n");
6462 return false;
6464 /* Now adjust the constant shift amount in place. */
6465 if (slp_node
6466 && incompatible_op1_vectype_p
6467 && dt[1] == vect_constant_def)
6469 for (unsigned i = 0;
6470 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6472 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6473 = fold_convert (TREE_TYPE (vectype),
6474 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6475 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6476 == INTEGER_CST));
6479 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6480 DUMP_VECT_SCOPE ("vectorizable_shift");
6481 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6482 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6483 return true;
6486 /* Transform. */
6488 if (dump_enabled_p ())
6489 dump_printf_loc (MSG_NOTE, vect_location,
6490 "transform binary/unary operation.\n");
6492 if (incompatible_op1_vectype_p && !slp_node)
6494 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6495 op1 = fold_convert (TREE_TYPE (vectype), op1);
6496 if (dt[1] != vect_constant_def)
6497 op1 = vect_init_vector (vinfo, stmt_info, op1,
6498 TREE_TYPE (vectype), NULL);
6501 /* Handle def. */
6502 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6504 if (scalar_shift_arg && dt[1] != vect_internal_def)
6506 /* Vector shl and shr insn patterns can be defined with scalar
6507 operand 2 (shift operand). In this case, use constant or loop
6508 invariant op1 directly, without extending it to vector mode
6509 first. */
6510 optab_op2_mode = insn_data[icode].operand[2].mode;
6511 if (!VECTOR_MODE_P (optab_op2_mode))
6513 if (dump_enabled_p ())
6514 dump_printf_loc (MSG_NOTE, vect_location,
6515 "operand 1 using scalar mode.\n");
6516 vec_oprnd1 = op1;
6517 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6518 vec_oprnds1.quick_push (vec_oprnd1);
6519 /* Store vec_oprnd1 for every vector stmt to be created.
6520 We check during the analysis that all the shift arguments
6521 are the same.
6522 TODO: Allow different constants for different vector
6523 stmts generated for an SLP instance. */
6524 for (k = 0;
6525 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6526 vec_oprnds1.quick_push (vec_oprnd1);
6529 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6531 if (was_scalar_shift_arg)
6533 /* If the argument was the same in all lanes create
6534 the correctly typed vector shift amount directly. */
6535 op1 = fold_convert (TREE_TYPE (vectype), op1);
6536 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6537 !loop_vinfo ? gsi : NULL);
6538 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6539 !loop_vinfo ? gsi : NULL);
6540 vec_oprnds1.create (slp_node->vec_stmts_size);
6541 for (k = 0; k < slp_node->vec_stmts_size; k++)
6542 vec_oprnds1.quick_push (vec_oprnd1);
6544 else if (dt[1] == vect_constant_def)
6545 /* The constant shift amount has been adjusted in place. */
6547 else
6548 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6551 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6552 (a special case for certain kind of vector shifts); otherwise,
6553 operand 1 should be of a vector type (the usual case). */
6554 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6555 op0, &vec_oprnds0,
6556 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6558 /* Arguments are ready. Create the new vector stmt. */
6559 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6561 /* For internal defs where we need to use a scalar shift arg
6562 extract the first lane. */
6563 if (scalar_shift_arg && dt[1] == vect_internal_def)
6565 vop1 = vec_oprnds1[0];
6566 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6567 gassign *new_stmt
6568 = gimple_build_assign (new_temp,
6569 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6570 vop1,
6571 TYPE_SIZE (TREE_TYPE (new_temp)),
6572 bitsize_zero_node));
6573 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6574 vop1 = new_temp;
6576 else
6577 vop1 = vec_oprnds1[i];
6578 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6579 new_temp = make_ssa_name (vec_dest, new_stmt);
6580 gimple_assign_set_lhs (new_stmt, new_temp);
6581 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6582 if (slp_node)
6583 slp_node->push_vec_def (new_stmt);
6584 else
6585 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6588 if (!slp_node)
6589 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6591 vec_oprnds0.release ();
6592 vec_oprnds1.release ();
6594 return true;
6597 /* Function vectorizable_operation.
6599 Check if STMT_INFO performs a binary, unary or ternary operation that can
6600 be vectorized.
6601 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6602 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6603 Return true if STMT_INFO is vectorizable in this way. */
6605 static bool
6606 vectorizable_operation (vec_info *vinfo,
6607 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6608 gimple **vec_stmt, slp_tree slp_node,
6609 stmt_vector_for_cost *cost_vec)
6611 tree vec_dest;
6612 tree scalar_dest;
6613 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6614 tree vectype;
6615 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6616 enum tree_code code, orig_code;
6617 machine_mode vec_mode;
6618 tree new_temp;
6619 int op_type;
6620 optab optab;
6621 bool target_support_p;
6622 enum vect_def_type dt[3]
6623 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6624 int ndts = 3;
6625 poly_uint64 nunits_in;
6626 poly_uint64 nunits_out;
6627 tree vectype_out;
6628 int ncopies, vec_num;
6629 int i;
6630 vec<tree> vec_oprnds0 = vNULL;
6631 vec<tree> vec_oprnds1 = vNULL;
6632 vec<tree> vec_oprnds2 = vNULL;
6633 tree vop0, vop1, vop2;
6634 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6636 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6637 return false;
6639 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6640 && ! vec_stmt)
6641 return false;
6643 /* Is STMT a vectorizable binary/unary operation? */
6644 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6645 if (!stmt)
6646 return false;
6648 /* Loads and stores are handled in vectorizable_{load,store}. */
6649 if (STMT_VINFO_DATA_REF (stmt_info))
6650 return false;
6652 orig_code = code = gimple_assign_rhs_code (stmt);
6654 /* Shifts are handled in vectorizable_shift. */
6655 if (code == LSHIFT_EXPR
6656 || code == RSHIFT_EXPR
6657 || code == LROTATE_EXPR
6658 || code == RROTATE_EXPR)
6659 return false;
6661 /* Comparisons are handled in vectorizable_comparison. */
6662 if (TREE_CODE_CLASS (code) == tcc_comparison)
6663 return false;
6665 /* Conditions are handled in vectorizable_condition. */
6666 if (code == COND_EXPR)
6667 return false;
6669 /* For pointer addition and subtraction, we should use the normal
6670 plus and minus for the vector operation. */
6671 if (code == POINTER_PLUS_EXPR)
6672 code = PLUS_EXPR;
6673 if (code == POINTER_DIFF_EXPR)
6674 code = MINUS_EXPR;
6676 /* Support only unary or binary operations. */
6677 op_type = TREE_CODE_LENGTH (code);
6678 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6680 if (dump_enabled_p ())
6681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6682 "num. args = %d (not unary/binary/ternary op).\n",
6683 op_type);
6684 return false;
6687 scalar_dest = gimple_assign_lhs (stmt);
6688 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6690 /* Most operations cannot handle bit-precision types without extra
6691 truncations. */
6692 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6693 if (!mask_op_p
6694 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6695 /* Exception are bitwise binary operations. */
6696 && code != BIT_IOR_EXPR
6697 && code != BIT_XOR_EXPR
6698 && code != BIT_AND_EXPR)
6700 if (dump_enabled_p ())
6701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6702 "bit-precision arithmetic not supported.\n");
6703 return false;
6706 slp_tree slp_op0;
6707 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6708 0, &op0, &slp_op0, &dt[0], &vectype))
6710 if (dump_enabled_p ())
6711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6712 "use not simple.\n");
6713 return false;
6715 bool is_invariant = (dt[0] == vect_external_def
6716 || dt[0] == vect_constant_def);
6717 /* If op0 is an external or constant def, infer the vector type
6718 from the scalar type. */
6719 if (!vectype)
6721 /* For boolean type we cannot determine vectype by
6722 invariant value (don't know whether it is a vector
6723 of booleans or vector of integers). We use output
6724 vectype because operations on boolean don't change
6725 type. */
6726 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6728 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6730 if (dump_enabled_p ())
6731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6732 "not supported operation on bool value.\n");
6733 return false;
6735 vectype = vectype_out;
6737 else
6738 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6739 slp_node);
6741 if (vec_stmt)
6742 gcc_assert (vectype);
6743 if (!vectype)
6745 if (dump_enabled_p ())
6746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6747 "no vectype for scalar type %T\n",
6748 TREE_TYPE (op0));
6750 return false;
6753 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6754 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6755 if (maybe_ne (nunits_out, nunits_in))
6756 return false;
6758 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6759 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6760 if (op_type == binary_op || op_type == ternary_op)
6762 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6763 1, &op1, &slp_op1, &dt[1], &vectype2))
6765 if (dump_enabled_p ())
6766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6767 "use not simple.\n");
6768 return false;
6770 is_invariant &= (dt[1] == vect_external_def
6771 || dt[1] == vect_constant_def);
6772 if (vectype2
6773 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6774 return false;
6776 if (op_type == ternary_op)
6778 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6779 2, &op2, &slp_op2, &dt[2], &vectype3))
6781 if (dump_enabled_p ())
6782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6783 "use not simple.\n");
6784 return false;
6786 is_invariant &= (dt[2] == vect_external_def
6787 || dt[2] == vect_constant_def);
6788 if (vectype3
6789 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6790 return false;
6793 /* Multiple types in SLP are handled by creating the appropriate number of
6794 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6795 case of SLP. */
6796 if (slp_node)
6798 ncopies = 1;
6799 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6801 else
6803 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6804 vec_num = 1;
6807 gcc_assert (ncopies >= 1);
6809 /* Reject attempts to combine mask types with nonmask types, e.g. if
6810 we have an AND between a (nonmask) boolean loaded from memory and
6811 a (mask) boolean result of a comparison.
6813 TODO: We could easily fix these cases up using pattern statements. */
6814 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6815 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6816 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6818 if (dump_enabled_p ())
6819 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6820 "mixed mask and nonmask vector types\n");
6821 return false;
6824 /* Supportable by target? */
6826 vec_mode = TYPE_MODE (vectype);
6827 if (code == MULT_HIGHPART_EXPR)
6828 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6829 else
6831 optab = optab_for_tree_code (code, vectype, optab_default);
6832 if (!optab)
6834 if (dump_enabled_p ())
6835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6836 "no optab.\n");
6837 return false;
6839 target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing
6840 || optab_libfunc (optab, vec_mode));
6843 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6844 if (!target_support_p || using_emulated_vectors_p)
6846 if (dump_enabled_p ())
6847 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6848 "op not supported by target.\n");
6849 /* When vec_mode is not a vector mode and we verified ops we
6850 do not have to lower like AND are natively supported let
6851 those through even when the mode isn't word_mode. For
6852 ops we have to lower the lowering code assumes we are
6853 dealing with word_mode. */
6854 if ((((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6855 || !target_support_p)
6856 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6857 /* Check only during analysis. */
6858 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6860 if (dump_enabled_p ())
6861 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6862 return false;
6864 if (dump_enabled_p ())
6865 dump_printf_loc (MSG_NOTE, vect_location,
6866 "proceeding using word mode.\n");
6867 using_emulated_vectors_p = true;
6870 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6871 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6872 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6873 internal_fn cond_fn = get_conditional_internal_fn (code);
6874 internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6876 /* If operating on inactive elements could generate spurious traps,
6877 we need to restrict the operation to active lanes. Note that this
6878 specifically doesn't apply to unhoisted invariants, since they
6879 operate on the same value for every lane.
6881 Similarly, if this operation is part of a reduction, a fully-masked
6882 loop should only change the active lanes of the reduction chain,
6883 keeping the inactive lanes as-is. */
6884 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6885 || reduc_idx >= 0);
6887 if (!vec_stmt) /* transformation not required. */
6889 if (loop_vinfo
6890 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6891 && mask_out_inactive)
6893 if (cond_len_fn != IFN_LAST
6894 && direct_internal_fn_supported_p (cond_len_fn, vectype,
6895 OPTIMIZE_FOR_SPEED))
6896 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6898 else if (cond_fn != IFN_LAST
6899 && direct_internal_fn_supported_p (cond_fn, vectype,
6900 OPTIMIZE_FOR_SPEED))
6901 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6902 vectype, NULL);
6903 else
6905 if (dump_enabled_p ())
6906 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6907 "can't use a fully-masked loop because no"
6908 " conditional operation is available.\n");
6909 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6913 /* Put types on constant and invariant SLP children. */
6914 if (slp_node
6915 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6916 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6917 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6919 if (dump_enabled_p ())
6920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6921 "incompatible vector types for invariants\n");
6922 return false;
6925 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6926 DUMP_VECT_SCOPE ("vectorizable_operation");
6927 vect_model_simple_cost (vinfo, stmt_info,
6928 ncopies, dt, ndts, slp_node, cost_vec);
6929 if (using_emulated_vectors_p)
6931 /* The above vect_model_simple_cost call handles constants
6932 in the prologue and (mis-)costs one of the stmts as
6933 vector stmt. See below for the actual lowering that will
6934 be applied. */
6935 unsigned n
6936 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6937 switch (code)
6939 case PLUS_EXPR:
6940 n *= 5;
6941 break;
6942 case MINUS_EXPR:
6943 n *= 6;
6944 break;
6945 case NEGATE_EXPR:
6946 n *= 4;
6947 break;
6948 default:
6949 /* Bit operations do not have extra cost and are accounted
6950 as vector stmt by vect_model_simple_cost. */
6951 n = 0;
6952 break;
6954 if (n != 0)
6956 /* We also need to materialize two large constants. */
6957 record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6958 0, vect_prologue);
6959 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6960 0, vect_body);
6963 return true;
6966 /* Transform. */
6968 if (dump_enabled_p ())
6969 dump_printf_loc (MSG_NOTE, vect_location,
6970 "transform binary/unary operation.\n");
6972 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6973 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6975 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6976 vectors with unsigned elements, but the result is signed. So, we
6977 need to compute the MINUS_EXPR into vectype temporary and
6978 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6979 tree vec_cvt_dest = NULL_TREE;
6980 if (orig_code == POINTER_DIFF_EXPR)
6982 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6983 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6985 /* Handle def. */
6986 else
6987 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6989 /* In case the vectorization factor (VF) is bigger than the number
6990 of elements that we can fit in a vectype (nunits), we have to generate
6991 more than one vector stmt - i.e - we need to "unroll" the
6992 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6993 from one copy of the vector stmt to the next, in the field
6994 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6995 stages to find the correct vector defs to be used when vectorizing
6996 stmts that use the defs of the current stmt. The example below
6997 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6998 we need to create 4 vectorized stmts):
7000 before vectorization:
7001 RELATED_STMT VEC_STMT
7002 S1: x = memref - -
7003 S2: z = x + 1 - -
7005 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
7006 there):
7007 RELATED_STMT VEC_STMT
7008 VS1_0: vx0 = memref0 VS1_1 -
7009 VS1_1: vx1 = memref1 VS1_2 -
7010 VS1_2: vx2 = memref2 VS1_3 -
7011 VS1_3: vx3 = memref3 - -
7012 S1: x = load - VS1_0
7013 S2: z = x + 1 - -
7015 step2: vectorize stmt S2 (done here):
7016 To vectorize stmt S2 we first need to find the relevant vector
7017 def for the first operand 'x'. This is, as usual, obtained from
7018 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
7019 that defines 'x' (S1). This way we find the stmt VS1_0, and the
7020 relevant vector def 'vx0'. Having found 'vx0' we can generate
7021 the vector stmt VS2_0, and as usual, record it in the
7022 STMT_VINFO_VEC_STMT of stmt S2.
7023 When creating the second copy (VS2_1), we obtain the relevant vector
7024 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
7025 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
7026 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
7027 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
7028 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
7029 chain of stmts and pointers:
7030 RELATED_STMT VEC_STMT
7031 VS1_0: vx0 = memref0 VS1_1 -
7032 VS1_1: vx1 = memref1 VS1_2 -
7033 VS1_2: vx2 = memref2 VS1_3 -
7034 VS1_3: vx3 = memref3 - -
7035 S1: x = load - VS1_0
7036 VS2_0: vz0 = vx0 + v1 VS2_1 -
7037 VS2_1: vz1 = vx1 + v1 VS2_2 -
7038 VS2_2: vz2 = vx2 + v1 VS2_3 -
7039 VS2_3: vz3 = vx3 + v1 - -
7040 S2: z = x + 1 - VS2_0 */
7042 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
7043 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
7044 /* Arguments are ready. Create the new vector stmt. */
7045 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
7047 gimple *new_stmt = NULL;
7048 vop1 = ((op_type == binary_op || op_type == ternary_op)
7049 ? vec_oprnds1[i] : NULL_TREE);
7050 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
7051 if (using_emulated_vectors_p
7052 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
7054 /* Lower the operation. This follows vector lowering. */
7055 unsigned int width = vector_element_bits (vectype);
7056 tree inner_type = TREE_TYPE (vectype);
7057 tree word_type
7058 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
7059 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
7060 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
7061 tree high_bits
7062 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
7063 tree wvop0 = make_ssa_name (word_type);
7064 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
7065 build1 (VIEW_CONVERT_EXPR,
7066 word_type, vop0));
7067 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7068 tree result_low, signs;
7069 if (code == PLUS_EXPR || code == MINUS_EXPR)
7071 tree wvop1 = make_ssa_name (word_type);
7072 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
7073 build1 (VIEW_CONVERT_EXPR,
7074 word_type, vop1));
7075 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7076 signs = make_ssa_name (word_type);
7077 new_stmt = gimple_build_assign (signs,
7078 BIT_XOR_EXPR, wvop0, wvop1);
7079 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7080 tree b_low = make_ssa_name (word_type);
7081 new_stmt = gimple_build_assign (b_low,
7082 BIT_AND_EXPR, wvop1, low_bits);
7083 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7084 tree a_low = make_ssa_name (word_type);
7085 if (code == PLUS_EXPR)
7086 new_stmt = gimple_build_assign (a_low,
7087 BIT_AND_EXPR, wvop0, low_bits);
7088 else
7089 new_stmt = gimple_build_assign (a_low,
7090 BIT_IOR_EXPR, wvop0, high_bits);
7091 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7092 if (code == MINUS_EXPR)
7094 new_stmt = gimple_build_assign (NULL_TREE,
7095 BIT_NOT_EXPR, signs);
7096 signs = make_ssa_name (word_type);
7097 gimple_assign_set_lhs (new_stmt, signs);
7098 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7100 new_stmt = gimple_build_assign (NULL_TREE,
7101 BIT_AND_EXPR, signs, high_bits);
7102 signs = make_ssa_name (word_type);
7103 gimple_assign_set_lhs (new_stmt, signs);
7104 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7105 result_low = make_ssa_name (word_type);
7106 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
7107 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7109 else
7111 tree a_low = make_ssa_name (word_type);
7112 new_stmt = gimple_build_assign (a_low,
7113 BIT_AND_EXPR, wvop0, low_bits);
7114 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7115 signs = make_ssa_name (word_type);
7116 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
7117 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7118 new_stmt = gimple_build_assign (NULL_TREE,
7119 BIT_AND_EXPR, signs, high_bits);
7120 signs = make_ssa_name (word_type);
7121 gimple_assign_set_lhs (new_stmt, signs);
7122 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7123 result_low = make_ssa_name (word_type);
7124 new_stmt = gimple_build_assign (result_low,
7125 MINUS_EXPR, high_bits, a_low);
7126 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7128 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
7129 signs);
7130 result_low = make_ssa_name (word_type);
7131 gimple_assign_set_lhs (new_stmt, result_low);
7132 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7133 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
7134 build1 (VIEW_CONVERT_EXPR,
7135 vectype, result_low));
7136 new_temp = make_ssa_name (vectype);
7137 gimple_assign_set_lhs (new_stmt, new_temp);
7138 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7140 else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
7142 tree mask;
7143 if (masked_loop_p)
7144 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7145 vec_num * ncopies, vectype, i);
7146 else
7147 /* Dummy mask. */
7148 mask = build_minus_one_cst (truth_type_for (vectype));
7149 auto_vec<tree> vops (6);
7150 vops.quick_push (mask);
7151 vops.quick_push (vop0);
7152 if (vop1)
7153 vops.quick_push (vop1);
7154 if (vop2)
7155 vops.quick_push (vop2);
7156 if (reduc_idx >= 0)
7158 /* Perform the operation on active elements only and take
7159 inactive elements from the reduction chain input. */
7160 gcc_assert (!vop2);
7161 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
7163 else
7165 auto else_value = targetm.preferred_else_value
7166 (cond_fn, vectype, vops.length () - 1, &vops[1]);
7167 vops.quick_push (else_value);
7169 if (len_loop_p)
7171 tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7172 vec_num * ncopies, vectype, i, 1);
7173 signed char biasval
7174 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7175 tree bias = build_int_cst (intQI_type_node, biasval);
7176 vops.quick_push (len);
7177 vops.quick_push (bias);
7179 gcall *call
7180 = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7181 : cond_len_fn,
7182 vops);
7183 new_temp = make_ssa_name (vec_dest, call);
7184 gimple_call_set_lhs (call, new_temp);
7185 gimple_call_set_nothrow (call, true);
7186 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7187 new_stmt = call;
7189 else
7191 tree mask = NULL_TREE;
7192 /* When combining two masks check if either of them is elsewhere
7193 combined with a loop mask, if that's the case we can mark that the
7194 new combined mask doesn't need to be combined with a loop mask. */
7195 if (masked_loop_p
7196 && code == BIT_AND_EXPR
7197 && VECTOR_BOOLEAN_TYPE_P (vectype))
7199 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
7200 ncopies}))
7202 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7203 vec_num * ncopies, vectype, i);
7205 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7206 vop0, gsi);
7209 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
7210 ncopies }))
7212 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7213 vec_num * ncopies, vectype, i);
7215 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7216 vop1, gsi);
7220 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7221 new_temp = make_ssa_name (vec_dest, new_stmt);
7222 gimple_assign_set_lhs (new_stmt, new_temp);
7223 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7224 if (using_emulated_vectors_p)
7225 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7227 /* Enter the combined value into the vector cond hash so we don't
7228 AND it with a loop mask again. */
7229 if (mask)
7230 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
7233 if (vec_cvt_dest)
7235 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7236 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7237 new_temp);
7238 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
7239 gimple_assign_set_lhs (new_stmt, new_temp);
7240 vect_finish_stmt_generation (vinfo, stmt_info,
7241 new_stmt, gsi);
7244 if (slp_node)
7245 slp_node->push_vec_def (new_stmt);
7246 else
7247 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7250 if (!slp_node)
7251 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7253 vec_oprnds0.release ();
7254 vec_oprnds1.release ();
7255 vec_oprnds2.release ();
7257 return true;
7260 /* A helper function to ensure data reference DR_INFO's base alignment. */
7262 static void
7263 ensure_base_align (dr_vec_info *dr_info)
7265 /* Alignment is only analyzed for the first element of a DR group,
7266 use that to look at base alignment we need to enforce. */
7267 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7268 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7270 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7272 if (dr_info->base_misaligned)
7274 tree base_decl = dr_info->base_decl;
7276 // We should only be able to increase the alignment of a base object if
7277 // we know what its new alignment should be at compile time.
7278 unsigned HOST_WIDE_INT align_base_to =
7279 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7281 if (decl_in_symtab_p (base_decl))
7282 symtab_node::get (base_decl)->increase_alignment (align_base_to);
7283 else if (DECL_ALIGN (base_decl) < align_base_to)
7285 SET_DECL_ALIGN (base_decl, align_base_to);
7286 DECL_USER_ALIGN (base_decl) = 1;
7288 dr_info->base_misaligned = false;
7293 /* Function get_group_alias_ptr_type.
7295 Return the alias type for the group starting at FIRST_STMT_INFO. */
7297 static tree
7298 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7300 struct data_reference *first_dr, *next_dr;
7302 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7303 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7304 while (next_stmt_info)
7306 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7307 if (get_alias_set (DR_REF (first_dr))
7308 != get_alias_set (DR_REF (next_dr)))
7310 if (dump_enabled_p ())
7311 dump_printf_loc (MSG_NOTE, vect_location,
7312 "conflicting alias set types.\n");
7313 return ptr_type_node;
7315 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7317 return reference_alias_ptr_type (DR_REF (first_dr));
7321 /* Function scan_operand_equal_p.
7323 Helper function for check_scan_store. Compare two references
7324 with .GOMP_SIMD_LANE bases. */
7326 static bool
7327 scan_operand_equal_p (tree ref1, tree ref2)
7329 tree ref[2] = { ref1, ref2 };
7330 poly_int64 bitsize[2], bitpos[2];
7331 tree offset[2], base[2];
7332 for (int i = 0; i < 2; ++i)
7334 machine_mode mode;
7335 int unsignedp, reversep, volatilep = 0;
7336 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7337 &offset[i], &mode, &unsignedp,
7338 &reversep, &volatilep);
7339 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
7340 return false;
7341 if (TREE_CODE (base[i]) == MEM_REF
7342 && offset[i] == NULL_TREE
7343 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7345 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7346 if (is_gimple_assign (def_stmt)
7347 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
7348 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7349 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7351 if (maybe_ne (mem_ref_offset (base[i]), 0))
7352 return false;
7353 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7354 offset[i] = gimple_assign_rhs2 (def_stmt);
7359 if (!operand_equal_p (base[0], base[1], 0))
7360 return false;
7361 if (maybe_ne (bitsize[0], bitsize[1]))
7362 return false;
7363 if (offset[0] != offset[1])
7365 if (!offset[0] || !offset[1])
7366 return false;
7367 if (!operand_equal_p (offset[0], offset[1], 0))
7369 tree step[2];
7370 for (int i = 0; i < 2; ++i)
7372 step[i] = integer_one_node;
7373 if (TREE_CODE (offset[i]) == SSA_NAME)
7375 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7376 if (is_gimple_assign (def_stmt)
7377 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
7378 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7379 == INTEGER_CST))
7381 step[i] = gimple_assign_rhs2 (def_stmt);
7382 offset[i] = gimple_assign_rhs1 (def_stmt);
7385 else if (TREE_CODE (offset[i]) == MULT_EXPR)
7387 step[i] = TREE_OPERAND (offset[i], 1);
7388 offset[i] = TREE_OPERAND (offset[i], 0);
7390 tree rhs1 = NULL_TREE;
7391 if (TREE_CODE (offset[i]) == SSA_NAME)
7393 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7394 if (gimple_assign_cast_p (def_stmt))
7395 rhs1 = gimple_assign_rhs1 (def_stmt);
7397 else if (CONVERT_EXPR_P (offset[i]))
7398 rhs1 = TREE_OPERAND (offset[i], 0);
7399 if (rhs1
7400 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7401 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7402 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7403 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7404 offset[i] = rhs1;
7406 if (!operand_equal_p (offset[0], offset[1], 0)
7407 || !operand_equal_p (step[0], step[1], 0))
7408 return false;
7411 return true;
7415 enum scan_store_kind {
7416 /* Normal permutation. */
7417 scan_store_kind_perm,
7419 /* Whole vector left shift permutation with zero init. */
7420 scan_store_kind_lshift_zero,
7422 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7423 scan_store_kind_lshift_cond
7426 /* Function check_scan_store.
7428 Verify if we can perform the needed permutations or whole vector shifts.
7429 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7430 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7431 to do at each step. */
7433 static int
7434 scan_store_can_perm_p (tree vectype, tree init,
7435 vec<enum scan_store_kind> *use_whole_vector = NULL)
7437 enum machine_mode vec_mode = TYPE_MODE (vectype);
7438 unsigned HOST_WIDE_INT nunits;
7439 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7440 return -1;
7441 int units_log2 = exact_log2 (nunits);
7442 if (units_log2 <= 0)
7443 return -1;
7445 int i;
7446 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7447 for (i = 0; i <= units_log2; ++i)
7449 unsigned HOST_WIDE_INT j, k;
7450 enum scan_store_kind kind = scan_store_kind_perm;
7451 vec_perm_builder sel (nunits, nunits, 1);
7452 sel.quick_grow (nunits);
7453 if (i == units_log2)
7455 for (j = 0; j < nunits; ++j)
7456 sel[j] = nunits - 1;
7458 else
7460 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7461 sel[j] = j;
7462 for (k = 0; j < nunits; ++j, ++k)
7463 sel[j] = nunits + k;
7465 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7466 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7468 if (i == units_log2)
7469 return -1;
7471 if (whole_vector_shift_kind == scan_store_kind_perm)
7473 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
7474 return -1;
7475 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7476 /* Whole vector shifts shift in zeros, so if init is all zero
7477 constant, there is no need to do anything further. */
7478 if ((TREE_CODE (init) != INTEGER_CST
7479 && TREE_CODE (init) != REAL_CST)
7480 || !initializer_zerop (init))
7482 tree masktype = truth_type_for (vectype);
7483 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7484 return -1;
7485 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7488 kind = whole_vector_shift_kind;
7490 if (use_whole_vector)
7492 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7493 use_whole_vector->safe_grow_cleared (i, true);
7494 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7495 use_whole_vector->safe_push (kind);
7499 return units_log2;
7503 /* Function check_scan_store.
7505 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7507 static bool
7508 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7509 enum vect_def_type rhs_dt, bool slp, tree mask,
7510 vect_memory_access_type memory_access_type)
7512 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7513 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7514 tree ref_type;
7516 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7517 if (slp
7518 || mask
7519 || memory_access_type != VMAT_CONTIGUOUS
7520 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7521 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7522 || loop_vinfo == NULL
7523 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7524 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7525 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7526 || !integer_zerop (DR_INIT (dr_info->dr))
7527 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7528 || !alias_sets_conflict_p (get_alias_set (vectype),
7529 get_alias_set (TREE_TYPE (ref_type))))
7531 if (dump_enabled_p ())
7532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7533 "unsupported OpenMP scan store.\n");
7534 return false;
7537 /* We need to pattern match code built by OpenMP lowering and simplified
7538 by following optimizations into something we can handle.
7539 #pragma omp simd reduction(inscan,+:r)
7540 for (...)
7542 r += something ();
7543 #pragma omp scan inclusive (r)
7544 use (r);
7546 shall have body with:
7547 // Initialization for input phase, store the reduction initializer:
7548 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7549 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7550 D.2042[_21] = 0;
7551 // Actual input phase:
7553 r.0_5 = D.2042[_20];
7554 _6 = _4 + r.0_5;
7555 D.2042[_20] = _6;
7556 // Initialization for scan phase:
7557 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7558 _26 = D.2043[_25];
7559 _27 = D.2042[_25];
7560 _28 = _26 + _27;
7561 D.2043[_25] = _28;
7562 D.2042[_25] = _28;
7563 // Actual scan phase:
7565 r.1_8 = D.2042[_20];
7567 The "omp simd array" variable D.2042 holds the privatized copy used
7568 inside of the loop and D.2043 is another one that holds copies of
7569 the current original list item. The separate GOMP_SIMD_LANE ifn
7570 kinds are there in order to allow optimizing the initializer store
7571 and combiner sequence, e.g. if it is originally some C++ish user
7572 defined reduction, but allow the vectorizer to pattern recognize it
7573 and turn into the appropriate vectorized scan.
7575 For exclusive scan, this is slightly different:
7576 #pragma omp simd reduction(inscan,+:r)
7577 for (...)
7579 use (r);
7580 #pragma omp scan exclusive (r)
7581 r += something ();
7583 shall have body with:
7584 // Initialization for input phase, store the reduction initializer:
7585 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7586 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7587 D.2042[_21] = 0;
7588 // Actual input phase:
7590 r.0_5 = D.2042[_20];
7591 _6 = _4 + r.0_5;
7592 D.2042[_20] = _6;
7593 // Initialization for scan phase:
7594 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7595 _26 = D.2043[_25];
7596 D.2044[_25] = _26;
7597 _27 = D.2042[_25];
7598 _28 = _26 + _27;
7599 D.2043[_25] = _28;
7600 // Actual scan phase:
7602 r.1_8 = D.2044[_20];
7603 ... */
7605 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7607 /* Match the D.2042[_21] = 0; store above. Just require that
7608 it is a constant or external definition store. */
7609 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7611 fail_init:
7612 if (dump_enabled_p ())
7613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7614 "unsupported OpenMP scan initializer store.\n");
7615 return false;
7618 if (! loop_vinfo->scan_map)
7619 loop_vinfo->scan_map = new hash_map<tree, tree>;
7620 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7621 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7622 if (cached)
7623 goto fail_init;
7624 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7626 /* These stores can be vectorized normally. */
7627 return true;
7630 if (rhs_dt != vect_internal_def)
7632 fail:
7633 if (dump_enabled_p ())
7634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7635 "unsupported OpenMP scan combiner pattern.\n");
7636 return false;
7639 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7640 tree rhs = gimple_assign_rhs1 (stmt);
7641 if (TREE_CODE (rhs) != SSA_NAME)
7642 goto fail;
7644 gimple *other_store_stmt = NULL;
7645 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7646 bool inscan_var_store
7647 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7649 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7651 if (!inscan_var_store)
7653 use_operand_p use_p;
7654 imm_use_iterator iter;
7655 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7657 gimple *use_stmt = USE_STMT (use_p);
7658 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7659 continue;
7660 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7661 || !is_gimple_assign (use_stmt)
7662 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7663 || other_store_stmt
7664 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7665 goto fail;
7666 other_store_stmt = use_stmt;
7668 if (other_store_stmt == NULL)
7669 goto fail;
7670 rhs = gimple_assign_lhs (other_store_stmt);
7671 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7672 goto fail;
7675 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7677 use_operand_p use_p;
7678 imm_use_iterator iter;
7679 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7681 gimple *use_stmt = USE_STMT (use_p);
7682 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7683 continue;
7684 if (other_store_stmt)
7685 goto fail;
7686 other_store_stmt = use_stmt;
7689 else
7690 goto fail;
7692 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7693 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7694 || !is_gimple_assign (def_stmt)
7695 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7696 goto fail;
7698 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7699 /* For pointer addition, we should use the normal plus for the vector
7700 operation. */
7701 switch (code)
7703 case POINTER_PLUS_EXPR:
7704 code = PLUS_EXPR;
7705 break;
7706 case MULT_HIGHPART_EXPR:
7707 goto fail;
7708 default:
7709 break;
7711 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7712 goto fail;
7714 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7715 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7716 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7717 goto fail;
7719 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7720 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7721 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7722 || !gimple_assign_load_p (load1_stmt)
7723 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7724 || !gimple_assign_load_p (load2_stmt))
7725 goto fail;
7727 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7728 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7729 if (load1_stmt_info == NULL
7730 || load2_stmt_info == NULL
7731 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7732 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7733 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7734 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7735 goto fail;
7737 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7739 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7740 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7741 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7742 goto fail;
7743 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7744 tree lrhs;
7745 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7746 lrhs = rhs1;
7747 else
7748 lrhs = rhs2;
7749 use_operand_p use_p;
7750 imm_use_iterator iter;
7751 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7753 gimple *use_stmt = USE_STMT (use_p);
7754 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7755 continue;
7756 if (other_store_stmt)
7757 goto fail;
7758 other_store_stmt = use_stmt;
7762 if (other_store_stmt == NULL)
7763 goto fail;
7764 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7765 || !gimple_store_p (other_store_stmt))
7766 goto fail;
7768 stmt_vec_info other_store_stmt_info
7769 = loop_vinfo->lookup_stmt (other_store_stmt);
7770 if (other_store_stmt_info == NULL
7771 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7772 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7773 goto fail;
7775 gimple *stmt1 = stmt;
7776 gimple *stmt2 = other_store_stmt;
7777 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7778 std::swap (stmt1, stmt2);
7779 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7780 gimple_assign_rhs1 (load2_stmt)))
7782 std::swap (rhs1, rhs2);
7783 std::swap (load1_stmt, load2_stmt);
7784 std::swap (load1_stmt_info, load2_stmt_info);
7786 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7787 gimple_assign_rhs1 (load1_stmt)))
7788 goto fail;
7790 tree var3 = NULL_TREE;
7791 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7792 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7793 gimple_assign_rhs1 (load2_stmt)))
7794 goto fail;
7795 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7797 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7798 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7799 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7800 goto fail;
7801 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7802 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7803 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7804 || lookup_attribute ("omp simd inscan exclusive",
7805 DECL_ATTRIBUTES (var3)))
7806 goto fail;
7809 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7810 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7811 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7812 goto fail;
7814 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7815 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7816 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7817 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7818 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7819 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7820 goto fail;
7822 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7823 std::swap (var1, var2);
7825 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7827 if (!lookup_attribute ("omp simd inscan exclusive",
7828 DECL_ATTRIBUTES (var1)))
7829 goto fail;
7830 var1 = var3;
7833 if (loop_vinfo->scan_map == NULL)
7834 goto fail;
7835 tree *init = loop_vinfo->scan_map->get (var1);
7836 if (init == NULL)
7837 goto fail;
7839 /* The IL is as expected, now check if we can actually vectorize it.
7840 Inclusive scan:
7841 _26 = D.2043[_25];
7842 _27 = D.2042[_25];
7843 _28 = _26 + _27;
7844 D.2043[_25] = _28;
7845 D.2042[_25] = _28;
7846 should be vectorized as (where _40 is the vectorized rhs
7847 from the D.2042[_21] = 0; store):
7848 _30 = MEM <vector(8) int> [(int *)&D.2043];
7849 _31 = MEM <vector(8) int> [(int *)&D.2042];
7850 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7851 _33 = _31 + _32;
7852 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7853 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7854 _35 = _33 + _34;
7855 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7856 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7857 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7858 _37 = _35 + _36;
7859 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7860 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7861 _38 = _30 + _37;
7862 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7863 MEM <vector(8) int> [(int *)&D.2043] = _39;
7864 MEM <vector(8) int> [(int *)&D.2042] = _38;
7865 Exclusive scan:
7866 _26 = D.2043[_25];
7867 D.2044[_25] = _26;
7868 _27 = D.2042[_25];
7869 _28 = _26 + _27;
7870 D.2043[_25] = _28;
7871 should be vectorized as (where _40 is the vectorized rhs
7872 from the D.2042[_21] = 0; store):
7873 _30 = MEM <vector(8) int> [(int *)&D.2043];
7874 _31 = MEM <vector(8) int> [(int *)&D.2042];
7875 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7876 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7877 _34 = _32 + _33;
7878 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7879 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7880 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7881 _36 = _34 + _35;
7882 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7883 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7884 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7885 _38 = _36 + _37;
7886 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7887 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7888 _39 = _30 + _38;
7889 _50 = _31 + _39;
7890 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7891 MEM <vector(8) int> [(int *)&D.2044] = _39;
7892 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7893 enum machine_mode vec_mode = TYPE_MODE (vectype);
7894 optab optab = optab_for_tree_code (code, vectype, optab_default);
7895 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7896 goto fail;
7898 int units_log2 = scan_store_can_perm_p (vectype, *init);
7899 if (units_log2 == -1)
7900 goto fail;
7902 return true;
7906 /* Function vectorizable_scan_store.
7908 Helper of vectorizable_score, arguments like on vectorizable_store.
7909 Handle only the transformation, checking is done in check_scan_store. */
7911 static bool
7912 vectorizable_scan_store (vec_info *vinfo,
7913 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7914 gimple **vec_stmt, int ncopies)
7916 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7917 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7918 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7919 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7921 if (dump_enabled_p ())
7922 dump_printf_loc (MSG_NOTE, vect_location,
7923 "transform scan store. ncopies = %d\n", ncopies);
7925 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7926 tree rhs = gimple_assign_rhs1 (stmt);
7927 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7929 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7930 bool inscan_var_store
7931 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7933 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7935 use_operand_p use_p;
7936 imm_use_iterator iter;
7937 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7939 gimple *use_stmt = USE_STMT (use_p);
7940 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7941 continue;
7942 rhs = gimple_assign_lhs (use_stmt);
7943 break;
7947 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7948 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7949 if (code == POINTER_PLUS_EXPR)
7950 code = PLUS_EXPR;
7951 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7952 && commutative_tree_code (code));
7953 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7954 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7955 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7956 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7957 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7958 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7959 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7960 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7961 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7962 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7963 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7965 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7967 std::swap (rhs1, rhs2);
7968 std::swap (var1, var2);
7969 std::swap (load1_dr_info, load2_dr_info);
7972 tree *init = loop_vinfo->scan_map->get (var1);
7973 gcc_assert (init);
7975 unsigned HOST_WIDE_INT nunits;
7976 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7977 gcc_unreachable ();
7978 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7979 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7980 gcc_assert (units_log2 > 0);
7981 auto_vec<tree, 16> perms;
7982 perms.quick_grow (units_log2 + 1);
7983 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7984 for (int i = 0; i <= units_log2; ++i)
7986 unsigned HOST_WIDE_INT j, k;
7987 vec_perm_builder sel (nunits, nunits, 1);
7988 sel.quick_grow (nunits);
7989 if (i == units_log2)
7990 for (j = 0; j < nunits; ++j)
7991 sel[j] = nunits - 1;
7992 else
7994 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7995 sel[j] = j;
7996 for (k = 0; j < nunits; ++j, ++k)
7997 sel[j] = nunits + k;
7999 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
8000 if (!use_whole_vector.is_empty ()
8001 && use_whole_vector[i] != scan_store_kind_perm)
8003 if (zero_vec == NULL_TREE)
8004 zero_vec = build_zero_cst (vectype);
8005 if (masktype == NULL_TREE
8006 && use_whole_vector[i] == scan_store_kind_lshift_cond)
8007 masktype = truth_type_for (vectype);
8008 perms[i] = vect_gen_perm_mask_any (vectype, indices);
8010 else
8011 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
8014 tree vec_oprnd1 = NULL_TREE;
8015 tree vec_oprnd2 = NULL_TREE;
8016 tree vec_oprnd3 = NULL_TREE;
8017 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
8018 tree dataref_offset = build_int_cst (ref_type, 0);
8019 tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
8020 vectype, VMAT_CONTIGUOUS);
8021 tree ldataref_ptr = NULL_TREE;
8022 tree orig = NULL_TREE;
8023 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
8024 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
8025 auto_vec<tree> vec_oprnds1;
8026 auto_vec<tree> vec_oprnds2;
8027 auto_vec<tree> vec_oprnds3;
8028 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
8029 *init, &vec_oprnds1,
8030 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
8031 rhs2, &vec_oprnds3);
8032 for (int j = 0; j < ncopies; j++)
8034 vec_oprnd1 = vec_oprnds1[j];
8035 if (ldataref_ptr == NULL)
8036 vec_oprnd2 = vec_oprnds2[j];
8037 vec_oprnd3 = vec_oprnds3[j];
8038 if (j == 0)
8039 orig = vec_oprnd3;
8040 else if (!inscan_var_store)
8041 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8043 if (ldataref_ptr)
8045 vec_oprnd2 = make_ssa_name (vectype);
8046 tree data_ref = fold_build2 (MEM_REF, vectype,
8047 unshare_expr (ldataref_ptr),
8048 dataref_offset);
8049 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
8050 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
8051 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8052 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8053 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8056 tree v = vec_oprnd2;
8057 for (int i = 0; i < units_log2; ++i)
8059 tree new_temp = make_ssa_name (vectype);
8060 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
8061 (zero_vec
8062 && (use_whole_vector[i]
8063 != scan_store_kind_perm))
8064 ? zero_vec : vec_oprnd1, v,
8065 perms[i]);
8066 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8067 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8068 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8070 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
8072 /* Whole vector shift shifted in zero bits, but if *init
8073 is not initializer_zerop, we need to replace those elements
8074 with elements from vec_oprnd1. */
8075 tree_vector_builder vb (masktype, nunits, 1);
8076 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
8077 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
8078 ? boolean_false_node : boolean_true_node);
8080 tree new_temp2 = make_ssa_name (vectype);
8081 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
8082 new_temp, vec_oprnd1);
8083 vect_finish_stmt_generation (vinfo, stmt_info,
8084 g, gsi);
8085 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8086 new_temp = new_temp2;
8089 /* For exclusive scan, perform the perms[i] permutation once
8090 more. */
8091 if (i == 0
8092 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
8093 && v == vec_oprnd2)
8095 v = new_temp;
8096 --i;
8097 continue;
8100 tree new_temp2 = make_ssa_name (vectype);
8101 g = gimple_build_assign (new_temp2, code, v, new_temp);
8102 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8103 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8105 v = new_temp2;
8108 tree new_temp = make_ssa_name (vectype);
8109 gimple *g = gimple_build_assign (new_temp, code, orig, v);
8110 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8111 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8113 tree last_perm_arg = new_temp;
8114 /* For exclusive scan, new_temp computed above is the exclusive scan
8115 prefix sum. Turn it into inclusive prefix sum for the broadcast
8116 of the last element into orig. */
8117 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
8119 last_perm_arg = make_ssa_name (vectype);
8120 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
8121 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8122 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8125 orig = make_ssa_name (vectype);
8126 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
8127 last_perm_arg, perms[units_log2]);
8128 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8129 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8131 if (!inscan_var_store)
8133 tree data_ref = fold_build2 (MEM_REF, vectype,
8134 unshare_expr (dataref_ptr),
8135 dataref_offset);
8136 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8137 g = gimple_build_assign (data_ref, new_temp);
8138 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8139 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8143 if (inscan_var_store)
8144 for (int j = 0; j < ncopies; j++)
8146 if (j != 0)
8147 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8149 tree data_ref = fold_build2 (MEM_REF, vectype,
8150 unshare_expr (dataref_ptr),
8151 dataref_offset);
8152 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8153 gimple *g = gimple_build_assign (data_ref, orig);
8154 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8155 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8157 return true;
8161 /* Function vectorizable_store.
8163 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8164 that can be vectorized.
8165 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8166 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8167 Return true if STMT_INFO is vectorizable in this way. */
8169 static bool
8170 vectorizable_store (vec_info *vinfo,
8171 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8172 gimple **vec_stmt, slp_tree slp_node,
8173 stmt_vector_for_cost *cost_vec)
8175 tree data_ref;
8176 tree vec_oprnd = NULL_TREE;
8177 tree elem_type;
8178 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8179 class loop *loop = NULL;
8180 machine_mode vec_mode;
8181 tree dummy;
8182 enum vect_def_type rhs_dt = vect_unknown_def_type;
8183 enum vect_def_type mask_dt = vect_unknown_def_type;
8184 tree dataref_ptr = NULL_TREE;
8185 tree dataref_offset = NULL_TREE;
8186 gimple *ptr_incr = NULL;
8187 int ncopies;
8188 int j;
8189 stmt_vec_info first_stmt_info;
8190 bool grouped_store;
8191 unsigned int group_size, i;
8192 bool slp = (slp_node != NULL);
8193 unsigned int vec_num;
8194 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8195 tree aggr_type;
8196 gather_scatter_info gs_info;
8197 poly_uint64 vf;
8198 vec_load_store_type vls_type;
8199 tree ref_type;
8201 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8202 return false;
8204 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8205 && ! vec_stmt)
8206 return false;
8208 /* Is vectorizable store? */
8210 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8211 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8213 tree scalar_dest = gimple_assign_lhs (assign);
8214 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8215 && is_pattern_stmt_p (stmt_info))
8216 scalar_dest = TREE_OPERAND (scalar_dest, 0);
8217 if (TREE_CODE (scalar_dest) != ARRAY_REF
8218 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8219 && TREE_CODE (scalar_dest) != INDIRECT_REF
8220 && TREE_CODE (scalar_dest) != COMPONENT_REF
8221 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8222 && TREE_CODE (scalar_dest) != REALPART_EXPR
8223 && TREE_CODE (scalar_dest) != MEM_REF)
8224 return false;
8226 else
8228 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8229 if (!call || !gimple_call_internal_p (call))
8230 return false;
8232 internal_fn ifn = gimple_call_internal_fn (call);
8233 if (!internal_store_fn_p (ifn))
8234 return false;
8236 int mask_index = internal_fn_mask_index (ifn);
8237 if (mask_index >= 0 && slp_node)
8238 mask_index = vect_slp_child_index_for_operand
8239 (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8240 if (mask_index >= 0
8241 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8242 &mask, NULL, &mask_dt, &mask_vectype))
8243 return false;
8246 /* Cannot have hybrid store SLP -- that would mean storing to the
8247 same location twice. */
8248 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8250 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8251 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8253 if (loop_vinfo)
8255 loop = LOOP_VINFO_LOOP (loop_vinfo);
8256 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8258 else
8259 vf = 1;
8261 /* Multiple types in SLP are handled by creating the appropriate number of
8262 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8263 case of SLP. */
8264 if (slp)
8265 ncopies = 1;
8266 else
8267 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8269 gcc_assert (ncopies >= 1);
8271 /* FORNOW. This restriction should be relaxed. */
8272 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
8274 if (dump_enabled_p ())
8275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8276 "multiple types in nested loop.\n");
8277 return false;
8280 tree op;
8281 slp_tree op_node;
8282 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8283 &op, &op_node, &rhs_dt, &rhs_vectype, &vls_type))
8284 return false;
8286 elem_type = TREE_TYPE (vectype);
8287 vec_mode = TYPE_MODE (vectype);
8289 if (!STMT_VINFO_DATA_REF (stmt_info))
8290 return false;
8292 vect_memory_access_type memory_access_type;
8293 enum dr_alignment_support alignment_support_scheme;
8294 int misalignment;
8295 poly_int64 poffset;
8296 internal_fn lanes_ifn;
8297 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
8298 ncopies, &memory_access_type, &poffset,
8299 &alignment_support_scheme, &misalignment, &gs_info,
8300 &lanes_ifn))
8301 return false;
8303 if (mask)
8305 if (memory_access_type == VMAT_CONTIGUOUS)
8307 if (!VECTOR_MODE_P (vec_mode)
8308 || !can_vec_mask_load_store_p (vec_mode,
8309 TYPE_MODE (mask_vectype), false))
8310 return false;
8312 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8313 && (memory_access_type != VMAT_GATHER_SCATTER
8314 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8316 if (dump_enabled_p ())
8317 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8318 "unsupported access type for masked store.\n");
8319 return false;
8321 else if (memory_access_type == VMAT_GATHER_SCATTER
8322 && gs_info.ifn == IFN_LAST
8323 && !gs_info.decl)
8325 if (dump_enabled_p ())
8326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8327 "unsupported masked emulated scatter.\n");
8328 return false;
8331 else
8333 /* FORNOW. In some cases can vectorize even if data-type not supported
8334 (e.g. - array initialization with 0). */
8335 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
8336 return false;
8339 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8340 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8341 && memory_access_type != VMAT_GATHER_SCATTER
8342 && (slp || memory_access_type != VMAT_CONTIGUOUS));
8343 if (grouped_store)
8345 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8346 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8347 group_size = DR_GROUP_SIZE (first_stmt_info);
8349 else
8351 first_stmt_info = stmt_info;
8352 first_dr_info = dr_info;
8353 group_size = vec_num = 1;
8356 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
8358 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
8359 memory_access_type))
8360 return false;
8363 bool costing_p = !vec_stmt;
8364 if (costing_p) /* transformation not required. */
8366 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8368 if (loop_vinfo
8369 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8370 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8371 vls_type, group_size,
8372 memory_access_type, &gs_info,
8373 mask);
8375 if (slp_node
8376 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8377 vectype))
8379 if (dump_enabled_p ())
8380 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8381 "incompatible vector types for invariants\n");
8382 return false;
8385 if (dump_enabled_p ()
8386 && memory_access_type != VMAT_ELEMENTWISE
8387 && memory_access_type != VMAT_GATHER_SCATTER
8388 && alignment_support_scheme != dr_aligned)
8389 dump_printf_loc (MSG_NOTE, vect_location,
8390 "Vectorizing an unaligned access.\n");
8392 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8394 /* As function vect_transform_stmt shows, for interleaving stores
8395 the whole chain is vectorized when the last store in the chain
8396 is reached, the other stores in the group are skipped. So we
8397 want to only cost the last one here, but it's not trivial to
8398 get the last, as it's equivalent to use the first one for
8399 costing, use the first one instead. */
8400 if (grouped_store
8401 && !slp
8402 && first_stmt_info != stmt_info)
8403 return true;
8405 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8407 /* Transform. */
8409 ensure_base_align (dr_info);
8411 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8413 vect_build_scatter_store_calls (vinfo, stmt_info, gsi, vec_stmt, &gs_info,
8414 mask, cost_vec);
8415 return true;
8417 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8419 gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
8420 gcc_assert (!slp);
8421 if (costing_p)
8423 unsigned int inside_cost = 0, prologue_cost = 0;
8424 if (vls_type == VLS_STORE_INVARIANT)
8425 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8426 stmt_info, 0, vect_prologue);
8427 vect_get_store_cost (vinfo, stmt_info, ncopies,
8428 alignment_support_scheme, misalignment,
8429 &inside_cost, cost_vec);
8431 if (dump_enabled_p ())
8432 dump_printf_loc (MSG_NOTE, vect_location,
8433 "vect_model_store_cost: inside_cost = %d, "
8434 "prologue_cost = %d .\n",
8435 inside_cost, prologue_cost);
8437 return true;
8439 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8442 if (grouped_store)
8444 /* FORNOW */
8445 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8447 if (slp)
8449 grouped_store = false;
8450 /* VEC_NUM is the number of vect stmts to be created for this
8451 group. */
8452 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8453 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8454 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8455 == first_stmt_info);
8456 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8457 op = vect_get_store_rhs (first_stmt_info);
8459 else
8460 /* VEC_NUM is the number of vect stmts to be created for this
8461 group. */
8462 vec_num = group_size;
8464 ref_type = get_group_alias_ptr_type (first_stmt_info);
8466 else
8467 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8469 if (!costing_p && dump_enabled_p ())
8470 dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n",
8471 ncopies);
8473 /* Check if we need to update prologue cost for invariant,
8474 and update it accordingly if so. If it's not for
8475 interleaving store, we can just check vls_type; but if
8476 it's for interleaving store, need to check the def_type
8477 of the stored value since the current vls_type is just
8478 for first_stmt_info. */
8479 auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
8481 gcc_assert (costing_p);
8482 if (slp)
8483 return;
8484 if (grouped_store)
8486 gcc_assert (store_rhs);
8487 enum vect_def_type cdt;
8488 gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
8489 if (cdt != vect_constant_def && cdt != vect_external_def)
8490 return;
8492 else if (vls_type != VLS_STORE_INVARIANT)
8493 return;
8494 *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info,
8495 0, vect_prologue);
8498 if (memory_access_type == VMAT_ELEMENTWISE
8499 || memory_access_type == VMAT_STRIDED_SLP)
8501 unsigned inside_cost = 0, prologue_cost = 0;
8502 gimple_stmt_iterator incr_gsi;
8503 bool insert_after;
8504 gimple *incr;
8505 tree offvar;
8506 tree ivstep;
8507 tree running_off;
8508 tree stride_base, stride_step, alias_off;
8509 tree vec_oprnd = NULL_TREE;
8510 tree dr_offset;
8511 unsigned int g;
8512 /* Checked by get_load_store_type. */
8513 unsigned int const_nunits = nunits.to_constant ();
8515 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8516 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8518 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8519 stride_base
8520 = fold_build_pointer_plus
8521 (DR_BASE_ADDRESS (first_dr_info->dr),
8522 size_binop (PLUS_EXPR,
8523 convert_to_ptrofftype (dr_offset),
8524 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8525 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8527 /* For a store with loop-invariant (but other than power-of-2)
8528 stride (i.e. not a grouped access) like so:
8530 for (i = 0; i < n; i += stride)
8531 array[i] = ...;
8533 we generate a new induction variable and new stores from
8534 the components of the (vectorized) rhs:
8536 for (j = 0; ; j += VF*stride)
8537 vectemp = ...;
8538 tmp1 = vectemp[0];
8539 array[j] = tmp1;
8540 tmp2 = vectemp[1];
8541 array[j + stride] = tmp2;
8545 unsigned nstores = const_nunits;
8546 unsigned lnel = 1;
8547 tree ltype = elem_type;
8548 tree lvectype = vectype;
8549 if (slp)
8551 if (group_size < const_nunits
8552 && const_nunits % group_size == 0)
8554 nstores = const_nunits / group_size;
8555 lnel = group_size;
8556 ltype = build_vector_type (elem_type, group_size);
8557 lvectype = vectype;
8559 /* First check if vec_extract optab doesn't support extraction
8560 of vector elts directly. */
8561 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8562 machine_mode vmode;
8563 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8564 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8565 group_size).exists (&vmode)
8566 || (convert_optab_handler (vec_extract_optab,
8567 TYPE_MODE (vectype), vmode)
8568 == CODE_FOR_nothing))
8570 /* Try to avoid emitting an extract of vector elements
8571 by performing the extracts using an integer type of the
8572 same size, extracting from a vector of those and then
8573 re-interpreting it as the original vector type if
8574 supported. */
8575 unsigned lsize
8576 = group_size * GET_MODE_BITSIZE (elmode);
8577 unsigned int lnunits = const_nunits / group_size;
8578 /* If we can't construct such a vector fall back to
8579 element extracts from the original vector type and
8580 element size stores. */
8581 if (int_mode_for_size (lsize, 0).exists (&elmode)
8582 && VECTOR_MODE_P (TYPE_MODE (vectype))
8583 && related_vector_mode (TYPE_MODE (vectype), elmode,
8584 lnunits).exists (&vmode)
8585 && (convert_optab_handler (vec_extract_optab,
8586 vmode, elmode)
8587 != CODE_FOR_nothing))
8589 nstores = lnunits;
8590 lnel = group_size;
8591 ltype = build_nonstandard_integer_type (lsize, 1);
8592 lvectype = build_vector_type (ltype, nstores);
8594 /* Else fall back to vector extraction anyway.
8595 Fewer stores are more important than avoiding spilling
8596 of the vector we extract from. Compared to the
8597 construction case in vectorizable_load no store-forwarding
8598 issue exists here for reasonable archs. */
8601 else if (group_size >= const_nunits
8602 && group_size % const_nunits == 0)
8604 int mis_align = dr_misalignment (first_dr_info, vectype);
8605 dr_alignment_support dr_align
8606 = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
8607 mis_align);
8608 if (dr_align == dr_aligned
8609 || dr_align == dr_unaligned_supported)
8611 nstores = 1;
8612 lnel = const_nunits;
8613 ltype = vectype;
8614 lvectype = vectype;
8615 alignment_support_scheme = dr_align;
8616 misalignment = mis_align;
8619 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8620 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8623 if (!costing_p)
8625 ivstep = stride_step;
8626 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8627 build_int_cst (TREE_TYPE (ivstep), vf));
8629 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8631 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8632 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8633 create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
8634 insert_after, &offvar, NULL);
8635 incr = gsi_stmt (incr_gsi);
8637 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8640 alias_off = build_int_cst (ref_type, 0);
8641 stmt_vec_info next_stmt_info = first_stmt_info;
8642 auto_vec<tree> vec_oprnds (ncopies);
8643 /* For costing some adjacent vector stores, we'd like to cost with
8644 the total number of them once instead of cost each one by one. */
8645 unsigned int n_adjacent_stores = 0;
8646 for (g = 0; g < group_size; g++)
8648 running_off = offvar;
8649 if (!costing_p)
8651 if (g)
8653 tree size = TYPE_SIZE_UNIT (ltype);
8654 tree pos
8655 = fold_build2 (MULT_EXPR, sizetype, size_int (g), size);
8656 tree newoff = copy_ssa_name (running_off, NULL);
8657 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8658 running_off, pos);
8659 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8660 running_off = newoff;
8663 if (!slp)
8664 op = vect_get_store_rhs (next_stmt_info);
8665 if (!costing_p)
8666 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies, op,
8667 &vec_oprnds);
8668 else
8669 update_prologue_cost (&prologue_cost, op);
8670 unsigned int group_el = 0;
8671 unsigned HOST_WIDE_INT
8672 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8673 for (j = 0; j < ncopies; j++)
8675 if (!costing_p)
8677 vec_oprnd = vec_oprnds[j];
8678 /* Pun the vector to extract from if necessary. */
8679 if (lvectype != vectype)
8681 tree tem = make_ssa_name (lvectype);
8682 tree cvt
8683 = build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
8684 gimple *pun = gimple_build_assign (tem, cvt);
8685 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8686 vec_oprnd = tem;
8689 for (i = 0; i < nstores; i++)
8691 if (costing_p)
8693 /* Only need vector extracting when there are more
8694 than one stores. */
8695 if (nstores > 1)
8696 inside_cost
8697 += record_stmt_cost (cost_vec, 1, vec_to_scalar,
8698 stmt_info, 0, vect_body);
8699 /* Take a single lane vector type store as scalar
8700 store to avoid ICE like 110776. */
8701 if (VECTOR_TYPE_P (ltype)
8702 && known_ne (TYPE_VECTOR_SUBPARTS (ltype), 1U))
8703 n_adjacent_stores++;
8704 else
8705 inside_cost
8706 += record_stmt_cost (cost_vec, 1, scalar_store,
8707 stmt_info, 0, vect_body);
8708 continue;
8710 tree newref, newoff;
8711 gimple *incr, *assign;
8712 tree size = TYPE_SIZE (ltype);
8713 /* Extract the i'th component. */
8714 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8715 bitsize_int (i), size);
8716 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8717 size, pos);
8719 elem = force_gimple_operand_gsi (gsi, elem, true,
8720 NULL_TREE, true,
8721 GSI_SAME_STMT);
8723 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8724 group_el * elsz);
8725 newref = build2 (MEM_REF, ltype,
8726 running_off, this_off);
8727 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8729 /* And store it to *running_off. */
8730 assign = gimple_build_assign (newref, elem);
8731 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8733 group_el += lnel;
8734 if (! slp
8735 || group_el == group_size)
8737 newoff = copy_ssa_name (running_off, NULL);
8738 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8739 running_off, stride_step);
8740 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8742 running_off = newoff;
8743 group_el = 0;
8745 if (g == group_size - 1
8746 && !slp)
8748 if (j == 0 && i == 0)
8749 *vec_stmt = assign;
8750 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8754 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8755 vec_oprnds.truncate(0);
8756 if (slp)
8757 break;
8760 if (costing_p)
8762 if (n_adjacent_stores > 0)
8763 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
8764 alignment_support_scheme, misalignment,
8765 &inside_cost, cost_vec);
8766 if (dump_enabled_p ())
8767 dump_printf_loc (MSG_NOTE, vect_location,
8768 "vect_model_store_cost: inside_cost = %d, "
8769 "prologue_cost = %d .\n",
8770 inside_cost, prologue_cost);
8773 return true;
8776 gcc_assert (alignment_support_scheme);
8777 vec_loop_masks *loop_masks
8778 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8779 ? &LOOP_VINFO_MASKS (loop_vinfo)
8780 : NULL);
8781 vec_loop_lens *loop_lens
8782 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8783 ? &LOOP_VINFO_LENS (loop_vinfo)
8784 : NULL);
8786 /* Shouldn't go with length-based approach if fully masked. */
8787 gcc_assert (!loop_lens || !loop_masks);
8789 /* Targets with store-lane instructions must not require explicit
8790 realignment. vect_supportable_dr_alignment always returns either
8791 dr_aligned or dr_unaligned_supported for masked operations. */
8792 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8793 && !mask
8794 && !loop_masks)
8795 || alignment_support_scheme == dr_aligned
8796 || alignment_support_scheme == dr_unaligned_supported);
8798 tree offset = NULL_TREE;
8799 if (!known_eq (poffset, 0))
8800 offset = size_int (poffset);
8802 tree bump;
8803 tree vec_offset = NULL_TREE;
8804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8806 aggr_type = NULL_TREE;
8807 bump = NULL_TREE;
8809 else if (memory_access_type == VMAT_GATHER_SCATTER)
8811 aggr_type = elem_type;
8812 if (!costing_p)
8813 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
8814 &bump, &vec_offset, loop_lens);
8816 else
8818 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8819 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8820 else
8821 aggr_type = vectype;
8822 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8823 memory_access_type, loop_lens);
8826 if (mask && !costing_p)
8827 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8829 /* In case the vectorization factor (VF) is bigger than the number
8830 of elements that we can fit in a vectype (nunits), we have to generate
8831 more than one vector stmt - i.e - we need to "unroll" the
8832 vector stmt by a factor VF/nunits. */
8834 /* In case of interleaving (non-unit grouped access):
8836 S1: &base + 2 = x2
8837 S2: &base = x0
8838 S3: &base + 1 = x1
8839 S4: &base + 3 = x3
8841 We create vectorized stores starting from base address (the access of the
8842 first stmt in the chain (S2 in the above example), when the last store stmt
8843 of the chain (S4) is reached:
8845 VS1: &base = vx2
8846 VS2: &base + vec_size*1 = vx0
8847 VS3: &base + vec_size*2 = vx1
8848 VS4: &base + vec_size*3 = vx3
8850 Then permutation statements are generated:
8852 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8853 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8856 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8857 (the order of the data-refs in the output of vect_permute_store_chain
8858 corresponds to the order of scalar stmts in the interleaving chain - see
8859 the documentation of vect_permute_store_chain()).
8861 In case of both multiple types and interleaving, above vector stores and
8862 permutation stmts are created for every copy. The result vector stmts are
8863 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8864 STMT_VINFO_RELATED_STMT for the next copies.
8867 auto_vec<tree> dr_chain (group_size);
8868 auto_vec<tree> vec_masks;
8869 tree vec_mask = NULL;
8870 auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8871 for (i = 0; i < group_size; i++)
8872 gvec_oprnds.quick_push (new auto_vec<tree> (ncopies));
8874 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8876 gcc_assert (!slp && grouped_store);
8877 unsigned inside_cost = 0, prologue_cost = 0;
8878 /* For costing some adjacent vector stores, we'd like to cost with
8879 the total number of them once instead of cost each one by one. */
8880 unsigned int n_adjacent_stores = 0;
8881 for (j = 0; j < ncopies; j++)
8883 gimple *new_stmt;
8884 if (j == 0)
8886 /* For interleaved stores we collect vectorized defs for all
8887 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8888 as an input to vect_permute_store_chain(). */
8889 stmt_vec_info next_stmt_info = first_stmt_info;
8890 for (i = 0; i < group_size; i++)
8892 /* Since gaps are not supported for interleaved stores,
8893 DR_GROUP_SIZE is the exact number of stmts in the
8894 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8895 op = vect_get_store_rhs (next_stmt_info);
8896 if (costing_p)
8897 update_prologue_cost (&prologue_cost, op);
8898 else
8900 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8901 ncopies, op,
8902 gvec_oprnds[i]);
8903 vec_oprnd = (*gvec_oprnds[i])[0];
8904 dr_chain.quick_push (vec_oprnd);
8906 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8909 if (!costing_p)
8911 if (mask)
8913 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8914 mask, &vec_masks,
8915 mask_vectype);
8916 vec_mask = vec_masks[0];
8919 /* We should have catched mismatched types earlier. */
8920 gcc_assert (
8921 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
8922 dataref_ptr
8923 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8924 aggr_type, NULL, offset, &dummy,
8925 gsi, &ptr_incr, false, bump);
8928 else if (!costing_p)
8930 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8931 /* DR_CHAIN is then used as an input to
8932 vect_permute_store_chain(). */
8933 for (i = 0; i < group_size; i++)
8935 vec_oprnd = (*gvec_oprnds[i])[j];
8936 dr_chain[i] = vec_oprnd;
8938 if (mask)
8939 vec_mask = vec_masks[j];
8940 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8941 stmt_info, bump);
8944 if (costing_p)
8946 n_adjacent_stores += vec_num;
8947 continue;
8950 /* Get an array into which we can store the individual vectors. */
8951 tree vec_array = create_vector_array (vectype, vec_num);
8953 /* Invalidate the current contents of VEC_ARRAY. This should
8954 become an RTL clobber too, which prevents the vector registers
8955 from being upward-exposed. */
8956 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8958 /* Store the individual vectors into the array. */
8959 for (i = 0; i < vec_num; i++)
8961 vec_oprnd = dr_chain[i];
8962 write_vector_array (vinfo, stmt_info, gsi, vec_oprnd, vec_array,
8966 tree final_mask = NULL;
8967 tree final_len = NULL;
8968 tree bias = NULL;
8969 if (loop_masks)
8970 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8971 ncopies, vectype, j);
8972 if (vec_mask)
8973 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
8974 vec_mask, gsi);
8976 if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8978 if (loop_lens)
8979 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8980 ncopies, vectype, j, 1);
8981 else
8982 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8983 signed char biasval
8984 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8985 bias = build_int_cst (intQI_type_node, biasval);
8986 if (!final_mask)
8988 mask_vectype = truth_type_for (vectype);
8989 final_mask = build_minus_one_cst (mask_vectype);
8993 gcall *call;
8994 if (final_len && final_mask)
8996 /* Emit:
8997 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8998 LEN, BIAS, VEC_ARRAY). */
8999 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9000 tree alias_ptr = build_int_cst (ref_type, align);
9001 call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
9002 dataref_ptr, alias_ptr,
9003 final_mask, final_len, bias,
9004 vec_array);
9006 else if (final_mask)
9008 /* Emit:
9009 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
9010 VEC_ARRAY). */
9011 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9012 tree alias_ptr = build_int_cst (ref_type, align);
9013 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
9014 dataref_ptr, alias_ptr,
9015 final_mask, vec_array);
9017 else
9019 /* Emit:
9020 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
9021 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9022 call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
9023 gimple_call_set_lhs (call, data_ref);
9025 gimple_call_set_nothrow (call, true);
9026 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9027 new_stmt = call;
9029 /* Record that VEC_ARRAY is now dead. */
9030 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9031 if (j == 0)
9032 *vec_stmt = new_stmt;
9033 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9036 if (costing_p)
9038 if (n_adjacent_stores > 0)
9039 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
9040 alignment_support_scheme, misalignment,
9041 &inside_cost, cost_vec);
9042 if (dump_enabled_p ())
9043 dump_printf_loc (MSG_NOTE, vect_location,
9044 "vect_model_store_cost: inside_cost = %d, "
9045 "prologue_cost = %d .\n",
9046 inside_cost, prologue_cost);
9049 return true;
9052 if (memory_access_type == VMAT_GATHER_SCATTER)
9054 gcc_assert (!slp && !grouped_store);
9055 auto_vec<tree> vec_offsets;
9056 unsigned int inside_cost = 0, prologue_cost = 0;
9057 for (j = 0; j < ncopies; j++)
9059 gimple *new_stmt;
9060 if (j == 0)
9062 if (costing_p && vls_type == VLS_STORE_INVARIANT)
9063 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
9064 stmt_info, 0, vect_prologue);
9065 else if (!costing_p)
9067 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
9068 DR_CHAIN is of size 1. */
9069 gcc_assert (group_size == 1);
9070 op = vect_get_store_rhs (first_stmt_info);
9071 vect_get_vec_defs_for_operand (vinfo, first_stmt_info,
9072 ncopies, op, gvec_oprnds[0]);
9073 vec_oprnd = (*gvec_oprnds[0])[0];
9074 dr_chain.quick_push (vec_oprnd);
9075 if (mask)
9077 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
9078 mask, &vec_masks,
9079 mask_vectype);
9080 vec_mask = vec_masks[0];
9083 /* We should have catched mismatched types earlier. */
9084 gcc_assert (
9085 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
9086 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9087 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9088 slp_node, &gs_info,
9089 &dataref_ptr, &vec_offsets);
9090 else
9091 dataref_ptr
9092 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
9093 aggr_type, NULL, offset,
9094 &dummy, gsi, &ptr_incr, false,
9095 bump);
9098 else if (!costing_p)
9100 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9101 vec_oprnd = (*gvec_oprnds[0])[j];
9102 dr_chain[0] = vec_oprnd;
9103 if (mask)
9104 vec_mask = vec_masks[j];
9105 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9106 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9107 gsi, stmt_info, bump);
9110 new_stmt = NULL;
9111 unsigned HOST_WIDE_INT align;
9112 tree final_mask = NULL_TREE;
9113 tree final_len = NULL_TREE;
9114 tree bias = NULL_TREE;
9115 if (!costing_p)
9117 if (loop_masks)
9118 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9119 ncopies, vectype, j);
9120 if (vec_mask)
9121 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9122 final_mask, vec_mask, gsi);
9125 if (gs_info.ifn != IFN_LAST)
9127 if (costing_p)
9129 unsigned int cnunits = vect_nunits_for_cost (vectype);
9130 inside_cost
9131 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9132 stmt_info, 0, vect_body);
9133 continue;
9136 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9137 vec_offset = vec_offsets[j];
9138 tree scale = size_int (gs_info.scale);
9140 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
9142 if (loop_lens)
9143 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9144 ncopies, vectype, j, 1);
9145 else
9146 final_len = build_int_cst (sizetype,
9147 TYPE_VECTOR_SUBPARTS (vectype));
9148 signed char biasval
9149 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9150 bias = build_int_cst (intQI_type_node, biasval);
9151 if (!final_mask)
9153 mask_vectype = truth_type_for (vectype);
9154 final_mask = build_minus_one_cst (mask_vectype);
9158 gcall *call;
9159 if (final_len && final_mask)
9160 call = gimple_build_call_internal (IFN_MASK_LEN_SCATTER_STORE,
9161 7, dataref_ptr, vec_offset,
9162 scale, vec_oprnd, final_mask,
9163 final_len, bias);
9164 else if (final_mask)
9165 call
9166 = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
9167 dataref_ptr, vec_offset, scale,
9168 vec_oprnd, final_mask);
9169 else
9170 call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
9171 dataref_ptr, vec_offset,
9172 scale, vec_oprnd);
9173 gimple_call_set_nothrow (call, true);
9174 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9175 new_stmt = call;
9177 else
9179 /* Emulated scatter. */
9180 gcc_assert (!final_mask);
9181 if (costing_p)
9183 unsigned int cnunits = vect_nunits_for_cost (vectype);
9184 /* For emulated scatter N offset vector element extracts
9185 (we assume the scalar scaling and ptr + offset add is
9186 consumed by the load). */
9187 inside_cost
9188 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9189 stmt_info, 0, vect_body);
9190 /* N scalar stores plus extracting the elements. */
9191 inside_cost
9192 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9193 stmt_info, 0, vect_body);
9194 inside_cost
9195 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9196 stmt_info, 0, vect_body);
9197 continue;
9200 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9201 unsigned HOST_WIDE_INT const_offset_nunits
9202 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant ();
9203 vec<constructor_elt, va_gc> *ctor_elts;
9204 vec_alloc (ctor_elts, const_nunits);
9205 gimple_seq stmts = NULL;
9206 tree elt_type = TREE_TYPE (vectype);
9207 unsigned HOST_WIDE_INT elt_size
9208 = tree_to_uhwi (TYPE_SIZE (elt_type));
9209 /* We support offset vectors with more elements
9210 than the data vector for now. */
9211 unsigned HOST_WIDE_INT factor
9212 = const_offset_nunits / const_nunits;
9213 vec_offset = vec_offsets[j / factor];
9214 unsigned elt_offset = (j % factor) * const_nunits;
9215 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9216 tree scale = size_int (gs_info.scale);
9217 align = get_object_alignment (DR_REF (first_dr_info->dr));
9218 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9219 for (unsigned k = 0; k < const_nunits; ++k)
9221 /* Compute the offsetted pointer. */
9222 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9223 bitsize_int (k + elt_offset));
9224 tree idx
9225 = gimple_build (&stmts, BIT_FIELD_REF, idx_type, vec_offset,
9226 TYPE_SIZE (idx_type), boff);
9227 idx = gimple_convert (&stmts, sizetype, idx);
9228 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx, scale);
9229 tree ptr
9230 = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (dataref_ptr),
9231 dataref_ptr, idx);
9232 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9233 /* Extract the element to be stored. */
9234 tree elt
9235 = gimple_build (&stmts, BIT_FIELD_REF, TREE_TYPE (vectype),
9236 vec_oprnd, TYPE_SIZE (elt_type),
9237 bitsize_int (k * elt_size));
9238 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9239 stmts = NULL;
9240 tree ref
9241 = build2 (MEM_REF, ltype, ptr, build_int_cst (ref_type, 0));
9242 new_stmt = gimple_build_assign (ref, elt);
9243 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9246 if (j == 0)
9247 *vec_stmt = new_stmt;
9248 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9251 if (costing_p && dump_enabled_p ())
9252 dump_printf_loc (MSG_NOTE, vect_location,
9253 "vect_model_store_cost: inside_cost = %d, "
9254 "prologue_cost = %d .\n",
9255 inside_cost, prologue_cost);
9257 return true;
9260 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
9261 || memory_access_type == VMAT_CONTIGUOUS_DOWN
9262 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE
9263 || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
9265 unsigned inside_cost = 0, prologue_cost = 0;
9266 /* For costing some adjacent vector stores, we'd like to cost with
9267 the total number of them once instead of cost each one by one. */
9268 unsigned int n_adjacent_stores = 0;
9269 auto_vec<tree> result_chain (group_size);
9270 auto_vec<tree, 1> vec_oprnds;
9271 for (j = 0; j < ncopies; j++)
9273 gimple *new_stmt;
9274 if (j == 0)
9276 if (slp && !costing_p)
9278 /* Get vectorized arguments for SLP_NODE. */
9279 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
9280 &vec_oprnds, mask, &vec_masks);
9281 vec_oprnd = vec_oprnds[0];
9282 if (mask)
9283 vec_mask = vec_masks[0];
9285 else
9287 /* For interleaved stores we collect vectorized defs for all the
9288 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9289 input to vect_permute_store_chain().
9291 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9292 is of size 1. */
9293 stmt_vec_info next_stmt_info = first_stmt_info;
9294 for (i = 0; i < group_size; i++)
9296 /* Since gaps are not supported for interleaved stores,
9297 DR_GROUP_SIZE is the exact number of stmts in the chain.
9298 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9299 that there is no interleaving, DR_GROUP_SIZE is 1,
9300 and only one iteration of the loop will be executed. */
9301 op = vect_get_store_rhs (next_stmt_info);
9302 if (costing_p)
9303 update_prologue_cost (&prologue_cost, op);
9304 else
9306 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
9307 ncopies, op,
9308 gvec_oprnds[i]);
9309 vec_oprnd = (*gvec_oprnds[i])[0];
9310 dr_chain.quick_push (vec_oprnd);
9312 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9314 if (mask && !costing_p)
9316 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
9317 mask, &vec_masks,
9318 mask_vectype);
9319 vec_mask = vec_masks[0];
9323 /* We should have catched mismatched types earlier. */
9324 gcc_assert (costing_p
9325 || useless_type_conversion_p (vectype,
9326 TREE_TYPE (vec_oprnd)));
9327 bool simd_lane_access_p
9328 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9329 if (!costing_p
9330 && simd_lane_access_p
9331 && !loop_masks
9332 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9333 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9334 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9335 && integer_zerop (DR_INIT (first_dr_info->dr))
9336 && alias_sets_conflict_p (get_alias_set (aggr_type),
9337 get_alias_set (TREE_TYPE (ref_type))))
9339 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9340 dataref_offset = build_int_cst (ref_type, 0);
9342 else if (!costing_p)
9343 dataref_ptr
9344 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9345 simd_lane_access_p ? loop : NULL,
9346 offset, &dummy, gsi, &ptr_incr,
9347 simd_lane_access_p, bump);
9349 else if (!costing_p)
9351 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9352 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9353 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9354 of size 1. */
9355 for (i = 0; i < group_size; i++)
9357 vec_oprnd = (*gvec_oprnds[i])[j];
9358 dr_chain[i] = vec_oprnd;
9360 if (mask)
9361 vec_mask = vec_masks[j];
9362 if (dataref_offset)
9363 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
9364 else
9365 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9366 stmt_info, bump);
9369 new_stmt = NULL;
9370 if (grouped_store)
9372 /* Permute. */
9373 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
9374 if (costing_p)
9376 int group_size = DR_GROUP_SIZE (first_stmt_info);
9377 int nstmts = ceil_log2 (group_size) * group_size;
9378 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
9379 stmt_info, 0, vect_body);
9380 if (dump_enabled_p ())
9381 dump_printf_loc (MSG_NOTE, vect_location,
9382 "vect_model_store_cost: "
9383 "strided group_size = %d .\n",
9384 group_size);
9386 else
9387 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
9388 gsi, &result_chain);
9391 stmt_vec_info next_stmt_info = first_stmt_info;
9392 for (i = 0; i < vec_num; i++)
9394 if (!costing_p)
9396 if (slp)
9397 vec_oprnd = vec_oprnds[i];
9398 else if (grouped_store)
9399 /* For grouped stores vectorized defs are interleaved in
9400 vect_permute_store_chain(). */
9401 vec_oprnd = result_chain[i];
9404 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9406 if (costing_p)
9407 inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
9408 stmt_info, 0, vect_body);
9409 else
9411 tree perm_mask = perm_mask_for_reverse (vectype);
9412 tree perm_dest = vect_create_destination_var (
9413 vect_get_store_rhs (stmt_info), vectype);
9414 tree new_temp = make_ssa_name (perm_dest);
9416 /* Generate the permute statement. */
9417 gimple *perm_stmt
9418 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9419 vec_oprnd, perm_mask);
9420 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
9421 gsi);
9423 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9424 vec_oprnd = new_temp;
9428 if (costing_p)
9430 n_adjacent_stores++;
9432 if (!slp)
9434 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9435 if (!next_stmt_info)
9436 break;
9439 continue;
9442 tree final_mask = NULL_TREE;
9443 tree final_len = NULL_TREE;
9444 tree bias = NULL_TREE;
9445 if (loop_masks)
9446 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9447 vec_num * ncopies, vectype,
9448 vec_num * j + i);
9449 if (slp && vec_mask)
9450 vec_mask = vec_masks[i];
9451 if (vec_mask)
9452 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
9453 vec_mask, gsi);
9455 if (i > 0)
9456 /* Bump the vector pointer. */
9457 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9458 stmt_info, bump);
9460 unsigned misalign;
9461 unsigned HOST_WIDE_INT align;
9462 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9463 if (alignment_support_scheme == dr_aligned)
9464 misalign = 0;
9465 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9467 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
9468 misalign = 0;
9470 else
9471 misalign = misalignment;
9472 if (dataref_offset == NULL_TREE
9473 && TREE_CODE (dataref_ptr) == SSA_NAME)
9474 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9475 misalign);
9476 align = least_bit_hwi (misalign | align);
9478 /* Compute IFN when LOOP_LENS or final_mask valid. */
9479 machine_mode vmode = TYPE_MODE (vectype);
9480 machine_mode new_vmode = vmode;
9481 internal_fn partial_ifn = IFN_LAST;
9482 if (loop_lens)
9484 opt_machine_mode new_ovmode
9485 = get_len_load_store_mode (vmode, false, &partial_ifn);
9486 new_vmode = new_ovmode.require ();
9487 unsigned factor
9488 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9489 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9490 vec_num * ncopies, vectype,
9491 vec_num * j + i, factor);
9493 else if (final_mask)
9495 if (!can_vec_mask_load_store_p (
9496 vmode, TYPE_MODE (TREE_TYPE (final_mask)), false,
9497 &partial_ifn))
9498 gcc_unreachable ();
9501 if (partial_ifn == IFN_MASK_LEN_STORE)
9503 if (!final_len)
9505 /* Pass VF value to 'len' argument of
9506 MASK_LEN_STORE if LOOP_LENS is invalid. */
9507 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9509 if (!final_mask)
9511 /* Pass all ones value to 'mask' argument of
9512 MASK_LEN_STORE if final_mask is invalid. */
9513 mask_vectype = truth_type_for (vectype);
9514 final_mask = build_minus_one_cst (mask_vectype);
9517 if (final_len)
9519 signed char biasval
9520 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9522 bias = build_int_cst (intQI_type_node, biasval);
9525 /* Arguments are ready. Create the new vector stmt. */
9526 if (final_len)
9528 gcall *call;
9529 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9530 /* Need conversion if it's wrapped with VnQI. */
9531 if (vmode != new_vmode)
9533 tree new_vtype
9534 = build_vector_type_for_mode (unsigned_intQI_type_node,
9535 new_vmode);
9536 tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9537 vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9538 gassign *new_stmt
9539 = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9540 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9541 vec_oprnd = var;
9544 if (partial_ifn == IFN_MASK_LEN_STORE)
9545 call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9546 dataref_ptr, ptr, final_mask,
9547 final_len, bias, vec_oprnd);
9548 else
9549 call = gimple_build_call_internal (IFN_LEN_STORE, 5,
9550 dataref_ptr, ptr, final_len,
9551 bias, vec_oprnd);
9552 gimple_call_set_nothrow (call, true);
9553 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9554 new_stmt = call;
9556 else if (final_mask)
9558 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9559 gcall *call
9560 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
9561 ptr, final_mask, vec_oprnd);
9562 gimple_call_set_nothrow (call, true);
9563 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9564 new_stmt = call;
9566 else
9568 data_ref
9569 = fold_build2 (MEM_REF, vectype, dataref_ptr,
9570 dataref_offset ? dataref_offset
9571 : build_int_cst (ref_type, 0));
9572 if (alignment_support_scheme == dr_aligned)
9574 else
9575 TREE_TYPE (data_ref)
9576 = build_aligned_type (TREE_TYPE (data_ref),
9577 align * BITS_PER_UNIT);
9578 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9579 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9580 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9583 if (slp)
9584 continue;
9586 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9587 if (!next_stmt_info)
9588 break;
9590 if (!slp && !costing_p)
9592 if (j == 0)
9593 *vec_stmt = new_stmt;
9594 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9598 if (costing_p)
9600 if (n_adjacent_stores > 0)
9601 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
9602 alignment_support_scheme, misalignment,
9603 &inside_cost, cost_vec);
9605 /* When vectorizing a store into the function result assign
9606 a penalty if the function returns in a multi-register location.
9607 In this case we assume we'll end up with having to spill the
9608 vector result and do piecewise loads as a conservative estimate. */
9609 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
9610 if (base
9611 && (TREE_CODE (base) == RESULT_DECL
9612 || (DECL_P (base) && cfun_returns (base)))
9613 && !aggregate_value_p (base, cfun->decl))
9615 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
9616 /* ??? Handle PARALLEL in some way. */
9617 if (REG_P (reg))
9619 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
9620 /* Assume that a single reg-reg move is possible and cheap,
9621 do not account for vector to gp register move cost. */
9622 if (nregs > 1)
9624 /* Spill. */
9625 prologue_cost
9626 += record_stmt_cost (cost_vec, ncopies, vector_store,
9627 stmt_info, 0, vect_epilogue);
9628 /* Loads. */
9629 prologue_cost
9630 += record_stmt_cost (cost_vec, ncopies * nregs, scalar_load,
9631 stmt_info, 0, vect_epilogue);
9635 if (dump_enabled_p ())
9636 dump_printf_loc (MSG_NOTE, vect_location,
9637 "vect_model_store_cost: inside_cost = %d, "
9638 "prologue_cost = %d .\n",
9639 inside_cost, prologue_cost);
9642 return true;
9645 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9646 VECTOR_CST mask. No checks are made that the target platform supports the
9647 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9648 vect_gen_perm_mask_checked. */
9650 tree
9651 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9653 tree mask_type;
9655 poly_uint64 nunits = sel.length ();
9656 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9658 mask_type = build_vector_type (ssizetype, nunits);
9659 return vec_perm_indices_to_tree (mask_type, sel);
9662 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9663 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9665 tree
9666 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9668 machine_mode vmode = TYPE_MODE (vectype);
9669 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9670 return vect_gen_perm_mask_any (vectype, sel);
9673 /* Given a vector variable X and Y, that was generated for the scalar
9674 STMT_INFO, generate instructions to permute the vector elements of X and Y
9675 using permutation mask MASK_VEC, insert them at *GSI and return the
9676 permuted vector variable. */
9678 static tree
9679 permute_vec_elements (vec_info *vinfo,
9680 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9681 gimple_stmt_iterator *gsi)
9683 tree vectype = TREE_TYPE (x);
9684 tree perm_dest, data_ref;
9685 gimple *perm_stmt;
9687 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9688 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9689 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9690 else
9691 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9692 data_ref = make_ssa_name (perm_dest);
9694 /* Generate the permute statement. */
9695 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9696 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9698 return data_ref;
9701 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9702 inserting them on the loops preheader edge. Returns true if we
9703 were successful in doing so (and thus STMT_INFO can be moved then),
9704 otherwise returns false. HOIST_P indicates if we want to hoist the
9705 definitions of all SSA uses, it would be false when we are costing. */
9707 static bool
9708 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
9710 ssa_op_iter i;
9711 tree op;
9712 bool any = false;
9714 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9716 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9717 if (!gimple_nop_p (def_stmt)
9718 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9720 /* Make sure we don't need to recurse. While we could do
9721 so in simple cases when there are more complex use webs
9722 we don't have an easy way to preserve stmt order to fulfil
9723 dependencies within them. */
9724 tree op2;
9725 ssa_op_iter i2;
9726 if (gimple_code (def_stmt) == GIMPLE_PHI)
9727 return false;
9728 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9730 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9731 if (!gimple_nop_p (def_stmt2)
9732 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9733 return false;
9735 any = true;
9739 if (!any)
9740 return true;
9742 if (!hoist_p)
9743 return true;
9745 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9747 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9748 if (!gimple_nop_p (def_stmt)
9749 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9751 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9752 gsi_remove (&gsi, false);
9753 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9757 return true;
9760 /* vectorizable_load.
9762 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9763 that can be vectorized.
9764 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9765 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9766 Return true if STMT_INFO is vectorizable in this way. */
9768 static bool
9769 vectorizable_load (vec_info *vinfo,
9770 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9771 gimple **vec_stmt, slp_tree slp_node,
9772 stmt_vector_for_cost *cost_vec)
9774 tree scalar_dest;
9775 tree vec_dest = NULL;
9776 tree data_ref = NULL;
9777 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9778 class loop *loop = NULL;
9779 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9780 bool nested_in_vect_loop = false;
9781 tree elem_type;
9782 /* Avoid false positive uninitialized warning, see PR110652. */
9783 tree new_temp = NULL_TREE;
9784 machine_mode mode;
9785 tree dummy;
9786 tree dataref_ptr = NULL_TREE;
9787 tree dataref_offset = NULL_TREE;
9788 gimple *ptr_incr = NULL;
9789 int ncopies;
9790 int i, j;
9791 unsigned int group_size;
9792 poly_uint64 group_gap_adj;
9793 tree msq = NULL_TREE, lsq;
9794 tree realignment_token = NULL_TREE;
9795 gphi *phi = NULL;
9796 vec<tree> dr_chain = vNULL;
9797 bool grouped_load = false;
9798 stmt_vec_info first_stmt_info;
9799 stmt_vec_info first_stmt_info_for_drptr = NULL;
9800 bool compute_in_loop = false;
9801 class loop *at_loop;
9802 int vec_num;
9803 bool slp = (slp_node != NULL);
9804 bool slp_perm = false;
9805 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9806 poly_uint64 vf;
9807 tree aggr_type;
9808 gather_scatter_info gs_info;
9809 tree ref_type;
9810 enum vect_def_type mask_dt = vect_unknown_def_type;
9812 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9813 return false;
9815 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9816 && ! vec_stmt)
9817 return false;
9819 if (!STMT_VINFO_DATA_REF (stmt_info))
9820 return false;
9822 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9823 int mask_index = -1;
9824 slp_tree slp_op = NULL;
9825 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9827 scalar_dest = gimple_assign_lhs (assign);
9828 if (TREE_CODE (scalar_dest) != SSA_NAME)
9829 return false;
9831 tree_code code = gimple_assign_rhs_code (assign);
9832 if (code != ARRAY_REF
9833 && code != BIT_FIELD_REF
9834 && code != INDIRECT_REF
9835 && code != COMPONENT_REF
9836 && code != IMAGPART_EXPR
9837 && code != REALPART_EXPR
9838 && code != MEM_REF
9839 && TREE_CODE_CLASS (code) != tcc_declaration)
9840 return false;
9842 else
9844 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9845 if (!call || !gimple_call_internal_p (call))
9846 return false;
9848 internal_fn ifn = gimple_call_internal_fn (call);
9849 if (!internal_load_fn_p (ifn))
9850 return false;
9852 scalar_dest = gimple_call_lhs (call);
9853 if (!scalar_dest)
9854 return false;
9856 mask_index = internal_fn_mask_index (ifn);
9857 if (mask_index >= 0 && slp_node)
9858 mask_index = vect_slp_child_index_for_operand
9859 (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9860 if (mask_index >= 0
9861 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9862 &mask, &slp_op, &mask_dt, &mask_vectype))
9863 return false;
9866 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9867 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9869 if (loop_vinfo)
9871 loop = LOOP_VINFO_LOOP (loop_vinfo);
9872 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9873 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9875 else
9876 vf = 1;
9878 /* Multiple types in SLP are handled by creating the appropriate number of
9879 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9880 case of SLP. */
9881 if (slp)
9882 ncopies = 1;
9883 else
9884 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9886 gcc_assert (ncopies >= 1);
9888 /* FORNOW. This restriction should be relaxed. */
9889 if (nested_in_vect_loop && ncopies > 1)
9891 if (dump_enabled_p ())
9892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9893 "multiple types in nested loop.\n");
9894 return false;
9897 /* Invalidate assumptions made by dependence analysis when vectorization
9898 on the unrolled body effectively re-orders stmts. */
9899 if (ncopies > 1
9900 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9901 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9902 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9904 if (dump_enabled_p ())
9905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9906 "cannot perform implicit CSE when unrolling "
9907 "with negative dependence distance\n");
9908 return false;
9911 elem_type = TREE_TYPE (vectype);
9912 mode = TYPE_MODE (vectype);
9914 /* FORNOW. In some cases can vectorize even if data-type not supported
9915 (e.g. - data copies). */
9916 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
9918 if (dump_enabled_p ())
9919 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9920 "Aligned load, but unsupported type.\n");
9921 return false;
9924 /* Check if the load is a part of an interleaving chain. */
9925 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9927 grouped_load = true;
9928 /* FORNOW */
9929 gcc_assert (!nested_in_vect_loop);
9930 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9932 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9933 group_size = DR_GROUP_SIZE (first_stmt_info);
9935 /* Refuse non-SLP vectorization of SLP-only groups. */
9936 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9938 if (dump_enabled_p ())
9939 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9940 "cannot vectorize load in non-SLP mode.\n");
9941 return false;
9944 /* Invalidate assumptions made by dependence analysis when vectorization
9945 on the unrolled body effectively re-orders stmts. */
9946 if (!PURE_SLP_STMT (stmt_info)
9947 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9948 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9949 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9951 if (dump_enabled_p ())
9952 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9953 "cannot perform implicit CSE when performing "
9954 "group loads with negative dependence distance\n");
9955 return false;
9958 else
9959 group_size = 1;
9961 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9963 slp_perm = true;
9965 if (!loop_vinfo)
9967 /* In BB vectorization we may not actually use a loaded vector
9968 accessing elements in excess of DR_GROUP_SIZE. */
9969 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9970 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9971 unsigned HOST_WIDE_INT nunits;
9972 unsigned j, k, maxk = 0;
9973 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
9974 if (k > maxk)
9975 maxk = k;
9976 tree vectype = SLP_TREE_VECTYPE (slp_node);
9977 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
9978 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
9980 if (dump_enabled_p ())
9981 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9982 "BB vectorization with gaps at the end of "
9983 "a load is not supported\n");
9984 return false;
9988 auto_vec<tree> tem;
9989 unsigned n_perms;
9990 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
9991 true, &n_perms))
9993 if (dump_enabled_p ())
9994 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
9995 vect_location,
9996 "unsupported load permutation\n");
9997 return false;
10001 vect_memory_access_type memory_access_type;
10002 enum dr_alignment_support alignment_support_scheme;
10003 int misalignment;
10004 poly_int64 poffset;
10005 internal_fn lanes_ifn;
10006 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
10007 ncopies, &memory_access_type, &poffset,
10008 &alignment_support_scheme, &misalignment, &gs_info,
10009 &lanes_ifn))
10010 return false;
10012 if (mask)
10014 if (memory_access_type == VMAT_CONTIGUOUS)
10016 machine_mode vec_mode = TYPE_MODE (vectype);
10017 if (!VECTOR_MODE_P (vec_mode)
10018 || !can_vec_mask_load_store_p (vec_mode,
10019 TYPE_MODE (mask_vectype), true))
10020 return false;
10022 else if (memory_access_type != VMAT_LOAD_STORE_LANES
10023 && memory_access_type != VMAT_GATHER_SCATTER)
10025 if (dump_enabled_p ())
10026 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10027 "unsupported access type for masked load.\n");
10028 return false;
10030 else if (memory_access_type == VMAT_GATHER_SCATTER
10031 && gs_info.ifn == IFN_LAST
10032 && !gs_info.decl)
10034 if (dump_enabled_p ())
10035 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10036 "unsupported masked emulated gather.\n");
10037 return false;
10041 bool costing_p = !vec_stmt;
10043 if (costing_p) /* transformation not required. */
10045 if (slp_node
10046 && mask
10047 && !vect_maybe_update_slp_op_vectype (slp_op,
10048 mask_vectype))
10050 if (dump_enabled_p ())
10051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10052 "incompatible vector types for invariants\n");
10053 return false;
10056 if (!slp)
10057 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
10059 if (loop_vinfo
10060 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10061 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
10062 VLS_LOAD, group_size,
10063 memory_access_type, &gs_info,
10064 mask);
10066 if (dump_enabled_p ()
10067 && memory_access_type != VMAT_ELEMENTWISE
10068 && memory_access_type != VMAT_GATHER_SCATTER
10069 && alignment_support_scheme != dr_aligned)
10070 dump_printf_loc (MSG_NOTE, vect_location,
10071 "Vectorizing an unaligned access.\n");
10073 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10074 vinfo->any_known_not_updated_vssa = true;
10076 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
10079 if (!slp)
10080 gcc_assert (memory_access_type
10081 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
10083 if (dump_enabled_p () && !costing_p)
10084 dump_printf_loc (MSG_NOTE, vect_location,
10085 "transform load. ncopies = %d\n", ncopies);
10087 /* Transform. */
10089 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
10090 ensure_base_align (dr_info);
10092 if (memory_access_type == VMAT_INVARIANT)
10094 gcc_assert (!grouped_load && !mask && !bb_vinfo);
10095 /* If we have versioned for aliasing or the loop doesn't
10096 have any data dependencies that would preclude this,
10097 then we are sure this is a loop invariant load and
10098 thus we can insert it on the preheader edge.
10099 TODO: hoist_defs_of_uses should ideally be computed
10100 once at analysis time, remembered and used in the
10101 transform time. */
10102 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
10103 && !nested_in_vect_loop
10104 && hoist_defs_of_uses (stmt_info, loop, !costing_p));
10105 if (costing_p)
10107 enum vect_cost_model_location cost_loc
10108 = hoist_p ? vect_prologue : vect_body;
10109 unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
10110 stmt_info, 0, cost_loc);
10111 cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0,
10112 cost_loc);
10113 unsigned int prologue_cost = hoist_p ? cost : 0;
10114 unsigned int inside_cost = hoist_p ? 0 : cost;
10115 if (dump_enabled_p ())
10116 dump_printf_loc (MSG_NOTE, vect_location,
10117 "vect_model_load_cost: inside_cost = %d, "
10118 "prologue_cost = %d .\n",
10119 inside_cost, prologue_cost);
10120 return true;
10122 if (hoist_p)
10124 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
10125 if (dump_enabled_p ())
10126 dump_printf_loc (MSG_NOTE, vect_location,
10127 "hoisting out of the vectorized loop: %G",
10128 (gimple *) stmt);
10129 scalar_dest = copy_ssa_name (scalar_dest);
10130 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
10131 edge pe = loop_preheader_edge (loop);
10132 gphi *vphi = get_virtual_phi (loop->header);
10133 tree vuse;
10134 if (vphi)
10135 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
10136 else
10137 vuse = gimple_vuse (gsi_stmt (*gsi));
10138 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
10139 gimple_set_vuse (new_stmt, vuse);
10140 gsi_insert_on_edge_immediate (pe, new_stmt);
10142 /* These copies are all equivalent. */
10143 if (hoist_p)
10144 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10145 vectype, NULL);
10146 else
10148 gimple_stmt_iterator gsi2 = *gsi;
10149 gsi_next (&gsi2);
10150 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10151 vectype, &gsi2);
10153 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
10154 if (slp)
10155 for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
10156 slp_node->push_vec_def (new_stmt);
10157 else
10159 for (j = 0; j < ncopies; ++j)
10160 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10161 *vec_stmt = new_stmt;
10163 return true;
10166 if (memory_access_type == VMAT_ELEMENTWISE
10167 || memory_access_type == VMAT_STRIDED_SLP)
10169 gimple_stmt_iterator incr_gsi;
10170 bool insert_after;
10171 tree offvar;
10172 tree ivstep;
10173 tree running_off;
10174 vec<constructor_elt, va_gc> *v = NULL;
10175 tree stride_base, stride_step, alias_off;
10176 /* Checked by get_load_store_type. */
10177 unsigned int const_nunits = nunits.to_constant ();
10178 unsigned HOST_WIDE_INT cst_offset = 0;
10179 tree dr_offset;
10180 unsigned int inside_cost = 0;
10182 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
10183 gcc_assert (!nested_in_vect_loop);
10185 if (grouped_load)
10187 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10188 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10190 else
10192 first_stmt_info = stmt_info;
10193 first_dr_info = dr_info;
10196 if (slp && grouped_load)
10198 group_size = DR_GROUP_SIZE (first_stmt_info);
10199 ref_type = get_group_alias_ptr_type (first_stmt_info);
10201 else
10203 if (grouped_load)
10204 cst_offset
10205 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
10206 * vect_get_place_in_interleaving_chain (stmt_info,
10207 first_stmt_info));
10208 group_size = 1;
10209 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
10212 if (!costing_p)
10214 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
10215 stride_base = fold_build_pointer_plus (
10216 DR_BASE_ADDRESS (first_dr_info->dr),
10217 size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
10218 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
10219 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
10221 /* For a load with loop-invariant (but other than power-of-2)
10222 stride (i.e. not a grouped access) like so:
10224 for (i = 0; i < n; i += stride)
10225 ... = array[i];
10227 we generate a new induction variable and new accesses to
10228 form a new vector (or vectors, depending on ncopies):
10230 for (j = 0; ; j += VF*stride)
10231 tmp1 = array[j];
10232 tmp2 = array[j + stride];
10234 vectemp = {tmp1, tmp2, ...}
10237 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
10238 build_int_cst (TREE_TYPE (stride_step), vf));
10240 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
10242 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
10243 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
10244 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
10245 loop, &incr_gsi, insert_after,
10246 &offvar, NULL);
10248 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
10251 running_off = offvar;
10252 alias_off = build_int_cst (ref_type, 0);
10253 int nloads = const_nunits;
10254 int lnel = 1;
10255 tree ltype = TREE_TYPE (vectype);
10256 tree lvectype = vectype;
10257 auto_vec<tree> dr_chain;
10258 if (memory_access_type == VMAT_STRIDED_SLP)
10260 if (group_size < const_nunits)
10262 /* First check if vec_init optab supports construction from vector
10263 elts directly. Otherwise avoid emitting a constructor of
10264 vector elements by performing the loads using an integer type
10265 of the same size, constructing a vector of those and then
10266 re-interpreting it as the original vector type. This avoids a
10267 huge runtime penalty due to the general inability to perform
10268 store forwarding from smaller stores to a larger load. */
10269 tree ptype;
10270 tree vtype
10271 = vector_vector_composition_type (vectype,
10272 const_nunits / group_size,
10273 &ptype);
10274 if (vtype != NULL_TREE)
10276 nloads = const_nunits / group_size;
10277 lnel = group_size;
10278 lvectype = vtype;
10279 ltype = ptype;
10282 else
10284 nloads = 1;
10285 lnel = const_nunits;
10286 ltype = vectype;
10288 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
10290 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10291 else if (nloads == 1)
10292 ltype = vectype;
10294 if (slp)
10296 /* For SLP permutation support we need to load the whole group,
10297 not only the number of vector stmts the permutation result
10298 fits in. */
10299 if (slp_perm)
10301 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10302 variable VF. */
10303 unsigned int const_vf = vf.to_constant ();
10304 ncopies = CEIL (group_size * const_vf, const_nunits);
10305 dr_chain.create (ncopies);
10307 else
10308 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10310 unsigned int group_el = 0;
10311 unsigned HOST_WIDE_INT
10312 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10313 unsigned int n_groups = 0;
10314 /* For costing some adjacent vector loads, we'd like to cost with
10315 the total number of them once instead of cost each one by one. */
10316 unsigned int n_adjacent_loads = 0;
10317 for (j = 0; j < ncopies; j++)
10319 if (nloads > 1 && !costing_p)
10320 vec_alloc (v, nloads);
10321 gimple *new_stmt = NULL;
10322 for (i = 0; i < nloads; i++)
10324 if (costing_p)
10326 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10327 avoid ICE, see PR110776. */
10328 if (VECTOR_TYPE_P (ltype)
10329 && memory_access_type != VMAT_ELEMENTWISE)
10330 n_adjacent_loads++;
10331 else
10332 inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
10333 stmt_info, 0, vect_body);
10334 continue;
10336 tree this_off = build_int_cst (TREE_TYPE (alias_off),
10337 group_el * elsz + cst_offset);
10338 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10339 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10340 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
10341 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10342 if (nloads > 1)
10343 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10344 gimple_assign_lhs (new_stmt));
10346 group_el += lnel;
10347 if (! slp
10348 || group_el == group_size)
10350 n_groups++;
10351 /* When doing SLP make sure to not load elements from
10352 the next vector iteration, those will not be accessed
10353 so just use the last element again. See PR107451. */
10354 if (!slp || known_lt (n_groups, vf))
10356 tree newoff = copy_ssa_name (running_off);
10357 gimple *incr
10358 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10359 running_off, stride_step);
10360 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
10361 running_off = newoff;
10363 group_el = 0;
10367 if (nloads > 1)
10369 if (costing_p)
10370 inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
10371 stmt_info, 0, vect_body);
10372 else
10374 tree vec_inv = build_constructor (lvectype, v);
10375 new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
10376 lvectype, gsi);
10377 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10378 if (lvectype != vectype)
10380 new_stmt
10381 = gimple_build_assign (make_ssa_name (vectype),
10382 VIEW_CONVERT_EXPR,
10383 build1 (VIEW_CONVERT_EXPR,
10384 vectype, new_temp));
10385 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10386 gsi);
10391 if (!costing_p)
10393 if (slp)
10395 if (slp_perm)
10396 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
10397 else
10398 slp_node->push_vec_def (new_stmt);
10400 else
10402 if (j == 0)
10403 *vec_stmt = new_stmt;
10404 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10408 if (slp_perm)
10410 unsigned n_perms;
10411 if (costing_p)
10413 unsigned n_loads;
10414 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10415 true, &n_perms, &n_loads);
10416 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
10417 first_stmt_info, 0, vect_body);
10419 else
10420 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10421 false, &n_perms);
10424 if (costing_p)
10426 if (n_adjacent_loads > 0)
10427 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
10428 alignment_support_scheme, misalignment, false,
10429 &inside_cost, nullptr, cost_vec, cost_vec,
10430 true);
10431 if (dump_enabled_p ())
10432 dump_printf_loc (MSG_NOTE, vect_location,
10433 "vect_model_load_cost: inside_cost = %u, "
10434 "prologue_cost = 0 .\n",
10435 inside_cost);
10438 return true;
10441 if (memory_access_type == VMAT_GATHER_SCATTER
10442 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
10443 grouped_load = false;
10445 if (grouped_load
10446 || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10448 if (grouped_load)
10450 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10451 group_size = DR_GROUP_SIZE (first_stmt_info);
10453 else
10455 first_stmt_info = stmt_info;
10456 group_size = 1;
10458 /* For SLP vectorization we directly vectorize a subchain
10459 without permutation. */
10460 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10461 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10462 /* For BB vectorization always use the first stmt to base
10463 the data ref pointer on. */
10464 if (bb_vinfo)
10465 first_stmt_info_for_drptr
10466 = vect_find_first_scalar_stmt_in_slp (slp_node);
10468 /* Check if the chain of loads is already vectorized. */
10469 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10470 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10471 ??? But we can only do so if there is exactly one
10472 as we have no way to get at the rest. Leave the CSE
10473 opportunity alone.
10474 ??? With the group load eventually participating
10475 in multiple different permutations (having multiple
10476 slp nodes which refer to the same group) the CSE
10477 is even wrong code. See PR56270. */
10478 && !slp)
10480 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10481 return true;
10483 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10484 group_gap_adj = 0;
10486 /* VEC_NUM is the number of vect stmts to be created for this group. */
10487 if (slp)
10489 grouped_load = false;
10490 /* If an SLP permutation is from N elements to N elements,
10491 and if one vector holds a whole number of N, we can load
10492 the inputs to the permutation in the same way as an
10493 unpermuted sequence. In other cases we need to load the
10494 whole group, not only the number of vector stmts the
10495 permutation result fits in. */
10496 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10497 if (slp_perm
10498 && (group_size != scalar_lanes
10499 || !multiple_p (nunits, group_size)))
10501 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10502 variable VF; see vect_transform_slp_perm_load. */
10503 unsigned int const_vf = vf.to_constant ();
10504 unsigned int const_nunits = nunits.to_constant ();
10505 vec_num = CEIL (group_size * const_vf, const_nunits);
10506 group_gap_adj = vf * group_size - nunits * vec_num;
10508 else
10510 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10511 group_gap_adj
10512 = group_size - scalar_lanes;
10515 else
10516 vec_num = group_size;
10518 ref_type = get_group_alias_ptr_type (first_stmt_info);
10520 else
10522 first_stmt_info = stmt_info;
10523 first_dr_info = dr_info;
10524 group_size = vec_num = 1;
10525 group_gap_adj = 0;
10526 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10527 if (slp)
10528 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10531 gcc_assert (alignment_support_scheme);
10532 vec_loop_masks *loop_masks
10533 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10534 ? &LOOP_VINFO_MASKS (loop_vinfo)
10535 : NULL);
10536 vec_loop_lens *loop_lens
10537 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10538 ? &LOOP_VINFO_LENS (loop_vinfo)
10539 : NULL);
10541 /* Shouldn't go with length-based approach if fully masked. */
10542 gcc_assert (!loop_lens || !loop_masks);
10544 /* Targets with store-lane instructions must not require explicit
10545 realignment. vect_supportable_dr_alignment always returns either
10546 dr_aligned or dr_unaligned_supported for masked operations. */
10547 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10548 && !mask
10549 && !loop_masks)
10550 || alignment_support_scheme == dr_aligned
10551 || alignment_support_scheme == dr_unaligned_supported);
10553 /* In case the vectorization factor (VF) is bigger than the number
10554 of elements that we can fit in a vectype (nunits), we have to generate
10555 more than one vector stmt - i.e - we need to "unroll" the
10556 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10557 from one copy of the vector stmt to the next, in the field
10558 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10559 stages to find the correct vector defs to be used when vectorizing
10560 stmts that use the defs of the current stmt. The example below
10561 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10562 need to create 4 vectorized stmts):
10564 before vectorization:
10565 RELATED_STMT VEC_STMT
10566 S1: x = memref - -
10567 S2: z = x + 1 - -
10569 step 1: vectorize stmt S1:
10570 We first create the vector stmt VS1_0, and, as usual, record a
10571 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10572 Next, we create the vector stmt VS1_1, and record a pointer to
10573 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10574 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10575 stmts and pointers:
10576 RELATED_STMT VEC_STMT
10577 VS1_0: vx0 = memref0 VS1_1 -
10578 VS1_1: vx1 = memref1 VS1_2 -
10579 VS1_2: vx2 = memref2 VS1_3 -
10580 VS1_3: vx3 = memref3 - -
10581 S1: x = load - VS1_0
10582 S2: z = x + 1 - -
10585 /* In case of interleaving (non-unit grouped access):
10587 S1: x2 = &base + 2
10588 S2: x0 = &base
10589 S3: x1 = &base + 1
10590 S4: x3 = &base + 3
10592 Vectorized loads are created in the order of memory accesses
10593 starting from the access of the first stmt of the chain:
10595 VS1: vx0 = &base
10596 VS2: vx1 = &base + vec_size*1
10597 VS3: vx3 = &base + vec_size*2
10598 VS4: vx4 = &base + vec_size*3
10600 Then permutation statements are generated:
10602 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10603 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10606 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10607 (the order of the data-refs in the output of vect_permute_load_chain
10608 corresponds to the order of scalar stmts in the interleaving chain - see
10609 the documentation of vect_permute_load_chain()).
10610 The generation of permutation stmts and recording them in
10611 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10613 In case of both multiple types and interleaving, the vector loads and
10614 permutation stmts above are created for every copy. The result vector
10615 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10616 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10618 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10619 on a target that supports unaligned accesses (dr_unaligned_supported)
10620 we generate the following code:
10621 p = initial_addr;
10622 indx = 0;
10623 loop {
10624 p = p + indx * vectype_size;
10625 vec_dest = *(p);
10626 indx = indx + 1;
10629 Otherwise, the data reference is potentially unaligned on a target that
10630 does not support unaligned accesses (dr_explicit_realign_optimized) -
10631 then generate the following code, in which the data in each iteration is
10632 obtained by two vector loads, one from the previous iteration, and one
10633 from the current iteration:
10634 p1 = initial_addr;
10635 msq_init = *(floor(p1))
10636 p2 = initial_addr + VS - 1;
10637 realignment_token = call target_builtin;
10638 indx = 0;
10639 loop {
10640 p2 = p2 + indx * vectype_size
10641 lsq = *(floor(p2))
10642 vec_dest = realign_load (msq, lsq, realignment_token)
10643 indx = indx + 1;
10644 msq = lsq;
10645 } */
10647 /* If the misalignment remains the same throughout the execution of the
10648 loop, we can create the init_addr and permutation mask at the loop
10649 preheader. Otherwise, it needs to be created inside the loop.
10650 This can only occur when vectorizing memory accesses in the inner-loop
10651 nested within an outer-loop that is being vectorized. */
10653 if (nested_in_vect_loop
10654 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10655 GET_MODE_SIZE (TYPE_MODE (vectype))))
10657 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10658 compute_in_loop = true;
10661 bool diff_first_stmt_info
10662 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10664 tree offset = NULL_TREE;
10665 if ((alignment_support_scheme == dr_explicit_realign_optimized
10666 || alignment_support_scheme == dr_explicit_realign)
10667 && !compute_in_loop)
10669 /* If we have different first_stmt_info, we can't set up realignment
10670 here, since we can't guarantee first_stmt_info DR has been
10671 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10672 distance from first_stmt_info DR instead as below. */
10673 if (!costing_p)
10675 if (!diff_first_stmt_info)
10676 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10677 &realignment_token,
10678 alignment_support_scheme, NULL_TREE,
10679 &at_loop);
10680 if (alignment_support_scheme == dr_explicit_realign_optimized)
10682 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10683 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10684 size_one_node);
10685 gcc_assert (!first_stmt_info_for_drptr);
10689 else
10690 at_loop = loop;
10692 if (!known_eq (poffset, 0))
10693 offset = (offset
10694 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10695 : size_int (poffset));
10697 tree bump;
10698 tree vec_offset = NULL_TREE;
10699 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10701 aggr_type = NULL_TREE;
10702 bump = NULL_TREE;
10704 else if (memory_access_type == VMAT_GATHER_SCATTER)
10706 aggr_type = elem_type;
10707 if (!costing_p)
10708 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
10709 &bump, &vec_offset, loop_lens);
10711 else
10713 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10714 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
10715 else
10716 aggr_type = vectype;
10717 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10718 memory_access_type, loop_lens);
10721 auto_vec<tree> vec_offsets;
10722 auto_vec<tree> vec_masks;
10723 if (mask && !costing_p)
10725 if (slp_node)
10726 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10727 &vec_masks);
10728 else
10729 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
10730 &vec_masks, mask_vectype);
10733 tree vec_mask = NULL_TREE;
10734 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10736 gcc_assert (alignment_support_scheme == dr_aligned
10737 || alignment_support_scheme == dr_unaligned_supported);
10738 gcc_assert (grouped_load && !slp);
10740 unsigned int inside_cost = 0, prologue_cost = 0;
10741 /* For costing some adjacent vector loads, we'd like to cost with
10742 the total number of them once instead of cost each one by one. */
10743 unsigned int n_adjacent_loads = 0;
10744 for (j = 0; j < ncopies; j++)
10746 if (costing_p)
10748 /* An IFN_LOAD_LANES will load all its vector results,
10749 regardless of which ones we actually need. Account
10750 for the cost of unused results. */
10751 if (first_stmt_info == stmt_info)
10753 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10754 stmt_vec_info next_stmt_info = first_stmt_info;
10757 gaps -= 1;
10758 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10760 while (next_stmt_info);
10761 if (gaps)
10763 if (dump_enabled_p ())
10764 dump_printf_loc (MSG_NOTE, vect_location,
10765 "vect_model_load_cost: %d "
10766 "unused vectors.\n",
10767 gaps);
10768 vect_get_load_cost (vinfo, stmt_info, gaps,
10769 alignment_support_scheme,
10770 misalignment, false, &inside_cost,
10771 &prologue_cost, cost_vec, cost_vec,
10772 true);
10775 n_adjacent_loads++;
10776 continue;
10779 /* 1. Create the vector or array pointer update chain. */
10780 if (j == 0)
10781 dataref_ptr
10782 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10783 at_loop, offset, &dummy, gsi,
10784 &ptr_incr, false, bump);
10785 else
10787 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10788 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10789 stmt_info, bump);
10791 if (mask)
10792 vec_mask = vec_masks[j];
10794 tree vec_array = create_vector_array (vectype, vec_num);
10796 tree final_mask = NULL_TREE;
10797 tree final_len = NULL_TREE;
10798 tree bias = NULL_TREE;
10799 if (loop_masks)
10800 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10801 ncopies, vectype, j);
10802 if (vec_mask)
10803 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
10804 vec_mask, gsi);
10806 if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10808 if (loop_lens)
10809 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10810 ncopies, vectype, j, 1);
10811 else
10812 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10813 signed char biasval
10814 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10815 bias = build_int_cst (intQI_type_node, biasval);
10816 if (!final_mask)
10818 mask_vectype = truth_type_for (vectype);
10819 final_mask = build_minus_one_cst (mask_vectype);
10823 gcall *call;
10824 if (final_len && final_mask)
10826 /* Emit:
10827 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10828 VEC_MASK, LEN, BIAS). */
10829 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10830 tree alias_ptr = build_int_cst (ref_type, align);
10831 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
10832 dataref_ptr, alias_ptr,
10833 final_mask, final_len, bias);
10835 else if (final_mask)
10837 /* Emit:
10838 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10839 VEC_MASK). */
10840 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10841 tree alias_ptr = build_int_cst (ref_type, align);
10842 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
10843 dataref_ptr, alias_ptr,
10844 final_mask);
10846 else
10848 /* Emit:
10849 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10850 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
10851 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10853 gimple_call_set_lhs (call, vec_array);
10854 gimple_call_set_nothrow (call, true);
10855 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
10857 dr_chain.create (vec_num);
10858 /* Extract each vector into an SSA_NAME. */
10859 for (i = 0; i < vec_num; i++)
10861 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10862 vec_array, i);
10863 dr_chain.quick_push (new_temp);
10866 /* Record the mapping between SSA_NAMEs and statements. */
10867 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10869 /* Record that VEC_ARRAY is now dead. */
10870 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10872 dr_chain.release ();
10874 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10877 if (costing_p)
10879 if (n_adjacent_loads > 0)
10880 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
10881 alignment_support_scheme, misalignment, false,
10882 &inside_cost, &prologue_cost, cost_vec,
10883 cost_vec, true);
10884 if (dump_enabled_p ())
10885 dump_printf_loc (MSG_NOTE, vect_location,
10886 "vect_model_load_cost: inside_cost = %u, "
10887 "prologue_cost = %u .\n",
10888 inside_cost, prologue_cost);
10891 return true;
10894 if (memory_access_type == VMAT_GATHER_SCATTER)
10896 gcc_assert (alignment_support_scheme == dr_aligned
10897 || alignment_support_scheme == dr_unaligned_supported);
10898 gcc_assert (!grouped_load && !slp_perm);
10900 unsigned int inside_cost = 0, prologue_cost = 0;
10901 for (j = 0; j < ncopies; j++)
10903 /* 1. Create the vector or array pointer update chain. */
10904 if (j == 0 && !costing_p)
10906 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10907 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10908 slp_node, &gs_info, &dataref_ptr,
10909 &vec_offsets);
10910 else
10911 dataref_ptr
10912 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10913 at_loop, offset, &dummy, gsi,
10914 &ptr_incr, false, bump);
10916 else if (!costing_p)
10918 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10919 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10920 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10921 gsi, stmt_info, bump);
10924 gimple *new_stmt = NULL;
10925 for (i = 0; i < vec_num; i++)
10927 tree final_mask = NULL_TREE;
10928 tree final_len = NULL_TREE;
10929 tree bias = NULL_TREE;
10930 if (!costing_p)
10932 if (mask)
10933 vec_mask = vec_masks[vec_num * j + i];
10934 if (loop_masks)
10935 final_mask
10936 = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10937 vec_num * ncopies, vectype,
10938 vec_num * j + i);
10939 if (vec_mask)
10940 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10941 final_mask, vec_mask, gsi);
10943 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10944 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10945 gsi, stmt_info, bump);
10948 /* 2. Create the vector-load in the loop. */
10949 unsigned HOST_WIDE_INT align;
10950 if (gs_info.ifn != IFN_LAST)
10952 if (costing_p)
10954 unsigned int cnunits = vect_nunits_for_cost (vectype);
10955 inside_cost
10956 = record_stmt_cost (cost_vec, cnunits, scalar_load,
10957 stmt_info, 0, vect_body);
10958 continue;
10960 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10961 vec_offset = vec_offsets[vec_num * j + i];
10962 tree zero = build_zero_cst (vectype);
10963 tree scale = size_int (gs_info.scale);
10965 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
10967 if (loop_lens)
10968 final_len
10969 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10970 vec_num * ncopies, vectype,
10971 vec_num * j + i, 1);
10972 else
10973 final_len
10974 = build_int_cst (sizetype,
10975 TYPE_VECTOR_SUBPARTS (vectype));
10976 signed char biasval
10977 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10978 bias = build_int_cst (intQI_type_node, biasval);
10979 if (!final_mask)
10981 mask_vectype = truth_type_for (vectype);
10982 final_mask = build_minus_one_cst (mask_vectype);
10986 gcall *call;
10987 if (final_len && final_mask)
10988 call
10989 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
10990 dataref_ptr, vec_offset,
10991 scale, zero, final_mask,
10992 final_len, bias);
10993 else if (final_mask)
10994 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
10995 dataref_ptr, vec_offset,
10996 scale, zero, final_mask);
10997 else
10998 call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
10999 dataref_ptr, vec_offset,
11000 scale, zero);
11001 gimple_call_set_nothrow (call, true);
11002 new_stmt = call;
11003 data_ref = NULL_TREE;
11005 else if (gs_info.decl)
11007 /* The builtin decls path for gather is legacy, x86 only. */
11008 gcc_assert (!final_len && nunits.is_constant ());
11009 if (costing_p)
11011 unsigned int cnunits = vect_nunits_for_cost (vectype);
11012 inside_cost
11013 = record_stmt_cost (cost_vec, cnunits, scalar_load,
11014 stmt_info, 0, vect_body);
11015 continue;
11017 poly_uint64 offset_nunits
11018 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
11019 if (known_eq (nunits, offset_nunits))
11021 new_stmt = vect_build_one_gather_load_call
11022 (vinfo, stmt_info, gsi, &gs_info,
11023 dataref_ptr, vec_offsets[vec_num * j + i],
11024 final_mask);
11025 data_ref = NULL_TREE;
11027 else if (known_eq (nunits, offset_nunits * 2))
11029 /* We have a offset vector with half the number of
11030 lanes but the builtins will produce full vectype
11031 data with just the lower lanes filled. */
11032 new_stmt = vect_build_one_gather_load_call
11033 (vinfo, stmt_info, gsi, &gs_info,
11034 dataref_ptr, vec_offsets[2 * vec_num * j + 2 * i],
11035 final_mask);
11036 tree low = make_ssa_name (vectype);
11037 gimple_set_lhs (new_stmt, low);
11038 vect_finish_stmt_generation (vinfo, stmt_info,
11039 new_stmt, gsi);
11041 /* now put upper half of final_mask in final_mask low. */
11042 if (final_mask
11043 && !SCALAR_INT_MODE_P
11044 (TYPE_MODE (TREE_TYPE (final_mask))))
11046 int count = nunits.to_constant ();
11047 vec_perm_builder sel (count, count, 1);
11048 sel.quick_grow (count);
11049 for (int i = 0; i < count; ++i)
11050 sel[i] = i | (count / 2);
11051 vec_perm_indices indices (sel, 2, count);
11052 tree perm_mask = vect_gen_perm_mask_checked
11053 (TREE_TYPE (final_mask), indices);
11054 new_stmt = gimple_build_assign (NULL_TREE,
11055 VEC_PERM_EXPR,
11056 final_mask,
11057 final_mask,
11058 perm_mask);
11059 final_mask = make_ssa_name (TREE_TYPE (final_mask));
11060 gimple_set_lhs (new_stmt, final_mask);
11061 vect_finish_stmt_generation (vinfo, stmt_info,
11062 new_stmt, gsi);
11064 else if (final_mask)
11066 new_stmt = gimple_build_assign (NULL_TREE,
11067 VEC_UNPACK_HI_EXPR,
11068 final_mask);
11069 final_mask = make_ssa_name
11070 (truth_type_for (gs_info.offset_vectype));
11071 gimple_set_lhs (new_stmt, final_mask);
11072 vect_finish_stmt_generation (vinfo, stmt_info,
11073 new_stmt, gsi);
11076 new_stmt = vect_build_one_gather_load_call
11077 (vinfo, stmt_info, gsi, &gs_info,
11078 dataref_ptr,
11079 vec_offsets[2 * vec_num * j + 2 * i + 1],
11080 final_mask);
11081 tree high = make_ssa_name (vectype);
11082 gimple_set_lhs (new_stmt, high);
11083 vect_finish_stmt_generation (vinfo, stmt_info,
11084 new_stmt, gsi);
11086 /* compose low + high. */
11087 int count = nunits.to_constant ();
11088 vec_perm_builder sel (count, count, 1);
11089 sel.quick_grow (count);
11090 for (int i = 0; i < count; ++i)
11091 sel[i] = i < count / 2 ? i : i + count / 2;
11092 vec_perm_indices indices (sel, 2, count);
11093 tree perm_mask
11094 = vect_gen_perm_mask_checked (vectype, indices);
11095 new_stmt = gimple_build_assign (NULL_TREE,
11096 VEC_PERM_EXPR,
11097 low, high, perm_mask);
11098 data_ref = NULL_TREE;
11100 else if (known_eq (nunits * 2, offset_nunits))
11102 /* We have a offset vector with double the number of
11103 lanes. Select the low/high part accordingly. */
11104 vec_offset = vec_offsets[(vec_num * j + i) / 2];
11105 if ((vec_num * j + i) & 1)
11107 int count = offset_nunits.to_constant ();
11108 vec_perm_builder sel (count, count, 1);
11109 sel.quick_grow (count);
11110 for (int i = 0; i < count; ++i)
11111 sel[i] = i | (count / 2);
11112 vec_perm_indices indices (sel, 2, count);
11113 tree perm_mask = vect_gen_perm_mask_checked
11114 (TREE_TYPE (vec_offset), indices);
11115 new_stmt = gimple_build_assign (NULL_TREE,
11116 VEC_PERM_EXPR,
11117 vec_offset,
11118 vec_offset,
11119 perm_mask);
11120 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
11121 gimple_set_lhs (new_stmt, vec_offset);
11122 vect_finish_stmt_generation (vinfo, stmt_info,
11123 new_stmt, gsi);
11125 new_stmt = vect_build_one_gather_load_call
11126 (vinfo, stmt_info, gsi, &gs_info,
11127 dataref_ptr, vec_offset, final_mask);
11128 data_ref = NULL_TREE;
11130 else
11131 gcc_unreachable ();
11133 else
11135 /* Emulated gather-scatter. */
11136 gcc_assert (!final_mask);
11137 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
11138 if (costing_p)
11140 /* For emulated gathers N offset vector element
11141 offset add is consumed by the load). */
11142 inside_cost = record_stmt_cost (cost_vec, const_nunits,
11143 vec_to_scalar, stmt_info,
11144 0, vect_body);
11145 /* N scalar loads plus gathering them into a
11146 vector. */
11147 inside_cost
11148 = record_stmt_cost (cost_vec, const_nunits, scalar_load,
11149 stmt_info, 0, vect_body);
11150 inside_cost
11151 = record_stmt_cost (cost_vec, 1, vec_construct,
11152 stmt_info, 0, vect_body);
11153 continue;
11155 unsigned HOST_WIDE_INT const_offset_nunits
11156 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
11157 .to_constant ();
11158 vec<constructor_elt, va_gc> *ctor_elts;
11159 vec_alloc (ctor_elts, const_nunits);
11160 gimple_seq stmts = NULL;
11161 /* We support offset vectors with more elements
11162 than the data vector for now. */
11163 unsigned HOST_WIDE_INT factor
11164 = const_offset_nunits / const_nunits;
11165 vec_offset = vec_offsets[(vec_num * j + i) / factor];
11166 unsigned elt_offset = (j % factor) * const_nunits;
11167 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
11168 tree scale = size_int (gs_info.scale);
11169 align = get_object_alignment (DR_REF (first_dr_info->dr));
11170 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
11171 for (unsigned k = 0; k < const_nunits; ++k)
11173 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
11174 bitsize_int (k + elt_offset));
11175 tree idx
11176 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
11177 vec_offset, TYPE_SIZE (idx_type), boff);
11178 idx = gimple_convert (&stmts, sizetype, idx);
11179 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx,
11180 scale);
11181 tree ptr = gimple_build (&stmts, PLUS_EXPR,
11182 TREE_TYPE (dataref_ptr),
11183 dataref_ptr, idx);
11184 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
11185 tree elt = make_ssa_name (TREE_TYPE (vectype));
11186 tree ref = build2 (MEM_REF, ltype, ptr,
11187 build_int_cst (ref_type, 0));
11188 new_stmt = gimple_build_assign (elt, ref);
11189 gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
11190 gimple_seq_add_stmt (&stmts, new_stmt);
11191 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
11193 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
11194 new_stmt = gimple_build_assign (
11195 NULL_TREE, build_constructor (vectype, ctor_elts));
11196 data_ref = NULL_TREE;
11199 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11200 /* DATA_REF is null if we've already built the statement. */
11201 if (data_ref)
11203 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11204 new_stmt = gimple_build_assign (vec_dest, data_ref);
11206 new_temp = make_ssa_name (vec_dest, new_stmt);
11207 gimple_set_lhs (new_stmt, new_temp);
11208 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11210 /* Store vector loads in the corresponding SLP_NODE. */
11211 if (slp)
11212 slp_node->push_vec_def (new_stmt);
11215 if (!slp && !costing_p)
11216 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11219 if (!slp && !costing_p)
11220 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11222 if (costing_p && dump_enabled_p ())
11223 dump_printf_loc (MSG_NOTE, vect_location,
11224 "vect_model_load_cost: inside_cost = %u, "
11225 "prologue_cost = %u .\n",
11226 inside_cost, prologue_cost);
11227 return true;
11230 poly_uint64 group_elt = 0;
11231 unsigned int inside_cost = 0, prologue_cost = 0;
11232 /* For costing some adjacent vector loads, we'd like to cost with
11233 the total number of them once instead of cost each one by one. */
11234 unsigned int n_adjacent_loads = 0;
11235 for (j = 0; j < ncopies; j++)
11237 /* 1. Create the vector or array pointer update chain. */
11238 if (j == 0 && !costing_p)
11240 bool simd_lane_access_p
11241 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
11242 if (simd_lane_access_p
11243 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
11244 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
11245 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
11246 && integer_zerop (DR_INIT (first_dr_info->dr))
11247 && alias_sets_conflict_p (get_alias_set (aggr_type),
11248 get_alias_set (TREE_TYPE (ref_type)))
11249 && (alignment_support_scheme == dr_aligned
11250 || alignment_support_scheme == dr_unaligned_supported))
11252 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
11253 dataref_offset = build_int_cst (ref_type, 0);
11255 else if (diff_first_stmt_info)
11257 dataref_ptr
11258 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
11259 aggr_type, at_loop, offset, &dummy,
11260 gsi, &ptr_incr, simd_lane_access_p,
11261 bump);
11262 /* Adjust the pointer by the difference to first_stmt. */
11263 data_reference_p ptrdr
11264 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
11265 tree diff
11266 = fold_convert (sizetype,
11267 size_binop (MINUS_EXPR,
11268 DR_INIT (first_dr_info->dr),
11269 DR_INIT (ptrdr)));
11270 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11271 stmt_info, diff);
11272 if (alignment_support_scheme == dr_explicit_realign)
11274 msq = vect_setup_realignment (vinfo,
11275 first_stmt_info_for_drptr, gsi,
11276 &realignment_token,
11277 alignment_support_scheme,
11278 dataref_ptr, &at_loop);
11279 gcc_assert (!compute_in_loop);
11282 else
11283 dataref_ptr
11284 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11285 at_loop,
11286 offset, &dummy, gsi, &ptr_incr,
11287 simd_lane_access_p, bump);
11289 else if (!costing_p)
11291 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11292 if (dataref_offset)
11293 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
11294 bump);
11295 else
11296 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11297 stmt_info, bump);
11300 if (grouped_load || slp_perm)
11301 dr_chain.create (vec_num);
11303 gimple *new_stmt = NULL;
11304 for (i = 0; i < vec_num; i++)
11306 tree final_mask = NULL_TREE;
11307 tree final_len = NULL_TREE;
11308 tree bias = NULL_TREE;
11309 if (!costing_p)
11311 if (mask)
11312 vec_mask = vec_masks[vec_num * j + i];
11313 if (loop_masks)
11314 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11315 vec_num * ncopies, vectype,
11316 vec_num * j + i);
11317 if (vec_mask)
11318 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
11319 final_mask, vec_mask, gsi);
11321 if (i > 0)
11322 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11323 gsi, stmt_info, bump);
11326 /* 2. Create the vector-load in the loop. */
11327 switch (alignment_support_scheme)
11329 case dr_aligned:
11330 case dr_unaligned_supported:
11332 if (costing_p)
11333 break;
11335 unsigned int misalign;
11336 unsigned HOST_WIDE_INT align;
11337 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
11338 if (alignment_support_scheme == dr_aligned)
11339 misalign = 0;
11340 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
11342 align
11343 = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
11344 misalign = 0;
11346 else
11347 misalign = misalignment;
11348 if (dataref_offset == NULL_TREE
11349 && TREE_CODE (dataref_ptr) == SSA_NAME)
11350 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
11351 misalign);
11352 align = least_bit_hwi (misalign | align);
11354 /* Compute IFN when LOOP_LENS or final_mask valid. */
11355 machine_mode vmode = TYPE_MODE (vectype);
11356 machine_mode new_vmode = vmode;
11357 internal_fn partial_ifn = IFN_LAST;
11358 if (loop_lens)
11360 opt_machine_mode new_ovmode
11361 = get_len_load_store_mode (vmode, true, &partial_ifn);
11362 new_vmode = new_ovmode.require ();
11363 unsigned factor
11364 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
11365 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11366 vec_num * ncopies, vectype,
11367 vec_num * j + i, factor);
11369 else if (final_mask)
11371 if (!can_vec_mask_load_store_p (
11372 vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
11373 &partial_ifn))
11374 gcc_unreachable ();
11377 if (partial_ifn == IFN_MASK_LEN_LOAD)
11379 if (!final_len)
11381 /* Pass VF value to 'len' argument of
11382 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11383 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11385 if (!final_mask)
11387 /* Pass all ones value to 'mask' argument of
11388 MASK_LEN_LOAD if final_mask is invalid. */
11389 mask_vectype = truth_type_for (vectype);
11390 final_mask = build_minus_one_cst (mask_vectype);
11393 if (final_len)
11395 signed char biasval
11396 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11398 bias = build_int_cst (intQI_type_node, biasval);
11401 if (final_len)
11403 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11404 gcall *call;
11405 if (partial_ifn == IFN_MASK_LEN_LOAD)
11406 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
11407 dataref_ptr, ptr,
11408 final_mask, final_len,
11409 bias);
11410 else
11411 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
11412 dataref_ptr, ptr,
11413 final_len, bias);
11414 gimple_call_set_nothrow (call, true);
11415 new_stmt = call;
11416 data_ref = NULL_TREE;
11418 /* Need conversion if it's wrapped with VnQI. */
11419 if (vmode != new_vmode)
11421 tree new_vtype = build_vector_type_for_mode (
11422 unsigned_intQI_type_node, new_vmode);
11423 tree var
11424 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
11425 gimple_set_lhs (call, var);
11426 vect_finish_stmt_generation (vinfo, stmt_info, call,
11427 gsi);
11428 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11429 new_stmt = gimple_build_assign (vec_dest,
11430 VIEW_CONVERT_EXPR, op);
11433 else if (final_mask)
11435 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11436 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
11437 dataref_ptr, ptr,
11438 final_mask);
11439 gimple_call_set_nothrow (call, true);
11440 new_stmt = call;
11441 data_ref = NULL_TREE;
11443 else
11445 tree ltype = vectype;
11446 tree new_vtype = NULL_TREE;
11447 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11448 unsigned int vect_align
11449 = vect_known_alignment_in_bytes (first_dr_info, vectype);
11450 unsigned int scalar_dr_size
11451 = vect_get_scalar_dr_size (first_dr_info);
11452 /* If there's no peeling for gaps but we have a gap
11453 with slp loads then load the lower half of the
11454 vector only. See get_group_load_store_type for
11455 when we apply this optimization. */
11456 if (slp
11457 && loop_vinfo
11458 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != 0
11459 && known_eq (nunits, (group_size - gap) * 2)
11460 && known_eq (nunits, group_size)
11461 && gap >= (vect_align / scalar_dr_size))
11463 tree half_vtype;
11464 new_vtype
11465 = vector_vector_composition_type (vectype, 2,
11466 &half_vtype);
11467 if (new_vtype != NULL_TREE)
11468 ltype = half_vtype;
11470 tree offset
11471 = (dataref_offset ? dataref_offset
11472 : build_int_cst (ref_type, 0));
11473 if (ltype != vectype
11474 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11476 unsigned HOST_WIDE_INT gap_offset
11477 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
11478 tree gapcst = build_int_cst (ref_type, gap_offset);
11479 offset = size_binop (PLUS_EXPR, offset, gapcst);
11481 data_ref
11482 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
11483 if (alignment_support_scheme == dr_aligned)
11485 else
11486 TREE_TYPE (data_ref)
11487 = build_aligned_type (TREE_TYPE (data_ref),
11488 align * BITS_PER_UNIT);
11489 if (ltype != vectype)
11491 vect_copy_ref_info (data_ref,
11492 DR_REF (first_dr_info->dr));
11493 tree tem = make_ssa_name (ltype);
11494 new_stmt = gimple_build_assign (tem, data_ref);
11495 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
11496 gsi);
11497 data_ref = NULL;
11498 vec<constructor_elt, va_gc> *v;
11499 vec_alloc (v, 2);
11500 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11502 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11503 build_zero_cst (ltype));
11504 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11506 else
11508 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11509 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11510 build_zero_cst (ltype));
11512 gcc_assert (new_vtype != NULL_TREE);
11513 if (new_vtype == vectype)
11514 new_stmt = gimple_build_assign (
11515 vec_dest, build_constructor (vectype, v));
11516 else
11518 tree new_vname = make_ssa_name (new_vtype);
11519 new_stmt = gimple_build_assign (
11520 new_vname, build_constructor (new_vtype, v));
11521 vect_finish_stmt_generation (vinfo, stmt_info,
11522 new_stmt, gsi);
11523 new_stmt = gimple_build_assign (
11524 vec_dest,
11525 build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
11529 break;
11531 case dr_explicit_realign:
11533 if (costing_p)
11534 break;
11535 tree ptr, bump;
11537 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11539 if (compute_in_loop)
11540 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
11541 &realignment_token,
11542 dr_explicit_realign,
11543 dataref_ptr, NULL);
11545 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11546 ptr = copy_ssa_name (dataref_ptr);
11547 else
11548 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11549 // For explicit realign the target alignment should be
11550 // known at compile time.
11551 unsigned HOST_WIDE_INT align
11552 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11553 new_stmt = gimple_build_assign (
11554 ptr, BIT_AND_EXPR, dataref_ptr,
11555 build_int_cst (TREE_TYPE (dataref_ptr),
11556 -(HOST_WIDE_INT) align));
11557 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11558 data_ref
11559 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11560 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11561 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11562 new_stmt = gimple_build_assign (vec_dest, data_ref);
11563 new_temp = make_ssa_name (vec_dest, new_stmt);
11564 gimple_assign_set_lhs (new_stmt, new_temp);
11565 gimple_move_vops (new_stmt, stmt_info->stmt);
11566 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11567 msq = new_temp;
11569 bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11570 bump = size_binop (MINUS_EXPR, bump, size_one_node);
11571 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11572 bump);
11573 new_stmt = gimple_build_assign (
11574 NULL_TREE, BIT_AND_EXPR, ptr,
11575 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11576 if (TREE_CODE (ptr) == SSA_NAME)
11577 ptr = copy_ssa_name (ptr, new_stmt);
11578 else
11579 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
11580 gimple_assign_set_lhs (new_stmt, ptr);
11581 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11582 data_ref
11583 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11584 break;
11586 case dr_explicit_realign_optimized:
11588 if (costing_p)
11589 break;
11590 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11591 new_temp = copy_ssa_name (dataref_ptr);
11592 else
11593 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11594 // We should only be doing this if we know the target
11595 // alignment at compile time.
11596 unsigned HOST_WIDE_INT align
11597 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11598 new_stmt = gimple_build_assign (
11599 new_temp, BIT_AND_EXPR, dataref_ptr,
11600 build_int_cst (TREE_TYPE (dataref_ptr),
11601 -(HOST_WIDE_INT) align));
11602 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11603 data_ref = build2 (MEM_REF, vectype, new_temp,
11604 build_int_cst (ref_type, 0));
11605 break;
11607 default:
11608 gcc_unreachable ();
11611 /* One common place to cost the above vect load for different
11612 alignment support schemes. */
11613 if (costing_p)
11615 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11616 only need to take care of the first stmt, whose
11617 stmt_info is first_stmt_info, vec_num iterating on it
11618 will cover the cost for the remaining, it's consistent
11619 with transforming. For the prologue cost for realign,
11620 we only need to count it once for the whole group. */
11621 bool first_stmt_info_p = first_stmt_info == stmt_info;
11622 bool add_realign_cost = first_stmt_info_p && i == 0;
11623 if (memory_access_type == VMAT_CONTIGUOUS
11624 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11625 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11626 && (!grouped_load || first_stmt_info_p)))
11628 /* Leave realign cases alone to keep them simple. */
11629 if (alignment_support_scheme == dr_explicit_realign_optimized
11630 || alignment_support_scheme == dr_explicit_realign)
11631 vect_get_load_cost (vinfo, stmt_info, 1,
11632 alignment_support_scheme, misalignment,
11633 add_realign_cost, &inside_cost,
11634 &prologue_cost, cost_vec, cost_vec,
11635 true);
11636 else
11637 n_adjacent_loads++;
11640 else
11642 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11643 /* DATA_REF is null if we've already built the statement. */
11644 if (data_ref)
11646 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11647 new_stmt = gimple_build_assign (vec_dest, data_ref);
11649 new_temp = make_ssa_name (vec_dest, new_stmt);
11650 gimple_set_lhs (new_stmt, new_temp);
11651 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11654 /* 3. Handle explicit realignment if necessary/supported.
11655 Create in loop:
11656 vec_dest = realign_load (msq, lsq, realignment_token) */
11657 if (!costing_p
11658 && (alignment_support_scheme == dr_explicit_realign_optimized
11659 || alignment_support_scheme == dr_explicit_realign))
11661 lsq = gimple_assign_lhs (new_stmt);
11662 if (!realignment_token)
11663 realignment_token = dataref_ptr;
11664 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11665 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11666 lsq, realignment_token);
11667 new_temp = make_ssa_name (vec_dest, new_stmt);
11668 gimple_assign_set_lhs (new_stmt, new_temp);
11669 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11671 if (alignment_support_scheme == dr_explicit_realign_optimized)
11673 gcc_assert (phi);
11674 if (i == vec_num - 1 && j == ncopies - 1)
11675 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11676 UNKNOWN_LOCATION);
11677 msq = lsq;
11681 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11683 if (costing_p)
11684 inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
11685 stmt_info, 0, vect_body);
11686 else
11688 tree perm_mask = perm_mask_for_reverse (vectype);
11689 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
11690 perm_mask, stmt_info, gsi);
11691 new_stmt = SSA_NAME_DEF_STMT (new_temp);
11695 /* Collect vector loads and later create their permutation in
11696 vect_transform_grouped_load (). */
11697 if (!costing_p && (grouped_load || slp_perm))
11698 dr_chain.quick_push (new_temp);
11700 /* Store vector loads in the corresponding SLP_NODE. */
11701 if (!costing_p && slp && !slp_perm)
11702 slp_node->push_vec_def (new_stmt);
11704 /* With SLP permutation we load the gaps as well, without
11705 we need to skip the gaps after we manage to fully load
11706 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11707 group_elt += nunits;
11708 if (!costing_p
11709 && maybe_ne (group_gap_adj, 0U)
11710 && !slp_perm
11711 && known_eq (group_elt, group_size - group_gap_adj))
11713 poly_wide_int bump_val
11714 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11715 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11716 == -1)
11717 bump_val = -bump_val;
11718 tree bump = wide_int_to_tree (sizetype, bump_val);
11719 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11720 stmt_info, bump);
11721 group_elt = 0;
11724 /* Bump the vector pointer to account for a gap or for excess
11725 elements loaded for a permuted SLP load. */
11726 if (!costing_p
11727 && maybe_ne (group_gap_adj, 0U)
11728 && slp_perm)
11730 poly_wide_int bump_val
11731 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11732 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11733 bump_val = -bump_val;
11734 tree bump = wide_int_to_tree (sizetype, bump_val);
11735 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11736 stmt_info, bump);
11739 if (slp && !slp_perm)
11740 continue;
11742 if (slp_perm)
11744 unsigned n_perms;
11745 /* For SLP we know we've seen all possible uses of dr_chain so
11746 direct vect_transform_slp_perm_load to DCE the unused parts.
11747 ??? This is a hack to prevent compile-time issues as seen
11748 in PR101120 and friends. */
11749 if (costing_p)
11751 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
11752 true, &n_perms, nullptr);
11753 inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
11754 stmt_info, 0, vect_body);
11756 else
11758 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11759 gsi, vf, false, &n_perms,
11760 nullptr, true);
11761 gcc_assert (ok);
11764 else
11766 if (grouped_load)
11768 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11769 /* We assume that the cost of a single load-lanes instruction
11770 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11771 If a grouped access is instead being provided by a
11772 load-and-permute operation, include the cost of the
11773 permutes. */
11774 if (costing_p && first_stmt_info == stmt_info)
11776 /* Uses an even and odd extract operations or shuffle
11777 operations for each needed permute. */
11778 int group_size = DR_GROUP_SIZE (first_stmt_info);
11779 int nstmts = ceil_log2 (group_size) * group_size;
11780 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
11781 stmt_info, 0, vect_body);
11783 if (dump_enabled_p ())
11784 dump_printf_loc (MSG_NOTE, vect_location,
11785 "vect_model_load_cost:"
11786 "strided group_size = %d .\n",
11787 group_size);
11789 else if (!costing_p)
11791 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
11792 group_size, gsi);
11793 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11796 else if (!costing_p)
11797 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11799 dr_chain.release ();
11801 if (!slp && !costing_p)
11802 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11804 if (costing_p)
11806 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
11807 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11808 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11809 if (n_adjacent_loads > 0)
11810 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
11811 alignment_support_scheme, misalignment, false,
11812 &inside_cost, &prologue_cost, cost_vec, cost_vec,
11813 true);
11814 if (dump_enabled_p ())
11815 dump_printf_loc (MSG_NOTE, vect_location,
11816 "vect_model_load_cost: inside_cost = %u, "
11817 "prologue_cost = %u .\n",
11818 inside_cost, prologue_cost);
11821 return true;
11824 /* Function vect_is_simple_cond.
11826 Input:
11827 LOOP - the loop that is being vectorized.
11828 COND - Condition that is checked for simple use.
11830 Output:
11831 *COMP_VECTYPE - the vector type for the comparison.
11832 *DTS - The def types for the arguments of the comparison
11834 Returns whether a COND can be vectorized. Checks whether
11835 condition operands are supportable using vec_is_simple_use. */
11837 static bool
11838 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
11839 slp_tree slp_node, tree *comp_vectype,
11840 enum vect_def_type *dts, tree vectype)
11842 tree lhs, rhs;
11843 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11844 slp_tree slp_op;
11846 /* Mask case. */
11847 if (TREE_CODE (cond) == SSA_NAME
11848 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11850 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
11851 &slp_op, &dts[0], comp_vectype)
11852 || !*comp_vectype
11853 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11854 return false;
11855 return true;
11858 if (!COMPARISON_CLASS_P (cond))
11859 return false;
11861 lhs = TREE_OPERAND (cond, 0);
11862 rhs = TREE_OPERAND (cond, 1);
11864 if (TREE_CODE (lhs) == SSA_NAME)
11866 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
11867 &lhs, &slp_op, &dts[0], &vectype1))
11868 return false;
11870 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
11871 || TREE_CODE (lhs) == FIXED_CST)
11872 dts[0] = vect_constant_def;
11873 else
11874 return false;
11876 if (TREE_CODE (rhs) == SSA_NAME)
11878 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
11879 &rhs, &slp_op, &dts[1], &vectype2))
11880 return false;
11882 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
11883 || TREE_CODE (rhs) == FIXED_CST)
11884 dts[1] = vect_constant_def;
11885 else
11886 return false;
11888 if (vectype1 && vectype2
11889 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11890 TYPE_VECTOR_SUBPARTS (vectype2)))
11891 return false;
11893 *comp_vectype = vectype1 ? vectype1 : vectype2;
11894 /* Invariant comparison. */
11895 if (! *comp_vectype)
11897 tree scalar_type = TREE_TYPE (lhs);
11898 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11899 *comp_vectype = truth_type_for (vectype);
11900 else
11902 /* If we can widen the comparison to match vectype do so. */
11903 if (INTEGRAL_TYPE_P (scalar_type)
11904 && !slp_node
11905 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
11906 TYPE_SIZE (TREE_TYPE (vectype))))
11907 scalar_type = build_nonstandard_integer_type
11908 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
11909 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
11910 slp_node);
11914 return true;
11917 /* vectorizable_condition.
11919 Check if STMT_INFO is conditional modify expression that can be vectorized.
11920 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11921 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
11922 at GSI.
11924 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
11926 Return true if STMT_INFO is vectorizable in this way. */
11928 static bool
11929 vectorizable_condition (vec_info *vinfo,
11930 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11931 gimple **vec_stmt,
11932 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11934 tree scalar_dest = NULL_TREE;
11935 tree vec_dest = NULL_TREE;
11936 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
11937 tree then_clause, else_clause;
11938 tree comp_vectype = NULL_TREE;
11939 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
11940 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
11941 tree vec_compare;
11942 tree new_temp;
11943 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11944 enum vect_def_type dts[4]
11945 = {vect_unknown_def_type, vect_unknown_def_type,
11946 vect_unknown_def_type, vect_unknown_def_type};
11947 int ndts = 4;
11948 int ncopies;
11949 int vec_num;
11950 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11951 int i;
11952 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11953 vec<tree> vec_oprnds0 = vNULL;
11954 vec<tree> vec_oprnds1 = vNULL;
11955 vec<tree> vec_oprnds2 = vNULL;
11956 vec<tree> vec_oprnds3 = vNULL;
11957 tree vec_cmp_type;
11958 bool masked = false;
11960 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11961 return false;
11963 /* Is vectorizable conditional operation? */
11964 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11965 if (!stmt)
11966 return false;
11968 code = gimple_assign_rhs_code (stmt);
11969 if (code != COND_EXPR)
11970 return false;
11972 stmt_vec_info reduc_info = NULL;
11973 int reduc_index = -1;
11974 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
11975 bool for_reduction
11976 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
11977 if (for_reduction)
11979 if (slp_node)
11980 return false;
11981 reduc_info = info_for_reduction (vinfo, stmt_info);
11982 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
11983 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
11984 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
11985 || reduc_index != -1);
11987 else
11989 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11990 return false;
11993 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11994 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11996 if (slp_node)
11998 ncopies = 1;
11999 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
12001 else
12003 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12004 vec_num = 1;
12007 gcc_assert (ncopies >= 1);
12008 if (for_reduction && ncopies > 1)
12009 return false; /* FORNOW */
12011 cond_expr = gimple_assign_rhs1 (stmt);
12013 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
12014 &comp_vectype, &dts[0], vectype)
12015 || !comp_vectype)
12016 return false;
12018 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
12019 slp_tree then_slp_node, else_slp_node;
12020 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
12021 &then_clause, &then_slp_node, &dts[2], &vectype1))
12022 return false;
12023 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
12024 &else_clause, &else_slp_node, &dts[3], &vectype2))
12025 return false;
12027 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
12028 return false;
12030 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
12031 return false;
12033 masked = !COMPARISON_CLASS_P (cond_expr);
12034 vec_cmp_type = truth_type_for (comp_vectype);
12036 if (vec_cmp_type == NULL_TREE)
12037 return false;
12039 cond_code = TREE_CODE (cond_expr);
12040 if (!masked)
12042 cond_expr0 = TREE_OPERAND (cond_expr, 0);
12043 cond_expr1 = TREE_OPERAND (cond_expr, 1);
12046 /* For conditional reductions, the "then" value needs to be the candidate
12047 value calculated by this iteration while the "else" value needs to be
12048 the result carried over from previous iterations. If the COND_EXPR
12049 is the other way around, we need to swap it. */
12050 bool must_invert_cmp_result = false;
12051 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
12053 if (masked)
12054 must_invert_cmp_result = true;
12055 else
12057 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
12058 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
12059 if (new_code == ERROR_MARK)
12060 must_invert_cmp_result = true;
12061 else
12063 cond_code = new_code;
12064 /* Make sure we don't accidentally use the old condition. */
12065 cond_expr = NULL_TREE;
12068 std::swap (then_clause, else_clause);
12071 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
12073 /* Boolean values may have another representation in vectors
12074 and therefore we prefer bit operations over comparison for
12075 them (which also works for scalar masks). We store opcodes
12076 to use in bitop1 and bitop2. Statement is vectorized as
12077 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12078 depending on bitop1 and bitop2 arity. */
12079 switch (cond_code)
12081 case GT_EXPR:
12082 bitop1 = BIT_NOT_EXPR;
12083 bitop2 = BIT_AND_EXPR;
12084 break;
12085 case GE_EXPR:
12086 bitop1 = BIT_NOT_EXPR;
12087 bitop2 = BIT_IOR_EXPR;
12088 break;
12089 case LT_EXPR:
12090 bitop1 = BIT_NOT_EXPR;
12091 bitop2 = BIT_AND_EXPR;
12092 std::swap (cond_expr0, cond_expr1);
12093 break;
12094 case LE_EXPR:
12095 bitop1 = BIT_NOT_EXPR;
12096 bitop2 = BIT_IOR_EXPR;
12097 std::swap (cond_expr0, cond_expr1);
12098 break;
12099 case NE_EXPR:
12100 bitop1 = BIT_XOR_EXPR;
12101 break;
12102 case EQ_EXPR:
12103 bitop1 = BIT_XOR_EXPR;
12104 bitop2 = BIT_NOT_EXPR;
12105 break;
12106 default:
12107 return false;
12109 cond_code = SSA_NAME;
12112 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
12113 && reduction_type == EXTRACT_LAST_REDUCTION
12114 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
12116 if (dump_enabled_p ())
12117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12118 "reduction comparison operation not supported.\n");
12119 return false;
12122 if (!vec_stmt)
12124 if (bitop1 != NOP_EXPR)
12126 machine_mode mode = TYPE_MODE (comp_vectype);
12127 optab optab;
12129 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
12130 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12131 return false;
12133 if (bitop2 != NOP_EXPR)
12135 optab = optab_for_tree_code (bitop2, comp_vectype,
12136 optab_default);
12137 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12138 return false;
12142 vect_cost_for_stmt kind = vector_stmt;
12143 if (reduction_type == EXTRACT_LAST_REDUCTION)
12144 /* Count one reduction-like operation per vector. */
12145 kind = vec_to_scalar;
12146 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
12147 && (masked
12148 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
12149 cond_code)
12150 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
12151 ERROR_MARK))))
12152 return false;
12154 if (slp_node
12155 && (!vect_maybe_update_slp_op_vectype
12156 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
12157 || (op_adjust == 1
12158 && !vect_maybe_update_slp_op_vectype
12159 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
12160 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
12161 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
12163 if (dump_enabled_p ())
12164 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12165 "incompatible vector types for invariants\n");
12166 return false;
12169 if (loop_vinfo && for_reduction
12170 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12172 if (reduction_type == EXTRACT_LAST_REDUCTION)
12174 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12175 vectype, OPTIMIZE_FOR_SPEED))
12176 vect_record_loop_len (loop_vinfo,
12177 &LOOP_VINFO_LENS (loop_vinfo),
12178 ncopies * vec_num, vectype, 1);
12179 else
12180 vect_record_loop_mask (loop_vinfo,
12181 &LOOP_VINFO_MASKS (loop_vinfo),
12182 ncopies * vec_num, vectype, NULL);
12184 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12185 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
12187 if (dump_enabled_p ())
12188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12189 "conditional reduction prevents the use"
12190 " of partial vectors.\n");
12191 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
12195 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
12196 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
12197 cost_vec, kind);
12198 return true;
12201 /* Transform. */
12203 /* Handle def. */
12204 scalar_dest = gimple_assign_lhs (stmt);
12205 if (reduction_type != EXTRACT_LAST_REDUCTION)
12206 vec_dest = vect_create_destination_var (scalar_dest, vectype);
12208 bool swap_cond_operands = false;
12210 /* See whether another part of the vectorized code applies a loop
12211 mask to the condition, or to its inverse. */
12213 vec_loop_masks *masks = NULL;
12214 vec_loop_lens *lens = NULL;
12215 if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
12217 if (reduction_type == EXTRACT_LAST_REDUCTION)
12218 lens = &LOOP_VINFO_LENS (loop_vinfo);
12220 else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
12222 if (reduction_type == EXTRACT_LAST_REDUCTION)
12223 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12224 else
12226 scalar_cond_masked_key cond (cond_expr, ncopies);
12227 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12228 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12229 else
12231 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
12232 tree_code orig_code = cond.code;
12233 cond.code = invert_tree_comparison (cond.code, honor_nans);
12234 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
12236 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12237 cond_code = cond.code;
12238 swap_cond_operands = true;
12240 else
12242 /* Try the inverse of the current mask. We check if the
12243 inverse mask is live and if so we generate a negate of
12244 the current mask such that we still honor NaNs. */
12245 cond.inverted_p = true;
12246 cond.code = orig_code;
12247 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12249 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12250 cond_code = cond.code;
12251 swap_cond_operands = true;
12252 must_invert_cmp_result = true;
12259 /* Handle cond expr. */
12260 if (masked)
12261 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12262 cond_expr, &vec_oprnds0, comp_vectype,
12263 then_clause, &vec_oprnds2, vectype,
12264 reduction_type != EXTRACT_LAST_REDUCTION
12265 ? else_clause : NULL, &vec_oprnds3, vectype);
12266 else
12267 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12268 cond_expr0, &vec_oprnds0, comp_vectype,
12269 cond_expr1, &vec_oprnds1, comp_vectype,
12270 then_clause, &vec_oprnds2, vectype,
12271 reduction_type != EXTRACT_LAST_REDUCTION
12272 ? else_clause : NULL, &vec_oprnds3, vectype);
12274 /* Arguments are ready. Create the new vector stmt. */
12275 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
12277 vec_then_clause = vec_oprnds2[i];
12278 if (reduction_type != EXTRACT_LAST_REDUCTION)
12279 vec_else_clause = vec_oprnds3[i];
12281 if (swap_cond_operands)
12282 std::swap (vec_then_clause, vec_else_clause);
12284 if (masked)
12285 vec_compare = vec_cond_lhs;
12286 else
12288 vec_cond_rhs = vec_oprnds1[i];
12289 if (bitop1 == NOP_EXPR)
12291 gimple_seq stmts = NULL;
12292 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
12293 vec_cond_lhs, vec_cond_rhs);
12294 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
12296 else
12298 new_temp = make_ssa_name (vec_cmp_type);
12299 gassign *new_stmt;
12300 if (bitop1 == BIT_NOT_EXPR)
12301 new_stmt = gimple_build_assign (new_temp, bitop1,
12302 vec_cond_rhs);
12303 else
12304 new_stmt
12305 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
12306 vec_cond_rhs);
12307 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12308 if (bitop2 == NOP_EXPR)
12309 vec_compare = new_temp;
12310 else if (bitop2 == BIT_NOT_EXPR
12311 && reduction_type != EXTRACT_LAST_REDUCTION)
12313 /* Instead of doing ~x ? y : z do x ? z : y. */
12314 vec_compare = new_temp;
12315 std::swap (vec_then_clause, vec_else_clause);
12317 else
12319 vec_compare = make_ssa_name (vec_cmp_type);
12320 if (bitop2 == BIT_NOT_EXPR)
12321 new_stmt
12322 = gimple_build_assign (vec_compare, bitop2, new_temp);
12323 else
12324 new_stmt
12325 = gimple_build_assign (vec_compare, bitop2,
12326 vec_cond_lhs, new_temp);
12327 vect_finish_stmt_generation (vinfo, stmt_info,
12328 new_stmt, gsi);
12333 /* If we decided to apply a loop mask to the result of the vector
12334 comparison, AND the comparison with the mask now. Later passes
12335 should then be able to reuse the AND results between mulitple
12336 vector statements.
12338 For example:
12339 for (int i = 0; i < 100; ++i)
12340 x[i] = y[i] ? z[i] : 10;
12342 results in following optimized GIMPLE:
12344 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12345 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12346 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12347 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12348 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12349 vect_iftmp.11_47, { 10, ... }>;
12351 instead of using a masked and unmasked forms of
12352 vec != { 0, ... } (masked in the MASK_LOAD,
12353 unmasked in the VEC_COND_EXPR). */
12355 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12356 in cases where that's necessary. */
12358 tree len = NULL_TREE, bias = NULL_TREE;
12359 if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
12361 if (!is_gimple_val (vec_compare))
12363 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12364 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12365 vec_compare);
12366 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12367 vec_compare = vec_compare_name;
12370 if (must_invert_cmp_result)
12372 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12373 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12374 BIT_NOT_EXPR,
12375 vec_compare);
12376 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12377 vec_compare = vec_compare_name;
12380 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12381 vectype, OPTIMIZE_FOR_SPEED))
12383 if (lens)
12385 len = vect_get_loop_len (loop_vinfo, gsi, lens,
12386 vec_num * ncopies, vectype, i, 1);
12387 signed char biasval
12388 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
12389 bias = build_int_cst (intQI_type_node, biasval);
12391 else
12393 len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
12394 bias = build_int_cst (intQI_type_node, 0);
12397 if (masks)
12399 tree loop_mask
12400 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
12401 vectype, i);
12402 tree tmp2 = make_ssa_name (vec_cmp_type);
12403 gassign *g
12404 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
12405 loop_mask);
12406 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
12407 vec_compare = tmp2;
12411 gimple *new_stmt;
12412 if (reduction_type == EXTRACT_LAST_REDUCTION)
12414 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
12415 tree lhs = gimple_get_lhs (old_stmt);
12416 if (len)
12417 new_stmt = gimple_build_call_internal
12418 (IFN_LEN_FOLD_EXTRACT_LAST, 5, else_clause, vec_compare,
12419 vec_then_clause, len, bias);
12420 else
12421 new_stmt = gimple_build_call_internal
12422 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
12423 vec_then_clause);
12424 gimple_call_set_lhs (new_stmt, lhs);
12425 SSA_NAME_DEF_STMT (lhs) = new_stmt;
12426 if (old_stmt == gsi_stmt (*gsi))
12427 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
12428 else
12430 /* In this case we're moving the definition to later in the
12431 block. That doesn't matter because the only uses of the
12432 lhs are in phi statements. */
12433 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12434 gsi_remove (&old_gsi, true);
12435 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12438 else
12440 new_temp = make_ssa_name (vec_dest);
12441 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12442 vec_then_clause, vec_else_clause);
12443 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12445 if (slp_node)
12446 slp_node->push_vec_def (new_stmt);
12447 else
12448 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12451 if (!slp_node)
12452 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12454 vec_oprnds0.release ();
12455 vec_oprnds1.release ();
12456 vec_oprnds2.release ();
12457 vec_oprnds3.release ();
12459 return true;
12462 /* Helper of vectorizable_comparison.
12464 Check if STMT_INFO is comparison expression CODE that can be vectorized.
12465 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12466 comparison, put it in VEC_STMT, and insert it at GSI.
12468 Return true if STMT_INFO is vectorizable in this way. */
12470 static bool
12471 vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
12472 stmt_vec_info stmt_info, tree_code code,
12473 gimple_stmt_iterator *gsi, gimple **vec_stmt,
12474 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12476 tree lhs, rhs1, rhs2;
12477 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12478 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12479 tree new_temp;
12480 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12481 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
12482 int ndts = 2;
12483 poly_uint64 nunits;
12484 int ncopies;
12485 enum tree_code bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12486 int i;
12487 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12488 vec<tree> vec_oprnds0 = vNULL;
12489 vec<tree> vec_oprnds1 = vNULL;
12490 tree mask_type;
12491 tree mask;
12493 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12494 return false;
12496 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
12497 return false;
12499 mask_type = vectype;
12500 nunits = TYPE_VECTOR_SUBPARTS (vectype);
12502 if (slp_node)
12503 ncopies = 1;
12504 else
12505 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12507 gcc_assert (ncopies >= 1);
12509 if (TREE_CODE_CLASS (code) != tcc_comparison)
12510 return false;
12512 slp_tree slp_rhs1, slp_rhs2;
12513 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12514 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
12515 return false;
12517 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12518 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
12519 return false;
12521 if (vectype1 && vectype2
12522 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12523 TYPE_VECTOR_SUBPARTS (vectype2)))
12524 return false;
12526 vectype = vectype1 ? vectype1 : vectype2;
12528 /* Invariant comparison. */
12529 if (!vectype)
12531 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
12532 vectype = mask_type;
12533 else
12534 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
12535 slp_node);
12536 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
12537 return false;
12539 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
12540 return false;
12542 /* Can't compare mask and non-mask types. */
12543 if (vectype1 && vectype2
12544 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12545 return false;
12547 /* Boolean values may have another representation in vectors
12548 and therefore we prefer bit operations over comparison for
12549 them (which also works for scalar masks). We store opcodes
12550 to use in bitop1 and bitop2. Statement is vectorized as
12551 BITOP2 (rhs1 BITOP1 rhs2) or
12552 rhs1 BITOP2 (BITOP1 rhs2)
12553 depending on bitop1 and bitop2 arity. */
12554 bool swap_p = false;
12555 if (VECTOR_BOOLEAN_TYPE_P (vectype))
12557 if (code == GT_EXPR)
12559 bitop1 = BIT_NOT_EXPR;
12560 bitop2 = BIT_AND_EXPR;
12562 else if (code == GE_EXPR)
12564 bitop1 = BIT_NOT_EXPR;
12565 bitop2 = BIT_IOR_EXPR;
12567 else if (code == LT_EXPR)
12569 bitop1 = BIT_NOT_EXPR;
12570 bitop2 = BIT_AND_EXPR;
12571 swap_p = true;
12573 else if (code == LE_EXPR)
12575 bitop1 = BIT_NOT_EXPR;
12576 bitop2 = BIT_IOR_EXPR;
12577 swap_p = true;
12579 else
12581 bitop1 = BIT_XOR_EXPR;
12582 if (code == EQ_EXPR)
12583 bitop2 = BIT_NOT_EXPR;
12587 if (!vec_stmt)
12589 if (bitop1 == NOP_EXPR)
12591 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12592 return false;
12594 else
12596 machine_mode mode = TYPE_MODE (vectype);
12597 optab optab;
12599 optab = optab_for_tree_code (bitop1, vectype, optab_default);
12600 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12601 return false;
12603 if (bitop2 != NOP_EXPR)
12605 optab = optab_for_tree_code (bitop2, vectype, optab_default);
12606 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12607 return false;
12611 /* Put types on constant and invariant SLP children. */
12612 if (slp_node
12613 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12614 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
12616 if (dump_enabled_p ())
12617 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12618 "incompatible vector types for invariants\n");
12619 return false;
12622 vect_model_simple_cost (vinfo, stmt_info,
12623 ncopies * (1 + (bitop2 != NOP_EXPR)),
12624 dts, ndts, slp_node, cost_vec);
12625 return true;
12628 /* Transform. */
12630 /* Handle def. */
12631 lhs = gimple_assign_lhs (STMT_VINFO_STMT (stmt_info));
12632 mask = vect_create_destination_var (lhs, mask_type);
12634 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12635 rhs1, &vec_oprnds0, vectype,
12636 rhs2, &vec_oprnds1, vectype);
12637 if (swap_p)
12638 std::swap (vec_oprnds0, vec_oprnds1);
12640 /* Arguments are ready. Create the new vector stmt. */
12641 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12643 gimple *new_stmt;
12644 vec_rhs2 = vec_oprnds1[i];
12646 new_temp = make_ssa_name (mask);
12647 if (bitop1 == NOP_EXPR)
12649 new_stmt = gimple_build_assign (new_temp, code,
12650 vec_rhs1, vec_rhs2);
12651 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12653 else
12655 if (bitop1 == BIT_NOT_EXPR)
12656 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12657 else
12658 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12659 vec_rhs2);
12660 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12661 if (bitop2 != NOP_EXPR)
12663 tree res = make_ssa_name (mask);
12664 if (bitop2 == BIT_NOT_EXPR)
12665 new_stmt = gimple_build_assign (res, bitop2, new_temp);
12666 else
12667 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12668 new_temp);
12669 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12672 if (slp_node)
12673 slp_node->push_vec_def (new_stmt);
12674 else
12675 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12678 if (!slp_node)
12679 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12681 vec_oprnds0.release ();
12682 vec_oprnds1.release ();
12684 return true;
12687 /* vectorizable_comparison.
12689 Check if STMT_INFO is comparison expression that can be vectorized.
12690 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12691 comparison, put it in VEC_STMT, and insert it at GSI.
12693 Return true if STMT_INFO is vectorizable in this way. */
12695 static bool
12696 vectorizable_comparison (vec_info *vinfo,
12697 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12698 gimple **vec_stmt,
12699 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12701 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12703 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12704 return false;
12706 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12707 return false;
12709 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12710 if (!stmt)
12711 return false;
12713 enum tree_code code = gimple_assign_rhs_code (stmt);
12714 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12715 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12716 vec_stmt, slp_node, cost_vec))
12717 return false;
12719 if (!vec_stmt)
12720 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
12722 return true;
12725 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
12726 can handle all live statements in the node. Otherwise return true
12727 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
12728 VEC_STMT_P is as for vectorizable_live_operation. */
12730 static bool
12731 can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
12732 slp_tree slp_node, slp_instance slp_node_instance,
12733 bool vec_stmt_p,
12734 stmt_vector_for_cost *cost_vec)
12736 if (slp_node)
12738 stmt_vec_info slp_stmt_info;
12739 unsigned int i;
12740 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
12742 if (STMT_VINFO_LIVE_P (slp_stmt_info)
12743 && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
12744 slp_node_instance, i,
12745 vec_stmt_p, cost_vec))
12746 return false;
12749 else if (STMT_VINFO_LIVE_P (stmt_info)
12750 && !vectorizable_live_operation (vinfo, stmt_info,
12751 slp_node, slp_node_instance, -1,
12752 vec_stmt_p, cost_vec))
12753 return false;
12755 return true;
12758 /* Make sure the statement is vectorizable. */
12760 opt_result
12761 vect_analyze_stmt (vec_info *vinfo,
12762 stmt_vec_info stmt_info, bool *need_to_vectorize,
12763 slp_tree node, slp_instance node_instance,
12764 stmt_vector_for_cost *cost_vec)
12766 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12767 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
12768 bool ok;
12769 gimple_seq pattern_def_seq;
12771 if (dump_enabled_p ())
12772 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
12773 stmt_info->stmt);
12775 if (gimple_has_volatile_ops (stmt_info->stmt))
12776 return opt_result::failure_at (stmt_info->stmt,
12777 "not vectorized:"
12778 " stmt has volatile operands: %G\n",
12779 stmt_info->stmt);
12781 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12782 && node == NULL
12783 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
12785 gimple_stmt_iterator si;
12787 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
12789 stmt_vec_info pattern_def_stmt_info
12790 = vinfo->lookup_stmt (gsi_stmt (si));
12791 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
12792 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
12794 /* Analyze def stmt of STMT if it's a pattern stmt. */
12795 if (dump_enabled_p ())
12796 dump_printf_loc (MSG_NOTE, vect_location,
12797 "==> examining pattern def statement: %G",
12798 pattern_def_stmt_info->stmt);
12800 opt_result res
12801 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
12802 need_to_vectorize, node, node_instance,
12803 cost_vec);
12804 if (!res)
12805 return res;
12810 /* Skip stmts that do not need to be vectorized. In loops this is expected
12811 to include:
12812 - the COND_EXPR which is the loop exit condition
12813 - any LABEL_EXPRs in the loop
12814 - computations that are used only for array indexing or loop control.
12815 In basic blocks we only analyze statements that are a part of some SLP
12816 instance, therefore, all the statements are relevant.
12818 Pattern statement needs to be analyzed instead of the original statement
12819 if the original statement is not relevant. Otherwise, we analyze both
12820 statements. In basic blocks we are called from some SLP instance
12821 traversal, don't analyze pattern stmts instead, the pattern stmts
12822 already will be part of SLP instance. */
12824 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
12825 if (!STMT_VINFO_RELEVANT_P (stmt_info)
12826 && !STMT_VINFO_LIVE_P (stmt_info))
12828 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12829 && pattern_stmt_info
12830 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12831 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12833 /* Analyze PATTERN_STMT instead of the original stmt. */
12834 stmt_info = pattern_stmt_info;
12835 if (dump_enabled_p ())
12836 dump_printf_loc (MSG_NOTE, vect_location,
12837 "==> examining pattern statement: %G",
12838 stmt_info->stmt);
12840 else
12842 if (dump_enabled_p ())
12843 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
12845 return opt_result::success ();
12848 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12849 && node == NULL
12850 && pattern_stmt_info
12851 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12852 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12854 /* Analyze PATTERN_STMT too. */
12855 if (dump_enabled_p ())
12856 dump_printf_loc (MSG_NOTE, vect_location,
12857 "==> examining pattern statement: %G",
12858 pattern_stmt_info->stmt);
12860 opt_result res
12861 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
12862 node_instance, cost_vec);
12863 if (!res)
12864 return res;
12867 switch (STMT_VINFO_DEF_TYPE (stmt_info))
12869 case vect_internal_def:
12870 break;
12872 case vect_reduction_def:
12873 case vect_nested_cycle:
12874 gcc_assert (!bb_vinfo
12875 && (relevance == vect_used_in_outer
12876 || relevance == vect_used_in_outer_by_reduction
12877 || relevance == vect_used_by_reduction
12878 || relevance == vect_unused_in_scope
12879 || relevance == vect_used_only_live));
12880 break;
12882 case vect_induction_def:
12883 case vect_first_order_recurrence:
12884 gcc_assert (!bb_vinfo);
12885 break;
12887 case vect_constant_def:
12888 case vect_external_def:
12889 case vect_unknown_def_type:
12890 default:
12891 gcc_unreachable ();
12894 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12895 if (node)
12896 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
12898 if (STMT_VINFO_RELEVANT_P (stmt_info))
12900 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
12901 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
12902 || (call && gimple_call_lhs (call) == NULL_TREE));
12903 *need_to_vectorize = true;
12906 if (PURE_SLP_STMT (stmt_info) && !node)
12908 if (dump_enabled_p ())
12909 dump_printf_loc (MSG_NOTE, vect_location,
12910 "handled only by SLP analysis\n");
12911 return opt_result::success ();
12914 ok = true;
12915 if (!bb_vinfo
12916 && (STMT_VINFO_RELEVANT_P (stmt_info)
12917 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
12918 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
12919 -mveclibabi= takes preference over library functions with
12920 the simd attribute. */
12921 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12922 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
12923 cost_vec)
12924 || vectorizable_conversion (vinfo, stmt_info,
12925 NULL, NULL, node, cost_vec)
12926 || vectorizable_operation (vinfo, stmt_info,
12927 NULL, NULL, node, cost_vec)
12928 || vectorizable_assignment (vinfo, stmt_info,
12929 NULL, NULL, node, cost_vec)
12930 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12931 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12932 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
12933 node, node_instance, cost_vec)
12934 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
12935 NULL, node, cost_vec)
12936 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12937 || vectorizable_condition (vinfo, stmt_info,
12938 NULL, NULL, node, cost_vec)
12939 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12940 cost_vec)
12941 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
12942 stmt_info, NULL, node)
12943 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
12944 stmt_info, NULL, node, cost_vec));
12945 else
12947 if (bb_vinfo)
12948 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12949 || vectorizable_simd_clone_call (vinfo, stmt_info,
12950 NULL, NULL, node, cost_vec)
12951 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
12952 cost_vec)
12953 || vectorizable_shift (vinfo, stmt_info,
12954 NULL, NULL, node, cost_vec)
12955 || vectorizable_operation (vinfo, stmt_info,
12956 NULL, NULL, node, cost_vec)
12957 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
12958 cost_vec)
12959 || vectorizable_load (vinfo, stmt_info,
12960 NULL, NULL, node, cost_vec)
12961 || vectorizable_store (vinfo, stmt_info,
12962 NULL, NULL, node, cost_vec)
12963 || vectorizable_condition (vinfo, stmt_info,
12964 NULL, NULL, node, cost_vec)
12965 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12966 cost_vec)
12967 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
12970 if (node)
12971 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12973 if (!ok)
12974 return opt_result::failure_at (stmt_info->stmt,
12975 "not vectorized:"
12976 " relevant stmt not supported: %G",
12977 stmt_info->stmt);
12979 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
12980 need extra handling, except for vectorizable reductions. */
12981 if (!bb_vinfo
12982 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
12983 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
12984 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
12985 stmt_info, node, node_instance,
12986 false, cost_vec))
12987 return opt_result::failure_at (stmt_info->stmt,
12988 "not vectorized:"
12989 " live stmt not supported: %G",
12990 stmt_info->stmt);
12992 return opt_result::success ();
12996 /* Function vect_transform_stmt.
12998 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
13000 bool
13001 vect_transform_stmt (vec_info *vinfo,
13002 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
13003 slp_tree slp_node, slp_instance slp_node_instance)
13005 bool is_store = false;
13006 gimple *vec_stmt = NULL;
13007 bool done;
13009 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
13011 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
13012 if (slp_node)
13013 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
13015 switch (STMT_VINFO_TYPE (stmt_info))
13017 case type_demotion_vec_info_type:
13018 case type_promotion_vec_info_type:
13019 case type_conversion_vec_info_type:
13020 done = vectorizable_conversion (vinfo, stmt_info,
13021 gsi, &vec_stmt, slp_node, NULL);
13022 gcc_assert (done);
13023 break;
13025 case induc_vec_info_type:
13026 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
13027 stmt_info, &vec_stmt, slp_node,
13028 NULL);
13029 gcc_assert (done);
13030 break;
13032 case shift_vec_info_type:
13033 done = vectorizable_shift (vinfo, stmt_info,
13034 gsi, &vec_stmt, slp_node, NULL);
13035 gcc_assert (done);
13036 break;
13038 case op_vec_info_type:
13039 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
13040 NULL);
13041 gcc_assert (done);
13042 break;
13044 case assignment_vec_info_type:
13045 done = vectorizable_assignment (vinfo, stmt_info,
13046 gsi, &vec_stmt, slp_node, NULL);
13047 gcc_assert (done);
13048 break;
13050 case load_vec_info_type:
13051 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
13052 NULL);
13053 gcc_assert (done);
13054 break;
13056 case store_vec_info_type:
13057 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
13058 && !slp_node
13059 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))
13060 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))))
13061 /* In case of interleaving, the whole chain is vectorized when the
13062 last store in the chain is reached. Store stmts before the last
13063 one are skipped, and there vec_stmt_info shouldn't be freed
13064 meanwhile. */
13066 else
13068 done = vectorizable_store (vinfo, stmt_info,
13069 gsi, &vec_stmt, slp_node, NULL);
13070 gcc_assert (done);
13071 is_store = true;
13073 break;
13075 case condition_vec_info_type:
13076 done = vectorizable_condition (vinfo, stmt_info,
13077 gsi, &vec_stmt, slp_node, NULL);
13078 gcc_assert (done);
13079 break;
13081 case comparison_vec_info_type:
13082 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
13083 slp_node, NULL);
13084 gcc_assert (done);
13085 break;
13087 case call_vec_info_type:
13088 done = vectorizable_call (vinfo, stmt_info,
13089 gsi, &vec_stmt, slp_node, NULL);
13090 break;
13092 case call_simd_clone_vec_info_type:
13093 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
13094 slp_node, NULL);
13095 break;
13097 case reduc_vec_info_type:
13098 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
13099 gsi, &vec_stmt, slp_node);
13100 gcc_assert (done);
13101 break;
13103 case cycle_phi_info_type:
13104 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
13105 &vec_stmt, slp_node, slp_node_instance);
13106 gcc_assert (done);
13107 break;
13109 case lc_phi_info_type:
13110 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
13111 stmt_info, &vec_stmt, slp_node);
13112 gcc_assert (done);
13113 break;
13115 case recurr_info_type:
13116 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13117 stmt_info, &vec_stmt, slp_node, NULL);
13118 gcc_assert (done);
13119 break;
13121 case phi_info_type:
13122 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
13123 gcc_assert (done);
13124 break;
13126 default:
13127 if (!STMT_VINFO_LIVE_P (stmt_info))
13129 if (dump_enabled_p ())
13130 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13131 "stmt not supported.\n");
13132 gcc_unreachable ();
13134 done = true;
13137 if (!slp_node && vec_stmt)
13138 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
13140 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
13142 /* Handle stmts whose DEF is used outside the loop-nest that is
13143 being vectorized. */
13144 done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
13145 slp_node_instance, true, NULL);
13146 gcc_assert (done);
13149 if (slp_node)
13150 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13152 return is_store;
13156 /* Remove a group of stores (for SLP or interleaving), free their
13157 stmt_vec_info. */
13159 void
13160 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
13162 stmt_vec_info next_stmt_info = first_stmt_info;
13164 while (next_stmt_info)
13166 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
13167 next_stmt_info = vect_orig_stmt (next_stmt_info);
13168 /* Free the attached stmt_vec_info and remove the stmt. */
13169 vinfo->remove_stmt (next_stmt_info);
13170 next_stmt_info = tmp;
13174 /* If NUNITS is nonzero, return a vector type that contains NUNITS
13175 elements of type SCALAR_TYPE, or null if the target doesn't support
13176 such a type.
13178 If NUNITS is zero, return a vector type that contains elements of
13179 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13181 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13182 for this vectorization region and want to "autodetect" the best choice.
13183 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13184 and we want the new type to be interoperable with it. PREVAILING_MODE
13185 in this case can be a scalar integer mode or a vector mode; when it
13186 is a vector mode, the function acts like a tree-level version of
13187 related_vector_mode. */
13189 tree
13190 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
13191 tree scalar_type, poly_uint64 nunits)
13193 tree orig_scalar_type = scalar_type;
13194 scalar_mode inner_mode;
13195 machine_mode simd_mode;
13196 tree vectype;
13198 if ((!INTEGRAL_TYPE_P (scalar_type)
13199 && !POINTER_TYPE_P (scalar_type)
13200 && !SCALAR_FLOAT_TYPE_P (scalar_type))
13201 || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
13202 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
13203 return NULL_TREE;
13205 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
13207 /* Interoperability between modes requires one to be a constant multiple
13208 of the other, so that the number of vectors required for each operation
13209 is a compile-time constant. */
13210 if (prevailing_mode != VOIDmode
13211 && !constant_multiple_p (nunits * nbytes,
13212 GET_MODE_SIZE (prevailing_mode))
13213 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
13214 nunits * nbytes))
13215 return NULL_TREE;
13217 /* For vector types of elements whose mode precision doesn't
13218 match their types precision we use a element type of mode
13219 precision. The vectorization routines will have to make sure
13220 they support the proper result truncation/extension.
13221 We also make sure to build vector types with INTEGER_TYPE
13222 component type only. */
13223 if (INTEGRAL_TYPE_P (scalar_type)
13224 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
13225 || TREE_CODE (scalar_type) != INTEGER_TYPE))
13226 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
13227 TYPE_UNSIGNED (scalar_type));
13229 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13230 When the component mode passes the above test simply use a type
13231 corresponding to that mode. The theory is that any use that
13232 would cause problems with this will disable vectorization anyway. */
13233 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
13234 && !INTEGRAL_TYPE_P (scalar_type))
13235 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
13237 /* We can't build a vector type of elements with alignment bigger than
13238 their size. */
13239 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
13240 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
13241 TYPE_UNSIGNED (scalar_type));
13243 /* If we felt back to using the mode fail if there was
13244 no scalar type for it. */
13245 if (scalar_type == NULL_TREE)
13246 return NULL_TREE;
13248 /* If no prevailing mode was supplied, use the mode the target prefers.
13249 Otherwise lookup a vector mode based on the prevailing mode. */
13250 if (prevailing_mode == VOIDmode)
13252 gcc_assert (known_eq (nunits, 0U));
13253 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
13254 if (SCALAR_INT_MODE_P (simd_mode))
13256 /* Traditional behavior is not to take the integer mode
13257 literally, but simply to use it as a way of determining
13258 the vector size. It is up to mode_for_vector to decide
13259 what the TYPE_MODE should be.
13261 Note that nunits == 1 is allowed in order to support single
13262 element vector types. */
13263 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
13264 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13265 return NULL_TREE;
13268 else if (SCALAR_INT_MODE_P (prevailing_mode)
13269 || !related_vector_mode (prevailing_mode,
13270 inner_mode, nunits).exists (&simd_mode))
13272 /* Fall back to using mode_for_vector, mostly in the hope of being
13273 able to use an integer mode. */
13274 if (known_eq (nunits, 0U)
13275 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
13276 return NULL_TREE;
13278 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13279 return NULL_TREE;
13282 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
13284 /* In cases where the mode was chosen by mode_for_vector, check that
13285 the target actually supports the chosen mode, or that it at least
13286 allows the vector mode to be replaced by a like-sized integer. */
13287 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
13288 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
13289 return NULL_TREE;
13291 /* Re-attach the address-space qualifier if we canonicalized the scalar
13292 type. */
13293 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
13294 return build_qualified_type
13295 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
13297 return vectype;
13300 /* Function get_vectype_for_scalar_type.
13302 Returns the vector type corresponding to SCALAR_TYPE as supported
13303 by the target. If GROUP_SIZE is nonzero and we're performing BB
13304 vectorization, make sure that the number of elements in the vector
13305 is no bigger than GROUP_SIZE. */
13307 tree
13308 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
13309 unsigned int group_size)
13311 /* For BB vectorization, we should always have a group size once we've
13312 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13313 are tentative requests during things like early data reference
13314 analysis and pattern recognition. */
13315 if (is_a <bb_vec_info> (vinfo))
13316 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13317 else
13318 group_size = 0;
13320 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13321 scalar_type);
13322 if (vectype && vinfo->vector_mode == VOIDmode)
13323 vinfo->vector_mode = TYPE_MODE (vectype);
13325 /* Register the natural choice of vector type, before the group size
13326 has been applied. */
13327 if (vectype)
13328 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
13330 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13331 try again with an explicit number of elements. */
13332 if (vectype
13333 && group_size
13334 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
13336 /* Start with the biggest number of units that fits within
13337 GROUP_SIZE and halve it until we find a valid vector type.
13338 Usually either the first attempt will succeed or all will
13339 fail (in the latter case because GROUP_SIZE is too small
13340 for the target), but it's possible that a target could have
13341 a hole between supported vector types.
13343 If GROUP_SIZE is not a power of 2, this has the effect of
13344 trying the largest power of 2 that fits within the group,
13345 even though the group is not a multiple of that vector size.
13346 The BB vectorizer will then try to carve up the group into
13347 smaller pieces. */
13348 unsigned int nunits = 1 << floor_log2 (group_size);
13351 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13352 scalar_type, nunits);
13353 nunits /= 2;
13355 while (nunits > 1 && !vectype);
13358 return vectype;
13361 /* Return the vector type corresponding to SCALAR_TYPE as supported
13362 by the target. NODE, if nonnull, is the SLP tree node that will
13363 use the returned vector type. */
13365 tree
13366 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
13368 unsigned int group_size = 0;
13369 if (node)
13370 group_size = SLP_TREE_LANES (node);
13371 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13374 /* Function get_mask_type_for_scalar_type.
13376 Returns the mask type corresponding to a result of comparison
13377 of vectors of specified SCALAR_TYPE as supported by target.
13378 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13379 make sure that the number of elements in the vector is no bigger
13380 than GROUP_SIZE. */
13382 tree
13383 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13384 unsigned int group_size)
13386 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13388 if (!vectype)
13389 return NULL;
13391 return truth_type_for (vectype);
13394 /* Function get_mask_type_for_scalar_type.
13396 Returns the mask type corresponding to a result of comparison
13397 of vectors of specified SCALAR_TYPE as supported by target.
13398 NODE, if nonnull, is the SLP tree node that will use the returned
13399 vector type. */
13401 tree
13402 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13403 slp_tree node)
13405 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
13407 if (!vectype)
13408 return NULL;
13410 return truth_type_for (vectype);
13413 /* Function get_same_sized_vectype
13415 Returns a vector type corresponding to SCALAR_TYPE of size
13416 VECTOR_TYPE if supported by the target. */
13418 tree
13419 get_same_sized_vectype (tree scalar_type, tree vector_type)
13421 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
13422 return truth_type_for (vector_type);
13424 poly_uint64 nunits;
13425 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
13426 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
13427 return NULL_TREE;
13429 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
13430 scalar_type, nunits);
13433 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13434 would not change the chosen vector modes. */
13436 bool
13437 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
13439 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
13440 i != vinfo->used_vector_modes.end (); ++i)
13441 if (!VECTOR_MODE_P (*i)
13442 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
13443 return false;
13444 return true;
13447 /* Function vect_is_simple_use.
13449 Input:
13450 VINFO - the vect info of the loop or basic block that is being vectorized.
13451 OPERAND - operand in the loop or bb.
13452 Output:
13453 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13454 case OPERAND is an SSA_NAME that is defined in the vectorizable region
13455 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13456 the definition could be anywhere in the function
13457 DT - the type of definition
13459 Returns whether a stmt with OPERAND can be vectorized.
13460 For loops, supportable operands are constants, loop invariants, and operands
13461 that are defined by the current iteration of the loop. Unsupportable
13462 operands are those that are defined by a previous iteration of the loop (as
13463 is the case in reduction/induction computations).
13464 For basic blocks, supportable operands are constants and bb invariants.
13465 For now, operands defined outside the basic block are not supported. */
13467 bool
13468 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13469 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
13471 if (def_stmt_info_out)
13472 *def_stmt_info_out = NULL;
13473 if (def_stmt_out)
13474 *def_stmt_out = NULL;
13475 *dt = vect_unknown_def_type;
13477 if (dump_enabled_p ())
13479 dump_printf_loc (MSG_NOTE, vect_location,
13480 "vect_is_simple_use: operand ");
13481 if (TREE_CODE (operand) == SSA_NAME
13482 && !SSA_NAME_IS_DEFAULT_DEF (operand))
13483 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
13484 else
13485 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
13488 if (CONSTANT_CLASS_P (operand))
13489 *dt = vect_constant_def;
13490 else if (is_gimple_min_invariant (operand))
13491 *dt = vect_external_def;
13492 else if (TREE_CODE (operand) != SSA_NAME)
13493 *dt = vect_unknown_def_type;
13494 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
13495 *dt = vect_external_def;
13496 else
13498 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
13499 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
13500 if (!stmt_vinfo)
13501 *dt = vect_external_def;
13502 else
13504 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
13505 def_stmt = stmt_vinfo->stmt;
13506 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
13507 if (def_stmt_info_out)
13508 *def_stmt_info_out = stmt_vinfo;
13510 if (def_stmt_out)
13511 *def_stmt_out = def_stmt;
13514 if (dump_enabled_p ())
13516 dump_printf (MSG_NOTE, ", type of def: ");
13517 switch (*dt)
13519 case vect_uninitialized_def:
13520 dump_printf (MSG_NOTE, "uninitialized\n");
13521 break;
13522 case vect_constant_def:
13523 dump_printf (MSG_NOTE, "constant\n");
13524 break;
13525 case vect_external_def:
13526 dump_printf (MSG_NOTE, "external\n");
13527 break;
13528 case vect_internal_def:
13529 dump_printf (MSG_NOTE, "internal\n");
13530 break;
13531 case vect_induction_def:
13532 dump_printf (MSG_NOTE, "induction\n");
13533 break;
13534 case vect_reduction_def:
13535 dump_printf (MSG_NOTE, "reduction\n");
13536 break;
13537 case vect_double_reduction_def:
13538 dump_printf (MSG_NOTE, "double reduction\n");
13539 break;
13540 case vect_nested_cycle:
13541 dump_printf (MSG_NOTE, "nested cycle\n");
13542 break;
13543 case vect_first_order_recurrence:
13544 dump_printf (MSG_NOTE, "first order recurrence\n");
13545 break;
13546 case vect_unknown_def_type:
13547 dump_printf (MSG_NOTE, "unknown\n");
13548 break;
13552 if (*dt == vect_unknown_def_type)
13554 if (dump_enabled_p ())
13555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13556 "Unsupported pattern.\n");
13557 return false;
13560 return true;
13563 /* Function vect_is_simple_use.
13565 Same as vect_is_simple_use but also determines the vector operand
13566 type of OPERAND and stores it to *VECTYPE. If the definition of
13567 OPERAND is vect_uninitialized_def, vect_constant_def or
13568 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13569 is responsible to compute the best suited vector type for the
13570 scalar operand. */
13572 bool
13573 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13574 tree *vectype, stmt_vec_info *def_stmt_info_out,
13575 gimple **def_stmt_out)
13577 stmt_vec_info def_stmt_info;
13578 gimple *def_stmt;
13579 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
13580 return false;
13582 if (def_stmt_out)
13583 *def_stmt_out = def_stmt;
13584 if (def_stmt_info_out)
13585 *def_stmt_info_out = def_stmt_info;
13587 /* Now get a vector type if the def is internal, otherwise supply
13588 NULL_TREE and leave it up to the caller to figure out a proper
13589 type for the use stmt. */
13590 if (*dt == vect_internal_def
13591 || *dt == vect_induction_def
13592 || *dt == vect_reduction_def
13593 || *dt == vect_double_reduction_def
13594 || *dt == vect_nested_cycle
13595 || *dt == vect_first_order_recurrence)
13597 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
13598 gcc_assert (*vectype != NULL_TREE);
13599 if (dump_enabled_p ())
13600 dump_printf_loc (MSG_NOTE, vect_location,
13601 "vect_is_simple_use: vectype %T\n", *vectype);
13603 else if (*dt == vect_uninitialized_def
13604 || *dt == vect_constant_def
13605 || *dt == vect_external_def)
13606 *vectype = NULL_TREE;
13607 else
13608 gcc_unreachable ();
13610 return true;
13613 /* Function vect_is_simple_use.
13615 Same as vect_is_simple_use but determines the operand by operand
13616 position OPERAND from either STMT or SLP_NODE, filling in *OP
13617 and *SLP_DEF (when SLP_NODE is not NULL). */
13619 bool
13620 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
13621 unsigned operand, tree *op, slp_tree *slp_def,
13622 enum vect_def_type *dt,
13623 tree *vectype, stmt_vec_info *def_stmt_info_out)
13625 if (slp_node)
13627 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
13628 *slp_def = child;
13629 *vectype = SLP_TREE_VECTYPE (child);
13630 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
13632 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
13633 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
13635 else
13637 if (def_stmt_info_out)
13638 *def_stmt_info_out = NULL;
13639 *op = SLP_TREE_SCALAR_OPS (child)[0];
13640 *dt = SLP_TREE_DEF_TYPE (child);
13641 return true;
13644 else
13646 *slp_def = NULL;
13647 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
13649 if (gimple_assign_rhs_code (ass) == COND_EXPR
13650 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
13652 if (operand < 2)
13653 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
13654 else
13655 *op = gimple_op (ass, operand);
13657 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
13658 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
13659 else
13660 *op = gimple_op (ass, operand + 1);
13662 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
13663 *op = gimple_call_arg (call, operand);
13664 else
13665 gcc_unreachable ();
13666 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
13670 /* If OP is not NULL and is external or constant update its vector
13671 type with VECTYPE. Returns true if successful or false if not,
13672 for example when conflicting vector types are present. */
13674 bool
13675 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
13677 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
13678 return true;
13679 if (SLP_TREE_VECTYPE (op))
13680 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
13681 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
13682 should be handled by patters. Allow vect_constant_def for now. */
13683 if (VECTOR_BOOLEAN_TYPE_P (vectype)
13684 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
13685 return false;
13686 SLP_TREE_VECTYPE (op) = vectype;
13687 return true;
13690 /* Function supportable_widening_operation
13692 Check whether an operation represented by the code CODE is a
13693 widening operation that is supported by the target platform in
13694 vector form (i.e., when operating on arguments of type VECTYPE_IN
13695 producing a result of type VECTYPE_OUT).
13697 Widening operations we currently support are NOP (CONVERT), FLOAT,
13698 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
13699 are supported by the target platform either directly (via vector
13700 tree-codes), or via target builtins.
13702 Output:
13703 - CODE1 and CODE2 are codes of vector operations to be used when
13704 vectorizing the operation, if available.
13705 - MULTI_STEP_CVT determines the number of required intermediate steps in
13706 case of multi-step conversion (like char->short->int - in that case
13707 MULTI_STEP_CVT will be 1).
13708 - INTERM_TYPES contains the intermediate type required to perform the
13709 widening operation (short in the above example). */
13711 bool
13712 supportable_widening_operation (vec_info *vinfo,
13713 code_helper code,
13714 stmt_vec_info stmt_info,
13715 tree vectype_out, tree vectype_in,
13716 code_helper *code1,
13717 code_helper *code2,
13718 int *multi_step_cvt,
13719 vec<tree> *interm_types)
13721 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
13722 class loop *vect_loop = NULL;
13723 machine_mode vec_mode;
13724 enum insn_code icode1, icode2;
13725 optab optab1 = unknown_optab, optab2 = unknown_optab;
13726 tree vectype = vectype_in;
13727 tree wide_vectype = vectype_out;
13728 tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
13729 int i;
13730 tree prev_type, intermediate_type;
13731 machine_mode intermediate_mode, prev_mode;
13732 optab optab3, optab4;
13734 *multi_step_cvt = 0;
13735 if (loop_info)
13736 vect_loop = LOOP_VINFO_LOOP (loop_info);
13738 switch (code.safe_as_tree_code ())
13740 case MAX_TREE_CODES:
13741 /* Don't set c1 and c2 if code is not a tree_code. */
13742 break;
13744 case WIDEN_MULT_EXPR:
13745 /* The result of a vectorized widening operation usually requires
13746 two vectors (because the widened results do not fit into one vector).
13747 The generated vector results would normally be expected to be
13748 generated in the same order as in the original scalar computation,
13749 i.e. if 8 results are generated in each vector iteration, they are
13750 to be organized as follows:
13751 vect1: [res1,res2,res3,res4],
13752 vect2: [res5,res6,res7,res8].
13754 However, in the special case that the result of the widening
13755 operation is used in a reduction computation only, the order doesn't
13756 matter (because when vectorizing a reduction we change the order of
13757 the computation). Some targets can take advantage of this and
13758 generate more efficient code. For example, targets like Altivec,
13759 that support widen_mult using a sequence of {mult_even,mult_odd}
13760 generate the following vectors:
13761 vect1: [res1,res3,res5,res7],
13762 vect2: [res2,res4,res6,res8].
13764 When vectorizing outer-loops, we execute the inner-loop sequentially
13765 (each vectorized inner-loop iteration contributes to VF outer-loop
13766 iterations in parallel). We therefore don't allow to change the
13767 order of the computation in the inner-loop during outer-loop
13768 vectorization. */
13769 /* TODO: Another case in which order doesn't *really* matter is when we
13770 widen and then contract again, e.g. (short)((int)x * y >> 8).
13771 Normally, pack_trunc performs an even/odd permute, whereas the
13772 repack from an even/odd expansion would be an interleave, which
13773 would be significantly simpler for e.g. AVX2. */
13774 /* In any case, in order to avoid duplicating the code below, recurse
13775 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
13776 are properly set up for the caller. If we fail, we'll continue with
13777 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
13778 if (vect_loop
13779 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
13780 && !nested_in_vect_loop_p (vect_loop, stmt_info)
13781 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
13782 stmt_info, vectype_out,
13783 vectype_in, code1,
13784 code2, multi_step_cvt,
13785 interm_types))
13787 /* Elements in a vector with vect_used_by_reduction property cannot
13788 be reordered if the use chain with this property does not have the
13789 same operation. One such an example is s += a * b, where elements
13790 in a and b cannot be reordered. Here we check if the vector defined
13791 by STMT is only directly used in the reduction statement. */
13792 tree lhs = gimple_assign_lhs (stmt_info->stmt);
13793 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
13794 if (use_stmt_info
13795 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
13796 return true;
13798 c1 = VEC_WIDEN_MULT_LO_EXPR;
13799 c2 = VEC_WIDEN_MULT_HI_EXPR;
13800 break;
13802 case DOT_PROD_EXPR:
13803 c1 = DOT_PROD_EXPR;
13804 c2 = DOT_PROD_EXPR;
13805 break;
13807 case SAD_EXPR:
13808 c1 = SAD_EXPR;
13809 c2 = SAD_EXPR;
13810 break;
13812 case VEC_WIDEN_MULT_EVEN_EXPR:
13813 /* Support the recursion induced just above. */
13814 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
13815 c2 = VEC_WIDEN_MULT_ODD_EXPR;
13816 break;
13818 case WIDEN_LSHIFT_EXPR:
13819 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
13820 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
13821 break;
13823 CASE_CONVERT:
13824 c1 = VEC_UNPACK_LO_EXPR;
13825 c2 = VEC_UNPACK_HI_EXPR;
13826 break;
13828 case FLOAT_EXPR:
13829 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
13830 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
13831 break;
13833 case FIX_TRUNC_EXPR:
13834 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
13835 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
13836 break;
13838 default:
13839 gcc_unreachable ();
13842 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
13843 std::swap (c1, c2);
13845 if (code == FIX_TRUNC_EXPR)
13847 /* The signedness is determined from output operand. */
13848 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13849 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13851 else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
13852 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13853 && VECTOR_BOOLEAN_TYPE_P (vectype)
13854 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13855 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13857 /* If the input and result modes are the same, a different optab
13858 is needed where we pass in the number of units in vectype. */
13859 optab1 = vec_unpacks_sbool_lo_optab;
13860 optab2 = vec_unpacks_sbool_hi_optab;
13863 vec_mode = TYPE_MODE (vectype);
13864 if (widening_fn_p (code))
13866 /* If this is an internal fn then we must check whether the target
13867 supports either a low-high split or an even-odd split. */
13868 internal_fn ifn = as_internal_fn ((combined_fn) code);
13870 internal_fn lo, hi, even, odd;
13871 lookup_hilo_internal_fn (ifn, &lo, &hi);
13872 *code1 = as_combined_fn (lo);
13873 *code2 = as_combined_fn (hi);
13874 optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
13875 optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
13877 /* If we don't support low-high, then check for even-odd. */
13878 if (!optab1
13879 || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13880 || !optab2
13881 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13883 lookup_evenodd_internal_fn (ifn, &even, &odd);
13884 *code1 = as_combined_fn (even);
13885 *code2 = as_combined_fn (odd);
13886 optab1 = direct_internal_fn_optab (even, {vectype, vectype});
13887 optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
13890 else if (code.is_tree_code ())
13892 if (code == FIX_TRUNC_EXPR)
13894 /* The signedness is determined from output operand. */
13895 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13896 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13898 else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
13899 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13900 && VECTOR_BOOLEAN_TYPE_P (vectype)
13901 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13902 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13904 /* If the input and result modes are the same, a different optab
13905 is needed where we pass in the number of units in vectype. */
13906 optab1 = vec_unpacks_sbool_lo_optab;
13907 optab2 = vec_unpacks_sbool_hi_optab;
13909 else
13911 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13912 optab2 = optab_for_tree_code (c2, vectype, optab_default);
13914 *code1 = c1;
13915 *code2 = c2;
13918 if (!optab1 || !optab2)
13919 return false;
13921 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13922 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13923 return false;
13926 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13927 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13929 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13930 return true;
13931 /* For scalar masks we may have different boolean
13932 vector types having the same QImode. Thus we
13933 add additional check for elements number. */
13934 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
13935 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13936 return true;
13939 /* Check if it's a multi-step conversion that can be done using intermediate
13940 types. */
13942 prev_type = vectype;
13943 prev_mode = vec_mode;
13945 if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
13946 return false;
13948 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13949 intermediate steps in promotion sequence. We try
13950 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
13951 not. */
13952 interm_types->create (MAX_INTERM_CVT_STEPS);
13953 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
13955 intermediate_mode = insn_data[icode1].operand[0].mode;
13956 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13957 intermediate_type
13958 = vect_halve_mask_nunits (prev_type, intermediate_mode);
13959 else if (VECTOR_MODE_P (intermediate_mode))
13961 tree intermediate_element_type
13962 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
13963 TYPE_UNSIGNED (prev_type));
13964 intermediate_type
13965 = build_vector_type_for_mode (intermediate_element_type,
13966 intermediate_mode);
13968 else
13969 intermediate_type
13970 = lang_hooks.types.type_for_mode (intermediate_mode,
13971 TYPE_UNSIGNED (prev_type));
13973 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13974 && VECTOR_BOOLEAN_TYPE_P (prev_type)
13975 && intermediate_mode == prev_mode
13976 && SCALAR_INT_MODE_P (prev_mode))
13978 /* If the input and result modes are the same, a different optab
13979 is needed where we pass in the number of units in vectype. */
13980 optab3 = vec_unpacks_sbool_lo_optab;
13981 optab4 = vec_unpacks_sbool_hi_optab;
13983 else
13985 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
13986 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
13989 if (!optab3 || !optab4
13990 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
13991 || insn_data[icode1].operand[0].mode != intermediate_mode
13992 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
13993 || insn_data[icode2].operand[0].mode != intermediate_mode
13994 || ((icode1 = optab_handler (optab3, intermediate_mode))
13995 == CODE_FOR_nothing)
13996 || ((icode2 = optab_handler (optab4, intermediate_mode))
13997 == CODE_FOR_nothing))
13998 break;
14000 interm_types->quick_push (intermediate_type);
14001 (*multi_step_cvt)++;
14003 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
14004 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
14006 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14007 return true;
14008 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
14009 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
14010 return true;
14013 prev_type = intermediate_type;
14014 prev_mode = intermediate_mode;
14017 interm_types->release ();
14018 return false;
14022 /* Function supportable_narrowing_operation
14024 Check whether an operation represented by the code CODE is a
14025 narrowing operation that is supported by the target platform in
14026 vector form (i.e., when operating on arguments of type VECTYPE_IN
14027 and producing a result of type VECTYPE_OUT).
14029 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14030 and FLOAT. This function checks if these operations are supported by
14031 the target platform directly via vector tree-codes.
14033 Output:
14034 - CODE1 is the code of a vector operation to be used when
14035 vectorizing the operation, if available.
14036 - MULTI_STEP_CVT determines the number of required intermediate steps in
14037 case of multi-step conversion (like int->short->char - in that case
14038 MULTI_STEP_CVT will be 1).
14039 - INTERM_TYPES contains the intermediate type required to perform the
14040 narrowing operation (short in the above example). */
14042 bool
14043 supportable_narrowing_operation (code_helper code,
14044 tree vectype_out, tree vectype_in,
14045 code_helper *code1, int *multi_step_cvt,
14046 vec<tree> *interm_types)
14048 machine_mode vec_mode;
14049 enum insn_code icode1;
14050 optab optab1, interm_optab;
14051 tree vectype = vectype_in;
14052 tree narrow_vectype = vectype_out;
14053 enum tree_code c1;
14054 tree intermediate_type, prev_type;
14055 machine_mode intermediate_mode, prev_mode;
14056 int i;
14057 unsigned HOST_WIDE_INT n_elts;
14058 bool uns;
14060 if (!code.is_tree_code ())
14061 return false;
14063 *multi_step_cvt = 0;
14064 switch ((tree_code) code)
14066 CASE_CONVERT:
14067 c1 = VEC_PACK_TRUNC_EXPR;
14068 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
14069 && VECTOR_BOOLEAN_TYPE_P (vectype)
14070 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
14071 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
14072 && n_elts < BITS_PER_UNIT)
14073 optab1 = vec_pack_sbool_trunc_optab;
14074 else
14075 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14076 break;
14078 case FIX_TRUNC_EXPR:
14079 c1 = VEC_PACK_FIX_TRUNC_EXPR;
14080 /* The signedness is determined from output operand. */
14081 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14082 break;
14084 case FLOAT_EXPR:
14085 c1 = VEC_PACK_FLOAT_EXPR;
14086 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14087 break;
14089 default:
14090 gcc_unreachable ();
14093 if (!optab1)
14094 return false;
14096 vec_mode = TYPE_MODE (vectype);
14097 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
14098 return false;
14100 *code1 = c1;
14102 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14104 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14105 return true;
14106 /* For scalar masks we may have different boolean
14107 vector types having the same QImode. Thus we
14108 add additional check for elements number. */
14109 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
14110 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14111 return true;
14114 if (code == FLOAT_EXPR)
14115 return false;
14117 /* Check if it's a multi-step conversion that can be done using intermediate
14118 types. */
14119 prev_mode = vec_mode;
14120 prev_type = vectype;
14121 if (code == FIX_TRUNC_EXPR)
14122 uns = TYPE_UNSIGNED (vectype_out);
14123 else
14124 uns = TYPE_UNSIGNED (vectype);
14126 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14127 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14128 costly than signed. */
14129 if (code == FIX_TRUNC_EXPR && uns)
14131 enum insn_code icode2;
14133 intermediate_type
14134 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
14135 interm_optab
14136 = optab_for_tree_code (c1, intermediate_type, optab_default);
14137 if (interm_optab != unknown_optab
14138 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
14139 && insn_data[icode1].operand[0].mode
14140 == insn_data[icode2].operand[0].mode)
14142 uns = false;
14143 optab1 = interm_optab;
14144 icode1 = icode2;
14148 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14149 intermediate steps in promotion sequence. We try
14150 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14151 interm_types->create (MAX_INTERM_CVT_STEPS);
14152 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14154 intermediate_mode = insn_data[icode1].operand[0].mode;
14155 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14156 intermediate_type
14157 = vect_double_mask_nunits (prev_type, intermediate_mode);
14158 else
14159 intermediate_type
14160 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
14161 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14162 && VECTOR_BOOLEAN_TYPE_P (prev_type)
14163 && SCALAR_INT_MODE_P (prev_mode)
14164 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
14165 && n_elts < BITS_PER_UNIT)
14166 interm_optab = vec_pack_sbool_trunc_optab;
14167 else
14168 interm_optab
14169 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
14170 optab_default);
14171 if (!interm_optab
14172 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
14173 || insn_data[icode1].operand[0].mode != intermediate_mode
14174 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
14175 == CODE_FOR_nothing))
14176 break;
14178 interm_types->quick_push (intermediate_type);
14179 (*multi_step_cvt)++;
14181 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14183 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14184 return true;
14185 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
14186 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14187 return true;
14190 prev_mode = intermediate_mode;
14191 prev_type = intermediate_type;
14192 optab1 = interm_optab;
14195 interm_types->release ();
14196 return false;
14199 /* Generate and return a vector mask of MASK_TYPE such that
14200 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14201 Add the statements to SEQ. */
14203 tree
14204 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
14205 tree end_index, const char *name)
14207 tree cmp_type = TREE_TYPE (start_index);
14208 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
14209 cmp_type, mask_type,
14210 OPTIMIZE_FOR_SPEED));
14211 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
14212 start_index, end_index,
14213 build_zero_cst (mask_type));
14214 tree tmp;
14215 if (name)
14216 tmp = make_temp_ssa_name (mask_type, NULL, name);
14217 else
14218 tmp = make_ssa_name (mask_type);
14219 gimple_call_set_lhs (call, tmp);
14220 gimple_seq_add_stmt (seq, call);
14221 return tmp;
14224 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14225 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14227 tree
14228 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
14229 tree end_index)
14231 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
14232 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
14235 /* Try to compute the vector types required to vectorize STMT_INFO,
14236 returning true on success and false if vectorization isn't possible.
14237 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14238 take sure that the number of elements in the vectors is no bigger
14239 than GROUP_SIZE.
14241 On success:
14243 - Set *STMT_VECTYPE_OUT to:
14244 - NULL_TREE if the statement doesn't need to be vectorized;
14245 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14247 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14248 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14249 statement does not help to determine the overall number of units. */
14251 opt_result
14252 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
14253 tree *stmt_vectype_out,
14254 tree *nunits_vectype_out,
14255 unsigned int group_size)
14257 gimple *stmt = stmt_info->stmt;
14259 /* For BB vectorization, we should always have a group size once we've
14260 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14261 are tentative requests during things like early data reference
14262 analysis and pattern recognition. */
14263 if (is_a <bb_vec_info> (vinfo))
14264 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
14265 else
14266 group_size = 0;
14268 *stmt_vectype_out = NULL_TREE;
14269 *nunits_vectype_out = NULL_TREE;
14271 if (gimple_get_lhs (stmt) == NULL_TREE
14272 /* MASK_STORE has no lhs, but is ok. */
14273 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
14275 if (is_a <gcall *> (stmt))
14277 /* Ignore calls with no lhs. These must be calls to
14278 #pragma omp simd functions, and what vectorization factor
14279 it really needs can't be determined until
14280 vectorizable_simd_clone_call. */
14281 if (dump_enabled_p ())
14282 dump_printf_loc (MSG_NOTE, vect_location,
14283 "defer to SIMD clone analysis.\n");
14284 return opt_result::success ();
14287 return opt_result::failure_at (stmt,
14288 "not vectorized: irregular stmt.%G", stmt);
14291 tree vectype;
14292 tree scalar_type = NULL_TREE;
14293 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
14295 vectype = STMT_VINFO_VECTYPE (stmt_info);
14296 if (dump_enabled_p ())
14297 dump_printf_loc (MSG_NOTE, vect_location,
14298 "precomputed vectype: %T\n", vectype);
14300 else if (vect_use_mask_type_p (stmt_info))
14302 unsigned int precision = stmt_info->mask_precision;
14303 scalar_type = build_nonstandard_integer_type (precision, 1);
14304 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
14305 if (!vectype)
14306 return opt_result::failure_at (stmt, "not vectorized: unsupported"
14307 " data-type %T\n", scalar_type);
14308 if (dump_enabled_p ())
14309 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14311 else
14313 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
14314 scalar_type = TREE_TYPE (DR_REF (dr));
14315 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
14316 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
14317 else
14318 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
14320 if (dump_enabled_p ())
14322 if (group_size)
14323 dump_printf_loc (MSG_NOTE, vect_location,
14324 "get vectype for scalar type (group size %d):"
14325 " %T\n", group_size, scalar_type);
14326 else
14327 dump_printf_loc (MSG_NOTE, vect_location,
14328 "get vectype for scalar type: %T\n", scalar_type);
14330 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
14331 if (!vectype)
14332 return opt_result::failure_at (stmt,
14333 "not vectorized:"
14334 " unsupported data-type %T\n",
14335 scalar_type);
14337 if (dump_enabled_p ())
14338 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14341 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
14342 return opt_result::failure_at (stmt,
14343 "not vectorized: vector stmt in loop:%G",
14344 stmt);
14346 *stmt_vectype_out = vectype;
14348 /* Don't try to compute scalar types if the stmt produces a boolean
14349 vector; use the existing vector type instead. */
14350 tree nunits_vectype = vectype;
14351 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14353 /* The number of units is set according to the smallest scalar
14354 type (or the largest vector size, but we only support one
14355 vector size per vectorization). */
14356 scalar_type = vect_get_smallest_scalar_type (stmt_info,
14357 TREE_TYPE (vectype));
14358 if (scalar_type != TREE_TYPE (vectype))
14360 if (dump_enabled_p ())
14361 dump_printf_loc (MSG_NOTE, vect_location,
14362 "get vectype for smallest scalar type: %T\n",
14363 scalar_type);
14364 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
14365 group_size);
14366 if (!nunits_vectype)
14367 return opt_result::failure_at
14368 (stmt, "not vectorized: unsupported data-type %T\n",
14369 scalar_type);
14370 if (dump_enabled_p ())
14371 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
14372 nunits_vectype);
14376 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
14377 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
14378 return opt_result::failure_at (stmt,
14379 "Not vectorized: Incompatible number "
14380 "of vector subparts between %T and %T\n",
14381 nunits_vectype, *stmt_vectype_out);
14383 if (dump_enabled_p ())
14385 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
14386 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
14387 dump_printf (MSG_NOTE, "\n");
14390 *nunits_vectype_out = nunits_vectype;
14391 return opt_result::success ();
14394 /* Generate and return statement sequence that sets vector length LEN that is:
14396 min_of_start_and_end = min (START_INDEX, END_INDEX);
14397 left_len = END_INDEX - min_of_start_and_end;
14398 rhs = min (left_len, LEN_LIMIT);
14399 LEN = rhs;
14401 Note: the cost of the code generated by this function is modeled
14402 by vect_estimate_min_profitable_iters, so changes here may need
14403 corresponding changes there. */
14405 gimple_seq
14406 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
14408 gimple_seq stmts = NULL;
14409 tree len_type = TREE_TYPE (len);
14410 gcc_assert (TREE_TYPE (start_index) == len_type);
14412 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
14413 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
14414 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
14415 gimple* stmt = gimple_build_assign (len, rhs);
14416 gimple_seq_add_stmt (&stmts, stmt);
14418 return stmts;