c++: indirect change of active union member in constexpr [PR101631,PR102286]
[official-gcc.git] / gcc / tree-vect-stmts.cc
blob8d9028654c938647727755b0705c751a61e44851
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
57 #include "regs.h"
58 #include "attribs.h"
59 #include "optabs-libfuncs.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* Return the vectorized type for the given statement. */
66 tree
67 stmt_vectype (class _stmt_vec_info *stmt_info)
69 return STMT_VINFO_VECTYPE (stmt_info);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
74 bool
75 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
77 gimple *stmt = STMT_VINFO_STMT (stmt_info);
78 basic_block bb = gimple_bb (stmt);
79 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
80 class loop* loop;
82 if (!loop_vinfo)
83 return false;
85 loop = LOOP_VINFO_LOOP (loop_vinfo);
87 return (bb->loop_father == loop->inner);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
94 static unsigned
95 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
96 enum vect_cost_for_stmt kind,
97 stmt_vec_info stmt_info, slp_tree node,
98 tree vectype, int misalign,
99 enum vect_cost_model_location where)
101 if ((kind == vector_load || kind == unaligned_load)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_gather_load;
104 if ((kind == vector_store || kind == unaligned_store)
105 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
106 kind = vector_scatter_store;
108 stmt_info_for_cost si
109 = { count, kind, where, stmt_info, node, vectype, misalign };
110 body_cost_vec->safe_push (si);
112 return (unsigned)
113 (builtin_vectorization_cost (kind, vectype, misalign) * count);
116 unsigned
117 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
118 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
119 tree vectype, int misalign,
120 enum vect_cost_model_location where)
122 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
123 vectype, misalign, where);
126 unsigned
127 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
128 enum vect_cost_for_stmt kind, slp_tree node,
129 tree vectype, int misalign,
130 enum vect_cost_model_location where)
132 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
133 vectype, misalign, where);
136 unsigned
137 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
138 enum vect_cost_for_stmt kind,
139 enum vect_cost_model_location where)
141 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
142 || kind == scalar_stmt);
143 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
144 NULL_TREE, 0, where);
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
149 static tree
150 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
152 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
153 "vect_array");
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
161 static tree
162 read_vector_array (vec_info *vinfo,
163 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
164 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
166 tree vect_type, vect, vect_name, array_ref;
167 gimple *new_stmt;
169 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170 vect_type = TREE_TYPE (TREE_TYPE (array));
171 vect = vect_create_destination_var (scalar_dest, vect_type);
172 array_ref = build4 (ARRAY_REF, vect_type, array,
173 build_int_cst (size_type_node, n),
174 NULL_TREE, NULL_TREE);
176 new_stmt = gimple_build_assign (vect, array_ref);
177 vect_name = make_ssa_name (vect, new_stmt);
178 gimple_assign_set_lhs (new_stmt, vect_name);
179 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
181 return vect_name;
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
188 static void
189 write_vector_array (vec_info *vinfo,
190 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
191 tree vect, tree array, unsigned HOST_WIDE_INT n)
193 tree array_ref;
194 gimple *new_stmt;
196 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
197 build_int_cst (size_type_node, n),
198 NULL_TREE, NULL_TREE);
200 new_stmt = gimple_build_assign (array_ref, vect);
201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
206 (and its group). */
208 static tree
209 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
211 tree mem_ref;
213 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
216 return mem_ref;
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
222 static void
223 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
224 gimple_stmt_iterator *gsi, tree var)
226 tree clobber = build_clobber (TREE_TYPE (var));
227 gimple *new_stmt = gimple_build_assign (var, clobber);
228 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
233 /* Function vect_mark_relevant.
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
237 static void
238 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
239 enum vect_relevant relevant, bool live_p)
241 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
242 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "mark relevant %d, live %d: %G", relevant, live_p,
247 stmt_info->stmt);
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE, vect_location,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
265 stmt_vec_info old_stmt_info = stmt_info;
266 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
268 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
269 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
271 if (live_p && relevant == vect_unused_in_scope)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
276 "relevant.\n");
277 relevant = vect_used_only_live;
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE, vect_location,
282 "mark relevant %d, live %d: %G", relevant, live_p,
283 stmt_info->stmt);
286 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
287 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
288 STMT_VINFO_RELEVANT (stmt_info) = relevant;
290 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE, vect_location,
295 "already marked relevant/live.\n");
296 return;
299 worklist->safe_push (stmt_info);
303 /* Function is_simple_and_all_uses_invariant
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
307 bool
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
309 loop_vec_info loop_vinfo)
311 tree op;
312 ssa_op_iter iter;
314 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
315 if (!stmt)
316 return false;
318 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
320 enum vect_def_type dt = vect_uninitialized_def;
322 if (!vect_is_simple_use (op, loop_vinfo, &dt))
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "use not simple.\n");
327 return false;
330 if (dt != vect_external_def && dt != vect_constant_def)
331 return false;
333 return true;
336 /* Function vect_stmt_relevant_p.
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
346 CHECKME: what other side effects would the vectorizer allow? */
348 static bool
349 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
350 enum vect_relevant *relevant, bool *live_p)
352 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
353 ssa_op_iter op_iter;
354 imm_use_iterator imm_iter;
355 use_operand_p use_p;
356 def_operand_p def_p;
358 *relevant = vect_unused_in_scope;
359 *live_p = false;
361 /* cond stmt other than loop exit cond. */
362 if (is_ctrl_stmt (stmt_info->stmt)
363 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
364 *relevant = vect_used_in_scope;
366 /* changing memory. */
367 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
368 if (gimple_vdef (stmt_info->stmt)
369 && !gimple_clobber_p (stmt_info->stmt))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: stmt has vdefs.\n");
374 *relevant = vect_used_in_scope;
377 /* uses outside the loop. */
378 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
380 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
382 basic_block bb = gimple_bb (USE_STMT (use_p));
383 if (!flow_bb_inside_loop_p (loop, bb))
385 if (is_gimple_debug (USE_STMT (use_p)))
386 continue;
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: used out of loop.\n");
392 /* We expect all such uses to be in the loop exit phis
393 (because of loop closed form) */
394 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
395 gcc_assert (bb == single_exit (loop)->dest);
397 *live_p = true;
402 if (*live_p && *relevant == vect_unused_in_scope
403 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
405 if (dump_enabled_p ())
406 dump_printf_loc (MSG_NOTE, vect_location,
407 "vec_stmt_relevant_p: stmt live but not relevant.\n");
408 *relevant = vect_used_only_live;
411 return (*live_p || *relevant);
415 /* Function exist_non_indexing_operands_for_use_p
417 USE is one of the uses attached to STMT_INFO. Check if USE is
418 used in STMT_INFO for anything other than indexing an array. */
420 static bool
421 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
423 tree operand;
425 /* USE corresponds to some operand in STMT. If there is no data
426 reference in STMT, then any operand that corresponds to USE
427 is not indexing an array. */
428 if (!STMT_VINFO_DATA_REF (stmt_info))
429 return true;
431 /* STMT has a data_ref. FORNOW this means that its of one of
432 the following forms:
433 -1- ARRAY_REF = var
434 -2- var = ARRAY_REF
435 (This should have been verified in analyze_data_refs).
437 'var' in the second case corresponds to a def, not a use,
438 so USE cannot correspond to any operands that are not used
439 for array indexing.
441 Therefore, all we need to check is if STMT falls into the
442 first case, and whether var corresponds to USE. */
444 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
445 if (!assign || !gimple_assign_copy_p (assign))
447 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
448 if (call && gimple_call_internal_p (call))
450 internal_fn ifn = gimple_call_internal_fn (call);
451 int mask_index = internal_fn_mask_index (ifn);
452 if (mask_index >= 0
453 && use == gimple_call_arg (call, mask_index))
454 return true;
455 int stored_value_index = internal_fn_stored_value_index (ifn);
456 if (stored_value_index >= 0
457 && use == gimple_call_arg (call, stored_value_index))
458 return true;
459 if (internal_gather_scatter_fn_p (ifn)
460 && use == gimple_call_arg (call, 1))
461 return true;
463 return false;
466 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
467 return false;
468 operand = gimple_assign_rhs1 (assign);
469 if (TREE_CODE (operand) != SSA_NAME)
470 return false;
472 if (operand == use)
473 return true;
475 return false;
480 Function process_use.
482 Inputs:
483 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
484 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
485 that defined USE. This is done by calling mark_relevant and passing it
486 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
487 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
488 be performed.
490 Outputs:
491 Generally, LIVE_P and RELEVANT are used to define the liveness and
492 relevance info of the DEF_STMT of this USE:
493 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
494 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
495 Exceptions:
496 - case 1: If USE is used only for address computations (e.g. array indexing),
497 which does not need to be directly vectorized, then the liveness/relevance
498 of the respective DEF_STMT is left unchanged.
499 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
500 we skip DEF_STMT cause it had already been processed.
501 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
502 "relevant" will be modified accordingly.
504 Return true if everything is as expected. Return false otherwise. */
506 static opt_result
507 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
508 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
509 bool force)
511 stmt_vec_info dstmt_vinfo;
512 enum vect_def_type dt;
514 /* case 1: we are only interested in uses that need to be vectorized. Uses
515 that are used for address computation are not considered relevant. */
516 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
517 return opt_result::success ();
519 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
520 return opt_result::failure_at (stmt_vinfo->stmt,
521 "not vectorized:"
522 " unsupported use in stmt.\n");
524 if (!dstmt_vinfo)
525 return opt_result::success ();
527 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
528 basic_block bb = gimple_bb (stmt_vinfo->stmt);
530 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
531 We have to force the stmt live since the epilogue loop needs it to
532 continue computing the reduction. */
533 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
534 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
537 && bb->loop_father == def_bb->loop_father)
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_NOTE, vect_location,
541 "reduc-stmt defining reduc-phi in the same nest.\n");
542 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
543 return opt_result::success ();
546 /* case 3a: outer-loop stmt defining an inner-loop stmt:
547 outer-loop-header-bb:
548 d = dstmt_vinfo
549 inner-loop:
550 stmt # use (d)
551 outer-loop-tail-bb:
552 ... */
553 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE, vect_location,
557 "outer-loop def-stmt defining inner-loop stmt.\n");
559 switch (relevant)
561 case vect_unused_in_scope:
562 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
563 vect_used_in_scope : vect_unused_in_scope;
564 break;
566 case vect_used_in_outer_by_reduction:
567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
568 relevant = vect_used_by_reduction;
569 break;
571 case vect_used_in_outer:
572 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
573 relevant = vect_used_in_scope;
574 break;
576 case vect_used_in_scope:
577 break;
579 default:
580 gcc_unreachable ();
584 /* case 3b: inner-loop stmt defining an outer-loop stmt:
585 outer-loop-header-bb:
587 inner-loop:
588 d = dstmt_vinfo
589 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
590 stmt # use (d) */
591 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
593 if (dump_enabled_p ())
594 dump_printf_loc (MSG_NOTE, vect_location,
595 "inner-loop def-stmt defining outer-loop stmt.\n");
597 switch (relevant)
599 case vect_unused_in_scope:
600 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
601 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
602 vect_used_in_outer_by_reduction : vect_unused_in_scope;
603 break;
605 case vect_used_by_reduction:
606 case vect_used_only_live:
607 relevant = vect_used_in_outer_by_reduction;
608 break;
610 case vect_used_in_scope:
611 relevant = vect_used_in_outer;
612 break;
614 default:
615 gcc_unreachable ();
618 /* We are also not interested in uses on loop PHI backedges that are
619 inductions. Otherwise we'll needlessly vectorize the IV increment
620 and cause hybrid SLP for SLP inductions. Unless the PHI is live
621 of course. */
622 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
623 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
624 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
625 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
626 loop_latch_edge (bb->loop_father))
627 == use))
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE, vect_location,
631 "induction value on backedge.\n");
632 return opt_result::success ();
636 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
637 return opt_result::success ();
641 /* Function vect_mark_stmts_to_be_vectorized.
643 Not all stmts in the loop need to be vectorized. For example:
645 for i...
646 for j...
647 1. T0 = i + j
648 2. T1 = a[T0]
650 3. j = j + 1
652 Stmt 1 and 3 do not need to be vectorized, because loop control and
653 addressing of vectorized data-refs are handled differently.
655 This pass detects such stmts. */
657 opt_result
658 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
660 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
661 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
662 unsigned int nbbs = loop->num_nodes;
663 gimple_stmt_iterator si;
664 unsigned int i;
665 basic_block bb;
666 bool live_p;
667 enum vect_relevant relevant;
669 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
671 auto_vec<stmt_vec_info, 64> worklist;
673 /* 1. Init worklist. */
674 for (i = 0; i < nbbs; i++)
676 bb = bbs[i];
677 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
679 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
682 phi_info->stmt);
684 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
685 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
687 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
689 if (is_gimple_debug (gsi_stmt (si)))
690 continue;
691 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE, vect_location,
694 "init: stmt relevant? %G", stmt_info->stmt);
696 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
697 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
701 /* 2. Process_worklist */
702 while (worklist.length () > 0)
704 use_operand_p use_p;
705 ssa_op_iter iter;
707 stmt_vec_info stmt_vinfo = worklist.pop ();
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE, vect_location,
710 "worklist: examine stmt: %G", stmt_vinfo->stmt);
712 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
713 (DEF_STMT) as relevant/irrelevant according to the relevance property
714 of STMT. */
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
717 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
718 propagated as is to the DEF_STMTs of its USEs.
720 One exception is when STMT has been identified as defining a reduction
721 variable; in this case we set the relevance to vect_used_by_reduction.
722 This is because we distinguish between two kinds of relevant stmts -
723 those that are used by a reduction computation, and those that are
724 (also) used by a regular computation. This allows us later on to
725 identify stmts that are used solely by a reduction, and therefore the
726 order of the results that they produce does not have to be kept. */
728 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
730 case vect_reduction_def:
731 gcc_assert (relevant != vect_unused_in_scope);
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_in_scope
734 && relevant != vect_used_by_reduction
735 && relevant != vect_used_only_live)
736 return opt_result::failure_at
737 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
738 break;
740 case vect_nested_cycle:
741 if (relevant != vect_unused_in_scope
742 && relevant != vect_used_in_outer_by_reduction
743 && relevant != vect_used_in_outer)
744 return opt_result::failure_at
745 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
746 break;
748 case vect_double_reduction_def:
749 if (relevant != vect_unused_in_scope
750 && relevant != vect_used_by_reduction
751 && relevant != vect_used_only_live)
752 return opt_result::failure_at
753 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
754 break;
756 default:
757 break;
760 if (is_pattern_stmt_p (stmt_vinfo))
762 /* Pattern statements are not inserted into the code, so
763 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
764 have to scan the RHS or function arguments instead. */
765 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
767 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
768 tree op = gimple_assign_rhs1 (assign);
770 i = 1;
771 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
773 opt_result res
774 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
775 loop_vinfo, relevant, &worklist, false);
776 if (!res)
777 return res;
778 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
779 loop_vinfo, relevant, &worklist, false);
780 if (!res)
781 return res;
782 i = 2;
784 for (; i < gimple_num_ops (assign); i++)
786 op = gimple_op (assign, i);
787 if (TREE_CODE (op) == SSA_NAME)
789 opt_result res
790 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
791 &worklist, false);
792 if (!res)
793 return res;
797 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
799 for (i = 0; i < gimple_call_num_args (call); i++)
801 tree arg = gimple_call_arg (call, i);
802 opt_result res
803 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
804 &worklist, false);
805 if (!res)
806 return res;
810 else
811 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
813 tree op = USE_FROM_PTR (use_p);
814 opt_result res
815 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
816 &worklist, false);
817 if (!res)
818 return res;
821 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
823 gather_scatter_info gs_info;
824 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
825 gcc_unreachable ();
826 opt_result res
827 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
828 &worklist, true);
829 if (!res)
831 if (fatal)
832 *fatal = false;
833 return res;
836 } /* while worklist */
838 return opt_result::success ();
841 /* Function vect_model_simple_cost.
843 Models cost for simple operations, i.e. those that only emit ncopies of a
844 single op. Right now, this does not account for multiple insns that could
845 be generated for the single vector op. We will handle that shortly. */
847 static void
848 vect_model_simple_cost (vec_info *,
849 stmt_vec_info stmt_info, int ncopies,
850 enum vect_def_type *dt,
851 int ndts,
852 slp_tree node,
853 stmt_vector_for_cost *cost_vec,
854 vect_cost_for_stmt kind = vector_stmt)
856 int inside_cost = 0, prologue_cost = 0;
858 gcc_assert (cost_vec != NULL);
860 /* ??? Somehow we need to fix this at the callers. */
861 if (node)
862 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
864 if (!node)
865 /* Cost the "broadcast" of a scalar operand in to a vector operand.
866 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
867 cost model. */
868 for (int i = 0; i < ndts; i++)
869 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
870 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
871 stmt_info, 0, vect_prologue);
873 /* Pass the inside-of-loop statements to the target-specific cost model. */
874 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
875 stmt_info, 0, vect_body);
877 if (dump_enabled_p ())
878 dump_printf_loc (MSG_NOTE, vect_location,
879 "vect_model_simple_cost: inside_cost = %d, "
880 "prologue_cost = %d .\n", inside_cost, prologue_cost);
884 /* Model cost for type demotion and promotion operations. PWR is
885 normally zero for single-step promotions and demotions. It will be
886 one if two-step promotion/demotion is required, and so on. NCOPIES
887 is the number of vector results (and thus number of instructions)
888 for the narrowest end of the operation chain. Each additional
889 step doubles the number of instructions required. If WIDEN_ARITH
890 is true the stmt is doing widening arithmetic. */
892 static void
893 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
894 enum vect_def_type *dt,
895 unsigned int ncopies, int pwr,
896 stmt_vector_for_cost *cost_vec,
897 bool widen_arith)
899 int i;
900 int inside_cost = 0, prologue_cost = 0;
902 for (i = 0; i < pwr + 1; i++)
904 inside_cost += record_stmt_cost (cost_vec, ncopies,
905 widen_arith
906 ? vector_stmt : vec_promote_demote,
907 stmt_info, 0, vect_body);
908 ncopies *= 2;
911 /* FORNOW: Assuming maximum 2 args per stmts. */
912 for (i = 0; i < 2; i++)
913 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
914 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
915 stmt_info, 0, vect_prologue);
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE, vect_location,
919 "vect_model_promotion_demotion_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost, prologue_cost);
923 /* Returns true if the current function returns DECL. */
925 static bool
926 cfun_returns (tree decl)
928 edge_iterator ei;
929 edge e;
930 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
932 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
933 if (!ret)
934 continue;
935 if (gimple_return_retval (ret) == decl)
936 return true;
937 /* We often end up with an aggregate copy to the result decl,
938 handle that case as well. First skip intermediate clobbers
939 though. */
940 gimple *def = ret;
943 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
945 while (gimple_clobber_p (def));
946 if (is_a <gassign *> (def)
947 && gimple_assign_lhs (def) == gimple_return_retval (ret)
948 && gimple_assign_rhs1 (def) == decl)
949 return true;
951 return false;
954 /* Calculate cost of DR's memory access. */
955 void
956 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
957 dr_alignment_support alignment_support_scheme,
958 int misalignment,
959 unsigned int *inside_cost,
960 stmt_vector_for_cost *body_cost_vec)
962 switch (alignment_support_scheme)
964 case dr_aligned:
966 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
967 vector_store, stmt_info, 0,
968 vect_body);
970 if (dump_enabled_p ())
971 dump_printf_loc (MSG_NOTE, vect_location,
972 "vect_model_store_cost: aligned.\n");
973 break;
976 case dr_unaligned_supported:
978 /* Here, we assign an additional cost for the unaligned store. */
979 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
980 unaligned_store, stmt_info,
981 misalignment, vect_body);
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_NOTE, vect_location,
984 "vect_model_store_cost: unaligned supported by "
985 "hardware.\n");
986 break;
989 case dr_unaligned_unsupported:
991 *inside_cost = VECT_MAX_COST;
993 if (dump_enabled_p ())
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
995 "vect_model_store_cost: unsupported access.\n");
996 break;
999 default:
1000 gcc_unreachable ();
1004 /* Calculate cost of DR's memory access. */
1005 void
1006 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1007 dr_alignment_support alignment_support_scheme,
1008 int misalignment,
1009 bool add_realign_cost, unsigned int *inside_cost,
1010 unsigned int *prologue_cost,
1011 stmt_vector_for_cost *prologue_cost_vec,
1012 stmt_vector_for_cost *body_cost_vec,
1013 bool record_prologue_costs)
1015 switch (alignment_support_scheme)
1017 case dr_aligned:
1019 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1020 stmt_info, 0, vect_body);
1022 if (dump_enabled_p ())
1023 dump_printf_loc (MSG_NOTE, vect_location,
1024 "vect_model_load_cost: aligned.\n");
1026 break;
1028 case dr_unaligned_supported:
1030 /* Here, we assign an additional cost for the unaligned load. */
1031 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1032 unaligned_load, stmt_info,
1033 misalignment, vect_body);
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_NOTE, vect_location,
1037 "vect_model_load_cost: unaligned supported by "
1038 "hardware.\n");
1040 break;
1042 case dr_explicit_realign:
1044 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1045 vector_load, stmt_info, 0, vect_body);
1046 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1047 vec_perm, stmt_info, 0, vect_body);
1049 /* FIXME: If the misalignment remains fixed across the iterations of
1050 the containing loop, the following cost should be added to the
1051 prologue costs. */
1052 if (targetm.vectorize.builtin_mask_for_load)
1053 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1054 stmt_info, 0, vect_body);
1056 if (dump_enabled_p ())
1057 dump_printf_loc (MSG_NOTE, vect_location,
1058 "vect_model_load_cost: explicit realign\n");
1060 break;
1062 case dr_explicit_realign_optimized:
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_NOTE, vect_location,
1066 "vect_model_load_cost: unaligned software "
1067 "pipelined.\n");
1069 /* Unaligned software pipeline has a load of an address, an initial
1070 load, and possibly a mask operation to "prime" the loop. However,
1071 if this is an access in a group of loads, which provide grouped
1072 access, then the above cost should only be considered for one
1073 access in the group. Inside the loop, there is a load op
1074 and a realignment op. */
1076 if (add_realign_cost && record_prologue_costs)
1078 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1079 vector_stmt, stmt_info,
1080 0, vect_prologue);
1081 if (targetm.vectorize.builtin_mask_for_load)
1082 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1083 vector_stmt, stmt_info,
1084 0, vect_prologue);
1087 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1088 stmt_info, 0, vect_body);
1089 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1090 stmt_info, 0, vect_body);
1092 if (dump_enabled_p ())
1093 dump_printf_loc (MSG_NOTE, vect_location,
1094 "vect_model_load_cost: explicit realign optimized"
1095 "\n");
1097 break;
1100 case dr_unaligned_unsupported:
1102 *inside_cost = VECT_MAX_COST;
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1106 "vect_model_load_cost: unsupported access.\n");
1107 break;
1110 default:
1111 gcc_unreachable ();
1115 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1116 the loop preheader for the vectorized stmt STMT_VINFO. */
1118 static void
1119 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1120 gimple_stmt_iterator *gsi)
1122 if (gsi)
1123 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1124 else
1125 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1127 if (dump_enabled_p ())
1128 dump_printf_loc (MSG_NOTE, vect_location,
1129 "created new init_stmt: %G", new_stmt);
1132 /* Function vect_init_vector.
1134 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1135 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1136 vector type a vector with all elements equal to VAL is created first.
1137 Place the initialization at GSI if it is not NULL. Otherwise, place the
1138 initialization at the loop preheader.
1139 Return the DEF of INIT_STMT.
1140 It will be used in the vectorization of STMT_INFO. */
1142 tree
1143 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1144 gimple_stmt_iterator *gsi)
1146 gimple *init_stmt;
1147 tree new_temp;
1149 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1150 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1152 gcc_assert (VECTOR_TYPE_P (type));
1153 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1155 /* Scalar boolean value should be transformed into
1156 all zeros or all ones value before building a vector. */
1157 if (VECTOR_BOOLEAN_TYPE_P (type))
1159 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1160 tree false_val = build_zero_cst (TREE_TYPE (type));
1162 if (CONSTANT_CLASS_P (val))
1163 val = integer_zerop (val) ? false_val : true_val;
1164 else
1166 new_temp = make_ssa_name (TREE_TYPE (type));
1167 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1168 val, true_val, false_val);
1169 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1170 val = new_temp;
1173 else
1175 gimple_seq stmts = NULL;
1176 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1177 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1178 TREE_TYPE (type), val);
1179 else
1180 /* ??? Condition vectorization expects us to do
1181 promotion of invariant/external defs. */
1182 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1183 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1184 !gsi_end_p (gsi2); )
1186 init_stmt = gsi_stmt (gsi2);
1187 gsi_remove (&gsi2, false);
1188 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1192 val = build_vector_from_val (type, val);
1195 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1196 init_stmt = gimple_build_assign (new_temp, val);
1197 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1198 return new_temp;
1202 /* Function vect_get_vec_defs_for_operand.
1204 OP is an operand in STMT_VINFO. This function returns a vector of
1205 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1207 In the case that OP is an SSA_NAME which is defined in the loop, then
1208 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1210 In case OP is an invariant or constant, a new stmt that creates a vector def
1211 needs to be introduced. VECTYPE may be used to specify a required type for
1212 vector invariant. */
1214 void
1215 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1216 unsigned ncopies,
1217 tree op, vec<tree> *vec_oprnds, tree vectype)
1219 gimple *def_stmt;
1220 enum vect_def_type dt;
1221 bool is_simple_use;
1222 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE, vect_location,
1226 "vect_get_vec_defs_for_operand: %T\n", op);
1228 stmt_vec_info def_stmt_info;
1229 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1230 &def_stmt_info, &def_stmt);
1231 gcc_assert (is_simple_use);
1232 if (def_stmt && dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1235 vec_oprnds->create (ncopies);
1236 if (dt == vect_constant_def || dt == vect_external_def)
1238 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1239 tree vector_type;
1241 if (vectype)
1242 vector_type = vectype;
1243 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1244 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1245 vector_type = truth_type_for (stmt_vectype);
1246 else
1247 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1249 gcc_assert (vector_type);
1250 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1251 while (ncopies--)
1252 vec_oprnds->quick_push (vop);
1254 else
1256 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1257 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1258 for (unsigned i = 0; i < ncopies; ++i)
1259 vec_oprnds->quick_push (gimple_get_lhs
1260 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1265 /* Get vectorized definitions for OP0 and OP1. */
1267 void
1268 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1269 unsigned ncopies,
1270 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1271 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1272 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1273 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1275 if (slp_node)
1277 if (op0)
1278 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1279 if (op1)
1280 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1281 if (op2)
1282 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1283 if (op3)
1284 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1286 else
1288 if (op0)
1289 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1290 op0, vec_oprnds0, vectype0);
1291 if (op1)
1292 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1293 op1, vec_oprnds1, vectype1);
1294 if (op2)
1295 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1296 op2, vec_oprnds2, vectype2);
1297 if (op3)
1298 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1299 op3, vec_oprnds3, vectype3);
1303 void
1304 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1305 unsigned ncopies,
1306 tree op0, vec<tree> *vec_oprnds0,
1307 tree op1, vec<tree> *vec_oprnds1,
1308 tree op2, vec<tree> *vec_oprnds2,
1309 tree op3, vec<tree> *vec_oprnds3)
1311 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1312 op0, vec_oprnds0, NULL_TREE,
1313 op1, vec_oprnds1, NULL_TREE,
1314 op2, vec_oprnds2, NULL_TREE,
1315 op3, vec_oprnds3, NULL_TREE);
1318 /* Helper function called by vect_finish_replace_stmt and
1319 vect_finish_stmt_generation. Set the location of the new
1320 statement and create and return a stmt_vec_info for it. */
1322 static void
1323 vect_finish_stmt_generation_1 (vec_info *,
1324 stmt_vec_info stmt_info, gimple *vec_stmt)
1326 if (dump_enabled_p ())
1327 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1329 if (stmt_info)
1331 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1333 /* While EH edges will generally prevent vectorization, stmt might
1334 e.g. be in a must-not-throw region. Ensure newly created stmts
1335 that could throw are part of the same region. */
1336 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1337 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1338 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1340 else
1341 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1344 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1345 which sets the same scalar result as STMT_INFO did. Create and return a
1346 stmt_vec_info for VEC_STMT. */
1348 void
1349 vect_finish_replace_stmt (vec_info *vinfo,
1350 stmt_vec_info stmt_info, gimple *vec_stmt)
1352 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1353 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1355 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1356 gsi_replace (&gsi, vec_stmt, true);
1358 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1361 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1362 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1364 void
1365 vect_finish_stmt_generation (vec_info *vinfo,
1366 stmt_vec_info stmt_info, gimple *vec_stmt,
1367 gimple_stmt_iterator *gsi)
1369 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1371 if (!gsi_end_p (*gsi)
1372 && gimple_has_mem_ops (vec_stmt))
1374 gimple *at_stmt = gsi_stmt (*gsi);
1375 tree vuse = gimple_vuse (at_stmt);
1376 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1378 tree vdef = gimple_vdef (at_stmt);
1379 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1380 gimple_set_modified (vec_stmt, true);
1381 /* If we have an SSA vuse and insert a store, update virtual
1382 SSA form to avoid triggering the renamer. Do so only
1383 if we can easily see all uses - which is what almost always
1384 happens with the way vectorized stmts are inserted. */
1385 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1386 && ((is_gimple_assign (vec_stmt)
1387 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1388 || (is_gimple_call (vec_stmt)
1389 && (!(gimple_call_flags (vec_stmt)
1390 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1391 || (gimple_call_lhs (vec_stmt)
1392 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1394 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1395 gimple_set_vdef (vec_stmt, new_vdef);
1396 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1400 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1401 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1404 /* We want to vectorize a call to combined function CFN with function
1405 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1406 as the types of all inputs. Check whether this is possible using
1407 an internal function, returning its code if so or IFN_LAST if not. */
1409 static internal_fn
1410 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1411 tree vectype_out, tree vectype_in)
1413 internal_fn ifn;
1414 if (internal_fn_p (cfn))
1415 ifn = as_internal_fn (cfn);
1416 else
1417 ifn = associated_internal_fn (fndecl);
1418 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1420 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1421 if (info.vectorizable)
1423 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1424 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1425 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1426 OPTIMIZE_FOR_SPEED))
1427 return ifn;
1430 return IFN_LAST;
1434 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1435 gimple_stmt_iterator *);
1437 /* Check whether a load or store statement in the loop described by
1438 LOOP_VINFO is possible in a loop using partial vectors. This is
1439 testing whether the vectorizer pass has the appropriate support,
1440 as well as whether the target does.
1442 VLS_TYPE says whether the statement is a load or store and VECTYPE
1443 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1444 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1445 says how the load or store is going to be implemented and GROUP_SIZE
1446 is the number of load or store statements in the containing group.
1447 If the access is a gather load or scatter store, GS_INFO describes
1448 its arguments. If the load or store is conditional, SCALAR_MASK is the
1449 condition under which it occurs.
1451 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1452 vectors is not supported, otherwise record the required rgroup control
1453 types. */
1455 static void
1456 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1457 slp_tree slp_node,
1458 vec_load_store_type vls_type,
1459 int group_size,
1460 vect_memory_access_type
1461 memory_access_type,
1462 gather_scatter_info *gs_info,
1463 tree scalar_mask)
1465 /* Invariant loads need no special support. */
1466 if (memory_access_type == VMAT_INVARIANT)
1467 return;
1469 unsigned int nvectors;
1470 if (slp_node)
1471 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1472 else
1473 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1475 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1476 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1477 machine_mode vecmode = TYPE_MODE (vectype);
1478 bool is_load = (vls_type == VLS_LOAD);
1479 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1481 internal_fn ifn
1482 = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1483 : vect_store_lanes_supported (vectype, group_size, true));
1484 if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1485 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1486 else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1487 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1488 scalar_mask);
1489 else
1491 if (dump_enabled_p ())
1492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1493 "can't operate on partial vectors because"
1494 " the target doesn't have an appropriate"
1495 " load/store-lanes instruction.\n");
1496 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1498 return;
1501 if (memory_access_type == VMAT_GATHER_SCATTER)
1503 internal_fn ifn = (is_load
1504 ? IFN_MASK_GATHER_LOAD
1505 : IFN_MASK_SCATTER_STORE);
1506 internal_fn len_ifn = (is_load
1507 ? IFN_MASK_LEN_GATHER_LOAD
1508 : IFN_MASK_LEN_SCATTER_STORE);
1509 if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1510 gs_info->memory_type,
1511 gs_info->offset_vectype,
1512 gs_info->scale))
1513 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1514 else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1515 gs_info->memory_type,
1516 gs_info->offset_vectype,
1517 gs_info->scale))
1518 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1519 scalar_mask);
1520 else
1522 if (dump_enabled_p ())
1523 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1524 "can't operate on partial vectors because"
1525 " the target doesn't have an appropriate"
1526 " gather load or scatter store instruction.\n");
1527 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1529 return;
1532 if (memory_access_type != VMAT_CONTIGUOUS
1533 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1535 /* Element X of the data must come from iteration i * VF + X of the
1536 scalar loop. We need more work to support other mappings. */
1537 if (dump_enabled_p ())
1538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1539 "can't operate on partial vectors because an"
1540 " access isn't contiguous.\n");
1541 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1542 return;
1545 if (!VECTOR_MODE_P (vecmode))
1547 if (dump_enabled_p ())
1548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1549 "can't operate on partial vectors when emulating"
1550 " vector operations.\n");
1551 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1552 return;
1555 /* We might load more scalars than we need for permuting SLP loads.
1556 We checked in get_group_load_store_type that the extra elements
1557 don't leak into a new vector. */
1558 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1560 unsigned int nvectors;
1561 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1562 return nvectors;
1563 gcc_unreachable ();
1566 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1567 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1568 machine_mode mask_mode;
1569 machine_mode vmode;
1570 bool using_partial_vectors_p = false;
1571 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1573 nvectors = group_memory_nvectors (group_size * vf, nunits);
1574 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1575 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1576 using_partial_vectors_p = true;
1578 else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1579 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1581 nvectors = group_memory_nvectors (group_size * vf, nunits);
1582 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1583 using_partial_vectors_p = true;
1586 if (!using_partial_vectors_p)
1588 if (dump_enabled_p ())
1589 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1590 "can't operate on partial vectors because the"
1591 " target doesn't have the appropriate partial"
1592 " vectorization load or store.\n");
1593 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1597 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1598 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1599 that needs to be applied to all loads and stores in a vectorized loop.
1600 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1601 otherwise return VEC_MASK & LOOP_MASK.
1603 MASK_TYPE is the type of both masks. If new statements are needed,
1604 insert them before GSI. */
1606 static tree
1607 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1608 tree vec_mask, gimple_stmt_iterator *gsi)
1610 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1611 if (!loop_mask)
1612 return vec_mask;
1614 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1616 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1617 return vec_mask;
1619 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1620 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1621 vec_mask, loop_mask);
1623 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1624 return and_res;
1627 /* Determine whether we can use a gather load or scatter store to vectorize
1628 strided load or store STMT_INFO by truncating the current offset to a
1629 smaller width. We need to be able to construct an offset vector:
1631 { 0, X, X*2, X*3, ... }
1633 without loss of precision, where X is STMT_INFO's DR_STEP.
1635 Return true if this is possible, describing the gather load or scatter
1636 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1638 static bool
1639 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1640 loop_vec_info loop_vinfo, bool masked_p,
1641 gather_scatter_info *gs_info)
1643 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1644 data_reference *dr = dr_info->dr;
1645 tree step = DR_STEP (dr);
1646 if (TREE_CODE (step) != INTEGER_CST)
1648 /* ??? Perhaps we could use range information here? */
1649 if (dump_enabled_p ())
1650 dump_printf_loc (MSG_NOTE, vect_location,
1651 "cannot truncate variable step.\n");
1652 return false;
1655 /* Get the number of bits in an element. */
1656 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1657 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1658 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1660 /* Set COUNT to the upper limit on the number of elements - 1.
1661 Start with the maximum vectorization factor. */
1662 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1664 /* Try lowering COUNT to the number of scalar latch iterations. */
1665 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1666 widest_int max_iters;
1667 if (max_loop_iterations (loop, &max_iters)
1668 && max_iters < count)
1669 count = max_iters.to_shwi ();
1671 /* Try scales of 1 and the element size. */
1672 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1673 wi::overflow_type overflow = wi::OVF_NONE;
1674 for (int i = 0; i < 2; ++i)
1676 int scale = scales[i];
1677 widest_int factor;
1678 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1679 continue;
1681 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1682 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1683 if (overflow)
1684 continue;
1685 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1686 unsigned int min_offset_bits = wi::min_precision (range, sign);
1688 /* Find the narrowest viable offset type. */
1689 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1690 tree offset_type = build_nonstandard_integer_type (offset_bits,
1691 sign == UNSIGNED);
1693 /* See whether the target supports the operation with an offset
1694 no narrower than OFFSET_TYPE. */
1695 tree memory_type = TREE_TYPE (DR_REF (dr));
1696 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1697 vectype, memory_type, offset_type, scale,
1698 &gs_info->ifn, &gs_info->offset_vectype)
1699 || gs_info->ifn == IFN_LAST)
1700 continue;
1702 gs_info->decl = NULL_TREE;
1703 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1704 but we don't need to store that here. */
1705 gs_info->base = NULL_TREE;
1706 gs_info->element_type = TREE_TYPE (vectype);
1707 gs_info->offset = fold_convert (offset_type, step);
1708 gs_info->offset_dt = vect_constant_def;
1709 gs_info->scale = scale;
1710 gs_info->memory_type = memory_type;
1711 return true;
1714 if (overflow && dump_enabled_p ())
1715 dump_printf_loc (MSG_NOTE, vect_location,
1716 "truncating gather/scatter offset to %d bits"
1717 " might change its value.\n", element_bits);
1719 return false;
1722 /* Return true if we can use gather/scatter internal functions to
1723 vectorize STMT_INFO, which is a grouped or strided load or store.
1724 MASKED_P is true if load or store is conditional. When returning
1725 true, fill in GS_INFO with the information required to perform the
1726 operation. */
1728 static bool
1729 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1730 loop_vec_info loop_vinfo, bool masked_p,
1731 gather_scatter_info *gs_info)
1733 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1734 || gs_info->ifn == IFN_LAST)
1735 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1736 masked_p, gs_info);
1738 tree old_offset_type = TREE_TYPE (gs_info->offset);
1739 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1741 gcc_assert (TYPE_PRECISION (new_offset_type)
1742 >= TYPE_PRECISION (old_offset_type));
1743 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1745 if (dump_enabled_p ())
1746 dump_printf_loc (MSG_NOTE, vect_location,
1747 "using gather/scatter for strided/grouped access,"
1748 " scale = %d\n", gs_info->scale);
1750 return true;
1753 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1754 elements with a known constant step. Return -1 if that step
1755 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1757 static int
1758 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1760 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1761 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1762 size_zero_node);
1765 /* If the target supports a permute mask that reverses the elements in
1766 a vector of type VECTYPE, return that mask, otherwise return null. */
1768 static tree
1769 perm_mask_for_reverse (tree vectype)
1771 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1773 /* The encoding has a single stepped pattern. */
1774 vec_perm_builder sel (nunits, 1, 3);
1775 for (int i = 0; i < 3; ++i)
1776 sel.quick_push (nunits - 1 - i);
1778 vec_perm_indices indices (sel, 1, nunits);
1779 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1780 indices))
1781 return NULL_TREE;
1782 return vect_gen_perm_mask_checked (vectype, indices);
1785 /* A subroutine of get_load_store_type, with a subset of the same
1786 arguments. Handle the case where STMT_INFO is a load or store that
1787 accesses consecutive elements with a negative step. Sets *POFFSET
1788 to the offset to be applied to the DR for the first access. */
1790 static vect_memory_access_type
1791 get_negative_load_store_type (vec_info *vinfo,
1792 stmt_vec_info stmt_info, tree vectype,
1793 vec_load_store_type vls_type,
1794 unsigned int ncopies, poly_int64 *poffset)
1796 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1797 dr_alignment_support alignment_support_scheme;
1799 if (ncopies > 1)
1801 if (dump_enabled_p ())
1802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1803 "multiple types with negative step.\n");
1804 return VMAT_ELEMENTWISE;
1807 /* For backward running DRs the first access in vectype actually is
1808 N-1 elements before the address of the DR. */
1809 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1810 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1812 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
1813 alignment_support_scheme
1814 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1815 if (alignment_support_scheme != dr_aligned
1816 && alignment_support_scheme != dr_unaligned_supported)
1818 if (dump_enabled_p ())
1819 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1820 "negative step but alignment required.\n");
1821 *poffset = 0;
1822 return VMAT_ELEMENTWISE;
1825 if (vls_type == VLS_STORE_INVARIANT)
1827 if (dump_enabled_p ())
1828 dump_printf_loc (MSG_NOTE, vect_location,
1829 "negative step with invariant source;"
1830 " no permute needed.\n");
1831 return VMAT_CONTIGUOUS_DOWN;
1834 if (!perm_mask_for_reverse (vectype))
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1838 "negative step and reversing not supported.\n");
1839 *poffset = 0;
1840 return VMAT_ELEMENTWISE;
1843 return VMAT_CONTIGUOUS_REVERSE;
1846 /* STMT_INFO is either a masked or unconditional store. Return the value
1847 being stored. */
1849 tree
1850 vect_get_store_rhs (stmt_vec_info stmt_info)
1852 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1854 gcc_assert (gimple_assign_single_p (assign));
1855 return gimple_assign_rhs1 (assign);
1857 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1859 internal_fn ifn = gimple_call_internal_fn (call);
1860 int index = internal_fn_stored_value_index (ifn);
1861 gcc_assert (index >= 0);
1862 return gimple_call_arg (call, index);
1864 gcc_unreachable ();
1867 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1869 This function returns a vector type which can be composed with NETLS pieces,
1870 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1871 same vector size as the return vector. It checks target whether supports
1872 pieces-size vector mode for construction firstly, if target fails to, check
1873 pieces-size scalar mode for construction further. It returns NULL_TREE if
1874 fails to find the available composition.
1876 For example, for (vtype=V16QI, nelts=4), we can probably get:
1877 - V16QI with PTYPE V4QI.
1878 - V4SI with PTYPE SI.
1879 - NULL_TREE. */
1881 static tree
1882 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
1884 gcc_assert (VECTOR_TYPE_P (vtype));
1885 gcc_assert (known_gt (nelts, 0U));
1887 machine_mode vmode = TYPE_MODE (vtype);
1888 if (!VECTOR_MODE_P (vmode))
1889 return NULL_TREE;
1891 /* When we are asked to compose the vector from its components let
1892 that happen directly. */
1893 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
1895 *ptype = TREE_TYPE (vtype);
1896 return vtype;
1899 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
1900 unsigned int pbsize;
1901 if (constant_multiple_p (vbsize, nelts, &pbsize))
1903 /* First check if vec_init optab supports construction from
1904 vector pieces directly. */
1905 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
1906 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
1907 machine_mode rmode;
1908 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
1909 && (convert_optab_handler (vec_init_optab, vmode, rmode)
1910 != CODE_FOR_nothing))
1912 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
1913 return vtype;
1916 /* Otherwise check if exists an integer type of the same piece size and
1917 if vec_init optab supports construction from it directly. */
1918 if (int_mode_for_size (pbsize, 0).exists (&elmode)
1919 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
1920 && (convert_optab_handler (vec_init_optab, rmode, elmode)
1921 != CODE_FOR_nothing))
1923 *ptype = build_nonstandard_integer_type (pbsize, 1);
1924 return build_vector_type (*ptype, nelts);
1928 return NULL_TREE;
1931 /* A subroutine of get_load_store_type, with a subset of the same
1932 arguments. Handle the case where STMT_INFO is part of a grouped load
1933 or store.
1935 For stores, the statements in the group are all consecutive
1936 and there is no gap at the end. For loads, the statements in the
1937 group might not be consecutive; there can be gaps between statements
1938 as well as at the end. */
1940 static bool
1941 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
1942 tree vectype, slp_tree slp_node,
1943 bool masked_p, vec_load_store_type vls_type,
1944 vect_memory_access_type *memory_access_type,
1945 poly_int64 *poffset,
1946 dr_alignment_support *alignment_support_scheme,
1947 int *misalignment,
1948 gather_scatter_info *gs_info,
1949 internal_fn *lanes_ifn)
1951 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1952 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1953 stmt_vec_info first_stmt_info;
1954 unsigned int group_size;
1955 unsigned HOST_WIDE_INT gap;
1956 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1958 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1959 group_size = DR_GROUP_SIZE (first_stmt_info);
1960 gap = DR_GROUP_GAP (first_stmt_info);
1962 else
1964 first_stmt_info = stmt_info;
1965 group_size = 1;
1966 gap = 0;
1968 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
1969 bool single_element_p = (stmt_info == first_stmt_info
1970 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
1971 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1973 /* True if the vectorized statements would access beyond the last
1974 statement in the group. */
1975 bool overrun_p = false;
1977 /* True if we can cope with such overrun by peeling for gaps, so that
1978 there is at least one final scalar iteration after the vector loop. */
1979 bool can_overrun_p = (!masked_p
1980 && vls_type == VLS_LOAD
1981 && loop_vinfo
1982 && !loop->inner);
1984 /* There can only be a gap at the end of the group if the stride is
1985 known at compile time. */
1986 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
1988 /* Stores can't yet have gaps. */
1989 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
1991 if (slp_node)
1993 /* For SLP vectorization we directly vectorize a subchain
1994 without permutation. */
1995 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1996 first_dr_info
1997 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
1998 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2000 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2001 separated by the stride, until we have a complete vector.
2002 Fall back to scalar accesses if that isn't possible. */
2003 if (multiple_p (nunits, group_size))
2004 *memory_access_type = VMAT_STRIDED_SLP;
2005 else
2006 *memory_access_type = VMAT_ELEMENTWISE;
2008 else
2010 overrun_p = loop_vinfo && gap != 0;
2011 if (overrun_p && vls_type != VLS_LOAD)
2013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2014 "Grouped store with gaps requires"
2015 " non-consecutive accesses\n");
2016 return false;
2018 /* An overrun is fine if the trailing elements are smaller
2019 than the alignment boundary B. Every vector access will
2020 be a multiple of B and so we are guaranteed to access a
2021 non-gap element in the same B-sized block. */
2022 if (overrun_p
2023 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2024 vectype)
2025 / vect_get_scalar_dr_size (first_dr_info)))
2026 overrun_p = false;
2028 /* If the gap splits the vector in half and the target
2029 can do half-vector operations avoid the epilogue peeling
2030 by simply loading half of the vector only. Usually
2031 the construction with an upper zero half will be elided. */
2032 dr_alignment_support alss;
2033 int misalign = dr_misalignment (first_dr_info, vectype);
2034 tree half_vtype;
2035 if (overrun_p
2036 && !masked_p
2037 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2038 vectype, misalign)))
2039 == dr_aligned
2040 || alss == dr_unaligned_supported)
2041 && known_eq (nunits, (group_size - gap) * 2)
2042 && known_eq (nunits, group_size)
2043 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2044 != NULL_TREE))
2045 overrun_p = false;
2047 if (overrun_p && !can_overrun_p)
2049 if (dump_enabled_p ())
2050 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2051 "Peeling for outer loop is not supported\n");
2052 return false;
2054 int cmp = compare_step_with_zero (vinfo, stmt_info);
2055 if (cmp < 0)
2057 if (single_element_p)
2058 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2059 only correct for single element "interleaving" SLP. */
2060 *memory_access_type = get_negative_load_store_type
2061 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2062 else
2064 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2065 separated by the stride, until we have a complete vector.
2066 Fall back to scalar accesses if that isn't possible. */
2067 if (multiple_p (nunits, group_size))
2068 *memory_access_type = VMAT_STRIDED_SLP;
2069 else
2070 *memory_access_type = VMAT_ELEMENTWISE;
2073 else if (cmp == 0 && loop_vinfo)
2075 gcc_assert (vls_type == VLS_LOAD);
2076 *memory_access_type = VMAT_INVARIANT;
2077 /* Invariant accesses perform only component accesses, alignment
2078 is irrelevant for them. */
2079 *alignment_support_scheme = dr_unaligned_supported;
2081 else
2082 *memory_access_type = VMAT_CONTIGUOUS;
2084 /* When we have a contiguous access across loop iterations
2085 but the access in the loop doesn't cover the full vector
2086 we can end up with no gap recorded but still excess
2087 elements accessed, see PR103116. Make sure we peel for
2088 gaps if necessary and sufficient and give up if not.
2090 If there is a combination of the access not covering the full
2091 vector and a gap recorded then we may need to peel twice. */
2092 if (loop_vinfo
2093 && *memory_access_type == VMAT_CONTIGUOUS
2094 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2095 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2096 nunits))
2098 unsigned HOST_WIDE_INT cnunits, cvf;
2099 if (!can_overrun_p
2100 || !nunits.is_constant (&cnunits)
2101 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2102 /* Peeling for gaps assumes that a single scalar iteration
2103 is enough to make sure the last vector iteration doesn't
2104 access excess elements.
2105 ??? Enhancements include peeling multiple iterations
2106 or using masked loads with a static mask. */
2107 || (group_size * cvf) % cnunits + group_size - gap < cnunits)
2109 if (dump_enabled_p ())
2110 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2111 "peeling for gaps insufficient for "
2112 "access\n");
2113 return false;
2115 overrun_p = true;
2119 else
2121 /* We can always handle this case using elementwise accesses,
2122 but see if something more efficient is available. */
2123 *memory_access_type = VMAT_ELEMENTWISE;
2125 /* If there is a gap at the end of the group then these optimizations
2126 would access excess elements in the last iteration. */
2127 bool would_overrun_p = (gap != 0);
2128 /* An overrun is fine if the trailing elements are smaller than the
2129 alignment boundary B. Every vector access will be a multiple of B
2130 and so we are guaranteed to access a non-gap element in the
2131 same B-sized block. */
2132 if (would_overrun_p
2133 && !masked_p
2134 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2135 / vect_get_scalar_dr_size (first_dr_info)))
2136 would_overrun_p = false;
2138 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2139 && (can_overrun_p || !would_overrun_p)
2140 && compare_step_with_zero (vinfo, stmt_info) > 0)
2142 /* First cope with the degenerate case of a single-element
2143 vector. */
2144 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2147 else
2149 /* Otherwise try using LOAD/STORE_LANES. */
2150 *lanes_ifn
2151 = vls_type == VLS_LOAD
2152 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2153 : vect_store_lanes_supported (vectype, group_size,
2154 masked_p);
2155 if (*lanes_ifn != IFN_LAST)
2157 *memory_access_type = VMAT_LOAD_STORE_LANES;
2158 overrun_p = would_overrun_p;
2161 /* If that fails, try using permuting loads. */
2162 else if (vls_type == VLS_LOAD
2163 ? vect_grouped_load_supported (vectype,
2164 single_element_p,
2165 group_size)
2166 : vect_grouped_store_supported (vectype, group_size))
2168 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2169 overrun_p = would_overrun_p;
2174 /* As a last resort, trying using a gather load or scatter store.
2176 ??? Although the code can handle all group sizes correctly,
2177 it probably isn't a win to use separate strided accesses based
2178 on nearby locations. Or, even if it's a win over scalar code,
2179 it might not be a win over vectorizing at a lower VF, if that
2180 allows us to use contiguous accesses. */
2181 if (*memory_access_type == VMAT_ELEMENTWISE
2182 && single_element_p
2183 && loop_vinfo
2184 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2185 masked_p, gs_info))
2186 *memory_access_type = VMAT_GATHER_SCATTER;
2189 if (*memory_access_type == VMAT_GATHER_SCATTER
2190 || *memory_access_type == VMAT_ELEMENTWISE)
2192 *alignment_support_scheme = dr_unaligned_supported;
2193 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2195 else
2197 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2198 *alignment_support_scheme
2199 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2200 *misalignment);
2203 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2205 /* STMT is the leader of the group. Check the operands of all the
2206 stmts of the group. */
2207 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2208 while (next_stmt_info)
2210 tree op = vect_get_store_rhs (next_stmt_info);
2211 enum vect_def_type dt;
2212 if (!vect_is_simple_use (op, vinfo, &dt))
2214 if (dump_enabled_p ())
2215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2216 "use not simple.\n");
2217 return false;
2219 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2223 if (overrun_p)
2225 gcc_assert (can_overrun_p);
2226 if (dump_enabled_p ())
2227 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2228 "Data access with gaps requires scalar "
2229 "epilogue loop\n");
2230 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2233 return true;
2236 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2237 if there is a memory access type that the vectorized form can use,
2238 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2239 or scatters, fill in GS_INFO accordingly. In addition
2240 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2241 the target does not support the alignment scheme. *MISALIGNMENT
2242 is set according to the alignment of the access (including
2243 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2245 SLP says whether we're performing SLP rather than loop vectorization.
2246 MASKED_P is true if the statement is conditional on a vectorized mask.
2247 VECTYPE is the vector type that the vectorized statements will use.
2248 NCOPIES is the number of vector statements that will be needed. */
2250 static bool
2251 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2252 tree vectype, slp_tree slp_node,
2253 bool masked_p, vec_load_store_type vls_type,
2254 unsigned int ncopies,
2255 vect_memory_access_type *memory_access_type,
2256 poly_int64 *poffset,
2257 dr_alignment_support *alignment_support_scheme,
2258 int *misalignment,
2259 gather_scatter_info *gs_info,
2260 internal_fn *lanes_ifn)
2262 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2263 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2264 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2265 *poffset = 0;
2266 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2268 *memory_access_type = VMAT_GATHER_SCATTER;
2269 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2270 gcc_unreachable ();
2271 /* When using internal functions, we rely on pattern recognition
2272 to convert the type of the offset to the type that the target
2273 requires, with the result being a call to an internal function.
2274 If that failed for some reason (e.g. because another pattern
2275 took priority), just handle cases in which the offset already
2276 has the right type. */
2277 else if (gs_info->ifn != IFN_LAST
2278 && !is_gimple_call (stmt_info->stmt)
2279 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2280 TREE_TYPE (gs_info->offset_vectype)))
2282 if (dump_enabled_p ())
2283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2284 "%s offset requires a conversion\n",
2285 vls_type == VLS_LOAD ? "gather" : "scatter");
2286 return false;
2288 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2289 &gs_info->offset_dt,
2290 &gs_info->offset_vectype))
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2294 "%s index use not simple.\n",
2295 vls_type == VLS_LOAD ? "gather" : "scatter");
2296 return false;
2298 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2300 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2301 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2302 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2303 (gs_info->offset_vectype),
2304 TYPE_VECTOR_SUBPARTS (vectype)))
2306 if (dump_enabled_p ())
2307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2308 "unsupported vector types for emulated "
2309 "gather.\n");
2310 return false;
2313 /* Gather-scatter accesses perform only component accesses, alignment
2314 is irrelevant for them. */
2315 *alignment_support_scheme = dr_unaligned_supported;
2317 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
2319 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2320 masked_p,
2321 vls_type, memory_access_type, poffset,
2322 alignment_support_scheme,
2323 misalignment, gs_info, lanes_ifn))
2324 return false;
2326 else if (STMT_VINFO_STRIDED_P (stmt_info))
2328 gcc_assert (!slp_node);
2329 if (loop_vinfo
2330 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2331 masked_p, gs_info))
2332 *memory_access_type = VMAT_GATHER_SCATTER;
2333 else
2334 *memory_access_type = VMAT_ELEMENTWISE;
2335 /* Alignment is irrelevant here. */
2336 *alignment_support_scheme = dr_unaligned_supported;
2338 else
2340 int cmp = compare_step_with_zero (vinfo, stmt_info);
2341 if (cmp == 0)
2343 gcc_assert (vls_type == VLS_LOAD);
2344 *memory_access_type = VMAT_INVARIANT;
2345 /* Invariant accesses perform only component accesses, alignment
2346 is irrelevant for them. */
2347 *alignment_support_scheme = dr_unaligned_supported;
2349 else
2351 if (cmp < 0)
2352 *memory_access_type = get_negative_load_store_type
2353 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2354 else
2355 *memory_access_type = VMAT_CONTIGUOUS;
2356 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2357 vectype, *poffset);
2358 *alignment_support_scheme
2359 = vect_supportable_dr_alignment (vinfo,
2360 STMT_VINFO_DR_INFO (stmt_info),
2361 vectype, *misalignment);
2365 if ((*memory_access_type == VMAT_ELEMENTWISE
2366 || *memory_access_type == VMAT_STRIDED_SLP)
2367 && !nunits.is_constant ())
2369 if (dump_enabled_p ())
2370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2371 "Not using elementwise accesses due to variable "
2372 "vectorization factor.\n");
2373 return false;
2376 if (*alignment_support_scheme == dr_unaligned_unsupported)
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2380 "unsupported unaligned access\n");
2381 return false;
2384 /* FIXME: At the moment the cost model seems to underestimate the
2385 cost of using elementwise accesses. This check preserves the
2386 traditional behavior until that can be fixed. */
2387 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2388 if (!first_stmt_info)
2389 first_stmt_info = stmt_info;
2390 if (*memory_access_type == VMAT_ELEMENTWISE
2391 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2392 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2393 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2394 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2396 if (dump_enabled_p ())
2397 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2398 "not falling back to elementwise accesses\n");
2399 return false;
2401 return true;
2404 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2405 conditional operation STMT_INFO. When returning true, store the mask
2406 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2407 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2408 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2410 static bool
2411 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2412 slp_tree slp_node, unsigned mask_index,
2413 tree *mask, slp_tree *mask_node,
2414 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2416 enum vect_def_type mask_dt;
2417 tree mask_vectype;
2418 slp_tree mask_node_1;
2419 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2420 mask, &mask_node_1, &mask_dt, &mask_vectype))
2422 if (dump_enabled_p ())
2423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2424 "mask use not simple.\n");
2425 return false;
2428 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2430 if (dump_enabled_p ())
2431 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2432 "mask argument is not a boolean.\n");
2433 return false;
2436 /* If the caller is not prepared for adjusting an external/constant
2437 SLP mask vector type fail. */
2438 if (slp_node
2439 && !mask_node
2440 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2442 if (dump_enabled_p ())
2443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2444 "SLP mask argument is not vectorized.\n");
2445 return false;
2448 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2449 if (!mask_vectype)
2450 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2452 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2454 if (dump_enabled_p ())
2455 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2456 "could not find an appropriate vector mask type.\n");
2457 return false;
2460 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2461 TYPE_VECTOR_SUBPARTS (vectype)))
2463 if (dump_enabled_p ())
2464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2465 "vector mask type %T"
2466 " does not match vector data type %T.\n",
2467 mask_vectype, vectype);
2469 return false;
2472 *mask_dt_out = mask_dt;
2473 *mask_vectype_out = mask_vectype;
2474 if (mask_node)
2475 *mask_node = mask_node_1;
2476 return true;
2479 /* Return true if stored value RHS is suitable for vectorizing store
2480 statement STMT_INFO. When returning true, store the type of the
2481 definition in *RHS_DT_OUT, the type of the vectorized store value in
2482 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2484 static bool
2485 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2486 slp_tree slp_node, tree rhs,
2487 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2488 vec_load_store_type *vls_type_out)
2490 /* In the case this is a store from a constant make sure
2491 native_encode_expr can handle it. */
2492 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2494 if (dump_enabled_p ())
2495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2496 "cannot encode constant as a byte sequence.\n");
2497 return false;
2500 int op_no = 0;
2501 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2503 if (gimple_call_internal_p (call)
2504 && internal_store_fn_p (gimple_call_internal_fn (call)))
2505 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2506 if (slp_node)
2507 op_no = vect_slp_child_index_for_operand (call, op_no);
2510 enum vect_def_type rhs_dt;
2511 tree rhs_vectype;
2512 slp_tree slp_op;
2513 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2514 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2516 if (dump_enabled_p ())
2517 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2518 "use not simple.\n");
2519 return false;
2522 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2523 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2525 if (dump_enabled_p ())
2526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2527 "incompatible vector types.\n");
2528 return false;
2531 *rhs_dt_out = rhs_dt;
2532 *rhs_vectype_out = rhs_vectype;
2533 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2534 *vls_type_out = VLS_STORE_INVARIANT;
2535 else
2536 *vls_type_out = VLS_STORE;
2537 return true;
2540 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2541 Note that we support masks with floating-point type, in which case the
2542 floats are interpreted as a bitmask. */
2544 static tree
2545 vect_build_all_ones_mask (vec_info *vinfo,
2546 stmt_vec_info stmt_info, tree masktype)
2548 if (TREE_CODE (masktype) == INTEGER_TYPE)
2549 return build_int_cst (masktype, -1);
2550 else if (VECTOR_BOOLEAN_TYPE_P (masktype)
2551 || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2553 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2554 mask = build_vector_from_val (masktype, mask);
2555 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2557 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2559 REAL_VALUE_TYPE r;
2560 long tmp[6];
2561 for (int j = 0; j < 6; ++j)
2562 tmp[j] = -1;
2563 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2564 tree mask = build_real (TREE_TYPE (masktype), r);
2565 mask = build_vector_from_val (masktype, mask);
2566 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2568 gcc_unreachable ();
2571 /* Build an all-zero merge value of type VECTYPE while vectorizing
2572 STMT_INFO as a gather load. */
2574 static tree
2575 vect_build_zero_merge_argument (vec_info *vinfo,
2576 stmt_vec_info stmt_info, tree vectype)
2578 tree merge;
2579 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2580 merge = build_int_cst (TREE_TYPE (vectype), 0);
2581 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2583 REAL_VALUE_TYPE r;
2584 long tmp[6];
2585 for (int j = 0; j < 6; ++j)
2586 tmp[j] = 0;
2587 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2588 merge = build_real (TREE_TYPE (vectype), r);
2590 else
2591 gcc_unreachable ();
2592 merge = build_vector_from_val (vectype, merge);
2593 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2596 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2597 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2598 the gather load operation. If the load is conditional, MASK is the
2599 vectorized condition, otherwise MASK is null. PTR is the base
2600 pointer and OFFSET is the vectorized offset. */
2602 static gimple *
2603 vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
2604 gimple_stmt_iterator *gsi,
2605 gather_scatter_info *gs_info,
2606 tree ptr, tree offset, tree mask)
2608 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2609 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2610 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2611 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2612 /* ptrtype */ arglist = TREE_CHAIN (arglist);
2613 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2614 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2615 tree scaletype = TREE_VALUE (arglist);
2616 tree var;
2617 gcc_checking_assert (types_compatible_p (srctype, rettype)
2618 && (!mask
2619 || TREE_CODE (masktype) == INTEGER_TYPE
2620 || types_compatible_p (srctype, masktype)));
2622 tree op = offset;
2623 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2625 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2626 TYPE_VECTOR_SUBPARTS (idxtype)));
2627 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2628 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2629 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2630 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2631 op = var;
2634 tree src_op = NULL_TREE;
2635 tree mask_op = NULL_TREE;
2636 if (mask)
2638 if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
2640 tree utype, optype = TREE_TYPE (mask);
2641 if (VECTOR_TYPE_P (masktype)
2642 || TYPE_MODE (masktype) == TYPE_MODE (optype))
2643 utype = masktype;
2644 else
2645 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2646 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2647 tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
2648 gassign *new_stmt
2649 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2650 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2651 mask_arg = var;
2652 if (!useless_type_conversion_p (masktype, utype))
2654 gcc_assert (TYPE_PRECISION (utype)
2655 <= TYPE_PRECISION (masktype));
2656 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2657 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2658 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2659 mask_arg = var;
2661 src_op = build_zero_cst (srctype);
2662 mask_op = mask_arg;
2664 else
2666 src_op = mask;
2667 mask_op = mask;
2670 else
2672 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2673 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2676 tree scale = build_int_cst (scaletype, gs_info->scale);
2677 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2678 mask_op, scale);
2680 if (!useless_type_conversion_p (vectype, rettype))
2682 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2683 TYPE_VECTOR_SUBPARTS (rettype)));
2684 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2685 gimple_call_set_lhs (new_stmt, op);
2686 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2687 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2688 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
2691 return new_stmt;
2694 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2695 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2696 the scatter store operation. If the store is conditional, MASK is the
2697 unvectorized condition, otherwise MASK is null. */
2699 static void
2700 vect_build_scatter_store_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2701 gimple_stmt_iterator *gsi, gimple **vec_stmt,
2702 gather_scatter_info *gs_info, tree mask,
2703 stmt_vector_for_cost *cost_vec)
2705 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2706 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2707 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2708 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2709 enum { NARROW, NONE, WIDEN } modifier;
2710 poly_uint64 scatter_off_nunits
2711 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2713 /* FIXME: Keep the previous costing way in vect_model_store_cost by
2714 costing N scalar stores, but it should be tweaked to use target
2715 specific costs on related scatter store calls. */
2716 if (cost_vec)
2718 tree op = vect_get_store_rhs (stmt_info);
2719 enum vect_def_type dt;
2720 gcc_assert (vect_is_simple_use (op, vinfo, &dt));
2721 unsigned int inside_cost, prologue_cost = 0;
2722 if (dt == vect_constant_def || dt == vect_external_def)
2723 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
2724 stmt_info, 0, vect_prologue);
2725 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
2726 inside_cost = record_stmt_cost (cost_vec, ncopies * assumed_nunits,
2727 scalar_store, stmt_info, 0, vect_body);
2729 if (dump_enabled_p ())
2730 dump_printf_loc (MSG_NOTE, vect_location,
2731 "vect_model_store_cost: inside_cost = %d, "
2732 "prologue_cost = %d .\n",
2733 inside_cost, prologue_cost);
2734 return;
2737 tree perm_mask = NULL_TREE, mask_halfvectype = NULL_TREE;
2738 if (known_eq (nunits, scatter_off_nunits))
2739 modifier = NONE;
2740 else if (known_eq (nunits * 2, scatter_off_nunits))
2742 modifier = WIDEN;
2744 /* Currently gathers and scatters are only supported for
2745 fixed-length vectors. */
2746 unsigned int count = scatter_off_nunits.to_constant ();
2747 vec_perm_builder sel (count, count, 1);
2748 for (unsigned i = 0; i < (unsigned int) count; ++i)
2749 sel.quick_push (i | (count / 2));
2751 vec_perm_indices indices (sel, 1, count);
2752 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype, indices);
2753 gcc_assert (perm_mask != NULL_TREE);
2755 else if (known_eq (nunits, scatter_off_nunits * 2))
2757 modifier = NARROW;
2759 /* Currently gathers and scatters are only supported for
2760 fixed-length vectors. */
2761 unsigned int count = nunits.to_constant ();
2762 vec_perm_builder sel (count, count, 1);
2763 for (unsigned i = 0; i < (unsigned int) count; ++i)
2764 sel.quick_push (i | (count / 2));
2766 vec_perm_indices indices (sel, 2, count);
2767 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2768 gcc_assert (perm_mask != NULL_TREE);
2769 ncopies *= 2;
2771 if (mask)
2772 mask_halfvectype = truth_type_for (gs_info->offset_vectype);
2774 else
2775 gcc_unreachable ();
2777 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2778 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2779 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2780 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2781 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2782 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2783 tree scaletype = TREE_VALUE (arglist);
2785 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
2786 && TREE_CODE (rettype) == VOID_TYPE);
2788 tree ptr = fold_convert (ptrtype, gs_info->base);
2789 if (!is_gimple_min_invariant (ptr))
2791 gimple_seq seq;
2792 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2793 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2794 edge pe = loop_preheader_edge (loop);
2795 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2796 gcc_assert (!new_bb);
2799 tree mask_arg = NULL_TREE;
2800 if (mask == NULL_TREE)
2802 mask_arg = build_int_cst (masktype, -1);
2803 mask_arg = vect_init_vector (vinfo, stmt_info, mask_arg, masktype, NULL);
2806 tree scale = build_int_cst (scaletype, gs_info->scale);
2808 auto_vec<tree> vec_oprnds0;
2809 auto_vec<tree> vec_oprnds1;
2810 auto_vec<tree> vec_masks;
2811 if (mask)
2813 tree mask_vectype = truth_type_for (vectype);
2814 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2815 modifier == NARROW ? ncopies / 2 : ncopies,
2816 mask, &vec_masks, mask_vectype);
2818 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2819 modifier == WIDEN ? ncopies / 2 : ncopies,
2820 gs_info->offset, &vec_oprnds0);
2821 tree op = vect_get_store_rhs (stmt_info);
2822 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2823 modifier == NARROW ? ncopies / 2 : ncopies, op,
2824 &vec_oprnds1);
2826 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2827 tree mask_op = NULL_TREE;
2828 tree src, vec_mask;
2829 for (int j = 0; j < ncopies; ++j)
2831 if (modifier == WIDEN)
2833 if (j & 1)
2834 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0, perm_mask,
2835 stmt_info, gsi);
2836 else
2837 op = vec_oprnd0 = vec_oprnds0[j / 2];
2838 src = vec_oprnd1 = vec_oprnds1[j];
2839 if (mask)
2840 mask_op = vec_mask = vec_masks[j];
2842 else if (modifier == NARROW)
2844 if (j & 1)
2845 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
2846 perm_mask, stmt_info, gsi);
2847 else
2848 src = vec_oprnd1 = vec_oprnds1[j / 2];
2849 op = vec_oprnd0 = vec_oprnds0[j];
2850 if (mask)
2851 mask_op = vec_mask = vec_masks[j / 2];
2853 else
2855 op = vec_oprnd0 = vec_oprnds0[j];
2856 src = vec_oprnd1 = vec_oprnds1[j];
2857 if (mask)
2858 mask_op = vec_mask = vec_masks[j];
2861 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
2863 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
2864 TYPE_VECTOR_SUBPARTS (srctype)));
2865 tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
2866 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
2867 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
2868 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2869 src = var;
2872 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2874 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2875 TYPE_VECTOR_SUBPARTS (idxtype)));
2876 tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2877 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2878 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2879 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2880 op = var;
2883 if (mask)
2885 tree utype;
2886 mask_arg = mask_op;
2887 if (modifier == NARROW)
2889 tree var
2890 = vect_get_new_ssa_name (mask_halfvectype, vect_simple_var);
2891 gassign *new_stmt
2892 = gimple_build_assign (var,
2893 (j & 1) ? VEC_UNPACK_HI_EXPR
2894 : VEC_UNPACK_LO_EXPR,
2895 mask_op);
2896 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2897 mask_arg = var;
2899 tree optype = TREE_TYPE (mask_arg);
2900 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
2901 utype = masktype;
2902 else
2903 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2904 tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
2905 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
2906 gassign *new_stmt
2907 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2908 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2909 mask_arg = var;
2910 if (!useless_type_conversion_p (masktype, utype))
2912 gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
2913 tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2914 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2915 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2916 mask_arg = var;
2920 gcall *new_stmt
2921 = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
2922 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2924 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2926 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2929 /* Prepare the base and offset in GS_INFO for vectorization.
2930 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2931 to the vectorized offset argument for the first copy of STMT_INFO.
2932 STMT_INFO is the statement described by GS_INFO and LOOP is the
2933 containing loop. */
2935 static void
2936 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2937 class loop *loop, stmt_vec_info stmt_info,
2938 slp_tree slp_node, gather_scatter_info *gs_info,
2939 tree *dataref_ptr, vec<tree> *vec_offset)
2941 gimple_seq stmts = NULL;
2942 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2943 if (stmts != NULL)
2945 basic_block new_bb;
2946 edge pe = loop_preheader_edge (loop);
2947 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2948 gcc_assert (!new_bb);
2950 if (slp_node)
2951 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
2952 else
2954 unsigned ncopies
2955 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2956 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2957 gs_info->offset, vec_offset,
2958 gs_info->offset_vectype);
2962 /* Prepare to implement a grouped or strided load or store using
2963 the gather load or scatter store operation described by GS_INFO.
2964 STMT_INFO is the load or store statement.
2966 Set *DATAREF_BUMP to the amount that should be added to the base
2967 address after each copy of the vectorized statement. Set *VEC_OFFSET
2968 to an invariant offset vector in which element I has the value
2969 I * DR_STEP / SCALE. */
2971 static void
2972 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2973 loop_vec_info loop_vinfo,
2974 gimple_stmt_iterator *gsi,
2975 gather_scatter_info *gs_info,
2976 tree *dataref_bump, tree *vec_offset,
2977 vec_loop_lens *loop_lens)
2979 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2980 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2982 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2984 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
2985 ivtmp_8 = _31 * 16 (step in bytes);
2986 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2987 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
2988 tree loop_len
2989 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
2990 tree tmp
2991 = fold_build2 (MULT_EXPR, sizetype,
2992 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2993 loop_len);
2994 *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
2995 GSI_SAME_STMT);
2997 else
2999 tree bump
3000 = size_binop (MULT_EXPR,
3001 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3002 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3003 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3006 /* The offset given in GS_INFO can have pointer type, so use the element
3007 type of the vector instead. */
3008 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3010 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3011 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3012 ssize_int (gs_info->scale));
3013 step = fold_convert (offset_type, step);
3015 /* Create {0, X, X*2, X*3, ...}. */
3016 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3017 build_zero_cst (offset_type), step);
3018 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3021 /* Prepare the pointer IVs which needs to be updated by a variable amount.
3022 Such variable amount is the outcome of .SELECT_VL. In this case, we can
3023 allow each iteration process the flexible number of elements as long as
3024 the number <= vf elments.
3026 Return data reference according to SELECT_VL.
3027 If new statements are needed, insert them before GSI. */
3029 static tree
3030 vect_get_loop_variant_data_ptr_increment (
3031 vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
3032 vec_loop_lens *loop_lens, dr_vec_info *dr_info,
3033 vect_memory_access_type memory_access_type)
3035 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3036 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3038 /* gather/scatter never reach here. */
3039 gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
3041 /* When we support SELECT_VL pattern, we dynamic adjust
3042 the memory address by .SELECT_VL result.
3044 The result of .SELECT_VL is the number of elements to
3045 be processed of each iteration. So the memory address
3046 adjustment operation should be:
3048 addr = addr + .SELECT_VL (ARG..) * step;
3050 tree loop_len
3051 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
3052 tree len_type = TREE_TYPE (loop_len);
3053 /* Since the outcome of .SELECT_VL is element size, we should adjust
3054 it into bytesize so that it can be used in address pointer variable
3055 amount IVs adjustment. */
3056 tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
3057 wide_int_to_tree (len_type, wi::to_widest (step)));
3058 tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
3059 gassign *assign = gimple_build_assign (bump, tmp);
3060 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
3061 return bump;
3064 /* Return the amount that should be added to a vector pointer to move
3065 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3066 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3067 vectorization. */
3069 static tree
3070 vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
3071 dr_vec_info *dr_info, tree aggr_type,
3072 vect_memory_access_type memory_access_type,
3073 vec_loop_lens *loop_lens = nullptr)
3075 if (memory_access_type == VMAT_INVARIANT)
3076 return size_zero_node;
3078 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3079 if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3080 return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
3081 loop_lens, dr_info,
3082 memory_access_type);
3084 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3085 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3086 if (tree_int_cst_sgn (step) == -1)
3087 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3088 return iv_step;
3091 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3093 static bool
3094 vectorizable_bswap (vec_info *vinfo,
3095 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3096 gimple **vec_stmt, slp_tree slp_node,
3097 slp_tree *slp_op,
3098 tree vectype_in, stmt_vector_for_cost *cost_vec)
3100 tree op, vectype;
3101 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3102 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3103 unsigned ncopies;
3105 op = gimple_call_arg (stmt, 0);
3106 vectype = STMT_VINFO_VECTYPE (stmt_info);
3107 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3109 /* Multiple types in SLP are handled by creating the appropriate number of
3110 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3111 case of SLP. */
3112 if (slp_node)
3113 ncopies = 1;
3114 else
3115 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3117 gcc_assert (ncopies >= 1);
3119 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3120 if (! char_vectype)
3121 return false;
3123 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3124 unsigned word_bytes;
3125 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3126 return false;
3128 /* The encoding uses one stepped pattern for each byte in the word. */
3129 vec_perm_builder elts (num_bytes, word_bytes, 3);
3130 for (unsigned i = 0; i < 3; ++i)
3131 for (unsigned j = 0; j < word_bytes; ++j)
3132 elts.quick_push ((i + 1) * word_bytes - j - 1);
3134 vec_perm_indices indices (elts, 1, num_bytes);
3135 machine_mode vmode = TYPE_MODE (char_vectype);
3136 if (!can_vec_perm_const_p (vmode, vmode, indices))
3137 return false;
3139 if (! vec_stmt)
3141 if (slp_node
3142 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3144 if (dump_enabled_p ())
3145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3146 "incompatible vector types for invariants\n");
3147 return false;
3150 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3151 DUMP_VECT_SCOPE ("vectorizable_bswap");
3152 record_stmt_cost (cost_vec,
3153 1, vector_stmt, stmt_info, 0, vect_prologue);
3154 record_stmt_cost (cost_vec,
3155 slp_node
3156 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3157 vec_perm, stmt_info, 0, vect_body);
3158 return true;
3161 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3163 /* Transform. */
3164 vec<tree> vec_oprnds = vNULL;
3165 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3166 op, &vec_oprnds);
3167 /* Arguments are ready. create the new vector stmt. */
3168 unsigned i;
3169 tree vop;
3170 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3172 gimple *new_stmt;
3173 tree tem = make_ssa_name (char_vectype);
3174 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3175 char_vectype, vop));
3176 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3177 tree tem2 = make_ssa_name (char_vectype);
3178 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3179 tem, tem, bswap_vconst);
3180 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3181 tem = make_ssa_name (vectype);
3182 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3183 vectype, tem2));
3184 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3185 if (slp_node)
3186 slp_node->push_vec_def (new_stmt);
3187 else
3188 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3191 if (!slp_node)
3192 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3194 vec_oprnds.release ();
3195 return true;
3198 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3199 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3200 in a single step. On success, store the binary pack code in
3201 *CONVERT_CODE. */
3203 static bool
3204 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3205 code_helper *convert_code)
3207 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3208 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3209 return false;
3211 code_helper code;
3212 int multi_step_cvt = 0;
3213 auto_vec <tree, 8> interm_types;
3214 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3215 &code, &multi_step_cvt, &interm_types)
3216 || multi_step_cvt)
3217 return false;
3219 *convert_code = code;
3220 return true;
3223 /* Function vectorizable_call.
3225 Check if STMT_INFO performs a function call that can be vectorized.
3226 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3227 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3228 Return true if STMT_INFO is vectorizable in this way. */
3230 static bool
3231 vectorizable_call (vec_info *vinfo,
3232 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3233 gimple **vec_stmt, slp_tree slp_node,
3234 stmt_vector_for_cost *cost_vec)
3236 gcall *stmt;
3237 tree vec_dest;
3238 tree scalar_dest;
3239 tree op;
3240 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3241 tree vectype_out, vectype_in;
3242 poly_uint64 nunits_in;
3243 poly_uint64 nunits_out;
3244 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3245 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3246 tree fndecl, new_temp, rhs_type;
3247 enum vect_def_type dt[4]
3248 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3249 vect_unknown_def_type };
3250 tree vectypes[ARRAY_SIZE (dt)] = {};
3251 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3252 int ndts = ARRAY_SIZE (dt);
3253 int ncopies, j;
3254 auto_vec<tree, 8> vargs;
3255 enum { NARROW, NONE, WIDEN } modifier;
3256 size_t i, nargs;
3257 tree lhs;
3259 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3260 return false;
3262 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3263 && ! vec_stmt)
3264 return false;
3266 /* Is STMT_INFO a vectorizable call? */
3267 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3268 if (!stmt)
3269 return false;
3271 if (gimple_call_internal_p (stmt)
3272 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3273 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3274 /* Handled by vectorizable_load and vectorizable_store. */
3275 return false;
3277 if (gimple_call_lhs (stmt) == NULL_TREE
3278 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3279 return false;
3281 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3283 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3285 /* Process function arguments. */
3286 rhs_type = NULL_TREE;
3287 vectype_in = NULL_TREE;
3288 nargs = gimple_call_num_args (stmt);
3290 /* Bail out if the function has more than four arguments, we do not have
3291 interesting builtin functions to vectorize with more than two arguments
3292 except for fma. No arguments is also not good. */
3293 if (nargs == 0 || nargs > 4)
3294 return false;
3296 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3297 combined_fn cfn = gimple_call_combined_fn (stmt);
3298 if (cfn == CFN_GOMP_SIMD_LANE)
3300 nargs = 0;
3301 rhs_type = unsigned_type_node;
3304 int mask_opno = -1;
3305 if (internal_fn_p (cfn))
3306 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3308 for (i = 0; i < nargs; i++)
3310 if ((int) i == mask_opno)
3312 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3313 &op, &slp_op[i], &dt[i], &vectypes[i]))
3314 return false;
3315 continue;
3318 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3319 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3321 if (dump_enabled_p ())
3322 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3323 "use not simple.\n");
3324 return false;
3327 /* We can only handle calls with arguments of the same type. */
3328 if (rhs_type
3329 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3331 if (dump_enabled_p ())
3332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3333 "argument types differ.\n");
3334 return false;
3336 if (!rhs_type)
3337 rhs_type = TREE_TYPE (op);
3339 if (!vectype_in)
3340 vectype_in = vectypes[i];
3341 else if (vectypes[i]
3342 && !types_compatible_p (vectypes[i], vectype_in))
3344 if (dump_enabled_p ())
3345 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3346 "argument vector types differ.\n");
3347 return false;
3350 /* If all arguments are external or constant defs, infer the vector type
3351 from the scalar type. */
3352 if (!vectype_in)
3353 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3354 if (vec_stmt)
3355 gcc_assert (vectype_in);
3356 if (!vectype_in)
3358 if (dump_enabled_p ())
3359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3360 "no vectype for scalar type %T\n", rhs_type);
3362 return false;
3364 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3365 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3366 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3367 by a pack of the two vectors into an SI vector. We would need
3368 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3369 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3371 if (dump_enabled_p ())
3372 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3373 "mismatched vector sizes %T and %T\n",
3374 vectype_in, vectype_out);
3375 return false;
3378 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3379 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3381 if (dump_enabled_p ())
3382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3383 "mixed mask and nonmask vector types\n");
3384 return false;
3387 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3389 if (dump_enabled_p ())
3390 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3391 "use emulated vector type for call\n");
3392 return false;
3395 /* FORNOW */
3396 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3397 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3398 if (known_eq (nunits_in * 2, nunits_out))
3399 modifier = NARROW;
3400 else if (known_eq (nunits_out, nunits_in))
3401 modifier = NONE;
3402 else if (known_eq (nunits_out * 2, nunits_in))
3403 modifier = WIDEN;
3404 else
3405 return false;
3407 /* We only handle functions that do not read or clobber memory. */
3408 if (gimple_vuse (stmt))
3410 if (dump_enabled_p ())
3411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3412 "function reads from or writes to memory.\n");
3413 return false;
3416 /* For now, we only vectorize functions if a target specific builtin
3417 is available. TODO -- in some cases, it might be profitable to
3418 insert the calls for pieces of the vector, in order to be able
3419 to vectorize other operations in the loop. */
3420 fndecl = NULL_TREE;
3421 internal_fn ifn = IFN_LAST;
3422 tree callee = gimple_call_fndecl (stmt);
3424 /* First try using an internal function. */
3425 code_helper convert_code = MAX_TREE_CODES;
3426 if (cfn != CFN_LAST
3427 && (modifier == NONE
3428 || (modifier == NARROW
3429 && simple_integer_narrowing (vectype_out, vectype_in,
3430 &convert_code))))
3431 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3432 vectype_in);
3434 /* If that fails, try asking for a target-specific built-in function. */
3435 if (ifn == IFN_LAST)
3437 if (cfn != CFN_LAST)
3438 fndecl = targetm.vectorize.builtin_vectorized_function
3439 (cfn, vectype_out, vectype_in);
3440 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3441 fndecl = targetm.vectorize.builtin_md_vectorized_function
3442 (callee, vectype_out, vectype_in);
3445 if (ifn == IFN_LAST && !fndecl)
3447 if (cfn == CFN_GOMP_SIMD_LANE
3448 && !slp_node
3449 && loop_vinfo
3450 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3451 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3452 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3453 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3455 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3456 { 0, 1, 2, ... vf - 1 } vector. */
3457 gcc_assert (nargs == 0);
3459 else if (modifier == NONE
3460 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3461 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3462 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3463 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3464 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3465 slp_op, vectype_in, cost_vec);
3466 else
3468 if (dump_enabled_p ())
3469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3470 "function is not vectorizable.\n");
3471 return false;
3475 if (slp_node)
3476 ncopies = 1;
3477 else if (modifier == NARROW && ifn == IFN_LAST)
3478 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3479 else
3480 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3482 /* Sanity check: make sure that at least one copy of the vectorized stmt
3483 needs to be generated. */
3484 gcc_assert (ncopies >= 1);
3486 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3487 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3488 internal_fn cond_len_fn = get_len_internal_fn (ifn);
3489 int len_opno = internal_fn_len_index (cond_len_fn);
3490 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3491 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3492 if (!vec_stmt) /* transformation not required. */
3494 if (slp_node)
3495 for (i = 0; i < nargs; ++i)
3496 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3497 vectypes[i]
3498 ? vectypes[i] : vectype_in))
3500 if (dump_enabled_p ())
3501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3502 "incompatible vector types for invariants\n");
3503 return false;
3505 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3506 DUMP_VECT_SCOPE ("vectorizable_call");
3507 vect_model_simple_cost (vinfo, stmt_info,
3508 ncopies, dt, ndts, slp_node, cost_vec);
3509 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3510 record_stmt_cost (cost_vec, ncopies / 2,
3511 vec_promote_demote, stmt_info, 0, vect_body);
3513 if (loop_vinfo
3514 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3515 && (reduc_idx >= 0 || mask_opno >= 0))
3517 if (reduc_idx >= 0
3518 && (cond_fn == IFN_LAST
3519 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3520 OPTIMIZE_FOR_SPEED))
3521 && (cond_len_fn == IFN_LAST
3522 || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3523 OPTIMIZE_FOR_SPEED)))
3525 if (dump_enabled_p ())
3526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3527 "can't use a fully-masked loop because no"
3528 " conditional operation is available.\n");
3529 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3531 else
3533 unsigned int nvectors
3534 = (slp_node
3535 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3536 : ncopies);
3537 tree scalar_mask = NULL_TREE;
3538 if (mask_opno >= 0)
3539 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3540 if (cond_len_fn != IFN_LAST
3541 && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3542 OPTIMIZE_FOR_SPEED))
3543 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3545 else
3546 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3547 scalar_mask);
3550 return true;
3553 /* Transform. */
3555 if (dump_enabled_p ())
3556 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3558 /* Handle def. */
3559 scalar_dest = gimple_call_lhs (stmt);
3560 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3562 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3563 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3564 unsigned int vect_nargs = nargs;
3565 if (len_loop_p)
3567 if (len_opno >= 0)
3569 ifn = cond_len_fn;
3570 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3571 vect_nargs += 2;
3573 else if (reduc_idx >= 0)
3574 gcc_unreachable ();
3576 else if (masked_loop_p && reduc_idx >= 0)
3578 ifn = cond_fn;
3579 vect_nargs += 2;
3582 if (modifier == NONE || ifn != IFN_LAST)
3584 tree prev_res = NULL_TREE;
3585 vargs.safe_grow (vect_nargs, true);
3586 auto_vec<vec<tree> > vec_defs (nargs);
3587 for (j = 0; j < ncopies; ++j)
3589 /* Build argument list for the vectorized call. */
3590 if (slp_node)
3592 vec<tree> vec_oprnds0;
3594 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3595 vec_oprnds0 = vec_defs[0];
3597 /* Arguments are ready. Create the new vector stmt. */
3598 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3600 int varg = 0;
3601 if (masked_loop_p && reduc_idx >= 0)
3603 unsigned int vec_num = vec_oprnds0.length ();
3604 /* Always true for SLP. */
3605 gcc_assert (ncopies == 1);
3606 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
3607 gsi, masks, vec_num,
3608 vectype_out, i);
3610 size_t k;
3611 for (k = 0; k < nargs; k++)
3613 vec<tree> vec_oprndsk = vec_defs[k];
3614 vargs[varg++] = vec_oprndsk[i];
3616 if (masked_loop_p && reduc_idx >= 0)
3617 vargs[varg++] = vargs[reduc_idx + 1];
3618 gimple *new_stmt;
3619 if (modifier == NARROW)
3621 /* We don't define any narrowing conditional functions
3622 at present. */
3623 gcc_assert (mask_opno < 0);
3624 tree half_res = make_ssa_name (vectype_in);
3625 gcall *call
3626 = gimple_build_call_internal_vec (ifn, vargs);
3627 gimple_call_set_lhs (call, half_res);
3628 gimple_call_set_nothrow (call, true);
3629 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3630 if ((i & 1) == 0)
3632 prev_res = half_res;
3633 continue;
3635 new_temp = make_ssa_name (vec_dest);
3636 new_stmt = vect_gimple_build (new_temp, convert_code,
3637 prev_res, half_res);
3638 vect_finish_stmt_generation (vinfo, stmt_info,
3639 new_stmt, gsi);
3641 else
3643 if (len_opno >= 0 && len_loop_p)
3645 unsigned int vec_num = vec_oprnds0.length ();
3646 /* Always true for SLP. */
3647 gcc_assert (ncopies == 1);
3648 tree len
3649 = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3650 vectype_out, i, 1);
3651 signed char biasval
3652 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3653 tree bias = build_int_cst (intQI_type_node, biasval);
3654 vargs[len_opno] = len;
3655 vargs[len_opno + 1] = bias;
3657 else if (mask_opno >= 0 && masked_loop_p)
3659 unsigned int vec_num = vec_oprnds0.length ();
3660 /* Always true for SLP. */
3661 gcc_assert (ncopies == 1);
3662 tree mask = vect_get_loop_mask (loop_vinfo,
3663 gsi, masks, vec_num,
3664 vectype_out, i);
3665 vargs[mask_opno] = prepare_vec_mask
3666 (loop_vinfo, TREE_TYPE (mask), mask,
3667 vargs[mask_opno], gsi);
3670 gcall *call;
3671 if (ifn != IFN_LAST)
3672 call = gimple_build_call_internal_vec (ifn, vargs);
3673 else
3674 call = gimple_build_call_vec (fndecl, vargs);
3675 new_temp = make_ssa_name (vec_dest, call);
3676 gimple_call_set_lhs (call, new_temp);
3677 gimple_call_set_nothrow (call, true);
3678 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3679 new_stmt = call;
3681 slp_node->push_vec_def (new_stmt);
3683 continue;
3686 int varg = 0;
3687 if (masked_loop_p && reduc_idx >= 0)
3688 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3689 vectype_out, j);
3690 for (i = 0; i < nargs; i++)
3692 op = gimple_call_arg (stmt, i);
3693 if (j == 0)
3695 vec_defs.quick_push (vNULL);
3696 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3697 op, &vec_defs[i],
3698 vectypes[i]);
3700 vargs[varg++] = vec_defs[i][j];
3702 if (masked_loop_p && reduc_idx >= 0)
3703 vargs[varg++] = vargs[reduc_idx + 1];
3705 if (len_opno >= 0 && len_loop_p)
3707 tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3708 vectype_out, j, 1);
3709 signed char biasval
3710 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3711 tree bias = build_int_cst (intQI_type_node, biasval);
3712 vargs[len_opno] = len;
3713 vargs[len_opno + 1] = bias;
3715 else if (mask_opno >= 0 && masked_loop_p)
3717 tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3718 vectype_out, j);
3719 vargs[mask_opno]
3720 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3721 vargs[mask_opno], gsi);
3724 gimple *new_stmt;
3725 if (cfn == CFN_GOMP_SIMD_LANE)
3727 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3728 tree new_var
3729 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3730 gimple *init_stmt = gimple_build_assign (new_var, cst);
3731 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3732 new_temp = make_ssa_name (vec_dest);
3733 new_stmt = gimple_build_assign (new_temp, new_var);
3734 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3736 else if (modifier == NARROW)
3738 /* We don't define any narrowing conditional functions at
3739 present. */
3740 gcc_assert (mask_opno < 0);
3741 tree half_res = make_ssa_name (vectype_in);
3742 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3743 gimple_call_set_lhs (call, half_res);
3744 gimple_call_set_nothrow (call, true);
3745 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3746 if ((j & 1) == 0)
3748 prev_res = half_res;
3749 continue;
3751 new_temp = make_ssa_name (vec_dest);
3752 new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
3753 half_res);
3754 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3756 else
3758 gcall *call;
3759 if (ifn != IFN_LAST)
3760 call = gimple_build_call_internal_vec (ifn, vargs);
3761 else
3762 call = gimple_build_call_vec (fndecl, vargs);
3763 new_temp = make_ssa_name (vec_dest, call);
3764 gimple_call_set_lhs (call, new_temp);
3765 gimple_call_set_nothrow (call, true);
3766 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3767 new_stmt = call;
3770 if (j == (modifier == NARROW ? 1 : 0))
3771 *vec_stmt = new_stmt;
3772 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3774 for (i = 0; i < nargs; i++)
3776 vec<tree> vec_oprndsi = vec_defs[i];
3777 vec_oprndsi.release ();
3780 else if (modifier == NARROW)
3782 auto_vec<vec<tree> > vec_defs (nargs);
3783 /* We don't define any narrowing conditional functions at present. */
3784 gcc_assert (mask_opno < 0);
3785 for (j = 0; j < ncopies; ++j)
3787 /* Build argument list for the vectorized call. */
3788 if (j == 0)
3789 vargs.create (nargs * 2);
3790 else
3791 vargs.truncate (0);
3793 if (slp_node)
3795 vec<tree> vec_oprnds0;
3797 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3798 vec_oprnds0 = vec_defs[0];
3800 /* Arguments are ready. Create the new vector stmt. */
3801 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3803 size_t k;
3804 vargs.truncate (0);
3805 for (k = 0; k < nargs; k++)
3807 vec<tree> vec_oprndsk = vec_defs[k];
3808 vargs.quick_push (vec_oprndsk[i]);
3809 vargs.quick_push (vec_oprndsk[i + 1]);
3811 gcall *call;
3812 if (ifn != IFN_LAST)
3813 call = gimple_build_call_internal_vec (ifn, vargs);
3814 else
3815 call = gimple_build_call_vec (fndecl, vargs);
3816 new_temp = make_ssa_name (vec_dest, call);
3817 gimple_call_set_lhs (call, new_temp);
3818 gimple_call_set_nothrow (call, true);
3819 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3820 slp_node->push_vec_def (call);
3822 continue;
3825 for (i = 0; i < nargs; i++)
3827 op = gimple_call_arg (stmt, i);
3828 if (j == 0)
3830 vec_defs.quick_push (vNULL);
3831 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3832 op, &vec_defs[i], vectypes[i]);
3834 vec_oprnd0 = vec_defs[i][2*j];
3835 vec_oprnd1 = vec_defs[i][2*j+1];
3837 vargs.quick_push (vec_oprnd0);
3838 vargs.quick_push (vec_oprnd1);
3841 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3842 new_temp = make_ssa_name (vec_dest, new_stmt);
3843 gimple_call_set_lhs (new_stmt, new_temp);
3844 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3846 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3849 if (!slp_node)
3850 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3852 for (i = 0; i < nargs; i++)
3854 vec<tree> vec_oprndsi = vec_defs[i];
3855 vec_oprndsi.release ();
3858 else
3859 /* No current target implements this case. */
3860 return false;
3862 vargs.release ();
3864 /* The call in STMT might prevent it from being removed in dce.
3865 We however cannot remove it here, due to the way the ssa name
3866 it defines is mapped to the new definition. So just replace
3867 rhs of the statement with something harmless. */
3869 if (slp_node)
3870 return true;
3872 stmt_info = vect_orig_stmt (stmt_info);
3873 lhs = gimple_get_lhs (stmt_info->stmt);
3875 gassign *new_stmt
3876 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3877 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3879 return true;
3883 struct simd_call_arg_info
3885 tree vectype;
3886 tree op;
3887 HOST_WIDE_INT linear_step;
3888 enum vect_def_type dt;
3889 unsigned int align;
3890 bool simd_lane_linear;
3893 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3894 is linear within simd lane (but not within whole loop), note it in
3895 *ARGINFO. */
3897 static void
3898 vect_simd_lane_linear (tree op, class loop *loop,
3899 struct simd_call_arg_info *arginfo)
3901 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3903 if (!is_gimple_assign (def_stmt)
3904 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3905 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3906 return;
3908 tree base = gimple_assign_rhs1 (def_stmt);
3909 HOST_WIDE_INT linear_step = 0;
3910 tree v = gimple_assign_rhs2 (def_stmt);
3911 while (TREE_CODE (v) == SSA_NAME)
3913 tree t;
3914 def_stmt = SSA_NAME_DEF_STMT (v);
3915 if (is_gimple_assign (def_stmt))
3916 switch (gimple_assign_rhs_code (def_stmt))
3918 case PLUS_EXPR:
3919 t = gimple_assign_rhs2 (def_stmt);
3920 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3921 return;
3922 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3923 v = gimple_assign_rhs1 (def_stmt);
3924 continue;
3925 case MULT_EXPR:
3926 t = gimple_assign_rhs2 (def_stmt);
3927 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3928 return;
3929 linear_step = tree_to_shwi (t);
3930 v = gimple_assign_rhs1 (def_stmt);
3931 continue;
3932 CASE_CONVERT:
3933 t = gimple_assign_rhs1 (def_stmt);
3934 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3935 || (TYPE_PRECISION (TREE_TYPE (v))
3936 < TYPE_PRECISION (TREE_TYPE (t))))
3937 return;
3938 if (!linear_step)
3939 linear_step = 1;
3940 v = t;
3941 continue;
3942 default:
3943 return;
3945 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3946 && loop->simduid
3947 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3948 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3949 == loop->simduid))
3951 if (!linear_step)
3952 linear_step = 1;
3953 arginfo->linear_step = linear_step;
3954 arginfo->op = base;
3955 arginfo->simd_lane_linear = true;
3956 return;
3961 /* Function vectorizable_simd_clone_call.
3963 Check if STMT_INFO performs a function call that can be vectorized
3964 by calling a simd clone of the function.
3965 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3966 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3967 Return true if STMT_INFO is vectorizable in this way. */
3969 static bool
3970 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3971 gimple_stmt_iterator *gsi,
3972 gimple **vec_stmt, slp_tree slp_node,
3973 stmt_vector_for_cost *)
3975 tree vec_dest;
3976 tree scalar_dest;
3977 tree op, type;
3978 tree vec_oprnd0 = NULL_TREE;
3979 tree vectype;
3980 poly_uint64 nunits;
3981 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3982 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3983 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3984 tree fndecl, new_temp;
3985 int ncopies, j;
3986 auto_vec<simd_call_arg_info> arginfo;
3987 vec<tree> vargs = vNULL;
3988 size_t i, nargs;
3989 tree lhs, rtype, ratype;
3990 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3991 int masked_call_offset = 0;
3993 /* Is STMT a vectorizable call? */
3994 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3995 if (!stmt)
3996 return false;
3998 fndecl = gimple_call_fndecl (stmt);
3999 if (fndecl == NULL_TREE
4000 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
4002 fndecl = gimple_call_arg (stmt, 0);
4003 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
4004 fndecl = TREE_OPERAND (fndecl, 0);
4005 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
4006 masked_call_offset = 1;
4008 if (fndecl == NULL_TREE)
4009 return false;
4011 struct cgraph_node *node = cgraph_node::get (fndecl);
4012 if (node == NULL || node->simd_clones == NULL)
4013 return false;
4015 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4016 return false;
4018 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4019 && ! vec_stmt)
4020 return false;
4022 if (gimple_call_lhs (stmt)
4023 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4024 return false;
4026 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4028 vectype = STMT_VINFO_VECTYPE (stmt_info);
4030 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4031 return false;
4033 /* Process function arguments. */
4034 nargs = gimple_call_num_args (stmt) - masked_call_offset;
4036 /* Bail out if the function has zero arguments. */
4037 if (nargs == 0)
4038 return false;
4040 vec<tree>& simd_clone_info = (slp_node ? SLP_TREE_SIMD_CLONE_INFO (slp_node)
4041 : STMT_VINFO_SIMD_CLONE_INFO (stmt_info));
4042 arginfo.reserve (nargs, true);
4043 auto_vec<slp_tree> slp_op;
4044 slp_op.safe_grow_cleared (nargs);
4046 for (i = 0; i < nargs; i++)
4048 simd_call_arg_info thisarginfo;
4049 affine_iv iv;
4051 thisarginfo.linear_step = 0;
4052 thisarginfo.align = 0;
4053 thisarginfo.op = NULL_TREE;
4054 thisarginfo.simd_lane_linear = false;
4056 int op_no = i + masked_call_offset;
4057 if (slp_node)
4058 op_no = vect_slp_child_index_for_operand (stmt, op_no);
4059 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4060 op_no, &op, &slp_op[i],
4061 &thisarginfo.dt, &thisarginfo.vectype)
4062 || thisarginfo.dt == vect_uninitialized_def)
4064 if (dump_enabled_p ())
4065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4066 "use not simple.\n");
4067 return false;
4070 if (thisarginfo.dt == vect_constant_def
4071 || thisarginfo.dt == vect_external_def)
4073 gcc_assert (vec_stmt || thisarginfo.vectype == NULL_TREE);
4074 if (!vec_stmt)
4075 thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
4076 TREE_TYPE (op),
4077 slp_node);
4079 else
4080 gcc_assert (thisarginfo.vectype != NULL_TREE);
4082 /* For linear arguments, the analyze phase should have saved
4083 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */
4084 if (i * 3 + 4 <= simd_clone_info.length ()
4085 && simd_clone_info[i * 3 + 2])
4087 gcc_assert (vec_stmt);
4088 thisarginfo.linear_step = tree_to_shwi (simd_clone_info[i * 3 + 2]);
4089 thisarginfo.op = simd_clone_info[i * 3 + 1];
4090 thisarginfo.simd_lane_linear
4091 = (simd_clone_info[i * 3 + 3] == boolean_true_node);
4092 /* If loop has been peeled for alignment, we need to adjust it. */
4093 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4094 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4095 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4097 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4098 tree step = simd_clone_info[i * 3 + 2];
4099 tree opt = TREE_TYPE (thisarginfo.op);
4100 bias = fold_convert (TREE_TYPE (step), bias);
4101 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4102 thisarginfo.op
4103 = fold_build2 (POINTER_TYPE_P (opt)
4104 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4105 thisarginfo.op, bias);
4108 else if (!vec_stmt
4109 && thisarginfo.dt != vect_constant_def
4110 && thisarginfo.dt != vect_external_def
4111 && loop_vinfo
4112 && TREE_CODE (op) == SSA_NAME
4113 && simple_iv (loop, loop_containing_stmt (stmt), op,
4114 &iv, false)
4115 && tree_fits_shwi_p (iv.step))
4117 thisarginfo.linear_step = tree_to_shwi (iv.step);
4118 thisarginfo.op = iv.base;
4120 else if ((thisarginfo.dt == vect_constant_def
4121 || thisarginfo.dt == vect_external_def)
4122 && POINTER_TYPE_P (TREE_TYPE (op)))
4123 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4124 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4125 linear too. */
4126 if (POINTER_TYPE_P (TREE_TYPE (op))
4127 && !thisarginfo.linear_step
4128 && !vec_stmt
4129 && thisarginfo.dt != vect_constant_def
4130 && thisarginfo.dt != vect_external_def
4131 && loop_vinfo
4132 && TREE_CODE (op) == SSA_NAME)
4133 vect_simd_lane_linear (op, loop, &thisarginfo);
4135 arginfo.quick_push (thisarginfo);
4138 poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
4139 unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : 1;
4140 unsigned int badness = 0;
4141 struct cgraph_node *bestn = NULL;
4142 if (simd_clone_info.exists ())
4143 bestn = cgraph_node::get (simd_clone_info[0]);
4144 else
4145 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4146 n = n->simdclone->next_clone)
4148 unsigned int this_badness = 0;
4149 unsigned int num_calls;
4150 if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen,
4151 &num_calls)
4152 || (!n->simdclone->inbranch && (masked_call_offset > 0))
4153 || nargs != n->simdclone->nargs)
4154 continue;
4155 if (num_calls != 1)
4156 this_badness += exact_log2 (num_calls) * 4096;
4157 if (n->simdclone->inbranch)
4158 this_badness += 8192;
4159 int target_badness = targetm.simd_clone.usable (n);
4160 if (target_badness < 0)
4161 continue;
4162 this_badness += target_badness * 512;
4163 for (i = 0; i < nargs; i++)
4165 switch (n->simdclone->args[i].arg_type)
4167 case SIMD_CLONE_ARG_TYPE_VECTOR:
4168 if (!useless_type_conversion_p
4169 (n->simdclone->args[i].orig_type,
4170 TREE_TYPE (gimple_call_arg (stmt,
4171 i + masked_call_offset))))
4172 i = -1;
4173 else if (arginfo[i].dt == vect_constant_def
4174 || arginfo[i].dt == vect_external_def
4175 || arginfo[i].linear_step)
4176 this_badness += 64;
4177 break;
4178 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4179 if (arginfo[i].dt != vect_constant_def
4180 && arginfo[i].dt != vect_external_def)
4181 i = -1;
4182 break;
4183 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4184 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4185 if (arginfo[i].dt == vect_constant_def
4186 || arginfo[i].dt == vect_external_def
4187 || (arginfo[i].linear_step
4188 != n->simdclone->args[i].linear_step))
4189 i = -1;
4190 break;
4191 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4192 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4193 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4194 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4195 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4196 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4197 /* FORNOW */
4198 i = -1;
4199 break;
4200 case SIMD_CLONE_ARG_TYPE_MASK:
4201 /* While we can create a traditional data vector from
4202 an incoming integer mode mask we have no good way to
4203 force generate an integer mode mask from a traditional
4204 boolean vector input. */
4205 if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4206 && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4207 i = -1;
4208 else if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4209 && SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4210 this_badness += 2048;
4211 break;
4213 if (i == (size_t) -1)
4214 break;
4215 if (n->simdclone->args[i].alignment > arginfo[i].align)
4217 i = -1;
4218 break;
4220 if (arginfo[i].align)
4221 this_badness += (exact_log2 (arginfo[i].align)
4222 - exact_log2 (n->simdclone->args[i].alignment));
4224 if (i == (size_t) -1)
4225 continue;
4226 if (masked_call_offset == 0
4227 && n->simdclone->inbranch
4228 && n->simdclone->nargs > nargs)
4230 gcc_assert (n->simdclone->args[n->simdclone->nargs - 1].arg_type ==
4231 SIMD_CLONE_ARG_TYPE_MASK);
4232 /* Penalize using a masked SIMD clone in a non-masked loop, that is
4233 not in a branch, as we'd have to construct an all-true mask. */
4234 if (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4235 this_badness += 64;
4237 if (bestn == NULL || this_badness < badness)
4239 bestn = n;
4240 badness = this_badness;
4244 if (bestn == NULL)
4245 return false;
4247 unsigned int num_mask_args = 0;
4248 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4249 for (i = 0; i < nargs; i++)
4250 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4251 num_mask_args++;
4253 for (i = 0; i < nargs; i++)
4255 if ((arginfo[i].dt == vect_constant_def
4256 || arginfo[i].dt == vect_external_def)
4257 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4259 tree arg_type = TREE_TYPE (gimple_call_arg (stmt,
4260 i + masked_call_offset));
4261 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4262 slp_node);
4263 if (arginfo[i].vectype == NULL
4264 || !constant_multiple_p (bestn->simdclone->simdlen,
4265 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4266 return false;
4269 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4270 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4272 if (dump_enabled_p ())
4273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4274 "vector mask arguments are not supported.\n");
4275 return false;
4278 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4280 tree clone_arg_vectype = bestn->simdclone->args[i].vector_type;
4281 if (bestn->simdclone->mask_mode == VOIDmode)
4283 if (maybe_ne (TYPE_VECTOR_SUBPARTS (clone_arg_vectype),
4284 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4286 /* FORNOW we only have partial support for vector-type masks
4287 that can't hold all of simdlen. */
4288 if (dump_enabled_p ())
4289 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4290 vect_location,
4291 "in-branch vector clones are not yet"
4292 " supported for mismatched vector sizes.\n");
4293 return false;
4296 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4298 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype))
4299 || maybe_ne (exact_div (bestn->simdclone->simdlen,
4300 num_mask_args),
4301 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4303 /* FORNOW we only have partial support for integer-type masks
4304 that represent the same number of lanes as the
4305 vectorized mask inputs. */
4306 if (dump_enabled_p ())
4307 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4308 vect_location,
4309 "in-branch vector clones are not yet "
4310 "supported for mismatched vector sizes.\n");
4311 return false;
4314 else
4316 if (dump_enabled_p ())
4317 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4318 vect_location,
4319 "in-branch vector clones not supported"
4320 " on this target.\n");
4321 return false;
4326 fndecl = bestn->decl;
4327 nunits = bestn->simdclone->simdlen;
4328 if (slp_node)
4329 ncopies = vector_unroll_factor (vf * group_size, nunits);
4330 else
4331 ncopies = vector_unroll_factor (vf, nunits);
4333 /* If the function isn't const, only allow it in simd loops where user
4334 has asserted that at least nunits consecutive iterations can be
4335 performed using SIMD instructions. */
4336 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4337 && gimple_vuse (stmt))
4338 return false;
4340 /* Sanity check: make sure that at least one copy of the vectorized stmt
4341 needs to be generated. */
4342 gcc_assert (ncopies >= 1);
4344 if (!vec_stmt) /* transformation not required. */
4346 if (slp_node)
4347 for (unsigned i = 0; i < nargs; ++i)
4348 if (!vect_maybe_update_slp_op_vectype (slp_op[i], arginfo[i].vectype))
4350 if (dump_enabled_p ())
4351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4352 "incompatible vector types for invariants\n");
4353 return false;
4355 /* When the original call is pure or const but the SIMD ABI dictates
4356 an aggregate return we will have to use a virtual definition and
4357 in a loop eventually even need to add a virtual PHI. That's
4358 not straight-forward so allow to fix this up via renaming. */
4359 if (gimple_call_lhs (stmt)
4360 && !gimple_vdef (stmt)
4361 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4362 vinfo->any_known_not_updated_vssa = true;
4363 /* ??? For SLP code-gen we end up inserting after the last
4364 vector argument def rather than at the original call position
4365 so automagic virtual operand updating doesn't work. */
4366 if (gimple_vuse (stmt) && slp_node)
4367 vinfo->any_known_not_updated_vssa = true;
4368 simd_clone_info.safe_push (bestn->decl);
4369 for (i = 0; i < bestn->simdclone->nargs; i++)
4371 switch (bestn->simdclone->args[i].arg_type)
4373 default:
4374 continue;
4375 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4376 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4378 auto &clone_info = STMT_VINFO_SIMD_CLONE_INFO (stmt_info);
4379 clone_info.safe_grow_cleared (i * 3 + 1, true);
4380 clone_info.safe_push (arginfo[i].op);
4381 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4382 ? size_type_node : TREE_TYPE (arginfo[i].op);
4383 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4384 clone_info.safe_push (ls);
4385 tree sll = arginfo[i].simd_lane_linear
4386 ? boolean_true_node : boolean_false_node;
4387 clone_info.safe_push (sll);
4389 break;
4390 case SIMD_CLONE_ARG_TYPE_MASK:
4391 if (loop_vinfo
4392 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4393 vect_record_loop_mask (loop_vinfo,
4394 &LOOP_VINFO_MASKS (loop_vinfo),
4395 ncopies, vectype, op);
4397 break;
4401 if (!bestn->simdclone->inbranch && loop_vinfo)
4403 if (dump_enabled_p ()
4404 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4405 dump_printf_loc (MSG_NOTE, vect_location,
4406 "can't use a fully-masked loop because a"
4407 " non-masked simd clone was selected.\n");
4408 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
4411 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4412 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4413 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4414 dt, slp_node, cost_vec); */
4415 return true;
4418 /* Transform. */
4420 if (dump_enabled_p ())
4421 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4423 /* Handle def. */
4424 scalar_dest = gimple_call_lhs (stmt);
4425 vec_dest = NULL_TREE;
4426 rtype = NULL_TREE;
4427 ratype = NULL_TREE;
4428 if (scalar_dest)
4430 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4431 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4432 if (TREE_CODE (rtype) == ARRAY_TYPE)
4434 ratype = rtype;
4435 rtype = TREE_TYPE (ratype);
4439 auto_vec<vec<tree> > vec_oprnds;
4440 auto_vec<unsigned> vec_oprnds_i;
4441 vec_oprnds_i.safe_grow_cleared (nargs, true);
4442 if (slp_node)
4444 vec_oprnds.reserve_exact (nargs);
4445 vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
4447 else
4448 vec_oprnds.safe_grow_cleared (nargs, true);
4449 for (j = 0; j < ncopies; ++j)
4451 poly_uint64 callee_nelements;
4452 poly_uint64 caller_nelements;
4453 /* Build argument list for the vectorized call. */
4454 if (j == 0)
4455 vargs.create (nargs);
4456 else
4457 vargs.truncate (0);
4459 for (i = 0; i < nargs; i++)
4461 unsigned int k, l, m, o;
4462 tree atype;
4463 op = gimple_call_arg (stmt, i + masked_call_offset);
4464 switch (bestn->simdclone->args[i].arg_type)
4466 case SIMD_CLONE_ARG_TYPE_VECTOR:
4467 atype = bestn->simdclone->args[i].vector_type;
4468 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4469 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4470 o = vector_unroll_factor (nunits, callee_nelements);
4471 for (m = j * o; m < (j + 1) * o; m++)
4473 if (known_lt (callee_nelements, caller_nelements))
4475 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4476 if (!constant_multiple_p (caller_nelements,
4477 callee_nelements, &k))
4478 gcc_unreachable ();
4480 gcc_assert ((k & (k - 1)) == 0);
4481 if (m == 0)
4483 if (!slp_node)
4484 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4485 ncopies * o / k, op,
4486 &vec_oprnds[i]);
4487 vec_oprnds_i[i] = 0;
4488 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4490 else
4492 vec_oprnd0 = arginfo[i].op;
4493 if ((m & (k - 1)) == 0)
4494 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4496 arginfo[i].op = vec_oprnd0;
4497 vec_oprnd0
4498 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4499 bitsize_int (prec),
4500 bitsize_int ((m & (k - 1)) * prec));
4501 gassign *new_stmt
4502 = gimple_build_assign (make_ssa_name (atype),
4503 vec_oprnd0);
4504 vect_finish_stmt_generation (vinfo, stmt_info,
4505 new_stmt, gsi);
4506 vargs.safe_push (gimple_assign_lhs (new_stmt));
4508 else
4510 if (!constant_multiple_p (callee_nelements,
4511 caller_nelements, &k))
4512 gcc_unreachable ();
4513 gcc_assert ((k & (k - 1)) == 0);
4514 vec<constructor_elt, va_gc> *ctor_elts;
4515 if (k != 1)
4516 vec_alloc (ctor_elts, k);
4517 else
4518 ctor_elts = NULL;
4519 for (l = 0; l < k; l++)
4521 if (m == 0 && l == 0)
4523 if (!slp_node)
4524 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4525 k * o * ncopies,
4527 &vec_oprnds[i]);
4528 vec_oprnds_i[i] = 0;
4529 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4531 else
4532 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4533 arginfo[i].op = vec_oprnd0;
4534 if (k == 1)
4535 break;
4536 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4537 vec_oprnd0);
4539 if (k == 1)
4540 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4541 atype))
4543 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, atype,
4544 vec_oprnd0);
4545 gassign *new_stmt
4546 = gimple_build_assign (make_ssa_name (atype),
4547 vec_oprnd0);
4548 vect_finish_stmt_generation (vinfo, stmt_info,
4549 new_stmt, gsi);
4550 vargs.safe_push (gimple_get_lhs (new_stmt));
4552 else
4553 vargs.safe_push (vec_oprnd0);
4554 else
4556 vec_oprnd0 = build_constructor (atype, ctor_elts);
4557 gassign *new_stmt
4558 = gimple_build_assign (make_ssa_name (atype),
4559 vec_oprnd0);
4560 vect_finish_stmt_generation (vinfo, stmt_info,
4561 new_stmt, gsi);
4562 vargs.safe_push (gimple_assign_lhs (new_stmt));
4566 break;
4567 case SIMD_CLONE_ARG_TYPE_MASK:
4568 if (bestn->simdclone->mask_mode == VOIDmode)
4570 atype = bestn->simdclone->args[i].vector_type;
4571 tree elt_type = TREE_TYPE (atype);
4572 tree one = fold_convert (elt_type, integer_one_node);
4573 tree zero = fold_convert (elt_type, integer_zero_node);
4574 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4575 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4576 o = vector_unroll_factor (nunits, callee_nelements);
4577 for (m = j * o; m < (j + 1) * o; m++)
4579 if (maybe_lt (callee_nelements, caller_nelements))
4581 /* The mask type has fewer elements than simdlen. */
4583 /* FORNOW */
4584 gcc_unreachable ();
4586 else if (known_eq (callee_nelements, caller_nelements))
4588 /* The SIMD clone function has the same number of
4589 elements as the current function. */
4590 if (m == 0)
4592 if (!slp_node)
4593 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4594 o * ncopies,
4596 &vec_oprnds[i]);
4597 vec_oprnds_i[i] = 0;
4599 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4600 if (loop_vinfo
4601 && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4603 vec_loop_masks *loop_masks
4604 = &LOOP_VINFO_MASKS (loop_vinfo);
4605 tree loop_mask
4606 = vect_get_loop_mask (loop_vinfo, gsi,
4607 loop_masks, ncopies,
4608 vectype, j);
4609 vec_oprnd0
4610 = prepare_vec_mask (loop_vinfo,
4611 TREE_TYPE (loop_mask),
4612 loop_mask, vec_oprnd0,
4613 gsi);
4614 loop_vinfo->vec_cond_masked_set.add ({ vec_oprnd0,
4615 loop_mask });
4618 vec_oprnd0
4619 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4620 build_vector_from_val (atype, one),
4621 build_vector_from_val (atype, zero));
4622 gassign *new_stmt
4623 = gimple_build_assign (make_ssa_name (atype),
4624 vec_oprnd0);
4625 vect_finish_stmt_generation (vinfo, stmt_info,
4626 new_stmt, gsi);
4627 vargs.safe_push (gimple_assign_lhs (new_stmt));
4629 else
4631 /* The mask type has more elements than simdlen. */
4633 /* FORNOW */
4634 gcc_unreachable ();
4638 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4640 atype = bestn->simdclone->args[i].vector_type;
4641 /* Guess the number of lanes represented by atype. */
4642 poly_uint64 atype_subparts
4643 = exact_div (bestn->simdclone->simdlen,
4644 num_mask_args);
4645 o = vector_unroll_factor (nunits, atype_subparts);
4646 for (m = j * o; m < (j + 1) * o; m++)
4648 if (m == 0)
4650 if (!slp_node)
4651 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4652 o * ncopies,
4654 &vec_oprnds[i]);
4655 vec_oprnds_i[i] = 0;
4657 if (maybe_lt (atype_subparts,
4658 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4660 /* The mask argument has fewer elements than the
4661 input vector. */
4662 /* FORNOW */
4663 gcc_unreachable ();
4665 else if (known_eq (atype_subparts,
4666 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4668 /* The vector mask argument matches the input
4669 in the number of lanes, but not necessarily
4670 in the mode. */
4671 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4672 tree st = lang_hooks.types.type_for_mode
4673 (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
4674 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
4675 vec_oprnd0);
4676 gassign *new_stmt
4677 = gimple_build_assign (make_ssa_name (st),
4678 vec_oprnd0);
4679 vect_finish_stmt_generation (vinfo, stmt_info,
4680 new_stmt, gsi);
4681 if (!types_compatible_p (atype, st))
4683 new_stmt
4684 = gimple_build_assign (make_ssa_name (atype),
4685 NOP_EXPR,
4686 gimple_assign_lhs
4687 (new_stmt));
4688 vect_finish_stmt_generation (vinfo, stmt_info,
4689 new_stmt, gsi);
4691 vargs.safe_push (gimple_assign_lhs (new_stmt));
4693 else
4695 /* The mask argument has more elements than the
4696 input vector. */
4697 /* FORNOW */
4698 gcc_unreachable ();
4702 else
4703 gcc_unreachable ();
4704 break;
4705 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4706 vargs.safe_push (op);
4707 break;
4708 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4709 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4710 if (j == 0)
4712 gimple_seq stmts;
4713 arginfo[i].op
4714 = force_gimple_operand (unshare_expr (arginfo[i].op),
4715 &stmts, true, NULL_TREE);
4716 if (stmts != NULL)
4718 basic_block new_bb;
4719 edge pe = loop_preheader_edge (loop);
4720 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4721 gcc_assert (!new_bb);
4723 if (arginfo[i].simd_lane_linear)
4725 vargs.safe_push (arginfo[i].op);
4726 break;
4728 tree phi_res = copy_ssa_name (op);
4729 gphi *new_phi = create_phi_node (phi_res, loop->header);
4730 add_phi_arg (new_phi, arginfo[i].op,
4731 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4732 enum tree_code code
4733 = POINTER_TYPE_P (TREE_TYPE (op))
4734 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4735 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4736 ? sizetype : TREE_TYPE (op);
4737 poly_widest_int cst
4738 = wi::mul (bestn->simdclone->args[i].linear_step,
4739 ncopies * nunits);
4740 tree tcst = wide_int_to_tree (type, cst);
4741 tree phi_arg = copy_ssa_name (op);
4742 gassign *new_stmt
4743 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4744 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4745 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4746 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4747 UNKNOWN_LOCATION);
4748 arginfo[i].op = phi_res;
4749 vargs.safe_push (phi_res);
4751 else
4753 enum tree_code code
4754 = POINTER_TYPE_P (TREE_TYPE (op))
4755 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4756 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4757 ? sizetype : TREE_TYPE (op);
4758 poly_widest_int cst
4759 = wi::mul (bestn->simdclone->args[i].linear_step,
4760 j * nunits);
4761 tree tcst = wide_int_to_tree (type, cst);
4762 new_temp = make_ssa_name (TREE_TYPE (op));
4763 gassign *new_stmt
4764 = gimple_build_assign (new_temp, code,
4765 arginfo[i].op, tcst);
4766 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4767 vargs.safe_push (new_temp);
4769 break;
4770 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4771 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4772 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4773 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4774 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4775 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4776 default:
4777 gcc_unreachable ();
4781 if (masked_call_offset == 0
4782 && bestn->simdclone->inbranch
4783 && bestn->simdclone->nargs > nargs)
4785 unsigned long m, o;
4786 size_t mask_i = bestn->simdclone->nargs - 1;
4787 tree mask;
4788 gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
4789 SIMD_CLONE_ARG_TYPE_MASK);
4791 tree masktype = bestn->simdclone->args[mask_i].vector_type;
4792 callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
4793 o = vector_unroll_factor (nunits, callee_nelements);
4794 for (m = j * o; m < (j + 1) * o; m++)
4796 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4798 vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
4799 mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
4800 ncopies, vectype, j);
4802 else
4803 mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
4805 if (!useless_type_conversion_p (TREE_TYPE (mask), masktype))
4807 gassign *new_stmt;
4808 if (bestn->simdclone->mask_mode != VOIDmode)
4810 /* This means we are dealing with integer mask modes.
4811 First convert to an integer type with the same size as
4812 the current vector type. */
4813 unsigned HOST_WIDE_INT intermediate_size
4814 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
4815 tree mid_int_type =
4816 build_nonstandard_integer_type (intermediate_size, 1);
4817 mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
4818 new_stmt
4819 = gimple_build_assign (make_ssa_name (mid_int_type),
4820 mask);
4821 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4822 /* Then zero-extend to the mask mode. */
4823 mask = fold_build1 (NOP_EXPR, masktype,
4824 gimple_get_lhs (new_stmt));
4826 else
4827 mask = build1 (VIEW_CONVERT_EXPR, masktype, mask);
4829 new_stmt = gimple_build_assign (make_ssa_name (masktype),
4830 mask);
4831 vect_finish_stmt_generation (vinfo, stmt_info,
4832 new_stmt, gsi);
4833 mask = gimple_assign_lhs (new_stmt);
4835 vargs.safe_push (mask);
4839 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4840 if (vec_dest)
4842 gcc_assert (ratype
4843 || known_eq (TYPE_VECTOR_SUBPARTS (rtype), nunits));
4844 if (ratype)
4845 new_temp = create_tmp_var (ratype);
4846 else if (useless_type_conversion_p (vectype, rtype))
4847 new_temp = make_ssa_name (vec_dest, new_call);
4848 else
4849 new_temp = make_ssa_name (rtype, new_call);
4850 gimple_call_set_lhs (new_call, new_temp);
4852 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4853 gimple *new_stmt = new_call;
4855 if (vec_dest)
4857 if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype), nunits))
4859 unsigned int k, l;
4860 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4861 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4862 k = vector_unroll_factor (nunits,
4863 TYPE_VECTOR_SUBPARTS (vectype));
4864 gcc_assert ((k & (k - 1)) == 0);
4865 for (l = 0; l < k; l++)
4867 tree t;
4868 if (ratype)
4870 t = build_fold_addr_expr (new_temp);
4871 t = build2 (MEM_REF, vectype, t,
4872 build_int_cst (TREE_TYPE (t), l * bytes));
4874 else
4875 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4876 bitsize_int (prec), bitsize_int (l * prec));
4877 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4878 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4880 if (j == 0 && l == 0)
4881 *vec_stmt = new_stmt;
4882 if (slp_node)
4883 SLP_TREE_VEC_DEFS (slp_node)
4884 .quick_push (gimple_assign_lhs (new_stmt));
4885 else
4886 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4889 if (ratype)
4890 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4891 continue;
4893 else if (!multiple_p (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
4895 unsigned int k;
4896 if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
4897 TYPE_VECTOR_SUBPARTS (rtype), &k))
4898 gcc_unreachable ();
4899 gcc_assert ((k & (k - 1)) == 0);
4900 if ((j & (k - 1)) == 0)
4901 vec_alloc (ret_ctor_elts, k);
4902 if (ratype)
4904 unsigned int m, o;
4905 o = vector_unroll_factor (nunits,
4906 TYPE_VECTOR_SUBPARTS (rtype));
4907 for (m = 0; m < o; m++)
4909 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4910 size_int (m), NULL_TREE, NULL_TREE);
4911 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4912 tem);
4913 vect_finish_stmt_generation (vinfo, stmt_info,
4914 new_stmt, gsi);
4915 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4916 gimple_assign_lhs (new_stmt));
4918 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4920 else
4921 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4922 if ((j & (k - 1)) != k - 1)
4923 continue;
4924 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4925 new_stmt
4926 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4927 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4929 if ((unsigned) j == k - 1)
4930 *vec_stmt = new_stmt;
4931 if (slp_node)
4932 SLP_TREE_VEC_DEFS (slp_node)
4933 .quick_push (gimple_assign_lhs (new_stmt));
4934 else
4935 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4936 continue;
4938 else if (ratype)
4940 tree t = build_fold_addr_expr (new_temp);
4941 t = build2 (MEM_REF, vectype, t,
4942 build_int_cst (TREE_TYPE (t), 0));
4943 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4944 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4945 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4947 else if (!useless_type_conversion_p (vectype, rtype))
4949 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4950 new_stmt
4951 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4952 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4956 if (j == 0)
4957 *vec_stmt = new_stmt;
4958 if (slp_node)
4959 SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
4960 else
4961 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4964 for (i = 0; i < nargs; ++i)
4966 vec<tree> oprndsi = vec_oprnds[i];
4967 oprndsi.release ();
4969 vargs.release ();
4971 /* Mark the clone as no longer being a candidate for GC. */
4972 bestn->gc_candidate = false;
4974 /* The call in STMT might prevent it from being removed in dce.
4975 We however cannot remove it here, due to the way the ssa name
4976 it defines is mapped to the new definition. So just replace
4977 rhs of the statement with something harmless. */
4979 if (slp_node)
4980 return true;
4982 gimple *new_stmt;
4983 if (scalar_dest)
4985 type = TREE_TYPE (scalar_dest);
4986 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4987 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4989 else
4990 new_stmt = gimple_build_nop ();
4991 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4992 unlink_stmt_vdef (stmt);
4994 return true;
4998 /* Function vect_gen_widened_results_half
5000 Create a vector stmt whose code, type, number of arguments, and result
5001 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
5002 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
5003 In the case that CODE is a CALL_EXPR, this means that a call to DECL
5004 needs to be created (DECL is a function-decl of a target-builtin).
5005 STMT_INFO is the original scalar stmt that we are vectorizing. */
5007 static gimple *
5008 vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
5009 tree vec_oprnd0, tree vec_oprnd1, int op_type,
5010 tree vec_dest, gimple_stmt_iterator *gsi,
5011 stmt_vec_info stmt_info)
5013 gimple *new_stmt;
5014 tree new_temp;
5016 /* Generate half of the widened result: */
5017 if (op_type != binary_op)
5018 vec_oprnd1 = NULL;
5019 new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
5020 new_temp = make_ssa_name (vec_dest, new_stmt);
5021 gimple_set_lhs (new_stmt, new_temp);
5022 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5024 return new_stmt;
5028 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
5029 For multi-step conversions store the resulting vectors and call the function
5030 recursively. When NARROW_SRC_P is true, there's still a conversion after
5031 narrowing, don't store the vectors in the SLP_NODE or in vector info of
5032 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
5034 static void
5035 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
5036 int multi_step_cvt,
5037 stmt_vec_info stmt_info,
5038 vec<tree> &vec_dsts,
5039 gimple_stmt_iterator *gsi,
5040 slp_tree slp_node, code_helper code,
5041 bool narrow_src_p)
5043 unsigned int i;
5044 tree vop0, vop1, new_tmp, vec_dest;
5046 vec_dest = vec_dsts.pop ();
5048 for (i = 0; i < vec_oprnds->length (); i += 2)
5050 /* Create demotion operation. */
5051 vop0 = (*vec_oprnds)[i];
5052 vop1 = (*vec_oprnds)[i + 1];
5053 gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
5054 new_tmp = make_ssa_name (vec_dest, new_stmt);
5055 gimple_set_lhs (new_stmt, new_tmp);
5056 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5057 if (multi_step_cvt || narrow_src_p)
5058 /* Store the resulting vector for next recursive call,
5059 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
5060 (*vec_oprnds)[i/2] = new_tmp;
5061 else
5063 /* This is the last step of the conversion sequence. Store the
5064 vectors in SLP_NODE or in vector info of the scalar statement
5065 (or in STMT_VINFO_RELATED_STMT chain). */
5066 if (slp_node)
5067 slp_node->push_vec_def (new_stmt);
5068 else
5069 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5073 /* For multi-step demotion operations we first generate demotion operations
5074 from the source type to the intermediate types, and then combine the
5075 results (stored in VEC_OPRNDS) in demotion operation to the destination
5076 type. */
5077 if (multi_step_cvt)
5079 /* At each level of recursion we have half of the operands we had at the
5080 previous level. */
5081 vec_oprnds->truncate ((i+1)/2);
5082 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
5083 multi_step_cvt - 1,
5084 stmt_info, vec_dsts, gsi,
5085 slp_node, VEC_PACK_TRUNC_EXPR,
5086 narrow_src_p);
5089 vec_dsts.quick_push (vec_dest);
5093 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5094 and VEC_OPRNDS1, for a binary operation associated with scalar statement
5095 STMT_INFO. For multi-step conversions store the resulting vectors and
5096 call the function recursively. */
5098 static void
5099 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
5100 vec<tree> *vec_oprnds0,
5101 vec<tree> *vec_oprnds1,
5102 stmt_vec_info stmt_info, tree vec_dest,
5103 gimple_stmt_iterator *gsi,
5104 code_helper ch1,
5105 code_helper ch2, int op_type)
5107 int i;
5108 tree vop0, vop1, new_tmp1, new_tmp2;
5109 gimple *new_stmt1, *new_stmt2;
5110 vec<tree> vec_tmp = vNULL;
5112 vec_tmp.create (vec_oprnds0->length () * 2);
5113 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5115 if (op_type == binary_op)
5116 vop1 = (*vec_oprnds1)[i];
5117 else
5118 vop1 = NULL_TREE;
5120 /* Generate the two halves of promotion operation. */
5121 new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
5122 op_type, vec_dest, gsi,
5123 stmt_info);
5124 new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
5125 op_type, vec_dest, gsi,
5126 stmt_info);
5127 if (is_gimple_call (new_stmt1))
5129 new_tmp1 = gimple_call_lhs (new_stmt1);
5130 new_tmp2 = gimple_call_lhs (new_stmt2);
5132 else
5134 new_tmp1 = gimple_assign_lhs (new_stmt1);
5135 new_tmp2 = gimple_assign_lhs (new_stmt2);
5138 /* Store the results for the next step. */
5139 vec_tmp.quick_push (new_tmp1);
5140 vec_tmp.quick_push (new_tmp2);
5143 vec_oprnds0->release ();
5144 *vec_oprnds0 = vec_tmp;
5147 /* Create vectorized promotion stmts for widening stmts using only half the
5148 potential vector size for input. */
5149 static void
5150 vect_create_half_widening_stmts (vec_info *vinfo,
5151 vec<tree> *vec_oprnds0,
5152 vec<tree> *vec_oprnds1,
5153 stmt_vec_info stmt_info, tree vec_dest,
5154 gimple_stmt_iterator *gsi,
5155 code_helper code1,
5156 int op_type)
5158 int i;
5159 tree vop0, vop1;
5160 gimple *new_stmt1;
5161 gimple *new_stmt2;
5162 gimple *new_stmt3;
5163 vec<tree> vec_tmp = vNULL;
5165 vec_tmp.create (vec_oprnds0->length ());
5166 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5168 tree new_tmp1, new_tmp2, new_tmp3, out_type;
5170 gcc_assert (op_type == binary_op);
5171 vop1 = (*vec_oprnds1)[i];
5173 /* Widen the first vector input. */
5174 out_type = TREE_TYPE (vec_dest);
5175 new_tmp1 = make_ssa_name (out_type);
5176 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5177 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
5178 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5180 /* Widen the second vector input. */
5181 new_tmp2 = make_ssa_name (out_type);
5182 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5183 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
5184 /* Perform the operation. With both vector inputs widened. */
5185 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5187 else
5189 /* Perform the operation. With the single vector input widened. */
5190 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5193 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
5194 gimple_assign_set_lhs (new_stmt3, new_tmp3);
5195 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
5197 /* Store the results for the next step. */
5198 vec_tmp.quick_push (new_tmp3);
5201 vec_oprnds0->release ();
5202 *vec_oprnds0 = vec_tmp;
5206 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5207 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5208 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5209 Return true if STMT_INFO is vectorizable in this way. */
5211 static bool
5212 vectorizable_conversion (vec_info *vinfo,
5213 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5214 gimple **vec_stmt, slp_tree slp_node,
5215 stmt_vector_for_cost *cost_vec)
5217 tree vec_dest, cvt_op = NULL_TREE;
5218 tree scalar_dest;
5219 tree op0, op1 = NULL_TREE;
5220 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5221 tree_code tc1, tc2;
5222 code_helper code, code1, code2;
5223 code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5224 tree new_temp;
5225 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5226 int ndts = 2;
5227 poly_uint64 nunits_in;
5228 poly_uint64 nunits_out;
5229 tree vectype_out, vectype_in;
5230 int ncopies, i;
5231 tree lhs_type, rhs_type;
5232 /* For conversions between floating point and integer, there're 2 NARROW
5233 cases. NARROW_SRC is for FLOAT_EXPR, means
5234 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5235 This is safe when the range of the source integer can fit into the lower
5236 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5237 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5238 For other conversions, when there's narrowing, NARROW_DST is used as
5239 default. */
5240 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5241 vec<tree> vec_oprnds0 = vNULL;
5242 vec<tree> vec_oprnds1 = vNULL;
5243 tree vop0;
5244 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5245 int multi_step_cvt = 0;
5246 vec<tree> interm_types = vNULL;
5247 tree intermediate_type, cvt_type = NULL_TREE;
5248 int op_type;
5249 unsigned short fltsz;
5251 /* Is STMT a vectorizable conversion? */
5253 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5254 return false;
5256 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5257 && ! vec_stmt)
5258 return false;
5260 gimple* stmt = stmt_info->stmt;
5261 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
5262 return false;
5264 if (gimple_get_lhs (stmt) == NULL_TREE
5265 || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5266 return false;
5268 if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5269 return false;
5271 if (is_gimple_assign (stmt))
5273 code = gimple_assign_rhs_code (stmt);
5274 op_type = TREE_CODE_LENGTH ((tree_code) code);
5276 else if (gimple_call_internal_p (stmt))
5278 code = gimple_call_internal_fn (stmt);
5279 op_type = gimple_call_num_args (stmt);
5281 else
5282 return false;
5284 bool widen_arith = (code == WIDEN_MULT_EXPR
5285 || code == WIDEN_LSHIFT_EXPR
5286 || widening_fn_p (code));
5288 if (!widen_arith
5289 && !CONVERT_EXPR_CODE_P (code)
5290 && code != FIX_TRUNC_EXPR
5291 && code != FLOAT_EXPR)
5292 return false;
5294 /* Check types of lhs and rhs. */
5295 scalar_dest = gimple_get_lhs (stmt);
5296 lhs_type = TREE_TYPE (scalar_dest);
5297 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5299 /* Check the operands of the operation. */
5300 slp_tree slp_op0, slp_op1 = NULL;
5301 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5302 0, &op0, &slp_op0, &dt[0], &vectype_in))
5304 if (dump_enabled_p ())
5305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5306 "use not simple.\n");
5307 return false;
5310 rhs_type = TREE_TYPE (op0);
5311 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5312 && !((INTEGRAL_TYPE_P (lhs_type)
5313 && INTEGRAL_TYPE_P (rhs_type))
5314 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5315 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5316 return false;
5318 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5319 && ((INTEGRAL_TYPE_P (lhs_type)
5320 && !type_has_mode_precision_p (lhs_type))
5321 || (INTEGRAL_TYPE_P (rhs_type)
5322 && !type_has_mode_precision_p (rhs_type))))
5324 if (dump_enabled_p ())
5325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5326 "type conversion to/from bit-precision unsupported."
5327 "\n");
5328 return false;
5331 if (op_type == binary_op)
5333 gcc_assert (code == WIDEN_MULT_EXPR
5334 || code == WIDEN_LSHIFT_EXPR
5335 || widening_fn_p (code));
5337 op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
5338 gimple_call_arg (stmt, 0);
5339 tree vectype1_in;
5340 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5341 &op1, &slp_op1, &dt[1], &vectype1_in))
5343 if (dump_enabled_p ())
5344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5345 "use not simple.\n");
5346 return false;
5348 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5349 OP1. */
5350 if (!vectype_in)
5351 vectype_in = vectype1_in;
5354 /* If op0 is an external or constant def, infer the vector type
5355 from the scalar type. */
5356 if (!vectype_in)
5357 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5358 if (vec_stmt)
5359 gcc_assert (vectype_in);
5360 if (!vectype_in)
5362 if (dump_enabled_p ())
5363 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5364 "no vectype for scalar type %T\n", rhs_type);
5366 return false;
5369 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5370 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5372 if (dump_enabled_p ())
5373 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5374 "can't convert between boolean and non "
5375 "boolean vectors %T\n", rhs_type);
5377 return false;
5380 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5381 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5382 if (known_eq (nunits_out, nunits_in))
5383 if (widen_arith)
5384 modifier = WIDEN;
5385 else
5386 modifier = NONE;
5387 else if (multiple_p (nunits_out, nunits_in))
5388 modifier = NARROW_DST;
5389 else
5391 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5392 modifier = WIDEN;
5395 /* Multiple types in SLP are handled by creating the appropriate number of
5396 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5397 case of SLP. */
5398 if (slp_node)
5399 ncopies = 1;
5400 else if (modifier == NARROW_DST)
5401 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5402 else
5403 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5405 /* Sanity check: make sure that at least one copy of the vectorized stmt
5406 needs to be generated. */
5407 gcc_assert (ncopies >= 1);
5409 bool found_mode = false;
5410 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5411 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5412 opt_scalar_mode rhs_mode_iter;
5414 /* Supportable by target? */
5415 switch (modifier)
5417 case NONE:
5418 if (code != FIX_TRUNC_EXPR
5419 && code != FLOAT_EXPR
5420 && !CONVERT_EXPR_CODE_P (code))
5421 return false;
5422 gcc_assert (code.is_tree_code ());
5423 if (supportable_convert_operation ((tree_code) code, vectype_out,
5424 vectype_in, &tc1))
5426 code1 = tc1;
5427 break;
5430 /* For conversions between float and integer types try whether
5431 we can use intermediate signed integer types to support the
5432 conversion. */
5433 if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
5434 && (code == FLOAT_EXPR ||
5435 (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
5437 bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
5438 bool float_expr_p = code == FLOAT_EXPR;
5439 unsigned short target_size;
5440 scalar_mode intermediate_mode;
5441 if (demotion)
5443 intermediate_mode = lhs_mode;
5444 target_size = GET_MODE_SIZE (rhs_mode);
5446 else
5448 target_size = GET_MODE_SIZE (lhs_mode);
5449 if (!int_mode_for_size
5450 (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
5451 goto unsupported;
5453 code1 = float_expr_p ? code : NOP_EXPR;
5454 codecvt1 = float_expr_p ? NOP_EXPR : code;
5455 opt_scalar_mode mode_iter;
5456 FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
5458 intermediate_mode = mode_iter.require ();
5460 if (GET_MODE_SIZE (intermediate_mode) > target_size)
5461 break;
5463 scalar_mode cvt_mode;
5464 if (!int_mode_for_size
5465 (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
5466 break;
5468 cvt_type = build_nonstandard_integer_type
5469 (GET_MODE_BITSIZE (cvt_mode), 0);
5471 /* Check if the intermediate type can hold OP0's range.
5472 When converting from float to integer this is not necessary
5473 because values that do not fit the (smaller) target type are
5474 unspecified anyway. */
5475 if (demotion && float_expr_p)
5477 wide_int op_min_value, op_max_value;
5478 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5479 break;
5481 if (cvt_type == NULL_TREE
5482 || (wi::min_precision (op_max_value, SIGNED)
5483 > TYPE_PRECISION (cvt_type))
5484 || (wi::min_precision (op_min_value, SIGNED)
5485 > TYPE_PRECISION (cvt_type)))
5486 continue;
5489 cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
5490 /* This should only happened for SLP as long as loop vectorizer
5491 only supports same-sized vector. */
5492 if (cvt_type == NULL_TREE
5493 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
5494 || !supportable_convert_operation ((tree_code) code1,
5495 vectype_out,
5496 cvt_type, &tc1)
5497 || !supportable_convert_operation ((tree_code) codecvt1,
5498 cvt_type,
5499 vectype_in, &tc2))
5500 continue;
5502 found_mode = true;
5503 break;
5506 if (found_mode)
5508 multi_step_cvt++;
5509 interm_types.safe_push (cvt_type);
5510 cvt_type = NULL_TREE;
5511 code1 = tc1;
5512 codecvt1 = tc2;
5513 break;
5516 /* FALLTHRU */
5517 unsupported:
5518 if (dump_enabled_p ())
5519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5520 "conversion not supported by target.\n");
5521 return false;
5523 case WIDEN:
5524 if (known_eq (nunits_in, nunits_out))
5526 if (!(code.is_tree_code ()
5527 && supportable_half_widening_operation ((tree_code) code,
5528 vectype_out, vectype_in,
5529 &tc1)))
5530 goto unsupported;
5531 code1 = tc1;
5532 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5533 break;
5535 if (supportable_widening_operation (vinfo, code, stmt_info,
5536 vectype_out, vectype_in, &code1,
5537 &code2, &multi_step_cvt,
5538 &interm_types))
5540 /* Binary widening operation can only be supported directly by the
5541 architecture. */
5542 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5543 break;
5546 if (code != FLOAT_EXPR
5547 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5548 goto unsupported;
5550 fltsz = GET_MODE_SIZE (lhs_mode);
5551 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5553 rhs_mode = rhs_mode_iter.require ();
5554 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5555 break;
5557 cvt_type
5558 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5559 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5560 if (cvt_type == NULL_TREE)
5561 goto unsupported;
5563 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5565 tc1 = ERROR_MARK;
5566 gcc_assert (code.is_tree_code ());
5567 if (!supportable_convert_operation ((tree_code) code, vectype_out,
5568 cvt_type, &tc1))
5569 goto unsupported;
5570 codecvt1 = tc1;
5572 else if (!supportable_widening_operation (vinfo, code,
5573 stmt_info, vectype_out,
5574 cvt_type, &codecvt1,
5575 &codecvt2, &multi_step_cvt,
5576 &interm_types))
5577 continue;
5578 else
5579 gcc_assert (multi_step_cvt == 0);
5581 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5582 cvt_type,
5583 vectype_in, &code1,
5584 &code2, &multi_step_cvt,
5585 &interm_types))
5587 found_mode = true;
5588 break;
5592 if (!found_mode)
5593 goto unsupported;
5595 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5596 codecvt2 = ERROR_MARK;
5597 else
5599 multi_step_cvt++;
5600 interm_types.safe_push (cvt_type);
5601 cvt_type = NULL_TREE;
5603 break;
5605 case NARROW_DST:
5606 gcc_assert (op_type == unary_op);
5607 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5608 &code1, &multi_step_cvt,
5609 &interm_types))
5610 break;
5612 if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5613 goto unsupported;
5615 if (code == FIX_TRUNC_EXPR)
5617 cvt_type
5618 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5619 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5620 if (cvt_type == NULL_TREE)
5621 goto unsupported;
5622 if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5623 &tc1))
5624 codecvt1 = tc1;
5625 else
5626 goto unsupported;
5627 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5628 &code1, &multi_step_cvt,
5629 &interm_types))
5630 break;
5632 /* If op0 can be represented with low precision integer,
5633 truncate it to cvt_type and the do FLOAT_EXPR. */
5634 else if (code == FLOAT_EXPR)
5636 wide_int op_min_value, op_max_value;
5637 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5638 goto unsupported;
5640 cvt_type
5641 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5642 if (cvt_type == NULL_TREE
5643 || (wi::min_precision (op_max_value, SIGNED)
5644 > TYPE_PRECISION (cvt_type))
5645 || (wi::min_precision (op_min_value, SIGNED)
5646 > TYPE_PRECISION (cvt_type)))
5647 goto unsupported;
5649 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5650 if (cvt_type == NULL_TREE)
5651 goto unsupported;
5652 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5653 &code1, &multi_step_cvt,
5654 &interm_types))
5655 goto unsupported;
5656 if (supportable_convert_operation ((tree_code) code, vectype_out,
5657 cvt_type, &tc1))
5659 codecvt1 = tc1;
5660 modifier = NARROW_SRC;
5661 break;
5665 goto unsupported;
5667 default:
5668 gcc_unreachable ();
5671 if (!vec_stmt) /* transformation not required. */
5673 if (slp_node
5674 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5675 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5677 if (dump_enabled_p ())
5678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5679 "incompatible vector types for invariants\n");
5680 return false;
5682 DUMP_VECT_SCOPE ("vectorizable_conversion");
5683 if (modifier == NONE)
5685 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5686 vect_model_simple_cost (vinfo, stmt_info,
5687 ncopies * (1 + multi_step_cvt),
5688 dt, ndts, slp_node, cost_vec);
5690 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5692 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5693 /* The final packing step produces one vector result per copy. */
5694 unsigned int nvectors
5695 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5696 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5697 multi_step_cvt, cost_vec,
5698 widen_arith);
5700 else
5702 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5703 /* The initial unpacking step produces two vector results
5704 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5705 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5706 unsigned int nvectors
5707 = (slp_node
5708 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5709 : ncopies * 2);
5710 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5711 multi_step_cvt, cost_vec,
5712 widen_arith);
5714 interm_types.release ();
5715 return true;
5718 /* Transform. */
5719 if (dump_enabled_p ())
5720 dump_printf_loc (MSG_NOTE, vect_location,
5721 "transform conversion. ncopies = %d.\n", ncopies);
5723 if (op_type == binary_op)
5725 if (CONSTANT_CLASS_P (op0))
5726 op0 = fold_convert (TREE_TYPE (op1), op0);
5727 else if (CONSTANT_CLASS_P (op1))
5728 op1 = fold_convert (TREE_TYPE (op0), op1);
5731 /* In case of multi-step conversion, we first generate conversion operations
5732 to the intermediate types, and then from that types to the final one.
5733 We create vector destinations for the intermediate type (TYPES) received
5734 from supportable_*_operation, and store them in the correct order
5735 for future use in vect_create_vectorized_*_stmts (). */
5736 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5737 bool widen_or_narrow_float_p
5738 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5739 vec_dest = vect_create_destination_var (scalar_dest,
5740 widen_or_narrow_float_p
5741 ? cvt_type : vectype_out);
5742 vec_dsts.quick_push (vec_dest);
5744 if (multi_step_cvt)
5746 for (i = interm_types.length () - 1;
5747 interm_types.iterate (i, &intermediate_type); i--)
5749 vec_dest = vect_create_destination_var (scalar_dest,
5750 intermediate_type);
5751 vec_dsts.quick_push (vec_dest);
5755 if (cvt_type)
5756 vec_dest = vect_create_destination_var (scalar_dest,
5757 widen_or_narrow_float_p
5758 ? vectype_out : cvt_type);
5760 int ninputs = 1;
5761 if (!slp_node)
5763 if (modifier == WIDEN)
5765 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5767 if (multi_step_cvt)
5768 ninputs = vect_pow2 (multi_step_cvt);
5769 ninputs *= 2;
5773 switch (modifier)
5775 case NONE:
5776 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5777 op0, &vec_oprnds0);
5778 /* vec_dest is intermediate type operand when multi_step_cvt. */
5779 if (multi_step_cvt)
5781 cvt_op = vec_dest;
5782 vec_dest = vec_dsts[0];
5785 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5787 /* Arguments are ready, create the new vector stmt. */
5788 gimple* new_stmt;
5789 if (multi_step_cvt)
5791 gcc_assert (multi_step_cvt == 1);
5792 new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5793 new_temp = make_ssa_name (cvt_op, new_stmt);
5794 gimple_assign_set_lhs (new_stmt, new_temp);
5795 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5796 vop0 = new_temp;
5798 new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5799 new_temp = make_ssa_name (vec_dest, new_stmt);
5800 gimple_set_lhs (new_stmt, new_temp);
5801 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5803 if (slp_node)
5804 slp_node->push_vec_def (new_stmt);
5805 else
5806 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5808 break;
5810 case WIDEN:
5811 /* In case the vectorization factor (VF) is bigger than the number
5812 of elements that we can fit in a vectype (nunits), we have to
5813 generate more than one vector stmt - i.e - we need to "unroll"
5814 the vector stmt by a factor VF/nunits. */
5815 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5816 op0, &vec_oprnds0,
5817 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5818 &vec_oprnds1);
5819 if (code == WIDEN_LSHIFT_EXPR)
5821 int oprnds_size = vec_oprnds0.length ();
5822 vec_oprnds1.create (oprnds_size);
5823 for (i = 0; i < oprnds_size; ++i)
5824 vec_oprnds1.quick_push (op1);
5826 /* Arguments are ready. Create the new vector stmts. */
5827 for (i = multi_step_cvt; i >= 0; i--)
5829 tree this_dest = vec_dsts[i];
5830 code_helper c1 = code1, c2 = code2;
5831 if (i == 0 && codecvt2 != ERROR_MARK)
5833 c1 = codecvt1;
5834 c2 = codecvt2;
5836 if (known_eq (nunits_out, nunits_in))
5837 vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
5838 stmt_info, this_dest, gsi, c1,
5839 op_type);
5840 else
5841 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5842 &vec_oprnds1, stmt_info,
5843 this_dest, gsi,
5844 c1, c2, op_type);
5847 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5849 gimple *new_stmt;
5850 if (cvt_type)
5852 new_temp = make_ssa_name (vec_dest);
5853 new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5854 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5856 else
5857 new_stmt = SSA_NAME_DEF_STMT (vop0);
5859 if (slp_node)
5860 slp_node->push_vec_def (new_stmt);
5861 else
5862 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5864 break;
5866 case NARROW_SRC:
5867 case NARROW_DST:
5868 /* In case the vectorization factor (VF) is bigger than the number
5869 of elements that we can fit in a vectype (nunits), we have to
5870 generate more than one vector stmt - i.e - we need to "unroll"
5871 the vector stmt by a factor VF/nunits. */
5872 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5873 op0, &vec_oprnds0);
5874 /* Arguments are ready. Create the new vector stmts. */
5875 if (cvt_type && modifier == NARROW_DST)
5876 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5878 new_temp = make_ssa_name (vec_dest);
5879 gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5880 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5881 vec_oprnds0[i] = new_temp;
5884 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5885 multi_step_cvt,
5886 stmt_info, vec_dsts, gsi,
5887 slp_node, code1,
5888 modifier == NARROW_SRC);
5889 /* After demoting op0 to cvt_type, convert it to dest. */
5890 if (cvt_type && code == FLOAT_EXPR)
5892 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5894 /* Arguments are ready, create the new vector stmt. */
5895 gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5896 gimple *new_stmt
5897 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5898 new_temp = make_ssa_name (vec_dest, new_stmt);
5899 gimple_set_lhs (new_stmt, new_temp);
5900 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5902 /* This is the last step of the conversion sequence. Store the
5903 vectors in SLP_NODE or in vector info of the scalar statement
5904 (or in STMT_VINFO_RELATED_STMT chain). */
5905 if (slp_node)
5906 slp_node->push_vec_def (new_stmt);
5907 else
5908 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5911 break;
5913 if (!slp_node)
5914 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5916 vec_oprnds0.release ();
5917 vec_oprnds1.release ();
5918 interm_types.release ();
5920 return true;
5923 /* Return true if we can assume from the scalar form of STMT_INFO that
5924 neither the scalar nor the vector forms will generate code. STMT_INFO
5925 is known not to involve a data reference. */
5927 bool
5928 vect_nop_conversion_p (stmt_vec_info stmt_info)
5930 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5931 if (!stmt)
5932 return false;
5934 tree lhs = gimple_assign_lhs (stmt);
5935 tree_code code = gimple_assign_rhs_code (stmt);
5936 tree rhs = gimple_assign_rhs1 (stmt);
5938 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5939 return true;
5941 if (CONVERT_EXPR_CODE_P (code))
5942 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5944 return false;
5947 /* Function vectorizable_assignment.
5949 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5950 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5951 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5952 Return true if STMT_INFO is vectorizable in this way. */
5954 static bool
5955 vectorizable_assignment (vec_info *vinfo,
5956 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5957 gimple **vec_stmt, slp_tree slp_node,
5958 stmt_vector_for_cost *cost_vec)
5960 tree vec_dest;
5961 tree scalar_dest;
5962 tree op;
5963 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5964 tree new_temp;
5965 enum vect_def_type dt[1] = {vect_unknown_def_type};
5966 int ndts = 1;
5967 int ncopies;
5968 int i;
5969 vec<tree> vec_oprnds = vNULL;
5970 tree vop;
5971 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5972 enum tree_code code;
5973 tree vectype_in;
5975 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5976 return false;
5978 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5979 && ! vec_stmt)
5980 return false;
5982 /* Is vectorizable assignment? */
5983 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5984 if (!stmt)
5985 return false;
5987 scalar_dest = gimple_assign_lhs (stmt);
5988 if (TREE_CODE (scalar_dest) != SSA_NAME)
5989 return false;
5991 if (STMT_VINFO_DATA_REF (stmt_info))
5992 return false;
5994 code = gimple_assign_rhs_code (stmt);
5995 if (!(gimple_assign_single_p (stmt)
5996 || code == PAREN_EXPR
5997 || CONVERT_EXPR_CODE_P (code)))
5998 return false;
6000 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6001 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6003 /* Multiple types in SLP are handled by creating the appropriate number of
6004 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6005 case of SLP. */
6006 if (slp_node)
6007 ncopies = 1;
6008 else
6009 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6011 gcc_assert (ncopies >= 1);
6013 slp_tree slp_op;
6014 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
6015 &dt[0], &vectype_in))
6017 if (dump_enabled_p ())
6018 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6019 "use not simple.\n");
6020 return false;
6022 if (!vectype_in)
6023 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
6025 /* We can handle NOP_EXPR conversions that do not change the number
6026 of elements or the vector size. */
6027 if ((CONVERT_EXPR_CODE_P (code)
6028 || code == VIEW_CONVERT_EXPR)
6029 && (!vectype_in
6030 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
6031 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
6032 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
6033 return false;
6035 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
6037 if (dump_enabled_p ())
6038 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6039 "can't convert between boolean and non "
6040 "boolean vectors %T\n", TREE_TYPE (op));
6042 return false;
6045 /* We do not handle bit-precision changes. */
6046 if ((CONVERT_EXPR_CODE_P (code)
6047 || code == VIEW_CONVERT_EXPR)
6048 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6049 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6050 || (INTEGRAL_TYPE_P (TREE_TYPE (op))
6051 && !type_has_mode_precision_p (TREE_TYPE (op))))
6052 /* But a conversion that does not change the bit-pattern is ok. */
6053 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6054 && INTEGRAL_TYPE_P (TREE_TYPE (op))
6055 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
6056 > TYPE_PRECISION (TREE_TYPE (op)))
6057 && TYPE_UNSIGNED (TREE_TYPE (op))))
6059 if (dump_enabled_p ())
6060 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6061 "type conversion to/from bit-precision "
6062 "unsupported.\n");
6063 return false;
6066 if (!vec_stmt) /* transformation not required. */
6068 if (slp_node
6069 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
6071 if (dump_enabled_p ())
6072 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6073 "incompatible vector types for invariants\n");
6074 return false;
6076 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
6077 DUMP_VECT_SCOPE ("vectorizable_assignment");
6078 if (!vect_nop_conversion_p (stmt_info))
6079 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
6080 cost_vec);
6081 return true;
6084 /* Transform. */
6085 if (dump_enabled_p ())
6086 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
6088 /* Handle def. */
6089 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6091 /* Handle use. */
6092 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
6094 /* Arguments are ready. create the new vector stmt. */
6095 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
6097 if (CONVERT_EXPR_CODE_P (code)
6098 || code == VIEW_CONVERT_EXPR)
6099 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
6100 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
6101 new_temp = make_ssa_name (vec_dest, new_stmt);
6102 gimple_assign_set_lhs (new_stmt, new_temp);
6103 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6104 if (slp_node)
6105 slp_node->push_vec_def (new_stmt);
6106 else
6107 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6109 if (!slp_node)
6110 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6112 vec_oprnds.release ();
6113 return true;
6117 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6118 either as shift by a scalar or by a vector. */
6120 bool
6121 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
6124 machine_mode vec_mode;
6125 optab optab;
6126 int icode;
6127 tree vectype;
6129 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6130 if (!vectype)
6131 return false;
6133 optab = optab_for_tree_code (code, vectype, optab_scalar);
6134 if (!optab
6135 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
6137 optab = optab_for_tree_code (code, vectype, optab_vector);
6138 if (!optab
6139 || (optab_handler (optab, TYPE_MODE (vectype))
6140 == CODE_FOR_nothing))
6141 return false;
6144 vec_mode = TYPE_MODE (vectype);
6145 icode = (int) optab_handler (optab, vec_mode);
6146 if (icode == CODE_FOR_nothing)
6147 return false;
6149 return true;
6153 /* Function vectorizable_shift.
6155 Check if STMT_INFO performs a shift operation that can be vectorized.
6156 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6157 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6158 Return true if STMT_INFO is vectorizable in this way. */
6160 static bool
6161 vectorizable_shift (vec_info *vinfo,
6162 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6163 gimple **vec_stmt, slp_tree slp_node,
6164 stmt_vector_for_cost *cost_vec)
6166 tree vec_dest;
6167 tree scalar_dest;
6168 tree op0, op1 = NULL;
6169 tree vec_oprnd1 = NULL_TREE;
6170 tree vectype;
6171 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6172 enum tree_code code;
6173 machine_mode vec_mode;
6174 tree new_temp;
6175 optab optab;
6176 int icode;
6177 machine_mode optab_op2_mode;
6178 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
6179 int ndts = 2;
6180 poly_uint64 nunits_in;
6181 poly_uint64 nunits_out;
6182 tree vectype_out;
6183 tree op1_vectype;
6184 int ncopies;
6185 int i;
6186 vec<tree> vec_oprnds0 = vNULL;
6187 vec<tree> vec_oprnds1 = vNULL;
6188 tree vop0, vop1;
6189 unsigned int k;
6190 bool scalar_shift_arg = true;
6191 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6192 bool incompatible_op1_vectype_p = false;
6194 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6195 return false;
6197 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6198 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6199 && ! vec_stmt)
6200 return false;
6202 /* Is STMT a vectorizable binary/unary operation? */
6203 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6204 if (!stmt)
6205 return false;
6207 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6208 return false;
6210 code = gimple_assign_rhs_code (stmt);
6212 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6213 || code == RROTATE_EXPR))
6214 return false;
6216 scalar_dest = gimple_assign_lhs (stmt);
6217 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6218 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6220 if (dump_enabled_p ())
6221 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6222 "bit-precision shifts not supported.\n");
6223 return false;
6226 slp_tree slp_op0;
6227 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6228 0, &op0, &slp_op0, &dt[0], &vectype))
6230 if (dump_enabled_p ())
6231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6232 "use not simple.\n");
6233 return false;
6235 /* If op0 is an external or constant def, infer the vector type
6236 from the scalar type. */
6237 if (!vectype)
6238 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6239 if (vec_stmt)
6240 gcc_assert (vectype);
6241 if (!vectype)
6243 if (dump_enabled_p ())
6244 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6245 "no vectype for scalar type\n");
6246 return false;
6249 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6250 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6251 if (maybe_ne (nunits_out, nunits_in))
6252 return false;
6254 stmt_vec_info op1_def_stmt_info;
6255 slp_tree slp_op1;
6256 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
6257 &dt[1], &op1_vectype, &op1_def_stmt_info))
6259 if (dump_enabled_p ())
6260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6261 "use not simple.\n");
6262 return false;
6265 /* Multiple types in SLP are handled by creating the appropriate number of
6266 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6267 case of SLP. */
6268 if (slp_node)
6269 ncopies = 1;
6270 else
6271 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6273 gcc_assert (ncopies >= 1);
6275 /* Determine whether the shift amount is a vector, or scalar. If the
6276 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6278 if ((dt[1] == vect_internal_def
6279 || dt[1] == vect_induction_def
6280 || dt[1] == vect_nested_cycle)
6281 && !slp_node)
6282 scalar_shift_arg = false;
6283 else if (dt[1] == vect_constant_def
6284 || dt[1] == vect_external_def
6285 || dt[1] == vect_internal_def)
6287 /* In SLP, need to check whether the shift count is the same,
6288 in loops if it is a constant or invariant, it is always
6289 a scalar shift. */
6290 if (slp_node)
6292 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6293 stmt_vec_info slpstmt_info;
6295 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6297 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
6298 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
6299 scalar_shift_arg = false;
6302 /* For internal SLP defs we have to make sure we see scalar stmts
6303 for all vector elements.
6304 ??? For different vectors we could resort to a different
6305 scalar shift operand but code-generation below simply always
6306 takes the first. */
6307 if (dt[1] == vect_internal_def
6308 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6309 stmts.length ()))
6310 scalar_shift_arg = false;
6313 /* If the shift amount is computed by a pattern stmt we cannot
6314 use the scalar amount directly thus give up and use a vector
6315 shift. */
6316 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
6317 scalar_shift_arg = false;
6319 else
6321 if (dump_enabled_p ())
6322 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6323 "operand mode requires invariant argument.\n");
6324 return false;
6327 /* Vector shifted by vector. */
6328 bool was_scalar_shift_arg = scalar_shift_arg;
6329 if (!scalar_shift_arg)
6331 optab = optab_for_tree_code (code, vectype, optab_vector);
6332 if (dump_enabled_p ())
6333 dump_printf_loc (MSG_NOTE, vect_location,
6334 "vector/vector shift/rotate found.\n");
6336 if (!op1_vectype)
6337 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6338 slp_op1);
6339 incompatible_op1_vectype_p
6340 = (op1_vectype == NULL_TREE
6341 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
6342 TYPE_VECTOR_SUBPARTS (vectype))
6343 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6344 if (incompatible_op1_vectype_p
6345 && (!slp_node
6346 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6347 || slp_op1->refcnt != 1))
6349 if (dump_enabled_p ())
6350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6351 "unusable type for last operand in"
6352 " vector/vector shift/rotate.\n");
6353 return false;
6356 /* See if the machine has a vector shifted by scalar insn and if not
6357 then see if it has a vector shifted by vector insn. */
6358 else
6360 optab = optab_for_tree_code (code, vectype, optab_scalar);
6361 if (optab
6362 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6364 if (dump_enabled_p ())
6365 dump_printf_loc (MSG_NOTE, vect_location,
6366 "vector/scalar shift/rotate found.\n");
6368 else
6370 optab = optab_for_tree_code (code, vectype, optab_vector);
6371 if (optab
6372 && (optab_handler (optab, TYPE_MODE (vectype))
6373 != CODE_FOR_nothing))
6375 scalar_shift_arg = false;
6377 if (dump_enabled_p ())
6378 dump_printf_loc (MSG_NOTE, vect_location,
6379 "vector/vector shift/rotate found.\n");
6381 if (!op1_vectype)
6382 op1_vectype = get_vectype_for_scalar_type (vinfo,
6383 TREE_TYPE (op1),
6384 slp_op1);
6386 /* Unlike the other binary operators, shifts/rotates have
6387 the rhs being int, instead of the same type as the lhs,
6388 so make sure the scalar is the right type if we are
6389 dealing with vectors of long long/long/short/char. */
6390 incompatible_op1_vectype_p
6391 = (!op1_vectype
6392 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6393 TREE_TYPE (op1)));
6394 if (incompatible_op1_vectype_p
6395 && dt[1] == vect_internal_def)
6397 if (dump_enabled_p ())
6398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6399 "unusable type for last operand in"
6400 " vector/vector shift/rotate.\n");
6401 return false;
6407 /* Supportable by target? */
6408 if (!optab)
6410 if (dump_enabled_p ())
6411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6412 "no optab.\n");
6413 return false;
6415 vec_mode = TYPE_MODE (vectype);
6416 icode = (int) optab_handler (optab, vec_mode);
6417 if (icode == CODE_FOR_nothing)
6419 if (dump_enabled_p ())
6420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6421 "op not supported by target.\n");
6422 return false;
6424 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6425 if (vect_emulated_vector_p (vectype))
6426 return false;
6428 if (!vec_stmt) /* transformation not required. */
6430 if (slp_node
6431 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6432 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6433 && (!incompatible_op1_vectype_p
6434 || dt[1] == vect_constant_def)
6435 && !vect_maybe_update_slp_op_vectype
6436 (slp_op1,
6437 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6439 if (dump_enabled_p ())
6440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6441 "incompatible vector types for invariants\n");
6442 return false;
6444 /* Now adjust the constant shift amount in place. */
6445 if (slp_node
6446 && incompatible_op1_vectype_p
6447 && dt[1] == vect_constant_def)
6449 for (unsigned i = 0;
6450 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6452 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6453 = fold_convert (TREE_TYPE (vectype),
6454 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6455 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6456 == INTEGER_CST));
6459 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6460 DUMP_VECT_SCOPE ("vectorizable_shift");
6461 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6462 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6463 return true;
6466 /* Transform. */
6468 if (dump_enabled_p ())
6469 dump_printf_loc (MSG_NOTE, vect_location,
6470 "transform binary/unary operation.\n");
6472 if (incompatible_op1_vectype_p && !slp_node)
6474 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6475 op1 = fold_convert (TREE_TYPE (vectype), op1);
6476 if (dt[1] != vect_constant_def)
6477 op1 = vect_init_vector (vinfo, stmt_info, op1,
6478 TREE_TYPE (vectype), NULL);
6481 /* Handle def. */
6482 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6484 if (scalar_shift_arg && dt[1] != vect_internal_def)
6486 /* Vector shl and shr insn patterns can be defined with scalar
6487 operand 2 (shift operand). In this case, use constant or loop
6488 invariant op1 directly, without extending it to vector mode
6489 first. */
6490 optab_op2_mode = insn_data[icode].operand[2].mode;
6491 if (!VECTOR_MODE_P (optab_op2_mode))
6493 if (dump_enabled_p ())
6494 dump_printf_loc (MSG_NOTE, vect_location,
6495 "operand 1 using scalar mode.\n");
6496 vec_oprnd1 = op1;
6497 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6498 vec_oprnds1.quick_push (vec_oprnd1);
6499 /* Store vec_oprnd1 for every vector stmt to be created.
6500 We check during the analysis that all the shift arguments
6501 are the same.
6502 TODO: Allow different constants for different vector
6503 stmts generated for an SLP instance. */
6504 for (k = 0;
6505 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6506 vec_oprnds1.quick_push (vec_oprnd1);
6509 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6511 if (was_scalar_shift_arg)
6513 /* If the argument was the same in all lanes create
6514 the correctly typed vector shift amount directly. */
6515 op1 = fold_convert (TREE_TYPE (vectype), op1);
6516 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6517 !loop_vinfo ? gsi : NULL);
6518 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6519 !loop_vinfo ? gsi : NULL);
6520 vec_oprnds1.create (slp_node->vec_stmts_size);
6521 for (k = 0; k < slp_node->vec_stmts_size; k++)
6522 vec_oprnds1.quick_push (vec_oprnd1);
6524 else if (dt[1] == vect_constant_def)
6525 /* The constant shift amount has been adjusted in place. */
6527 else
6528 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6531 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6532 (a special case for certain kind of vector shifts); otherwise,
6533 operand 1 should be of a vector type (the usual case). */
6534 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6535 op0, &vec_oprnds0,
6536 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6538 /* Arguments are ready. Create the new vector stmt. */
6539 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6541 /* For internal defs where we need to use a scalar shift arg
6542 extract the first lane. */
6543 if (scalar_shift_arg && dt[1] == vect_internal_def)
6545 vop1 = vec_oprnds1[0];
6546 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6547 gassign *new_stmt
6548 = gimple_build_assign (new_temp,
6549 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6550 vop1,
6551 TYPE_SIZE (TREE_TYPE (new_temp)),
6552 bitsize_zero_node));
6553 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6554 vop1 = new_temp;
6556 else
6557 vop1 = vec_oprnds1[i];
6558 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6559 new_temp = make_ssa_name (vec_dest, new_stmt);
6560 gimple_assign_set_lhs (new_stmt, new_temp);
6561 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6562 if (slp_node)
6563 slp_node->push_vec_def (new_stmt);
6564 else
6565 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6568 if (!slp_node)
6569 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6571 vec_oprnds0.release ();
6572 vec_oprnds1.release ();
6574 return true;
6577 /* Function vectorizable_operation.
6579 Check if STMT_INFO performs a binary, unary or ternary operation that can
6580 be vectorized.
6581 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6582 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6583 Return true if STMT_INFO is vectorizable in this way. */
6585 static bool
6586 vectorizable_operation (vec_info *vinfo,
6587 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6588 gimple **vec_stmt, slp_tree slp_node,
6589 stmt_vector_for_cost *cost_vec)
6591 tree vec_dest;
6592 tree scalar_dest;
6593 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6594 tree vectype;
6595 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6596 enum tree_code code, orig_code;
6597 machine_mode vec_mode;
6598 tree new_temp;
6599 int op_type;
6600 optab optab;
6601 bool target_support_p;
6602 enum vect_def_type dt[3]
6603 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6604 int ndts = 3;
6605 poly_uint64 nunits_in;
6606 poly_uint64 nunits_out;
6607 tree vectype_out;
6608 int ncopies, vec_num;
6609 int i;
6610 vec<tree> vec_oprnds0 = vNULL;
6611 vec<tree> vec_oprnds1 = vNULL;
6612 vec<tree> vec_oprnds2 = vNULL;
6613 tree vop0, vop1, vop2;
6614 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6616 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6617 return false;
6619 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6620 && ! vec_stmt)
6621 return false;
6623 /* Is STMT a vectorizable binary/unary operation? */
6624 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6625 if (!stmt)
6626 return false;
6628 /* Loads and stores are handled in vectorizable_{load,store}. */
6629 if (STMT_VINFO_DATA_REF (stmt_info))
6630 return false;
6632 orig_code = code = gimple_assign_rhs_code (stmt);
6634 /* Shifts are handled in vectorizable_shift. */
6635 if (code == LSHIFT_EXPR
6636 || code == RSHIFT_EXPR
6637 || code == LROTATE_EXPR
6638 || code == RROTATE_EXPR)
6639 return false;
6641 /* Comparisons are handled in vectorizable_comparison. */
6642 if (TREE_CODE_CLASS (code) == tcc_comparison)
6643 return false;
6645 /* Conditions are handled in vectorizable_condition. */
6646 if (code == COND_EXPR)
6647 return false;
6649 /* For pointer addition and subtraction, we should use the normal
6650 plus and minus for the vector operation. */
6651 if (code == POINTER_PLUS_EXPR)
6652 code = PLUS_EXPR;
6653 if (code == POINTER_DIFF_EXPR)
6654 code = MINUS_EXPR;
6656 /* Support only unary or binary operations. */
6657 op_type = TREE_CODE_LENGTH (code);
6658 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6660 if (dump_enabled_p ())
6661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6662 "num. args = %d (not unary/binary/ternary op).\n",
6663 op_type);
6664 return false;
6667 scalar_dest = gimple_assign_lhs (stmt);
6668 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6670 /* Most operations cannot handle bit-precision types without extra
6671 truncations. */
6672 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6673 if (!mask_op_p
6674 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6675 /* Exception are bitwise binary operations. */
6676 && code != BIT_IOR_EXPR
6677 && code != BIT_XOR_EXPR
6678 && code != BIT_AND_EXPR)
6680 if (dump_enabled_p ())
6681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6682 "bit-precision arithmetic not supported.\n");
6683 return false;
6686 slp_tree slp_op0;
6687 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6688 0, &op0, &slp_op0, &dt[0], &vectype))
6690 if (dump_enabled_p ())
6691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6692 "use not simple.\n");
6693 return false;
6695 bool is_invariant = (dt[0] == vect_external_def
6696 || dt[0] == vect_constant_def);
6697 /* If op0 is an external or constant def, infer the vector type
6698 from the scalar type. */
6699 if (!vectype)
6701 /* For boolean type we cannot determine vectype by
6702 invariant value (don't know whether it is a vector
6703 of booleans or vector of integers). We use output
6704 vectype because operations on boolean don't change
6705 type. */
6706 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6708 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6710 if (dump_enabled_p ())
6711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6712 "not supported operation on bool value.\n");
6713 return false;
6715 vectype = vectype_out;
6717 else
6718 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6719 slp_node);
6721 if (vec_stmt)
6722 gcc_assert (vectype);
6723 if (!vectype)
6725 if (dump_enabled_p ())
6726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6727 "no vectype for scalar type %T\n",
6728 TREE_TYPE (op0));
6730 return false;
6733 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6734 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6735 if (maybe_ne (nunits_out, nunits_in))
6736 return false;
6738 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6739 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6740 if (op_type == binary_op || op_type == ternary_op)
6742 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6743 1, &op1, &slp_op1, &dt[1], &vectype2))
6745 if (dump_enabled_p ())
6746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6747 "use not simple.\n");
6748 return false;
6750 is_invariant &= (dt[1] == vect_external_def
6751 || dt[1] == vect_constant_def);
6752 if (vectype2
6753 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6754 return false;
6756 if (op_type == ternary_op)
6758 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6759 2, &op2, &slp_op2, &dt[2], &vectype3))
6761 if (dump_enabled_p ())
6762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6763 "use not simple.\n");
6764 return false;
6766 is_invariant &= (dt[2] == vect_external_def
6767 || dt[2] == vect_constant_def);
6768 if (vectype3
6769 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6770 return false;
6773 /* Multiple types in SLP are handled by creating the appropriate number of
6774 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6775 case of SLP. */
6776 if (slp_node)
6778 ncopies = 1;
6779 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6781 else
6783 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6784 vec_num = 1;
6787 gcc_assert (ncopies >= 1);
6789 /* Reject attempts to combine mask types with nonmask types, e.g. if
6790 we have an AND between a (nonmask) boolean loaded from memory and
6791 a (mask) boolean result of a comparison.
6793 TODO: We could easily fix these cases up using pattern statements. */
6794 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6795 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6796 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6798 if (dump_enabled_p ())
6799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6800 "mixed mask and nonmask vector types\n");
6801 return false;
6804 /* Supportable by target? */
6806 vec_mode = TYPE_MODE (vectype);
6807 if (code == MULT_HIGHPART_EXPR)
6808 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6809 else
6811 optab = optab_for_tree_code (code, vectype, optab_default);
6812 if (!optab)
6814 if (dump_enabled_p ())
6815 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6816 "no optab.\n");
6817 return false;
6819 target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing
6820 || optab_libfunc (optab, vec_mode));
6823 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6824 if (!target_support_p || using_emulated_vectors_p)
6826 if (dump_enabled_p ())
6827 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6828 "op not supported by target.\n");
6829 /* When vec_mode is not a vector mode and we verified ops we
6830 do not have to lower like AND are natively supported let
6831 those through even when the mode isn't word_mode. For
6832 ops we have to lower the lowering code assumes we are
6833 dealing with word_mode. */
6834 if ((((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6835 || !target_support_p)
6836 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6837 /* Check only during analysis. */
6838 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6840 if (dump_enabled_p ())
6841 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6842 return false;
6844 if (dump_enabled_p ())
6845 dump_printf_loc (MSG_NOTE, vect_location,
6846 "proceeding using word mode.\n");
6847 using_emulated_vectors_p = true;
6850 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6851 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6852 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6853 internal_fn cond_fn = get_conditional_internal_fn (code);
6854 internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6856 /* If operating on inactive elements could generate spurious traps,
6857 we need to restrict the operation to active lanes. Note that this
6858 specifically doesn't apply to unhoisted invariants, since they
6859 operate on the same value for every lane.
6861 Similarly, if this operation is part of a reduction, a fully-masked
6862 loop should only change the active lanes of the reduction chain,
6863 keeping the inactive lanes as-is. */
6864 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6865 || reduc_idx >= 0);
6867 if (!vec_stmt) /* transformation not required. */
6869 if (loop_vinfo
6870 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6871 && mask_out_inactive)
6873 if (cond_len_fn != IFN_LAST
6874 && direct_internal_fn_supported_p (cond_len_fn, vectype,
6875 OPTIMIZE_FOR_SPEED))
6876 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6878 else if (cond_fn != IFN_LAST
6879 && direct_internal_fn_supported_p (cond_fn, vectype,
6880 OPTIMIZE_FOR_SPEED))
6881 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6882 vectype, NULL);
6883 else
6885 if (dump_enabled_p ())
6886 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6887 "can't use a fully-masked loop because no"
6888 " conditional operation is available.\n");
6889 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6893 /* Put types on constant and invariant SLP children. */
6894 if (slp_node
6895 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6896 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6897 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6899 if (dump_enabled_p ())
6900 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6901 "incompatible vector types for invariants\n");
6902 return false;
6905 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6906 DUMP_VECT_SCOPE ("vectorizable_operation");
6907 vect_model_simple_cost (vinfo, stmt_info,
6908 ncopies, dt, ndts, slp_node, cost_vec);
6909 if (using_emulated_vectors_p)
6911 /* The above vect_model_simple_cost call handles constants
6912 in the prologue and (mis-)costs one of the stmts as
6913 vector stmt. See below for the actual lowering that will
6914 be applied. */
6915 unsigned n
6916 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6917 switch (code)
6919 case PLUS_EXPR:
6920 n *= 5;
6921 break;
6922 case MINUS_EXPR:
6923 n *= 6;
6924 break;
6925 case NEGATE_EXPR:
6926 n *= 4;
6927 break;
6928 default:
6929 /* Bit operations do not have extra cost and are accounted
6930 as vector stmt by vect_model_simple_cost. */
6931 n = 0;
6932 break;
6934 if (n != 0)
6936 /* We also need to materialize two large constants. */
6937 record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6938 0, vect_prologue);
6939 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6940 0, vect_body);
6943 return true;
6946 /* Transform. */
6948 if (dump_enabled_p ())
6949 dump_printf_loc (MSG_NOTE, vect_location,
6950 "transform binary/unary operation.\n");
6952 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6953 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6955 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6956 vectors with unsigned elements, but the result is signed. So, we
6957 need to compute the MINUS_EXPR into vectype temporary and
6958 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6959 tree vec_cvt_dest = NULL_TREE;
6960 if (orig_code == POINTER_DIFF_EXPR)
6962 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6963 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6965 /* Handle def. */
6966 else
6967 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6969 /* In case the vectorization factor (VF) is bigger than the number
6970 of elements that we can fit in a vectype (nunits), we have to generate
6971 more than one vector stmt - i.e - we need to "unroll" the
6972 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6973 from one copy of the vector stmt to the next, in the field
6974 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6975 stages to find the correct vector defs to be used when vectorizing
6976 stmts that use the defs of the current stmt. The example below
6977 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6978 we need to create 4 vectorized stmts):
6980 before vectorization:
6981 RELATED_STMT VEC_STMT
6982 S1: x = memref - -
6983 S2: z = x + 1 - -
6985 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6986 there):
6987 RELATED_STMT VEC_STMT
6988 VS1_0: vx0 = memref0 VS1_1 -
6989 VS1_1: vx1 = memref1 VS1_2 -
6990 VS1_2: vx2 = memref2 VS1_3 -
6991 VS1_3: vx3 = memref3 - -
6992 S1: x = load - VS1_0
6993 S2: z = x + 1 - -
6995 step2: vectorize stmt S2 (done here):
6996 To vectorize stmt S2 we first need to find the relevant vector
6997 def for the first operand 'x'. This is, as usual, obtained from
6998 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6999 that defines 'x' (S1). This way we find the stmt VS1_0, and the
7000 relevant vector def 'vx0'. Having found 'vx0' we can generate
7001 the vector stmt VS2_0, and as usual, record it in the
7002 STMT_VINFO_VEC_STMT of stmt S2.
7003 When creating the second copy (VS2_1), we obtain the relevant vector
7004 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
7005 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
7006 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
7007 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
7008 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
7009 chain of stmts and pointers:
7010 RELATED_STMT VEC_STMT
7011 VS1_0: vx0 = memref0 VS1_1 -
7012 VS1_1: vx1 = memref1 VS1_2 -
7013 VS1_2: vx2 = memref2 VS1_3 -
7014 VS1_3: vx3 = memref3 - -
7015 S1: x = load - VS1_0
7016 VS2_0: vz0 = vx0 + v1 VS2_1 -
7017 VS2_1: vz1 = vx1 + v1 VS2_2 -
7018 VS2_2: vz2 = vx2 + v1 VS2_3 -
7019 VS2_3: vz3 = vx3 + v1 - -
7020 S2: z = x + 1 - VS2_0 */
7022 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
7023 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
7024 /* Arguments are ready. Create the new vector stmt. */
7025 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
7027 gimple *new_stmt = NULL;
7028 vop1 = ((op_type == binary_op || op_type == ternary_op)
7029 ? vec_oprnds1[i] : NULL_TREE);
7030 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
7031 if (using_emulated_vectors_p
7032 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
7034 /* Lower the operation. This follows vector lowering. */
7035 unsigned int width = vector_element_bits (vectype);
7036 tree inner_type = TREE_TYPE (vectype);
7037 tree word_type
7038 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
7039 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
7040 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
7041 tree high_bits
7042 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
7043 tree wvop0 = make_ssa_name (word_type);
7044 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
7045 build1 (VIEW_CONVERT_EXPR,
7046 word_type, vop0));
7047 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7048 tree result_low, signs;
7049 if (code == PLUS_EXPR || code == MINUS_EXPR)
7051 tree wvop1 = make_ssa_name (word_type);
7052 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
7053 build1 (VIEW_CONVERT_EXPR,
7054 word_type, vop1));
7055 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7056 signs = make_ssa_name (word_type);
7057 new_stmt = gimple_build_assign (signs,
7058 BIT_XOR_EXPR, wvop0, wvop1);
7059 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7060 tree b_low = make_ssa_name (word_type);
7061 new_stmt = gimple_build_assign (b_low,
7062 BIT_AND_EXPR, wvop1, low_bits);
7063 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7064 tree a_low = make_ssa_name (word_type);
7065 if (code == PLUS_EXPR)
7066 new_stmt = gimple_build_assign (a_low,
7067 BIT_AND_EXPR, wvop0, low_bits);
7068 else
7069 new_stmt = gimple_build_assign (a_low,
7070 BIT_IOR_EXPR, wvop0, high_bits);
7071 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7072 if (code == MINUS_EXPR)
7074 new_stmt = gimple_build_assign (NULL_TREE,
7075 BIT_NOT_EXPR, signs);
7076 signs = make_ssa_name (word_type);
7077 gimple_assign_set_lhs (new_stmt, signs);
7078 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7080 new_stmt = gimple_build_assign (NULL_TREE,
7081 BIT_AND_EXPR, signs, high_bits);
7082 signs = make_ssa_name (word_type);
7083 gimple_assign_set_lhs (new_stmt, signs);
7084 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7085 result_low = make_ssa_name (word_type);
7086 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
7087 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7089 else
7091 tree a_low = make_ssa_name (word_type);
7092 new_stmt = gimple_build_assign (a_low,
7093 BIT_AND_EXPR, wvop0, low_bits);
7094 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7095 signs = make_ssa_name (word_type);
7096 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
7097 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7098 new_stmt = gimple_build_assign (NULL_TREE,
7099 BIT_AND_EXPR, signs, high_bits);
7100 signs = make_ssa_name (word_type);
7101 gimple_assign_set_lhs (new_stmt, signs);
7102 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7103 result_low = make_ssa_name (word_type);
7104 new_stmt = gimple_build_assign (result_low,
7105 MINUS_EXPR, high_bits, a_low);
7106 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7108 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
7109 signs);
7110 result_low = make_ssa_name (word_type);
7111 gimple_assign_set_lhs (new_stmt, result_low);
7112 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7113 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
7114 build1 (VIEW_CONVERT_EXPR,
7115 vectype, result_low));
7116 new_temp = make_ssa_name (vectype);
7117 gimple_assign_set_lhs (new_stmt, new_temp);
7118 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7120 else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
7122 tree mask;
7123 if (masked_loop_p)
7124 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7125 vec_num * ncopies, vectype, i);
7126 else
7127 /* Dummy mask. */
7128 mask = build_minus_one_cst (truth_type_for (vectype));
7129 auto_vec<tree> vops (6);
7130 vops.quick_push (mask);
7131 vops.quick_push (vop0);
7132 if (vop1)
7133 vops.quick_push (vop1);
7134 if (vop2)
7135 vops.quick_push (vop2);
7136 if (reduc_idx >= 0)
7138 /* Perform the operation on active elements only and take
7139 inactive elements from the reduction chain input. */
7140 gcc_assert (!vop2);
7141 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
7143 else
7145 auto else_value = targetm.preferred_else_value
7146 (cond_fn, vectype, vops.length () - 1, &vops[1]);
7147 vops.quick_push (else_value);
7149 if (len_loop_p)
7151 tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7152 vec_num * ncopies, vectype, i, 1);
7153 signed char biasval
7154 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7155 tree bias = build_int_cst (intQI_type_node, biasval);
7156 vops.quick_push (len);
7157 vops.quick_push (bias);
7159 gcall *call
7160 = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7161 : cond_len_fn,
7162 vops);
7163 new_temp = make_ssa_name (vec_dest, call);
7164 gimple_call_set_lhs (call, new_temp);
7165 gimple_call_set_nothrow (call, true);
7166 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7167 new_stmt = call;
7169 else
7171 tree mask = NULL_TREE;
7172 /* When combining two masks check if either of them is elsewhere
7173 combined with a loop mask, if that's the case we can mark that the
7174 new combined mask doesn't need to be combined with a loop mask. */
7175 if (masked_loop_p
7176 && code == BIT_AND_EXPR
7177 && VECTOR_BOOLEAN_TYPE_P (vectype))
7179 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
7180 ncopies}))
7182 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7183 vec_num * ncopies, vectype, i);
7185 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7186 vop0, gsi);
7189 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
7190 ncopies }))
7192 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7193 vec_num * ncopies, vectype, i);
7195 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7196 vop1, gsi);
7200 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7201 new_temp = make_ssa_name (vec_dest, new_stmt);
7202 gimple_assign_set_lhs (new_stmt, new_temp);
7203 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7204 if (using_emulated_vectors_p)
7205 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7207 /* Enter the combined value into the vector cond hash so we don't
7208 AND it with a loop mask again. */
7209 if (mask)
7210 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
7213 if (vec_cvt_dest)
7215 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7216 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7217 new_temp);
7218 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
7219 gimple_assign_set_lhs (new_stmt, new_temp);
7220 vect_finish_stmt_generation (vinfo, stmt_info,
7221 new_stmt, gsi);
7224 if (slp_node)
7225 slp_node->push_vec_def (new_stmt);
7226 else
7227 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7230 if (!slp_node)
7231 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7233 vec_oprnds0.release ();
7234 vec_oprnds1.release ();
7235 vec_oprnds2.release ();
7237 return true;
7240 /* A helper function to ensure data reference DR_INFO's base alignment. */
7242 static void
7243 ensure_base_align (dr_vec_info *dr_info)
7245 /* Alignment is only analyzed for the first element of a DR group,
7246 use that to look at base alignment we need to enforce. */
7247 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7248 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7250 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7252 if (dr_info->base_misaligned)
7254 tree base_decl = dr_info->base_decl;
7256 // We should only be able to increase the alignment of a base object if
7257 // we know what its new alignment should be at compile time.
7258 unsigned HOST_WIDE_INT align_base_to =
7259 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7261 if (decl_in_symtab_p (base_decl))
7262 symtab_node::get (base_decl)->increase_alignment (align_base_to);
7263 else if (DECL_ALIGN (base_decl) < align_base_to)
7265 SET_DECL_ALIGN (base_decl, align_base_to);
7266 DECL_USER_ALIGN (base_decl) = 1;
7268 dr_info->base_misaligned = false;
7273 /* Function get_group_alias_ptr_type.
7275 Return the alias type for the group starting at FIRST_STMT_INFO. */
7277 static tree
7278 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7280 struct data_reference *first_dr, *next_dr;
7282 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7283 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7284 while (next_stmt_info)
7286 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7287 if (get_alias_set (DR_REF (first_dr))
7288 != get_alias_set (DR_REF (next_dr)))
7290 if (dump_enabled_p ())
7291 dump_printf_loc (MSG_NOTE, vect_location,
7292 "conflicting alias set types.\n");
7293 return ptr_type_node;
7295 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7297 return reference_alias_ptr_type (DR_REF (first_dr));
7301 /* Function scan_operand_equal_p.
7303 Helper function for check_scan_store. Compare two references
7304 with .GOMP_SIMD_LANE bases. */
7306 static bool
7307 scan_operand_equal_p (tree ref1, tree ref2)
7309 tree ref[2] = { ref1, ref2 };
7310 poly_int64 bitsize[2], bitpos[2];
7311 tree offset[2], base[2];
7312 for (int i = 0; i < 2; ++i)
7314 machine_mode mode;
7315 int unsignedp, reversep, volatilep = 0;
7316 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7317 &offset[i], &mode, &unsignedp,
7318 &reversep, &volatilep);
7319 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
7320 return false;
7321 if (TREE_CODE (base[i]) == MEM_REF
7322 && offset[i] == NULL_TREE
7323 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7325 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7326 if (is_gimple_assign (def_stmt)
7327 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
7328 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7329 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7331 if (maybe_ne (mem_ref_offset (base[i]), 0))
7332 return false;
7333 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7334 offset[i] = gimple_assign_rhs2 (def_stmt);
7339 if (!operand_equal_p (base[0], base[1], 0))
7340 return false;
7341 if (maybe_ne (bitsize[0], bitsize[1]))
7342 return false;
7343 if (offset[0] != offset[1])
7345 if (!offset[0] || !offset[1])
7346 return false;
7347 if (!operand_equal_p (offset[0], offset[1], 0))
7349 tree step[2];
7350 for (int i = 0; i < 2; ++i)
7352 step[i] = integer_one_node;
7353 if (TREE_CODE (offset[i]) == SSA_NAME)
7355 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7356 if (is_gimple_assign (def_stmt)
7357 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
7358 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7359 == INTEGER_CST))
7361 step[i] = gimple_assign_rhs2 (def_stmt);
7362 offset[i] = gimple_assign_rhs1 (def_stmt);
7365 else if (TREE_CODE (offset[i]) == MULT_EXPR)
7367 step[i] = TREE_OPERAND (offset[i], 1);
7368 offset[i] = TREE_OPERAND (offset[i], 0);
7370 tree rhs1 = NULL_TREE;
7371 if (TREE_CODE (offset[i]) == SSA_NAME)
7373 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7374 if (gimple_assign_cast_p (def_stmt))
7375 rhs1 = gimple_assign_rhs1 (def_stmt);
7377 else if (CONVERT_EXPR_P (offset[i]))
7378 rhs1 = TREE_OPERAND (offset[i], 0);
7379 if (rhs1
7380 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7381 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7382 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7383 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7384 offset[i] = rhs1;
7386 if (!operand_equal_p (offset[0], offset[1], 0)
7387 || !operand_equal_p (step[0], step[1], 0))
7388 return false;
7391 return true;
7395 enum scan_store_kind {
7396 /* Normal permutation. */
7397 scan_store_kind_perm,
7399 /* Whole vector left shift permutation with zero init. */
7400 scan_store_kind_lshift_zero,
7402 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7403 scan_store_kind_lshift_cond
7406 /* Function check_scan_store.
7408 Verify if we can perform the needed permutations or whole vector shifts.
7409 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7410 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7411 to do at each step. */
7413 static int
7414 scan_store_can_perm_p (tree vectype, tree init,
7415 vec<enum scan_store_kind> *use_whole_vector = NULL)
7417 enum machine_mode vec_mode = TYPE_MODE (vectype);
7418 unsigned HOST_WIDE_INT nunits;
7419 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7420 return -1;
7421 int units_log2 = exact_log2 (nunits);
7422 if (units_log2 <= 0)
7423 return -1;
7425 int i;
7426 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7427 for (i = 0; i <= units_log2; ++i)
7429 unsigned HOST_WIDE_INT j, k;
7430 enum scan_store_kind kind = scan_store_kind_perm;
7431 vec_perm_builder sel (nunits, nunits, 1);
7432 sel.quick_grow (nunits);
7433 if (i == units_log2)
7435 for (j = 0; j < nunits; ++j)
7436 sel[j] = nunits - 1;
7438 else
7440 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7441 sel[j] = j;
7442 for (k = 0; j < nunits; ++j, ++k)
7443 sel[j] = nunits + k;
7445 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7446 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7448 if (i == units_log2)
7449 return -1;
7451 if (whole_vector_shift_kind == scan_store_kind_perm)
7453 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
7454 return -1;
7455 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7456 /* Whole vector shifts shift in zeros, so if init is all zero
7457 constant, there is no need to do anything further. */
7458 if ((TREE_CODE (init) != INTEGER_CST
7459 && TREE_CODE (init) != REAL_CST)
7460 || !initializer_zerop (init))
7462 tree masktype = truth_type_for (vectype);
7463 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7464 return -1;
7465 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7468 kind = whole_vector_shift_kind;
7470 if (use_whole_vector)
7472 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7473 use_whole_vector->safe_grow_cleared (i, true);
7474 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7475 use_whole_vector->safe_push (kind);
7479 return units_log2;
7483 /* Function check_scan_store.
7485 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7487 static bool
7488 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7489 enum vect_def_type rhs_dt, bool slp, tree mask,
7490 vect_memory_access_type memory_access_type)
7492 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7493 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7494 tree ref_type;
7496 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7497 if (slp
7498 || mask
7499 || memory_access_type != VMAT_CONTIGUOUS
7500 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7501 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7502 || loop_vinfo == NULL
7503 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7504 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7505 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7506 || !integer_zerop (DR_INIT (dr_info->dr))
7507 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7508 || !alias_sets_conflict_p (get_alias_set (vectype),
7509 get_alias_set (TREE_TYPE (ref_type))))
7511 if (dump_enabled_p ())
7512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7513 "unsupported OpenMP scan store.\n");
7514 return false;
7517 /* We need to pattern match code built by OpenMP lowering and simplified
7518 by following optimizations into something we can handle.
7519 #pragma omp simd reduction(inscan,+:r)
7520 for (...)
7522 r += something ();
7523 #pragma omp scan inclusive (r)
7524 use (r);
7526 shall have body with:
7527 // Initialization for input phase, store the reduction initializer:
7528 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7529 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7530 D.2042[_21] = 0;
7531 // Actual input phase:
7533 r.0_5 = D.2042[_20];
7534 _6 = _4 + r.0_5;
7535 D.2042[_20] = _6;
7536 // Initialization for scan phase:
7537 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7538 _26 = D.2043[_25];
7539 _27 = D.2042[_25];
7540 _28 = _26 + _27;
7541 D.2043[_25] = _28;
7542 D.2042[_25] = _28;
7543 // Actual scan phase:
7545 r.1_8 = D.2042[_20];
7547 The "omp simd array" variable D.2042 holds the privatized copy used
7548 inside of the loop and D.2043 is another one that holds copies of
7549 the current original list item. The separate GOMP_SIMD_LANE ifn
7550 kinds are there in order to allow optimizing the initializer store
7551 and combiner sequence, e.g. if it is originally some C++ish user
7552 defined reduction, but allow the vectorizer to pattern recognize it
7553 and turn into the appropriate vectorized scan.
7555 For exclusive scan, this is slightly different:
7556 #pragma omp simd reduction(inscan,+:r)
7557 for (...)
7559 use (r);
7560 #pragma omp scan exclusive (r)
7561 r += something ();
7563 shall have body with:
7564 // Initialization for input phase, store the reduction initializer:
7565 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7566 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7567 D.2042[_21] = 0;
7568 // Actual input phase:
7570 r.0_5 = D.2042[_20];
7571 _6 = _4 + r.0_5;
7572 D.2042[_20] = _6;
7573 // Initialization for scan phase:
7574 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7575 _26 = D.2043[_25];
7576 D.2044[_25] = _26;
7577 _27 = D.2042[_25];
7578 _28 = _26 + _27;
7579 D.2043[_25] = _28;
7580 // Actual scan phase:
7582 r.1_8 = D.2044[_20];
7583 ... */
7585 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7587 /* Match the D.2042[_21] = 0; store above. Just require that
7588 it is a constant or external definition store. */
7589 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7591 fail_init:
7592 if (dump_enabled_p ())
7593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7594 "unsupported OpenMP scan initializer store.\n");
7595 return false;
7598 if (! loop_vinfo->scan_map)
7599 loop_vinfo->scan_map = new hash_map<tree, tree>;
7600 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7601 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7602 if (cached)
7603 goto fail_init;
7604 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7606 /* These stores can be vectorized normally. */
7607 return true;
7610 if (rhs_dt != vect_internal_def)
7612 fail:
7613 if (dump_enabled_p ())
7614 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7615 "unsupported OpenMP scan combiner pattern.\n");
7616 return false;
7619 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7620 tree rhs = gimple_assign_rhs1 (stmt);
7621 if (TREE_CODE (rhs) != SSA_NAME)
7622 goto fail;
7624 gimple *other_store_stmt = NULL;
7625 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7626 bool inscan_var_store
7627 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7629 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7631 if (!inscan_var_store)
7633 use_operand_p use_p;
7634 imm_use_iterator iter;
7635 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7637 gimple *use_stmt = USE_STMT (use_p);
7638 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7639 continue;
7640 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7641 || !is_gimple_assign (use_stmt)
7642 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7643 || other_store_stmt
7644 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7645 goto fail;
7646 other_store_stmt = use_stmt;
7648 if (other_store_stmt == NULL)
7649 goto fail;
7650 rhs = gimple_assign_lhs (other_store_stmt);
7651 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7652 goto fail;
7655 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7657 use_operand_p use_p;
7658 imm_use_iterator iter;
7659 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7661 gimple *use_stmt = USE_STMT (use_p);
7662 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7663 continue;
7664 if (other_store_stmt)
7665 goto fail;
7666 other_store_stmt = use_stmt;
7669 else
7670 goto fail;
7672 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7673 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7674 || !is_gimple_assign (def_stmt)
7675 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7676 goto fail;
7678 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7679 /* For pointer addition, we should use the normal plus for the vector
7680 operation. */
7681 switch (code)
7683 case POINTER_PLUS_EXPR:
7684 code = PLUS_EXPR;
7685 break;
7686 case MULT_HIGHPART_EXPR:
7687 goto fail;
7688 default:
7689 break;
7691 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7692 goto fail;
7694 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7695 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7696 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7697 goto fail;
7699 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7700 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7701 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7702 || !gimple_assign_load_p (load1_stmt)
7703 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7704 || !gimple_assign_load_p (load2_stmt))
7705 goto fail;
7707 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7708 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7709 if (load1_stmt_info == NULL
7710 || load2_stmt_info == NULL
7711 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7712 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7713 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7714 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7715 goto fail;
7717 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7719 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7720 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7721 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7722 goto fail;
7723 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7724 tree lrhs;
7725 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7726 lrhs = rhs1;
7727 else
7728 lrhs = rhs2;
7729 use_operand_p use_p;
7730 imm_use_iterator iter;
7731 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7733 gimple *use_stmt = USE_STMT (use_p);
7734 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7735 continue;
7736 if (other_store_stmt)
7737 goto fail;
7738 other_store_stmt = use_stmt;
7742 if (other_store_stmt == NULL)
7743 goto fail;
7744 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7745 || !gimple_store_p (other_store_stmt))
7746 goto fail;
7748 stmt_vec_info other_store_stmt_info
7749 = loop_vinfo->lookup_stmt (other_store_stmt);
7750 if (other_store_stmt_info == NULL
7751 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7752 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7753 goto fail;
7755 gimple *stmt1 = stmt;
7756 gimple *stmt2 = other_store_stmt;
7757 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7758 std::swap (stmt1, stmt2);
7759 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7760 gimple_assign_rhs1 (load2_stmt)))
7762 std::swap (rhs1, rhs2);
7763 std::swap (load1_stmt, load2_stmt);
7764 std::swap (load1_stmt_info, load2_stmt_info);
7766 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7767 gimple_assign_rhs1 (load1_stmt)))
7768 goto fail;
7770 tree var3 = NULL_TREE;
7771 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7772 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7773 gimple_assign_rhs1 (load2_stmt)))
7774 goto fail;
7775 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7777 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7778 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7779 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7780 goto fail;
7781 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7782 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7783 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7784 || lookup_attribute ("omp simd inscan exclusive",
7785 DECL_ATTRIBUTES (var3)))
7786 goto fail;
7789 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7790 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7791 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7792 goto fail;
7794 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7795 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7796 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7797 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7798 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7799 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7800 goto fail;
7802 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7803 std::swap (var1, var2);
7805 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7807 if (!lookup_attribute ("omp simd inscan exclusive",
7808 DECL_ATTRIBUTES (var1)))
7809 goto fail;
7810 var1 = var3;
7813 if (loop_vinfo->scan_map == NULL)
7814 goto fail;
7815 tree *init = loop_vinfo->scan_map->get (var1);
7816 if (init == NULL)
7817 goto fail;
7819 /* The IL is as expected, now check if we can actually vectorize it.
7820 Inclusive scan:
7821 _26 = D.2043[_25];
7822 _27 = D.2042[_25];
7823 _28 = _26 + _27;
7824 D.2043[_25] = _28;
7825 D.2042[_25] = _28;
7826 should be vectorized as (where _40 is the vectorized rhs
7827 from the D.2042[_21] = 0; store):
7828 _30 = MEM <vector(8) int> [(int *)&D.2043];
7829 _31 = MEM <vector(8) int> [(int *)&D.2042];
7830 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7831 _33 = _31 + _32;
7832 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7833 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7834 _35 = _33 + _34;
7835 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7836 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7837 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7838 _37 = _35 + _36;
7839 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7840 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7841 _38 = _30 + _37;
7842 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7843 MEM <vector(8) int> [(int *)&D.2043] = _39;
7844 MEM <vector(8) int> [(int *)&D.2042] = _38;
7845 Exclusive scan:
7846 _26 = D.2043[_25];
7847 D.2044[_25] = _26;
7848 _27 = D.2042[_25];
7849 _28 = _26 + _27;
7850 D.2043[_25] = _28;
7851 should be vectorized as (where _40 is the vectorized rhs
7852 from the D.2042[_21] = 0; store):
7853 _30 = MEM <vector(8) int> [(int *)&D.2043];
7854 _31 = MEM <vector(8) int> [(int *)&D.2042];
7855 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7856 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7857 _34 = _32 + _33;
7858 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7859 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7860 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7861 _36 = _34 + _35;
7862 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7863 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7864 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7865 _38 = _36 + _37;
7866 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7867 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7868 _39 = _30 + _38;
7869 _50 = _31 + _39;
7870 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7871 MEM <vector(8) int> [(int *)&D.2044] = _39;
7872 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7873 enum machine_mode vec_mode = TYPE_MODE (vectype);
7874 optab optab = optab_for_tree_code (code, vectype, optab_default);
7875 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7876 goto fail;
7878 int units_log2 = scan_store_can_perm_p (vectype, *init);
7879 if (units_log2 == -1)
7880 goto fail;
7882 return true;
7886 /* Function vectorizable_scan_store.
7888 Helper of vectorizable_score, arguments like on vectorizable_store.
7889 Handle only the transformation, checking is done in check_scan_store. */
7891 static bool
7892 vectorizable_scan_store (vec_info *vinfo,
7893 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7894 gimple **vec_stmt, int ncopies)
7896 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7897 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7898 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7899 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7901 if (dump_enabled_p ())
7902 dump_printf_loc (MSG_NOTE, vect_location,
7903 "transform scan store. ncopies = %d\n", ncopies);
7905 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7906 tree rhs = gimple_assign_rhs1 (stmt);
7907 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7909 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7910 bool inscan_var_store
7911 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7913 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7915 use_operand_p use_p;
7916 imm_use_iterator iter;
7917 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7919 gimple *use_stmt = USE_STMT (use_p);
7920 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7921 continue;
7922 rhs = gimple_assign_lhs (use_stmt);
7923 break;
7927 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7928 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7929 if (code == POINTER_PLUS_EXPR)
7930 code = PLUS_EXPR;
7931 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7932 && commutative_tree_code (code));
7933 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7934 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7935 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7936 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7937 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7938 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7939 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7940 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7941 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7942 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7943 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7945 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7947 std::swap (rhs1, rhs2);
7948 std::swap (var1, var2);
7949 std::swap (load1_dr_info, load2_dr_info);
7952 tree *init = loop_vinfo->scan_map->get (var1);
7953 gcc_assert (init);
7955 unsigned HOST_WIDE_INT nunits;
7956 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7957 gcc_unreachable ();
7958 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7959 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7960 gcc_assert (units_log2 > 0);
7961 auto_vec<tree, 16> perms;
7962 perms.quick_grow (units_log2 + 1);
7963 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7964 for (int i = 0; i <= units_log2; ++i)
7966 unsigned HOST_WIDE_INT j, k;
7967 vec_perm_builder sel (nunits, nunits, 1);
7968 sel.quick_grow (nunits);
7969 if (i == units_log2)
7970 for (j = 0; j < nunits; ++j)
7971 sel[j] = nunits - 1;
7972 else
7974 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7975 sel[j] = j;
7976 for (k = 0; j < nunits; ++j, ++k)
7977 sel[j] = nunits + k;
7979 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7980 if (!use_whole_vector.is_empty ()
7981 && use_whole_vector[i] != scan_store_kind_perm)
7983 if (zero_vec == NULL_TREE)
7984 zero_vec = build_zero_cst (vectype);
7985 if (masktype == NULL_TREE
7986 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7987 masktype = truth_type_for (vectype);
7988 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7990 else
7991 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7994 tree vec_oprnd1 = NULL_TREE;
7995 tree vec_oprnd2 = NULL_TREE;
7996 tree vec_oprnd3 = NULL_TREE;
7997 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7998 tree dataref_offset = build_int_cst (ref_type, 0);
7999 tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
8000 vectype, VMAT_CONTIGUOUS);
8001 tree ldataref_ptr = NULL_TREE;
8002 tree orig = NULL_TREE;
8003 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
8004 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
8005 auto_vec<tree> vec_oprnds1;
8006 auto_vec<tree> vec_oprnds2;
8007 auto_vec<tree> vec_oprnds3;
8008 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
8009 *init, &vec_oprnds1,
8010 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
8011 rhs2, &vec_oprnds3);
8012 for (int j = 0; j < ncopies; j++)
8014 vec_oprnd1 = vec_oprnds1[j];
8015 if (ldataref_ptr == NULL)
8016 vec_oprnd2 = vec_oprnds2[j];
8017 vec_oprnd3 = vec_oprnds3[j];
8018 if (j == 0)
8019 orig = vec_oprnd3;
8020 else if (!inscan_var_store)
8021 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8023 if (ldataref_ptr)
8025 vec_oprnd2 = make_ssa_name (vectype);
8026 tree data_ref = fold_build2 (MEM_REF, vectype,
8027 unshare_expr (ldataref_ptr),
8028 dataref_offset);
8029 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
8030 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
8031 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8032 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8033 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8036 tree v = vec_oprnd2;
8037 for (int i = 0; i < units_log2; ++i)
8039 tree new_temp = make_ssa_name (vectype);
8040 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
8041 (zero_vec
8042 && (use_whole_vector[i]
8043 != scan_store_kind_perm))
8044 ? zero_vec : vec_oprnd1, v,
8045 perms[i]);
8046 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8047 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8048 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8050 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
8052 /* Whole vector shift shifted in zero bits, but if *init
8053 is not initializer_zerop, we need to replace those elements
8054 with elements from vec_oprnd1. */
8055 tree_vector_builder vb (masktype, nunits, 1);
8056 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
8057 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
8058 ? boolean_false_node : boolean_true_node);
8060 tree new_temp2 = make_ssa_name (vectype);
8061 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
8062 new_temp, vec_oprnd1);
8063 vect_finish_stmt_generation (vinfo, stmt_info,
8064 g, gsi);
8065 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8066 new_temp = new_temp2;
8069 /* For exclusive scan, perform the perms[i] permutation once
8070 more. */
8071 if (i == 0
8072 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
8073 && v == vec_oprnd2)
8075 v = new_temp;
8076 --i;
8077 continue;
8080 tree new_temp2 = make_ssa_name (vectype);
8081 g = gimple_build_assign (new_temp2, code, v, new_temp);
8082 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8083 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8085 v = new_temp2;
8088 tree new_temp = make_ssa_name (vectype);
8089 gimple *g = gimple_build_assign (new_temp, code, orig, v);
8090 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8091 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8093 tree last_perm_arg = new_temp;
8094 /* For exclusive scan, new_temp computed above is the exclusive scan
8095 prefix sum. Turn it into inclusive prefix sum for the broadcast
8096 of the last element into orig. */
8097 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
8099 last_perm_arg = make_ssa_name (vectype);
8100 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
8101 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8102 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8105 orig = make_ssa_name (vectype);
8106 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
8107 last_perm_arg, perms[units_log2]);
8108 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8109 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8111 if (!inscan_var_store)
8113 tree data_ref = fold_build2 (MEM_REF, vectype,
8114 unshare_expr (dataref_ptr),
8115 dataref_offset);
8116 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8117 g = gimple_build_assign (data_ref, new_temp);
8118 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8119 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8123 if (inscan_var_store)
8124 for (int j = 0; j < ncopies; j++)
8126 if (j != 0)
8127 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8129 tree data_ref = fold_build2 (MEM_REF, vectype,
8130 unshare_expr (dataref_ptr),
8131 dataref_offset);
8132 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8133 gimple *g = gimple_build_assign (data_ref, orig);
8134 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8135 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8137 return true;
8141 /* Function vectorizable_store.
8143 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8144 that can be vectorized.
8145 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8146 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8147 Return true if STMT_INFO is vectorizable in this way. */
8149 static bool
8150 vectorizable_store (vec_info *vinfo,
8151 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8152 gimple **vec_stmt, slp_tree slp_node,
8153 stmt_vector_for_cost *cost_vec)
8155 tree data_ref;
8156 tree op;
8157 tree vec_oprnd = NULL_TREE;
8158 tree elem_type;
8159 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8160 class loop *loop = NULL;
8161 machine_mode vec_mode;
8162 tree dummy;
8163 enum vect_def_type rhs_dt = vect_unknown_def_type;
8164 enum vect_def_type mask_dt = vect_unknown_def_type;
8165 tree dataref_ptr = NULL_TREE;
8166 tree dataref_offset = NULL_TREE;
8167 gimple *ptr_incr = NULL;
8168 int ncopies;
8169 int j;
8170 stmt_vec_info first_stmt_info;
8171 bool grouped_store;
8172 unsigned int group_size, i;
8173 bool slp = (slp_node != NULL);
8174 unsigned int vec_num;
8175 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8176 tree aggr_type;
8177 gather_scatter_info gs_info;
8178 poly_uint64 vf;
8179 vec_load_store_type vls_type;
8180 tree ref_type;
8182 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8183 return false;
8185 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8186 && ! vec_stmt)
8187 return false;
8189 /* Is vectorizable store? */
8191 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8192 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8194 tree scalar_dest = gimple_assign_lhs (assign);
8195 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8196 && is_pattern_stmt_p (stmt_info))
8197 scalar_dest = TREE_OPERAND (scalar_dest, 0);
8198 if (TREE_CODE (scalar_dest) != ARRAY_REF
8199 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8200 && TREE_CODE (scalar_dest) != INDIRECT_REF
8201 && TREE_CODE (scalar_dest) != COMPONENT_REF
8202 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8203 && TREE_CODE (scalar_dest) != REALPART_EXPR
8204 && TREE_CODE (scalar_dest) != MEM_REF)
8205 return false;
8207 else
8209 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8210 if (!call || !gimple_call_internal_p (call))
8211 return false;
8213 internal_fn ifn = gimple_call_internal_fn (call);
8214 if (!internal_store_fn_p (ifn))
8215 return false;
8217 int mask_index = internal_fn_mask_index (ifn);
8218 if (mask_index >= 0 && slp_node)
8219 mask_index = vect_slp_child_index_for_operand (call, mask_index);
8220 if (mask_index >= 0
8221 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8222 &mask, NULL, &mask_dt, &mask_vectype))
8223 return false;
8226 op = vect_get_store_rhs (stmt_info);
8228 /* Cannot have hybrid store SLP -- that would mean storing to the
8229 same location twice. */
8230 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8232 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8233 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8235 if (loop_vinfo)
8237 loop = LOOP_VINFO_LOOP (loop_vinfo);
8238 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8240 else
8241 vf = 1;
8243 /* Multiple types in SLP are handled by creating the appropriate number of
8244 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8245 case of SLP. */
8246 if (slp)
8247 ncopies = 1;
8248 else
8249 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8251 gcc_assert (ncopies >= 1);
8253 /* FORNOW. This restriction should be relaxed. */
8254 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
8256 if (dump_enabled_p ())
8257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8258 "multiple types in nested loop.\n");
8259 return false;
8262 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8263 op, &rhs_dt, &rhs_vectype, &vls_type))
8264 return false;
8266 elem_type = TREE_TYPE (vectype);
8267 vec_mode = TYPE_MODE (vectype);
8269 if (!STMT_VINFO_DATA_REF (stmt_info))
8270 return false;
8272 vect_memory_access_type memory_access_type;
8273 enum dr_alignment_support alignment_support_scheme;
8274 int misalignment;
8275 poly_int64 poffset;
8276 internal_fn lanes_ifn;
8277 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
8278 ncopies, &memory_access_type, &poffset,
8279 &alignment_support_scheme, &misalignment, &gs_info,
8280 &lanes_ifn))
8281 return false;
8283 if (mask)
8285 if (memory_access_type == VMAT_CONTIGUOUS)
8287 if (!VECTOR_MODE_P (vec_mode)
8288 || !can_vec_mask_load_store_p (vec_mode,
8289 TYPE_MODE (mask_vectype), false))
8290 return false;
8292 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8293 && (memory_access_type != VMAT_GATHER_SCATTER
8294 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8296 if (dump_enabled_p ())
8297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8298 "unsupported access type for masked store.\n");
8299 return false;
8301 else if (memory_access_type == VMAT_GATHER_SCATTER
8302 && gs_info.ifn == IFN_LAST
8303 && !gs_info.decl)
8305 if (dump_enabled_p ())
8306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8307 "unsupported masked emulated scatter.\n");
8308 return false;
8311 else
8313 /* FORNOW. In some cases can vectorize even if data-type not supported
8314 (e.g. - array initialization with 0). */
8315 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
8316 return false;
8319 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8320 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8321 && memory_access_type != VMAT_GATHER_SCATTER
8322 && (slp || memory_access_type != VMAT_CONTIGUOUS));
8323 if (grouped_store)
8325 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8326 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8327 group_size = DR_GROUP_SIZE (first_stmt_info);
8329 else
8331 first_stmt_info = stmt_info;
8332 first_dr_info = dr_info;
8333 group_size = vec_num = 1;
8336 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
8338 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
8339 memory_access_type))
8340 return false;
8343 bool costing_p = !vec_stmt;
8344 if (costing_p) /* transformation not required. */
8346 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8348 if (loop_vinfo
8349 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8350 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8351 vls_type, group_size,
8352 memory_access_type, &gs_info,
8353 mask);
8355 if (slp_node
8356 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8357 vectype))
8359 if (dump_enabled_p ())
8360 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8361 "incompatible vector types for invariants\n");
8362 return false;
8365 if (dump_enabled_p ()
8366 && memory_access_type != VMAT_ELEMENTWISE
8367 && memory_access_type != VMAT_GATHER_SCATTER
8368 && alignment_support_scheme != dr_aligned)
8369 dump_printf_loc (MSG_NOTE, vect_location,
8370 "Vectorizing an unaligned access.\n");
8372 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8374 /* As function vect_transform_stmt shows, for interleaving stores
8375 the whole chain is vectorized when the last store in the chain
8376 is reached, the other stores in the group are skipped. So we
8377 want to only cost the last one here, but it's not trivial to
8378 get the last, as it's equivalent to use the first one for
8379 costing, use the first one instead. */
8380 if (grouped_store
8381 && !slp
8382 && first_stmt_info != stmt_info)
8383 return true;
8385 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8387 /* Transform. */
8389 ensure_base_align (dr_info);
8391 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8393 vect_build_scatter_store_calls (vinfo, stmt_info, gsi, vec_stmt, &gs_info,
8394 mask, cost_vec);
8395 return true;
8397 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8399 gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
8400 gcc_assert (!slp);
8401 if (costing_p)
8403 unsigned int inside_cost = 0, prologue_cost = 0;
8404 if (vls_type == VLS_STORE_INVARIANT)
8405 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8406 stmt_info, 0, vect_prologue);
8407 vect_get_store_cost (vinfo, stmt_info, ncopies,
8408 alignment_support_scheme, misalignment,
8409 &inside_cost, cost_vec);
8411 if (dump_enabled_p ())
8412 dump_printf_loc (MSG_NOTE, vect_location,
8413 "vect_model_store_cost: inside_cost = %d, "
8414 "prologue_cost = %d .\n",
8415 inside_cost, prologue_cost);
8417 return true;
8419 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8422 if (grouped_store)
8424 /* FORNOW */
8425 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8427 if (slp)
8429 grouped_store = false;
8430 /* VEC_NUM is the number of vect stmts to be created for this
8431 group. */
8432 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8433 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8434 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8435 == first_stmt_info);
8436 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8437 op = vect_get_store_rhs (first_stmt_info);
8439 else
8440 /* VEC_NUM is the number of vect stmts to be created for this
8441 group. */
8442 vec_num = group_size;
8444 ref_type = get_group_alias_ptr_type (first_stmt_info);
8446 else
8447 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8449 if (!costing_p && dump_enabled_p ())
8450 dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n",
8451 ncopies);
8453 /* Check if we need to update prologue cost for invariant,
8454 and update it accordingly if so. If it's not for
8455 interleaving store, we can just check vls_type; but if
8456 it's for interleaving store, need to check the def_type
8457 of the stored value since the current vls_type is just
8458 for first_stmt_info. */
8459 auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
8461 gcc_assert (costing_p);
8462 if (slp)
8463 return;
8464 if (grouped_store)
8466 gcc_assert (store_rhs);
8467 enum vect_def_type cdt;
8468 gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
8469 if (cdt != vect_constant_def && cdt != vect_external_def)
8470 return;
8472 else if (vls_type != VLS_STORE_INVARIANT)
8473 return;
8474 *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info,
8475 0, vect_prologue);
8478 if (memory_access_type == VMAT_ELEMENTWISE
8479 || memory_access_type == VMAT_STRIDED_SLP)
8481 unsigned inside_cost = 0, prologue_cost = 0;
8482 gimple_stmt_iterator incr_gsi;
8483 bool insert_after;
8484 gimple *incr;
8485 tree offvar;
8486 tree ivstep;
8487 tree running_off;
8488 tree stride_base, stride_step, alias_off;
8489 tree vec_oprnd = NULL_TREE;
8490 tree dr_offset;
8491 unsigned int g;
8492 /* Checked by get_load_store_type. */
8493 unsigned int const_nunits = nunits.to_constant ();
8495 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8496 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8498 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8499 stride_base
8500 = fold_build_pointer_plus
8501 (DR_BASE_ADDRESS (first_dr_info->dr),
8502 size_binop (PLUS_EXPR,
8503 convert_to_ptrofftype (dr_offset),
8504 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8505 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8507 /* For a store with loop-invariant (but other than power-of-2)
8508 stride (i.e. not a grouped access) like so:
8510 for (i = 0; i < n; i += stride)
8511 array[i] = ...;
8513 we generate a new induction variable and new stores from
8514 the components of the (vectorized) rhs:
8516 for (j = 0; ; j += VF*stride)
8517 vectemp = ...;
8518 tmp1 = vectemp[0];
8519 array[j] = tmp1;
8520 tmp2 = vectemp[1];
8521 array[j + stride] = tmp2;
8525 unsigned nstores = const_nunits;
8526 unsigned lnel = 1;
8527 tree ltype = elem_type;
8528 tree lvectype = vectype;
8529 if (slp)
8531 if (group_size < const_nunits
8532 && const_nunits % group_size == 0)
8534 nstores = const_nunits / group_size;
8535 lnel = group_size;
8536 ltype = build_vector_type (elem_type, group_size);
8537 lvectype = vectype;
8539 /* First check if vec_extract optab doesn't support extraction
8540 of vector elts directly. */
8541 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8542 machine_mode vmode;
8543 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8544 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8545 group_size).exists (&vmode)
8546 || (convert_optab_handler (vec_extract_optab,
8547 TYPE_MODE (vectype), vmode)
8548 == CODE_FOR_nothing))
8550 /* Try to avoid emitting an extract of vector elements
8551 by performing the extracts using an integer type of the
8552 same size, extracting from a vector of those and then
8553 re-interpreting it as the original vector type if
8554 supported. */
8555 unsigned lsize
8556 = group_size * GET_MODE_BITSIZE (elmode);
8557 unsigned int lnunits = const_nunits / group_size;
8558 /* If we can't construct such a vector fall back to
8559 element extracts from the original vector type and
8560 element size stores. */
8561 if (int_mode_for_size (lsize, 0).exists (&elmode)
8562 && VECTOR_MODE_P (TYPE_MODE (vectype))
8563 && related_vector_mode (TYPE_MODE (vectype), elmode,
8564 lnunits).exists (&vmode)
8565 && (convert_optab_handler (vec_extract_optab,
8566 vmode, elmode)
8567 != CODE_FOR_nothing))
8569 nstores = lnunits;
8570 lnel = group_size;
8571 ltype = build_nonstandard_integer_type (lsize, 1);
8572 lvectype = build_vector_type (ltype, nstores);
8574 /* Else fall back to vector extraction anyway.
8575 Fewer stores are more important than avoiding spilling
8576 of the vector we extract from. Compared to the
8577 construction case in vectorizable_load no store-forwarding
8578 issue exists here for reasonable archs. */
8581 else if (group_size >= const_nunits
8582 && group_size % const_nunits == 0)
8584 int mis_align = dr_misalignment (first_dr_info, vectype);
8585 dr_alignment_support dr_align
8586 = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
8587 mis_align);
8588 if (dr_align == dr_aligned
8589 || dr_align == dr_unaligned_supported)
8591 nstores = 1;
8592 lnel = const_nunits;
8593 ltype = vectype;
8594 lvectype = vectype;
8595 alignment_support_scheme = dr_align;
8596 misalignment = mis_align;
8599 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8600 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8603 if (!costing_p)
8605 ivstep = stride_step;
8606 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8607 build_int_cst (TREE_TYPE (ivstep), vf));
8609 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8611 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8612 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8613 create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
8614 insert_after, &offvar, NULL);
8615 incr = gsi_stmt (incr_gsi);
8617 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8620 alias_off = build_int_cst (ref_type, 0);
8621 stmt_vec_info next_stmt_info = first_stmt_info;
8622 auto_vec<tree> vec_oprnds (ncopies);
8623 for (g = 0; g < group_size; g++)
8625 running_off = offvar;
8626 if (!costing_p)
8628 if (g)
8630 tree size = TYPE_SIZE_UNIT (ltype);
8631 tree pos
8632 = fold_build2 (MULT_EXPR, sizetype, size_int (g), size);
8633 tree newoff = copy_ssa_name (running_off, NULL);
8634 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8635 running_off, pos);
8636 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8637 running_off = newoff;
8640 if (!slp)
8641 op = vect_get_store_rhs (next_stmt_info);
8642 if (!costing_p)
8643 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies, op,
8644 &vec_oprnds);
8645 else
8646 update_prologue_cost (&prologue_cost, op);
8647 unsigned int group_el = 0;
8648 unsigned HOST_WIDE_INT
8649 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8650 for (j = 0; j < ncopies; j++)
8652 if (!costing_p)
8654 vec_oprnd = vec_oprnds[j];
8655 /* Pun the vector to extract from if necessary. */
8656 if (lvectype != vectype)
8658 tree tem = make_ssa_name (lvectype);
8659 tree cvt
8660 = build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
8661 gimple *pun = gimple_build_assign (tem, cvt);
8662 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8663 vec_oprnd = tem;
8666 for (i = 0; i < nstores; i++)
8668 if (costing_p)
8670 /* Only need vector extracting when there are more
8671 than one stores. */
8672 if (nstores > 1)
8673 inside_cost
8674 += record_stmt_cost (cost_vec, 1, vec_to_scalar,
8675 stmt_info, 0, vect_body);
8676 /* Take a single lane vector type store as scalar
8677 store to avoid ICE like 110776. */
8678 if (VECTOR_TYPE_P (ltype)
8679 && known_ne (TYPE_VECTOR_SUBPARTS (ltype), 1U))
8680 vect_get_store_cost (vinfo, stmt_info, 1,
8681 alignment_support_scheme,
8682 misalignment, &inside_cost,
8683 cost_vec);
8684 else
8685 inside_cost
8686 += record_stmt_cost (cost_vec, 1, scalar_store,
8687 stmt_info, 0, vect_body);
8688 continue;
8690 tree newref, newoff;
8691 gimple *incr, *assign;
8692 tree size = TYPE_SIZE (ltype);
8693 /* Extract the i'th component. */
8694 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8695 bitsize_int (i), size);
8696 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8697 size, pos);
8699 elem = force_gimple_operand_gsi (gsi, elem, true,
8700 NULL_TREE, true,
8701 GSI_SAME_STMT);
8703 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8704 group_el * elsz);
8705 newref = build2 (MEM_REF, ltype,
8706 running_off, this_off);
8707 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8709 /* And store it to *running_off. */
8710 assign = gimple_build_assign (newref, elem);
8711 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8713 group_el += lnel;
8714 if (! slp
8715 || group_el == group_size)
8717 newoff = copy_ssa_name (running_off, NULL);
8718 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8719 running_off, stride_step);
8720 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8722 running_off = newoff;
8723 group_el = 0;
8725 if (g == group_size - 1
8726 && !slp)
8728 if (j == 0 && i == 0)
8729 *vec_stmt = assign;
8730 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8734 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8735 vec_oprnds.truncate(0);
8736 if (slp)
8737 break;
8740 if (costing_p && dump_enabled_p ())
8741 dump_printf_loc (MSG_NOTE, vect_location,
8742 "vect_model_store_cost: inside_cost = %d, "
8743 "prologue_cost = %d .\n",
8744 inside_cost, prologue_cost);
8746 return true;
8749 gcc_assert (alignment_support_scheme);
8750 vec_loop_masks *loop_masks
8751 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8752 ? &LOOP_VINFO_MASKS (loop_vinfo)
8753 : NULL);
8754 vec_loop_lens *loop_lens
8755 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8756 ? &LOOP_VINFO_LENS (loop_vinfo)
8757 : NULL);
8759 /* Shouldn't go with length-based approach if fully masked. */
8760 gcc_assert (!loop_lens || !loop_masks);
8762 /* Targets with store-lane instructions must not require explicit
8763 realignment. vect_supportable_dr_alignment always returns either
8764 dr_aligned or dr_unaligned_supported for masked operations. */
8765 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8766 && !mask
8767 && !loop_masks)
8768 || alignment_support_scheme == dr_aligned
8769 || alignment_support_scheme == dr_unaligned_supported);
8771 tree offset = NULL_TREE;
8772 if (!known_eq (poffset, 0))
8773 offset = size_int (poffset);
8775 tree bump;
8776 tree vec_offset = NULL_TREE;
8777 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8779 aggr_type = NULL_TREE;
8780 bump = NULL_TREE;
8782 else if (memory_access_type == VMAT_GATHER_SCATTER)
8784 aggr_type = elem_type;
8785 if (!costing_p)
8786 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
8787 &bump, &vec_offset, loop_lens);
8789 else
8791 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8792 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8793 else
8794 aggr_type = vectype;
8795 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8796 memory_access_type, loop_lens);
8799 if (mask && !costing_p)
8800 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8802 /* In case the vectorization factor (VF) is bigger than the number
8803 of elements that we can fit in a vectype (nunits), we have to generate
8804 more than one vector stmt - i.e - we need to "unroll" the
8805 vector stmt by a factor VF/nunits. */
8807 /* In case of interleaving (non-unit grouped access):
8809 S1: &base + 2 = x2
8810 S2: &base = x0
8811 S3: &base + 1 = x1
8812 S4: &base + 3 = x3
8814 We create vectorized stores starting from base address (the access of the
8815 first stmt in the chain (S2 in the above example), when the last store stmt
8816 of the chain (S4) is reached:
8818 VS1: &base = vx2
8819 VS2: &base + vec_size*1 = vx0
8820 VS3: &base + vec_size*2 = vx1
8821 VS4: &base + vec_size*3 = vx3
8823 Then permutation statements are generated:
8825 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8826 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8829 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8830 (the order of the data-refs in the output of vect_permute_store_chain
8831 corresponds to the order of scalar stmts in the interleaving chain - see
8832 the documentation of vect_permute_store_chain()).
8834 In case of both multiple types and interleaving, above vector stores and
8835 permutation stmts are created for every copy. The result vector stmts are
8836 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8837 STMT_VINFO_RELATED_STMT for the next copies.
8840 auto_vec<tree> dr_chain (group_size);
8841 auto_vec<tree> vec_masks;
8842 tree vec_mask = NULL;
8843 auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8844 for (i = 0; i < group_size; i++)
8845 gvec_oprnds.quick_push (new auto_vec<tree> (ncopies));
8847 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8849 gcc_assert (!slp && grouped_store);
8850 unsigned inside_cost = 0, prologue_cost = 0;
8851 for (j = 0; j < ncopies; j++)
8853 gimple *new_stmt;
8854 if (j == 0)
8856 /* For interleaved stores we collect vectorized defs for all
8857 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8858 as an input to vect_permute_store_chain(). */
8859 stmt_vec_info next_stmt_info = first_stmt_info;
8860 for (i = 0; i < group_size; i++)
8862 /* Since gaps are not supported for interleaved stores,
8863 DR_GROUP_SIZE is the exact number of stmts in the
8864 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8865 op = vect_get_store_rhs (next_stmt_info);
8866 if (costing_p)
8867 update_prologue_cost (&prologue_cost, op);
8868 else
8870 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8871 ncopies, op,
8872 gvec_oprnds[i]);
8873 vec_oprnd = (*gvec_oprnds[i])[0];
8874 dr_chain.quick_push (vec_oprnd);
8876 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8879 if (!costing_p)
8881 if (mask)
8883 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8884 mask, &vec_masks,
8885 mask_vectype);
8886 vec_mask = vec_masks[0];
8889 /* We should have catched mismatched types earlier. */
8890 gcc_assert (
8891 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
8892 dataref_ptr
8893 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8894 aggr_type, NULL, offset, &dummy,
8895 gsi, &ptr_incr, false, bump);
8898 else if (!costing_p)
8900 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8901 /* DR_CHAIN is then used as an input to
8902 vect_permute_store_chain(). */
8903 for (i = 0; i < group_size; i++)
8905 vec_oprnd = (*gvec_oprnds[i])[j];
8906 dr_chain[i] = vec_oprnd;
8908 if (mask)
8909 vec_mask = vec_masks[j];
8910 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8911 stmt_info, bump);
8914 if (costing_p)
8916 for (i = 0; i < vec_num; i++)
8917 vect_get_store_cost (vinfo, stmt_info, 1,
8918 alignment_support_scheme, misalignment,
8919 &inside_cost, cost_vec);
8920 continue;
8923 /* Get an array into which we can store the individual vectors. */
8924 tree vec_array = create_vector_array (vectype, vec_num);
8926 /* Invalidate the current contents of VEC_ARRAY. This should
8927 become an RTL clobber too, which prevents the vector registers
8928 from being upward-exposed. */
8929 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8931 /* Store the individual vectors into the array. */
8932 for (i = 0; i < vec_num; i++)
8934 vec_oprnd = dr_chain[i];
8935 write_vector_array (vinfo, stmt_info, gsi, vec_oprnd, vec_array,
8939 tree final_mask = NULL;
8940 tree final_len = NULL;
8941 tree bias = NULL;
8942 if (loop_masks)
8943 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8944 ncopies, vectype, j);
8945 if (vec_mask)
8946 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
8947 vec_mask, gsi);
8949 if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8951 if (loop_lens)
8952 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8953 ncopies, vectype, j, 1);
8954 else
8955 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8956 signed char biasval
8957 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8958 bias = build_int_cst (intQI_type_node, biasval);
8959 if (!final_mask)
8961 mask_vectype = truth_type_for (vectype);
8962 final_mask = build_minus_one_cst (mask_vectype);
8966 gcall *call;
8967 if (final_len && final_mask)
8969 /* Emit:
8970 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8971 LEN, BIAS, VEC_ARRAY). */
8972 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8973 tree alias_ptr = build_int_cst (ref_type, align);
8974 call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
8975 dataref_ptr, alias_ptr,
8976 final_mask, final_len, bias,
8977 vec_array);
8979 else if (final_mask)
8981 /* Emit:
8982 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8983 VEC_ARRAY). */
8984 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8985 tree alias_ptr = build_int_cst (ref_type, align);
8986 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8987 dataref_ptr, alias_ptr,
8988 final_mask, vec_array);
8990 else
8992 /* Emit:
8993 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8994 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8995 call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
8996 gimple_call_set_lhs (call, data_ref);
8998 gimple_call_set_nothrow (call, true);
8999 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9000 new_stmt = call;
9002 /* Record that VEC_ARRAY is now dead. */
9003 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9004 if (j == 0)
9005 *vec_stmt = new_stmt;
9006 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9009 if (costing_p && dump_enabled_p ())
9010 dump_printf_loc (MSG_NOTE, vect_location,
9011 "vect_model_store_cost: inside_cost = %d, "
9012 "prologue_cost = %d .\n",
9013 inside_cost, prologue_cost);
9015 return true;
9018 if (memory_access_type == VMAT_GATHER_SCATTER)
9020 gcc_assert (!slp && !grouped_store);
9021 auto_vec<tree> vec_offsets;
9022 unsigned int inside_cost = 0, prologue_cost = 0;
9023 for (j = 0; j < ncopies; j++)
9025 gimple *new_stmt;
9026 if (j == 0)
9028 if (costing_p && vls_type == VLS_STORE_INVARIANT)
9029 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
9030 stmt_info, 0, vect_prologue);
9031 else if (!costing_p)
9033 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
9034 DR_CHAIN is of size 1. */
9035 gcc_assert (group_size == 1);
9036 op = vect_get_store_rhs (first_stmt_info);
9037 vect_get_vec_defs_for_operand (vinfo, first_stmt_info,
9038 ncopies, op, gvec_oprnds[0]);
9039 vec_oprnd = (*gvec_oprnds[0])[0];
9040 dr_chain.quick_push (vec_oprnd);
9041 if (mask)
9043 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
9044 mask, &vec_masks,
9045 mask_vectype);
9046 vec_mask = vec_masks[0];
9049 /* We should have catched mismatched types earlier. */
9050 gcc_assert (
9051 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
9052 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9053 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9054 slp_node, &gs_info,
9055 &dataref_ptr, &vec_offsets);
9056 else
9057 dataref_ptr
9058 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
9059 aggr_type, NULL, offset,
9060 &dummy, gsi, &ptr_incr, false,
9061 bump);
9064 else if (!costing_p)
9066 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9067 vec_oprnd = (*gvec_oprnds[0])[j];
9068 dr_chain[0] = vec_oprnd;
9069 if (mask)
9070 vec_mask = vec_masks[j];
9071 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9072 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9073 gsi, stmt_info, bump);
9076 new_stmt = NULL;
9077 unsigned HOST_WIDE_INT align;
9078 tree final_mask = NULL_TREE;
9079 tree final_len = NULL_TREE;
9080 tree bias = NULL_TREE;
9081 if (!costing_p)
9083 if (loop_masks)
9084 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9085 ncopies, vectype, j);
9086 if (vec_mask)
9087 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9088 final_mask, vec_mask, gsi);
9091 if (gs_info.ifn != IFN_LAST)
9093 if (costing_p)
9095 unsigned int cnunits = vect_nunits_for_cost (vectype);
9096 inside_cost
9097 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9098 stmt_info, 0, vect_body);
9099 continue;
9102 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9103 vec_offset = vec_offsets[j];
9104 tree scale = size_int (gs_info.scale);
9106 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
9108 if (loop_lens)
9109 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9110 ncopies, vectype, j, 1);
9111 else
9112 final_len = build_int_cst (sizetype,
9113 TYPE_VECTOR_SUBPARTS (vectype));
9114 signed char biasval
9115 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9116 bias = build_int_cst (intQI_type_node, biasval);
9117 if (!final_mask)
9119 mask_vectype = truth_type_for (vectype);
9120 final_mask = build_minus_one_cst (mask_vectype);
9124 gcall *call;
9125 if (final_len && final_mask)
9126 call = gimple_build_call_internal (IFN_MASK_LEN_SCATTER_STORE,
9127 7, dataref_ptr, vec_offset,
9128 scale, vec_oprnd, final_mask,
9129 final_len, bias);
9130 else if (final_mask)
9131 call
9132 = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
9133 dataref_ptr, vec_offset, scale,
9134 vec_oprnd, final_mask);
9135 else
9136 call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
9137 dataref_ptr, vec_offset,
9138 scale, vec_oprnd);
9139 gimple_call_set_nothrow (call, true);
9140 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9141 new_stmt = call;
9143 else
9145 /* Emulated scatter. */
9146 gcc_assert (!final_mask);
9147 if (costing_p)
9149 unsigned int cnunits = vect_nunits_for_cost (vectype);
9150 /* For emulated scatter N offset vector element extracts
9151 (we assume the scalar scaling and ptr + offset add is
9152 consumed by the load). */
9153 inside_cost
9154 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9155 stmt_info, 0, vect_body);
9156 /* N scalar stores plus extracting the elements. */
9157 inside_cost
9158 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9159 stmt_info, 0, vect_body);
9160 inside_cost
9161 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9162 stmt_info, 0, vect_body);
9163 continue;
9166 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9167 unsigned HOST_WIDE_INT const_offset_nunits
9168 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant ();
9169 vec<constructor_elt, va_gc> *ctor_elts;
9170 vec_alloc (ctor_elts, const_nunits);
9171 gimple_seq stmts = NULL;
9172 tree elt_type = TREE_TYPE (vectype);
9173 unsigned HOST_WIDE_INT elt_size
9174 = tree_to_uhwi (TYPE_SIZE (elt_type));
9175 /* We support offset vectors with more elements
9176 than the data vector for now. */
9177 unsigned HOST_WIDE_INT factor
9178 = const_offset_nunits / const_nunits;
9179 vec_offset = vec_offsets[j / factor];
9180 unsigned elt_offset = (j % factor) * const_nunits;
9181 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9182 tree scale = size_int (gs_info.scale);
9183 align = get_object_alignment (DR_REF (first_dr_info->dr));
9184 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9185 for (unsigned k = 0; k < const_nunits; ++k)
9187 /* Compute the offsetted pointer. */
9188 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9189 bitsize_int (k + elt_offset));
9190 tree idx
9191 = gimple_build (&stmts, BIT_FIELD_REF, idx_type, vec_offset,
9192 TYPE_SIZE (idx_type), boff);
9193 idx = gimple_convert (&stmts, sizetype, idx);
9194 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx, scale);
9195 tree ptr
9196 = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (dataref_ptr),
9197 dataref_ptr, idx);
9198 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9199 /* Extract the element to be stored. */
9200 tree elt
9201 = gimple_build (&stmts, BIT_FIELD_REF, TREE_TYPE (vectype),
9202 vec_oprnd, TYPE_SIZE (elt_type),
9203 bitsize_int (k * elt_size));
9204 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9205 stmts = NULL;
9206 tree ref
9207 = build2 (MEM_REF, ltype, ptr, build_int_cst (ref_type, 0));
9208 new_stmt = gimple_build_assign (ref, elt);
9209 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9212 if (j == 0)
9213 *vec_stmt = new_stmt;
9214 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9217 if (costing_p && dump_enabled_p ())
9218 dump_printf_loc (MSG_NOTE, vect_location,
9219 "vect_model_store_cost: inside_cost = %d, "
9220 "prologue_cost = %d .\n",
9221 inside_cost, prologue_cost);
9223 return true;
9226 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
9227 || memory_access_type == VMAT_CONTIGUOUS_DOWN
9228 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE
9229 || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
9231 unsigned inside_cost = 0, prologue_cost = 0;
9232 auto_vec<tree> result_chain (group_size);
9233 auto_vec<tree, 1> vec_oprnds;
9234 for (j = 0; j < ncopies; j++)
9236 gimple *new_stmt;
9237 if (j == 0)
9239 if (slp && !costing_p)
9241 /* Get vectorized arguments for SLP_NODE. */
9242 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
9243 &vec_oprnds, mask, &vec_masks);
9244 vec_oprnd = vec_oprnds[0];
9245 if (mask)
9246 vec_mask = vec_masks[0];
9248 else
9250 /* For interleaved stores we collect vectorized defs for all the
9251 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9252 input to vect_permute_store_chain().
9254 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9255 is of size 1. */
9256 stmt_vec_info next_stmt_info = first_stmt_info;
9257 for (i = 0; i < group_size; i++)
9259 /* Since gaps are not supported for interleaved stores,
9260 DR_GROUP_SIZE is the exact number of stmts in the chain.
9261 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9262 that there is no interleaving, DR_GROUP_SIZE is 1,
9263 and only one iteration of the loop will be executed. */
9264 op = vect_get_store_rhs (next_stmt_info);
9265 if (costing_p)
9266 update_prologue_cost (&prologue_cost, op);
9267 else
9269 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
9270 ncopies, op,
9271 gvec_oprnds[i]);
9272 vec_oprnd = (*gvec_oprnds[i])[0];
9273 dr_chain.quick_push (vec_oprnd);
9275 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9277 if (mask && !costing_p)
9279 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
9280 mask, &vec_masks,
9281 mask_vectype);
9282 vec_mask = vec_masks[0];
9286 /* We should have catched mismatched types earlier. */
9287 gcc_assert (costing_p
9288 || useless_type_conversion_p (vectype,
9289 TREE_TYPE (vec_oprnd)));
9290 bool simd_lane_access_p
9291 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9292 if (!costing_p
9293 && simd_lane_access_p
9294 && !loop_masks
9295 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9296 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9297 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9298 && integer_zerop (DR_INIT (first_dr_info->dr))
9299 && alias_sets_conflict_p (get_alias_set (aggr_type),
9300 get_alias_set (TREE_TYPE (ref_type))))
9302 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9303 dataref_offset = build_int_cst (ref_type, 0);
9305 else if (!costing_p)
9306 dataref_ptr
9307 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9308 simd_lane_access_p ? loop : NULL,
9309 offset, &dummy, gsi, &ptr_incr,
9310 simd_lane_access_p, bump);
9312 else if (!costing_p)
9314 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9315 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9316 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9317 of size 1. */
9318 for (i = 0; i < group_size; i++)
9320 vec_oprnd = (*gvec_oprnds[i])[j];
9321 dr_chain[i] = vec_oprnd;
9323 if (mask)
9324 vec_mask = vec_masks[j];
9325 if (dataref_offset)
9326 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
9327 else
9328 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9329 stmt_info, bump);
9332 new_stmt = NULL;
9333 if (grouped_store)
9335 /* Permute. */
9336 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
9337 if (costing_p)
9339 int group_size = DR_GROUP_SIZE (first_stmt_info);
9340 int nstmts = ceil_log2 (group_size) * group_size;
9341 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
9342 stmt_info, 0, vect_body);
9343 if (dump_enabled_p ())
9344 dump_printf_loc (MSG_NOTE, vect_location,
9345 "vect_model_store_cost: "
9346 "strided group_size = %d .\n",
9347 group_size);
9349 else
9350 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
9351 gsi, &result_chain);
9354 stmt_vec_info next_stmt_info = first_stmt_info;
9355 for (i = 0; i < vec_num; i++)
9357 if (!costing_p)
9359 if (slp)
9360 vec_oprnd = vec_oprnds[i];
9361 else if (grouped_store)
9362 /* For grouped stores vectorized defs are interleaved in
9363 vect_permute_store_chain(). */
9364 vec_oprnd = result_chain[i];
9367 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9369 if (costing_p)
9370 inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
9371 stmt_info, 0, vect_body);
9372 else
9374 tree perm_mask = perm_mask_for_reverse (vectype);
9375 tree perm_dest = vect_create_destination_var (
9376 vect_get_store_rhs (stmt_info), vectype);
9377 tree new_temp = make_ssa_name (perm_dest);
9379 /* Generate the permute statement. */
9380 gimple *perm_stmt
9381 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9382 vec_oprnd, perm_mask);
9383 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
9384 gsi);
9386 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9387 vec_oprnd = new_temp;
9391 if (costing_p)
9393 vect_get_store_cost (vinfo, stmt_info, 1,
9394 alignment_support_scheme, misalignment,
9395 &inside_cost, cost_vec);
9397 if (!slp)
9399 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9400 if (!next_stmt_info)
9401 break;
9404 continue;
9407 tree final_mask = NULL_TREE;
9408 tree final_len = NULL_TREE;
9409 tree bias = NULL_TREE;
9410 if (loop_masks)
9411 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9412 vec_num * ncopies, vectype,
9413 vec_num * j + i);
9414 if (slp && vec_mask)
9415 vec_mask = vec_masks[i];
9416 if (vec_mask)
9417 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
9418 vec_mask, gsi);
9420 if (i > 0)
9421 /* Bump the vector pointer. */
9422 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9423 stmt_info, bump);
9425 unsigned misalign;
9426 unsigned HOST_WIDE_INT align;
9427 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9428 if (alignment_support_scheme == dr_aligned)
9429 misalign = 0;
9430 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9432 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
9433 misalign = 0;
9435 else
9436 misalign = misalignment;
9437 if (dataref_offset == NULL_TREE
9438 && TREE_CODE (dataref_ptr) == SSA_NAME)
9439 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9440 misalign);
9441 align = least_bit_hwi (misalign | align);
9443 /* Compute IFN when LOOP_LENS or final_mask valid. */
9444 machine_mode vmode = TYPE_MODE (vectype);
9445 machine_mode new_vmode = vmode;
9446 internal_fn partial_ifn = IFN_LAST;
9447 if (loop_lens)
9449 opt_machine_mode new_ovmode
9450 = get_len_load_store_mode (vmode, false, &partial_ifn);
9451 new_vmode = new_ovmode.require ();
9452 unsigned factor
9453 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9454 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9455 vec_num * ncopies, vectype,
9456 vec_num * j + i, factor);
9458 else if (final_mask)
9460 if (!can_vec_mask_load_store_p (
9461 vmode, TYPE_MODE (TREE_TYPE (final_mask)), false,
9462 &partial_ifn))
9463 gcc_unreachable ();
9466 if (partial_ifn == IFN_MASK_LEN_STORE)
9468 if (!final_len)
9470 /* Pass VF value to 'len' argument of
9471 MASK_LEN_STORE if LOOP_LENS is invalid. */
9472 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9474 if (!final_mask)
9476 /* Pass all ones value to 'mask' argument of
9477 MASK_LEN_STORE if final_mask is invalid. */
9478 mask_vectype = truth_type_for (vectype);
9479 final_mask = build_minus_one_cst (mask_vectype);
9482 if (final_len)
9484 signed char biasval
9485 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9487 bias = build_int_cst (intQI_type_node, biasval);
9490 /* Arguments are ready. Create the new vector stmt. */
9491 if (final_len)
9493 gcall *call;
9494 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9495 /* Need conversion if it's wrapped with VnQI. */
9496 if (vmode != new_vmode)
9498 tree new_vtype
9499 = build_vector_type_for_mode (unsigned_intQI_type_node,
9500 new_vmode);
9501 tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9502 vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9503 gassign *new_stmt
9504 = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9505 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9506 vec_oprnd = var;
9509 if (partial_ifn == IFN_MASK_LEN_STORE)
9510 call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9511 dataref_ptr, ptr, final_mask,
9512 final_len, bias, vec_oprnd);
9513 else
9514 call = gimple_build_call_internal (IFN_LEN_STORE, 5,
9515 dataref_ptr, ptr, final_len,
9516 bias, vec_oprnd);
9517 gimple_call_set_nothrow (call, true);
9518 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9519 new_stmt = call;
9521 else if (final_mask)
9523 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9524 gcall *call
9525 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
9526 ptr, final_mask, vec_oprnd);
9527 gimple_call_set_nothrow (call, true);
9528 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9529 new_stmt = call;
9531 else
9533 data_ref
9534 = fold_build2 (MEM_REF, vectype, dataref_ptr,
9535 dataref_offset ? dataref_offset
9536 : build_int_cst (ref_type, 0));
9537 if (alignment_support_scheme == dr_aligned)
9539 else
9540 TREE_TYPE (data_ref)
9541 = build_aligned_type (TREE_TYPE (data_ref),
9542 align * BITS_PER_UNIT);
9543 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9544 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9545 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9548 if (slp)
9549 continue;
9551 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9552 if (!next_stmt_info)
9553 break;
9555 if (!slp && !costing_p)
9557 if (j == 0)
9558 *vec_stmt = new_stmt;
9559 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9563 if (costing_p)
9565 /* When vectorizing a store into the function result assign
9566 a penalty if the function returns in a multi-register location.
9567 In this case we assume we'll end up with having to spill the
9568 vector result and do piecewise loads as a conservative estimate. */
9569 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
9570 if (base
9571 && (TREE_CODE (base) == RESULT_DECL
9572 || (DECL_P (base) && cfun_returns (base)))
9573 && !aggregate_value_p (base, cfun->decl))
9575 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
9576 /* ??? Handle PARALLEL in some way. */
9577 if (REG_P (reg))
9579 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
9580 /* Assume that a single reg-reg move is possible and cheap,
9581 do not account for vector to gp register move cost. */
9582 if (nregs > 1)
9584 /* Spill. */
9585 prologue_cost
9586 += record_stmt_cost (cost_vec, ncopies, vector_store,
9587 stmt_info, 0, vect_epilogue);
9588 /* Loads. */
9589 prologue_cost
9590 += record_stmt_cost (cost_vec, ncopies * nregs, scalar_load,
9591 stmt_info, 0, vect_epilogue);
9595 if (dump_enabled_p ())
9596 dump_printf_loc (MSG_NOTE, vect_location,
9597 "vect_model_store_cost: inside_cost = %d, "
9598 "prologue_cost = %d .\n",
9599 inside_cost, prologue_cost);
9602 return true;
9605 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9606 VECTOR_CST mask. No checks are made that the target platform supports the
9607 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9608 vect_gen_perm_mask_checked. */
9610 tree
9611 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9613 tree mask_type;
9615 poly_uint64 nunits = sel.length ();
9616 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9618 mask_type = build_vector_type (ssizetype, nunits);
9619 return vec_perm_indices_to_tree (mask_type, sel);
9622 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9623 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9625 tree
9626 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9628 machine_mode vmode = TYPE_MODE (vectype);
9629 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9630 return vect_gen_perm_mask_any (vectype, sel);
9633 /* Given a vector variable X and Y, that was generated for the scalar
9634 STMT_INFO, generate instructions to permute the vector elements of X and Y
9635 using permutation mask MASK_VEC, insert them at *GSI and return the
9636 permuted vector variable. */
9638 static tree
9639 permute_vec_elements (vec_info *vinfo,
9640 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9641 gimple_stmt_iterator *gsi)
9643 tree vectype = TREE_TYPE (x);
9644 tree perm_dest, data_ref;
9645 gimple *perm_stmt;
9647 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9648 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9649 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9650 else
9651 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9652 data_ref = make_ssa_name (perm_dest);
9654 /* Generate the permute statement. */
9655 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9656 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9658 return data_ref;
9661 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9662 inserting them on the loops preheader edge. Returns true if we
9663 were successful in doing so (and thus STMT_INFO can be moved then),
9664 otherwise returns false. HOIST_P indicates if we want to hoist the
9665 definitions of all SSA uses, it would be false when we are costing. */
9667 static bool
9668 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
9670 ssa_op_iter i;
9671 tree op;
9672 bool any = false;
9674 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9676 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9677 if (!gimple_nop_p (def_stmt)
9678 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9680 /* Make sure we don't need to recurse. While we could do
9681 so in simple cases when there are more complex use webs
9682 we don't have an easy way to preserve stmt order to fulfil
9683 dependencies within them. */
9684 tree op2;
9685 ssa_op_iter i2;
9686 if (gimple_code (def_stmt) == GIMPLE_PHI)
9687 return false;
9688 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9690 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9691 if (!gimple_nop_p (def_stmt2)
9692 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9693 return false;
9695 any = true;
9699 if (!any)
9700 return true;
9702 if (!hoist_p)
9703 return true;
9705 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9707 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9708 if (!gimple_nop_p (def_stmt)
9709 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9711 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9712 gsi_remove (&gsi, false);
9713 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9717 return true;
9720 /* vectorizable_load.
9722 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9723 that can be vectorized.
9724 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9725 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9726 Return true if STMT_INFO is vectorizable in this way. */
9728 static bool
9729 vectorizable_load (vec_info *vinfo,
9730 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9731 gimple **vec_stmt, slp_tree slp_node,
9732 stmt_vector_for_cost *cost_vec)
9734 tree scalar_dest;
9735 tree vec_dest = NULL;
9736 tree data_ref = NULL;
9737 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9738 class loop *loop = NULL;
9739 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9740 bool nested_in_vect_loop = false;
9741 tree elem_type;
9742 /* Avoid false positive uninitialized warning, see PR110652. */
9743 tree new_temp = NULL_TREE;
9744 machine_mode mode;
9745 tree dummy;
9746 tree dataref_ptr = NULL_TREE;
9747 tree dataref_offset = NULL_TREE;
9748 gimple *ptr_incr = NULL;
9749 int ncopies;
9750 int i, j;
9751 unsigned int group_size;
9752 poly_uint64 group_gap_adj;
9753 tree msq = NULL_TREE, lsq;
9754 tree realignment_token = NULL_TREE;
9755 gphi *phi = NULL;
9756 vec<tree> dr_chain = vNULL;
9757 bool grouped_load = false;
9758 stmt_vec_info first_stmt_info;
9759 stmt_vec_info first_stmt_info_for_drptr = NULL;
9760 bool compute_in_loop = false;
9761 class loop *at_loop;
9762 int vec_num;
9763 bool slp = (slp_node != NULL);
9764 bool slp_perm = false;
9765 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9766 poly_uint64 vf;
9767 tree aggr_type;
9768 gather_scatter_info gs_info;
9769 tree ref_type;
9770 enum vect_def_type mask_dt = vect_unknown_def_type;
9772 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9773 return false;
9775 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9776 && ! vec_stmt)
9777 return false;
9779 if (!STMT_VINFO_DATA_REF (stmt_info))
9780 return false;
9782 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9783 int mask_index = -1;
9784 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9786 scalar_dest = gimple_assign_lhs (assign);
9787 if (TREE_CODE (scalar_dest) != SSA_NAME)
9788 return false;
9790 tree_code code = gimple_assign_rhs_code (assign);
9791 if (code != ARRAY_REF
9792 && code != BIT_FIELD_REF
9793 && code != INDIRECT_REF
9794 && code != COMPONENT_REF
9795 && code != IMAGPART_EXPR
9796 && code != REALPART_EXPR
9797 && code != MEM_REF
9798 && TREE_CODE_CLASS (code) != tcc_declaration)
9799 return false;
9801 else
9803 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9804 if (!call || !gimple_call_internal_p (call))
9805 return false;
9807 internal_fn ifn = gimple_call_internal_fn (call);
9808 if (!internal_load_fn_p (ifn))
9809 return false;
9811 scalar_dest = gimple_call_lhs (call);
9812 if (!scalar_dest)
9813 return false;
9815 mask_index = internal_fn_mask_index (ifn);
9816 if (mask_index >= 0 && slp_node)
9817 mask_index = vect_slp_child_index_for_operand (call, mask_index);
9818 if (mask_index >= 0
9819 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9820 &mask, NULL, &mask_dt, &mask_vectype))
9821 return false;
9824 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9825 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9827 if (loop_vinfo)
9829 loop = LOOP_VINFO_LOOP (loop_vinfo);
9830 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9831 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9833 else
9834 vf = 1;
9836 /* Multiple types in SLP are handled by creating the appropriate number of
9837 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9838 case of SLP. */
9839 if (slp)
9840 ncopies = 1;
9841 else
9842 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9844 gcc_assert (ncopies >= 1);
9846 /* FORNOW. This restriction should be relaxed. */
9847 if (nested_in_vect_loop && ncopies > 1)
9849 if (dump_enabled_p ())
9850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9851 "multiple types in nested loop.\n");
9852 return false;
9855 /* Invalidate assumptions made by dependence analysis when vectorization
9856 on the unrolled body effectively re-orders stmts. */
9857 if (ncopies > 1
9858 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9859 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9860 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9862 if (dump_enabled_p ())
9863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9864 "cannot perform implicit CSE when unrolling "
9865 "with negative dependence distance\n");
9866 return false;
9869 elem_type = TREE_TYPE (vectype);
9870 mode = TYPE_MODE (vectype);
9872 /* FORNOW. In some cases can vectorize even if data-type not supported
9873 (e.g. - data copies). */
9874 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
9876 if (dump_enabled_p ())
9877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9878 "Aligned load, but unsupported type.\n");
9879 return false;
9882 /* Check if the load is a part of an interleaving chain. */
9883 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9885 grouped_load = true;
9886 /* FORNOW */
9887 gcc_assert (!nested_in_vect_loop);
9888 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9890 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9891 group_size = DR_GROUP_SIZE (first_stmt_info);
9893 /* Refuse non-SLP vectorization of SLP-only groups. */
9894 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9896 if (dump_enabled_p ())
9897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9898 "cannot vectorize load in non-SLP mode.\n");
9899 return false;
9902 /* Invalidate assumptions made by dependence analysis when vectorization
9903 on the unrolled body effectively re-orders stmts. */
9904 if (!PURE_SLP_STMT (stmt_info)
9905 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9906 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9907 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9909 if (dump_enabled_p ())
9910 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9911 "cannot perform implicit CSE when performing "
9912 "group loads with negative dependence distance\n");
9913 return false;
9916 else
9917 group_size = 1;
9919 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9921 slp_perm = true;
9923 if (!loop_vinfo)
9925 /* In BB vectorization we may not actually use a loaded vector
9926 accessing elements in excess of DR_GROUP_SIZE. */
9927 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9928 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9929 unsigned HOST_WIDE_INT nunits;
9930 unsigned j, k, maxk = 0;
9931 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
9932 if (k > maxk)
9933 maxk = k;
9934 tree vectype = SLP_TREE_VECTYPE (slp_node);
9935 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
9936 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
9938 if (dump_enabled_p ())
9939 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9940 "BB vectorization with gaps at the end of "
9941 "a load is not supported\n");
9942 return false;
9946 auto_vec<tree> tem;
9947 unsigned n_perms;
9948 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
9949 true, &n_perms))
9951 if (dump_enabled_p ())
9952 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
9953 vect_location,
9954 "unsupported load permutation\n");
9955 return false;
9959 vect_memory_access_type memory_access_type;
9960 enum dr_alignment_support alignment_support_scheme;
9961 int misalignment;
9962 poly_int64 poffset;
9963 internal_fn lanes_ifn;
9964 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
9965 ncopies, &memory_access_type, &poffset,
9966 &alignment_support_scheme, &misalignment, &gs_info,
9967 &lanes_ifn))
9968 return false;
9970 if (mask)
9972 if (memory_access_type == VMAT_CONTIGUOUS)
9974 machine_mode vec_mode = TYPE_MODE (vectype);
9975 if (!VECTOR_MODE_P (vec_mode)
9976 || !can_vec_mask_load_store_p (vec_mode,
9977 TYPE_MODE (mask_vectype), true))
9978 return false;
9980 else if (memory_access_type != VMAT_LOAD_STORE_LANES
9981 && memory_access_type != VMAT_GATHER_SCATTER)
9983 if (dump_enabled_p ())
9984 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9985 "unsupported access type for masked load.\n");
9986 return false;
9988 else if (memory_access_type == VMAT_GATHER_SCATTER
9989 && gs_info.ifn == IFN_LAST
9990 && !gs_info.decl)
9992 if (dump_enabled_p ())
9993 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9994 "unsupported masked emulated gather.\n");
9995 return false;
9999 bool costing_p = !vec_stmt;
10001 if (costing_p) /* transformation not required. */
10003 if (slp_node
10004 && mask
10005 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
10006 mask_vectype))
10008 if (dump_enabled_p ())
10009 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10010 "incompatible vector types for invariants\n");
10011 return false;
10014 if (!slp)
10015 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
10017 if (loop_vinfo
10018 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10019 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
10020 VLS_LOAD, group_size,
10021 memory_access_type, &gs_info,
10022 mask);
10024 if (dump_enabled_p ()
10025 && memory_access_type != VMAT_ELEMENTWISE
10026 && memory_access_type != VMAT_GATHER_SCATTER
10027 && alignment_support_scheme != dr_aligned)
10028 dump_printf_loc (MSG_NOTE, vect_location,
10029 "Vectorizing an unaligned access.\n");
10031 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10032 vinfo->any_known_not_updated_vssa = true;
10034 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
10037 if (!slp)
10038 gcc_assert (memory_access_type
10039 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
10041 if (dump_enabled_p () && !costing_p)
10042 dump_printf_loc (MSG_NOTE, vect_location,
10043 "transform load. ncopies = %d\n", ncopies);
10045 /* Transform. */
10047 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
10048 ensure_base_align (dr_info);
10050 if (memory_access_type == VMAT_INVARIANT)
10052 gcc_assert (!grouped_load && !mask && !bb_vinfo);
10053 /* If we have versioned for aliasing or the loop doesn't
10054 have any data dependencies that would preclude this,
10055 then we are sure this is a loop invariant load and
10056 thus we can insert it on the preheader edge.
10057 TODO: hoist_defs_of_uses should ideally be computed
10058 once at analysis time, remembered and used in the
10059 transform time. */
10060 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
10061 && !nested_in_vect_loop
10062 && hoist_defs_of_uses (stmt_info, loop, !costing_p));
10063 if (costing_p)
10065 enum vect_cost_model_location cost_loc
10066 = hoist_p ? vect_prologue : vect_body;
10067 unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
10068 stmt_info, 0, cost_loc);
10069 cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0,
10070 cost_loc);
10071 unsigned int prologue_cost = hoist_p ? cost : 0;
10072 unsigned int inside_cost = hoist_p ? 0 : cost;
10073 if (dump_enabled_p ())
10074 dump_printf_loc (MSG_NOTE, vect_location,
10075 "vect_model_load_cost: inside_cost = %d, "
10076 "prologue_cost = %d .\n",
10077 inside_cost, prologue_cost);
10078 return true;
10080 if (hoist_p)
10082 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
10083 if (dump_enabled_p ())
10084 dump_printf_loc (MSG_NOTE, vect_location,
10085 "hoisting out of the vectorized loop: %G",
10086 (gimple *) stmt);
10087 scalar_dest = copy_ssa_name (scalar_dest);
10088 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
10089 edge pe = loop_preheader_edge (loop);
10090 gphi *vphi = get_virtual_phi (loop->header);
10091 tree vuse;
10092 if (vphi)
10093 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
10094 else
10095 vuse = gimple_vuse (gsi_stmt (*gsi));
10096 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
10097 gimple_set_vuse (new_stmt, vuse);
10098 gsi_insert_on_edge_immediate (pe, new_stmt);
10100 /* These copies are all equivalent. */
10101 if (hoist_p)
10102 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10103 vectype, NULL);
10104 else
10106 gimple_stmt_iterator gsi2 = *gsi;
10107 gsi_next (&gsi2);
10108 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10109 vectype, &gsi2);
10111 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
10112 if (slp)
10113 for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
10114 slp_node->push_vec_def (new_stmt);
10115 else
10117 for (j = 0; j < ncopies; ++j)
10118 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10119 *vec_stmt = new_stmt;
10121 return true;
10124 if (memory_access_type == VMAT_ELEMENTWISE
10125 || memory_access_type == VMAT_STRIDED_SLP)
10127 gimple_stmt_iterator incr_gsi;
10128 bool insert_after;
10129 tree offvar;
10130 tree ivstep;
10131 tree running_off;
10132 vec<constructor_elt, va_gc> *v = NULL;
10133 tree stride_base, stride_step, alias_off;
10134 /* Checked by get_load_store_type. */
10135 unsigned int const_nunits = nunits.to_constant ();
10136 unsigned HOST_WIDE_INT cst_offset = 0;
10137 tree dr_offset;
10138 unsigned int inside_cost = 0;
10140 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
10141 gcc_assert (!nested_in_vect_loop);
10143 if (grouped_load)
10145 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10146 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10148 else
10150 first_stmt_info = stmt_info;
10151 first_dr_info = dr_info;
10154 if (slp && grouped_load)
10156 group_size = DR_GROUP_SIZE (first_stmt_info);
10157 ref_type = get_group_alias_ptr_type (first_stmt_info);
10159 else
10161 if (grouped_load)
10162 cst_offset
10163 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
10164 * vect_get_place_in_interleaving_chain (stmt_info,
10165 first_stmt_info));
10166 group_size = 1;
10167 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
10170 if (!costing_p)
10172 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
10173 stride_base = fold_build_pointer_plus (
10174 DR_BASE_ADDRESS (first_dr_info->dr),
10175 size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
10176 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
10177 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
10179 /* For a load with loop-invariant (but other than power-of-2)
10180 stride (i.e. not a grouped access) like so:
10182 for (i = 0; i < n; i += stride)
10183 ... = array[i];
10185 we generate a new induction variable and new accesses to
10186 form a new vector (or vectors, depending on ncopies):
10188 for (j = 0; ; j += VF*stride)
10189 tmp1 = array[j];
10190 tmp2 = array[j + stride];
10192 vectemp = {tmp1, tmp2, ...}
10195 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
10196 build_int_cst (TREE_TYPE (stride_step), vf));
10198 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
10200 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
10201 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
10202 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
10203 loop, &incr_gsi, insert_after,
10204 &offvar, NULL);
10206 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
10209 running_off = offvar;
10210 alias_off = build_int_cst (ref_type, 0);
10211 int nloads = const_nunits;
10212 int lnel = 1;
10213 tree ltype = TREE_TYPE (vectype);
10214 tree lvectype = vectype;
10215 auto_vec<tree> dr_chain;
10216 if (memory_access_type == VMAT_STRIDED_SLP)
10218 if (group_size < const_nunits)
10220 /* First check if vec_init optab supports construction from vector
10221 elts directly. Otherwise avoid emitting a constructor of
10222 vector elements by performing the loads using an integer type
10223 of the same size, constructing a vector of those and then
10224 re-interpreting it as the original vector type. This avoids a
10225 huge runtime penalty due to the general inability to perform
10226 store forwarding from smaller stores to a larger load. */
10227 tree ptype;
10228 tree vtype
10229 = vector_vector_composition_type (vectype,
10230 const_nunits / group_size,
10231 &ptype);
10232 if (vtype != NULL_TREE)
10234 nloads = const_nunits / group_size;
10235 lnel = group_size;
10236 lvectype = vtype;
10237 ltype = ptype;
10240 else
10242 nloads = 1;
10243 lnel = const_nunits;
10244 ltype = vectype;
10246 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
10248 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10249 else if (nloads == 1)
10250 ltype = vectype;
10252 if (slp)
10254 /* For SLP permutation support we need to load the whole group,
10255 not only the number of vector stmts the permutation result
10256 fits in. */
10257 if (slp_perm)
10259 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10260 variable VF. */
10261 unsigned int const_vf = vf.to_constant ();
10262 ncopies = CEIL (group_size * const_vf, const_nunits);
10263 dr_chain.create (ncopies);
10265 else
10266 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10268 unsigned int group_el = 0;
10269 unsigned HOST_WIDE_INT
10270 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10271 unsigned int n_groups = 0;
10272 for (j = 0; j < ncopies; j++)
10274 if (nloads > 1 && !costing_p)
10275 vec_alloc (v, nloads);
10276 gimple *new_stmt = NULL;
10277 for (i = 0; i < nloads; i++)
10279 if (costing_p)
10281 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10282 avoid ICE, see PR110776. */
10283 if (VECTOR_TYPE_P (ltype)
10284 && memory_access_type != VMAT_ELEMENTWISE)
10285 vect_get_load_cost (vinfo, stmt_info, 1,
10286 alignment_support_scheme, misalignment,
10287 false, &inside_cost, nullptr, cost_vec,
10288 cost_vec, true);
10289 else
10290 inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
10291 stmt_info, 0, vect_body);
10292 continue;
10294 tree this_off = build_int_cst (TREE_TYPE (alias_off),
10295 group_el * elsz + cst_offset);
10296 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10297 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10298 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
10299 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10300 if (nloads > 1)
10301 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10302 gimple_assign_lhs (new_stmt));
10304 group_el += lnel;
10305 if (! slp
10306 || group_el == group_size)
10308 n_groups++;
10309 /* When doing SLP make sure to not load elements from
10310 the next vector iteration, those will not be accessed
10311 so just use the last element again. See PR107451. */
10312 if (!slp || known_lt (n_groups, vf))
10314 tree newoff = copy_ssa_name (running_off);
10315 gimple *incr
10316 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10317 running_off, stride_step);
10318 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
10319 running_off = newoff;
10321 group_el = 0;
10325 if (nloads > 1)
10327 if (costing_p)
10328 inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
10329 stmt_info, 0, vect_body);
10330 else
10332 tree vec_inv = build_constructor (lvectype, v);
10333 new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
10334 lvectype, gsi);
10335 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10336 if (lvectype != vectype)
10338 new_stmt
10339 = gimple_build_assign (make_ssa_name (vectype),
10340 VIEW_CONVERT_EXPR,
10341 build1 (VIEW_CONVERT_EXPR,
10342 vectype, new_temp));
10343 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10344 gsi);
10349 if (!costing_p)
10351 if (slp)
10353 if (slp_perm)
10354 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
10355 else
10356 slp_node->push_vec_def (new_stmt);
10358 else
10360 if (j == 0)
10361 *vec_stmt = new_stmt;
10362 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10366 if (slp_perm)
10368 unsigned n_perms;
10369 if (costing_p)
10371 unsigned n_loads;
10372 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10373 true, &n_perms, &n_loads);
10374 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
10375 first_stmt_info, 0, vect_body);
10377 else
10378 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10379 false, &n_perms);
10382 if (costing_p && dump_enabled_p ())
10383 dump_printf_loc (MSG_NOTE, vect_location,
10384 "vect_model_load_cost: inside_cost = %u, "
10385 "prologue_cost = 0 .\n",
10386 inside_cost);
10388 return true;
10391 if (memory_access_type == VMAT_GATHER_SCATTER
10392 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
10393 grouped_load = false;
10395 if (grouped_load
10396 || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10398 if (grouped_load)
10400 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10401 group_size = DR_GROUP_SIZE (first_stmt_info);
10403 else
10405 first_stmt_info = stmt_info;
10406 group_size = 1;
10408 /* For SLP vectorization we directly vectorize a subchain
10409 without permutation. */
10410 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10411 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10412 /* For BB vectorization always use the first stmt to base
10413 the data ref pointer on. */
10414 if (bb_vinfo)
10415 first_stmt_info_for_drptr
10416 = vect_find_first_scalar_stmt_in_slp (slp_node);
10418 /* Check if the chain of loads is already vectorized. */
10419 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10420 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10421 ??? But we can only do so if there is exactly one
10422 as we have no way to get at the rest. Leave the CSE
10423 opportunity alone.
10424 ??? With the group load eventually participating
10425 in multiple different permutations (having multiple
10426 slp nodes which refer to the same group) the CSE
10427 is even wrong code. See PR56270. */
10428 && !slp)
10430 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10431 return true;
10433 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10434 group_gap_adj = 0;
10436 /* VEC_NUM is the number of vect stmts to be created for this group. */
10437 if (slp)
10439 grouped_load = false;
10440 /* If an SLP permutation is from N elements to N elements,
10441 and if one vector holds a whole number of N, we can load
10442 the inputs to the permutation in the same way as an
10443 unpermuted sequence. In other cases we need to load the
10444 whole group, not only the number of vector stmts the
10445 permutation result fits in. */
10446 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10447 if (slp_perm
10448 && (group_size != scalar_lanes
10449 || !multiple_p (nunits, group_size)))
10451 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10452 variable VF; see vect_transform_slp_perm_load. */
10453 unsigned int const_vf = vf.to_constant ();
10454 unsigned int const_nunits = nunits.to_constant ();
10455 vec_num = CEIL (group_size * const_vf, const_nunits);
10456 group_gap_adj = vf * group_size - nunits * vec_num;
10458 else
10460 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10461 group_gap_adj
10462 = group_size - scalar_lanes;
10465 else
10466 vec_num = group_size;
10468 ref_type = get_group_alias_ptr_type (first_stmt_info);
10470 else
10472 first_stmt_info = stmt_info;
10473 first_dr_info = dr_info;
10474 group_size = vec_num = 1;
10475 group_gap_adj = 0;
10476 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10477 if (slp)
10478 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10481 gcc_assert (alignment_support_scheme);
10482 vec_loop_masks *loop_masks
10483 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10484 ? &LOOP_VINFO_MASKS (loop_vinfo)
10485 : NULL);
10486 vec_loop_lens *loop_lens
10487 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10488 ? &LOOP_VINFO_LENS (loop_vinfo)
10489 : NULL);
10491 /* Shouldn't go with length-based approach if fully masked. */
10492 gcc_assert (!loop_lens || !loop_masks);
10494 /* Targets with store-lane instructions must not require explicit
10495 realignment. vect_supportable_dr_alignment always returns either
10496 dr_aligned or dr_unaligned_supported for masked operations. */
10497 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10498 && !mask
10499 && !loop_masks)
10500 || alignment_support_scheme == dr_aligned
10501 || alignment_support_scheme == dr_unaligned_supported);
10503 /* In case the vectorization factor (VF) is bigger than the number
10504 of elements that we can fit in a vectype (nunits), we have to generate
10505 more than one vector stmt - i.e - we need to "unroll" the
10506 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10507 from one copy of the vector stmt to the next, in the field
10508 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10509 stages to find the correct vector defs to be used when vectorizing
10510 stmts that use the defs of the current stmt. The example below
10511 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10512 need to create 4 vectorized stmts):
10514 before vectorization:
10515 RELATED_STMT VEC_STMT
10516 S1: x = memref - -
10517 S2: z = x + 1 - -
10519 step 1: vectorize stmt S1:
10520 We first create the vector stmt VS1_0, and, as usual, record a
10521 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10522 Next, we create the vector stmt VS1_1, and record a pointer to
10523 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10524 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10525 stmts and pointers:
10526 RELATED_STMT VEC_STMT
10527 VS1_0: vx0 = memref0 VS1_1 -
10528 VS1_1: vx1 = memref1 VS1_2 -
10529 VS1_2: vx2 = memref2 VS1_3 -
10530 VS1_3: vx3 = memref3 - -
10531 S1: x = load - VS1_0
10532 S2: z = x + 1 - -
10535 /* In case of interleaving (non-unit grouped access):
10537 S1: x2 = &base + 2
10538 S2: x0 = &base
10539 S3: x1 = &base + 1
10540 S4: x3 = &base + 3
10542 Vectorized loads are created in the order of memory accesses
10543 starting from the access of the first stmt of the chain:
10545 VS1: vx0 = &base
10546 VS2: vx1 = &base + vec_size*1
10547 VS3: vx3 = &base + vec_size*2
10548 VS4: vx4 = &base + vec_size*3
10550 Then permutation statements are generated:
10552 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10553 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10556 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10557 (the order of the data-refs in the output of vect_permute_load_chain
10558 corresponds to the order of scalar stmts in the interleaving chain - see
10559 the documentation of vect_permute_load_chain()).
10560 The generation of permutation stmts and recording them in
10561 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10563 In case of both multiple types and interleaving, the vector loads and
10564 permutation stmts above are created for every copy. The result vector
10565 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10566 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10568 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10569 on a target that supports unaligned accesses (dr_unaligned_supported)
10570 we generate the following code:
10571 p = initial_addr;
10572 indx = 0;
10573 loop {
10574 p = p + indx * vectype_size;
10575 vec_dest = *(p);
10576 indx = indx + 1;
10579 Otherwise, the data reference is potentially unaligned on a target that
10580 does not support unaligned accesses (dr_explicit_realign_optimized) -
10581 then generate the following code, in which the data in each iteration is
10582 obtained by two vector loads, one from the previous iteration, and one
10583 from the current iteration:
10584 p1 = initial_addr;
10585 msq_init = *(floor(p1))
10586 p2 = initial_addr + VS - 1;
10587 realignment_token = call target_builtin;
10588 indx = 0;
10589 loop {
10590 p2 = p2 + indx * vectype_size
10591 lsq = *(floor(p2))
10592 vec_dest = realign_load (msq, lsq, realignment_token)
10593 indx = indx + 1;
10594 msq = lsq;
10595 } */
10597 /* If the misalignment remains the same throughout the execution of the
10598 loop, we can create the init_addr and permutation mask at the loop
10599 preheader. Otherwise, it needs to be created inside the loop.
10600 This can only occur when vectorizing memory accesses in the inner-loop
10601 nested within an outer-loop that is being vectorized. */
10603 if (nested_in_vect_loop
10604 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10605 GET_MODE_SIZE (TYPE_MODE (vectype))))
10607 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10608 compute_in_loop = true;
10611 bool diff_first_stmt_info
10612 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10614 tree offset = NULL_TREE;
10615 if ((alignment_support_scheme == dr_explicit_realign_optimized
10616 || alignment_support_scheme == dr_explicit_realign)
10617 && !compute_in_loop)
10619 /* If we have different first_stmt_info, we can't set up realignment
10620 here, since we can't guarantee first_stmt_info DR has been
10621 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10622 distance from first_stmt_info DR instead as below. */
10623 if (!costing_p)
10625 if (!diff_first_stmt_info)
10626 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10627 &realignment_token,
10628 alignment_support_scheme, NULL_TREE,
10629 &at_loop);
10630 if (alignment_support_scheme == dr_explicit_realign_optimized)
10632 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10633 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10634 size_one_node);
10635 gcc_assert (!first_stmt_info_for_drptr);
10639 else
10640 at_loop = loop;
10642 if (!known_eq (poffset, 0))
10643 offset = (offset
10644 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10645 : size_int (poffset));
10647 tree bump;
10648 tree vec_offset = NULL_TREE;
10649 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10651 aggr_type = NULL_TREE;
10652 bump = NULL_TREE;
10654 else if (memory_access_type == VMAT_GATHER_SCATTER)
10656 aggr_type = elem_type;
10657 if (!costing_p)
10658 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
10659 &bump, &vec_offset, loop_lens);
10661 else
10663 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10664 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
10665 else
10666 aggr_type = vectype;
10667 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10668 memory_access_type, loop_lens);
10671 auto_vec<tree> vec_offsets;
10672 auto_vec<tree> vec_masks;
10673 if (mask && !costing_p)
10675 if (slp_node)
10676 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10677 &vec_masks);
10678 else
10679 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
10680 &vec_masks, mask_vectype);
10683 tree vec_mask = NULL_TREE;
10684 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10686 gcc_assert (alignment_support_scheme == dr_aligned
10687 || alignment_support_scheme == dr_unaligned_supported);
10688 gcc_assert (grouped_load && !slp);
10690 unsigned int inside_cost = 0, prologue_cost = 0;
10691 for (j = 0; j < ncopies; j++)
10693 if (costing_p)
10695 /* An IFN_LOAD_LANES will load all its vector results,
10696 regardless of which ones we actually need. Account
10697 for the cost of unused results. */
10698 if (first_stmt_info == stmt_info)
10700 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10701 stmt_vec_info next_stmt_info = first_stmt_info;
10704 gaps -= 1;
10705 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10707 while (next_stmt_info);
10708 if (gaps)
10710 if (dump_enabled_p ())
10711 dump_printf_loc (MSG_NOTE, vect_location,
10712 "vect_model_load_cost: %d "
10713 "unused vectors.\n",
10714 gaps);
10715 vect_get_load_cost (vinfo, stmt_info, gaps,
10716 alignment_support_scheme,
10717 misalignment, false, &inside_cost,
10718 &prologue_cost, cost_vec, cost_vec,
10719 true);
10722 vect_get_load_cost (vinfo, stmt_info, 1, alignment_support_scheme,
10723 misalignment, false, &inside_cost,
10724 &prologue_cost, cost_vec, cost_vec, true);
10725 continue;
10728 /* 1. Create the vector or array pointer update chain. */
10729 if (j == 0)
10730 dataref_ptr
10731 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10732 at_loop, offset, &dummy, gsi,
10733 &ptr_incr, false, bump);
10734 else
10736 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10737 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10738 stmt_info, bump);
10740 if (mask)
10741 vec_mask = vec_masks[j];
10743 tree vec_array = create_vector_array (vectype, vec_num);
10745 tree final_mask = NULL_TREE;
10746 tree final_len = NULL_TREE;
10747 tree bias = NULL_TREE;
10748 if (loop_masks)
10749 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10750 ncopies, vectype, j);
10751 if (vec_mask)
10752 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
10753 vec_mask, gsi);
10755 if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10757 if (loop_lens)
10758 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10759 ncopies, vectype, j, 1);
10760 else
10761 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10762 signed char biasval
10763 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10764 bias = build_int_cst (intQI_type_node, biasval);
10765 if (!final_mask)
10767 mask_vectype = truth_type_for (vectype);
10768 final_mask = build_minus_one_cst (mask_vectype);
10772 gcall *call;
10773 if (final_len && final_mask)
10775 /* Emit:
10776 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10777 VEC_MASK, LEN, BIAS). */
10778 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10779 tree alias_ptr = build_int_cst (ref_type, align);
10780 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
10781 dataref_ptr, alias_ptr,
10782 final_mask, final_len, bias);
10784 else if (final_mask)
10786 /* Emit:
10787 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10788 VEC_MASK). */
10789 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10790 tree alias_ptr = build_int_cst (ref_type, align);
10791 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
10792 dataref_ptr, alias_ptr,
10793 final_mask);
10795 else
10797 /* Emit:
10798 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10799 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
10800 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10802 gimple_call_set_lhs (call, vec_array);
10803 gimple_call_set_nothrow (call, true);
10804 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
10806 dr_chain.create (vec_num);
10807 /* Extract each vector into an SSA_NAME. */
10808 for (i = 0; i < vec_num; i++)
10810 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10811 vec_array, i);
10812 dr_chain.quick_push (new_temp);
10815 /* Record the mapping between SSA_NAMEs and statements. */
10816 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10818 /* Record that VEC_ARRAY is now dead. */
10819 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10821 dr_chain.release ();
10823 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10826 if (costing_p && dump_enabled_p ())
10827 dump_printf_loc (MSG_NOTE, vect_location,
10828 "vect_model_load_cost: inside_cost = %u, "
10829 "prologue_cost = %u .\n",
10830 inside_cost, prologue_cost);
10832 return true;
10835 if (memory_access_type == VMAT_GATHER_SCATTER)
10837 gcc_assert (alignment_support_scheme == dr_aligned
10838 || alignment_support_scheme == dr_unaligned_supported);
10839 gcc_assert (!grouped_load && !slp_perm);
10841 unsigned int inside_cost = 0, prologue_cost = 0;
10842 for (j = 0; j < ncopies; j++)
10844 /* 1. Create the vector or array pointer update chain. */
10845 if (j == 0 && !costing_p)
10847 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10848 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10849 slp_node, &gs_info, &dataref_ptr,
10850 &vec_offsets);
10851 else
10852 dataref_ptr
10853 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10854 at_loop, offset, &dummy, gsi,
10855 &ptr_incr, false, bump);
10857 else if (!costing_p)
10859 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10860 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10861 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10862 gsi, stmt_info, bump);
10865 if (mask && !costing_p)
10866 vec_mask = vec_masks[j];
10868 gimple *new_stmt = NULL;
10869 for (i = 0; i < vec_num; i++)
10871 tree final_mask = NULL_TREE;
10872 tree final_len = NULL_TREE;
10873 tree bias = NULL_TREE;
10874 if (!costing_p)
10876 if (loop_masks)
10877 final_mask
10878 = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10879 vec_num * ncopies, vectype,
10880 vec_num * j + i);
10881 if (vec_mask)
10882 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10883 final_mask, vec_mask, gsi);
10885 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10886 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10887 gsi, stmt_info, bump);
10890 /* 2. Create the vector-load in the loop. */
10891 unsigned HOST_WIDE_INT align;
10892 if (gs_info.ifn != IFN_LAST)
10894 if (costing_p)
10896 unsigned int cnunits = vect_nunits_for_cost (vectype);
10897 inside_cost
10898 = record_stmt_cost (cost_vec, cnunits, scalar_load,
10899 stmt_info, 0, vect_body);
10900 continue;
10902 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10903 vec_offset = vec_offsets[vec_num * j + i];
10904 tree zero = build_zero_cst (vectype);
10905 tree scale = size_int (gs_info.scale);
10907 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
10909 if (loop_lens)
10910 final_len
10911 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10912 vec_num * ncopies, vectype,
10913 vec_num * j + i, 1);
10914 else
10915 final_len
10916 = build_int_cst (sizetype,
10917 TYPE_VECTOR_SUBPARTS (vectype));
10918 signed char biasval
10919 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10920 bias = build_int_cst (intQI_type_node, biasval);
10921 if (!final_mask)
10923 mask_vectype = truth_type_for (vectype);
10924 final_mask = build_minus_one_cst (mask_vectype);
10928 gcall *call;
10929 if (final_len && final_mask)
10930 call
10931 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
10932 dataref_ptr, vec_offset,
10933 scale, zero, final_mask,
10934 final_len, bias);
10935 else if (final_mask)
10936 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
10937 dataref_ptr, vec_offset,
10938 scale, zero, final_mask);
10939 else
10940 call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
10941 dataref_ptr, vec_offset,
10942 scale, zero);
10943 gimple_call_set_nothrow (call, true);
10944 new_stmt = call;
10945 data_ref = NULL_TREE;
10947 else if (gs_info.decl)
10949 /* The builtin decls path for gather is legacy, x86 only. */
10950 gcc_assert (!final_len && nunits.is_constant ());
10951 if (costing_p)
10953 unsigned int cnunits = vect_nunits_for_cost (vectype);
10954 inside_cost
10955 = record_stmt_cost (cost_vec, cnunits, scalar_load,
10956 stmt_info, 0, vect_body);
10957 continue;
10959 poly_uint64 offset_nunits
10960 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
10961 if (known_eq (nunits, offset_nunits))
10963 new_stmt = vect_build_one_gather_load_call
10964 (vinfo, stmt_info, gsi, &gs_info,
10965 dataref_ptr, vec_offsets[vec_num * j + i],
10966 final_mask);
10967 data_ref = NULL_TREE;
10969 else if (known_eq (nunits, offset_nunits * 2))
10971 /* We have a offset vector with half the number of
10972 lanes but the builtins will produce full vectype
10973 data with just the lower lanes filled. */
10974 new_stmt = vect_build_one_gather_load_call
10975 (vinfo, stmt_info, gsi, &gs_info,
10976 dataref_ptr, vec_offsets[2 * vec_num * j + 2 * i],
10977 final_mask);
10978 tree low = make_ssa_name (vectype);
10979 gimple_set_lhs (new_stmt, low);
10980 vect_finish_stmt_generation (vinfo, stmt_info,
10981 new_stmt, gsi);
10983 /* now put upper half of final_mask in final_mask low. */
10984 if (final_mask
10985 && !SCALAR_INT_MODE_P
10986 (TYPE_MODE (TREE_TYPE (final_mask))))
10988 int count = nunits.to_constant ();
10989 vec_perm_builder sel (count, count, 1);
10990 sel.quick_grow (count);
10991 for (int i = 0; i < count; ++i)
10992 sel[i] = i | (count / 2);
10993 vec_perm_indices indices (sel, 2, count);
10994 tree perm_mask = vect_gen_perm_mask_checked
10995 (TREE_TYPE (final_mask), indices);
10996 new_stmt = gimple_build_assign (NULL_TREE,
10997 VEC_PERM_EXPR,
10998 final_mask,
10999 final_mask,
11000 perm_mask);
11001 final_mask = make_ssa_name (TREE_TYPE (final_mask));
11002 gimple_set_lhs (new_stmt, final_mask);
11003 vect_finish_stmt_generation (vinfo, stmt_info,
11004 new_stmt, gsi);
11006 else if (final_mask)
11008 new_stmt = gimple_build_assign (NULL_TREE,
11009 VEC_UNPACK_HI_EXPR,
11010 final_mask);
11011 final_mask = make_ssa_name
11012 (truth_type_for (gs_info.offset_vectype));
11013 gimple_set_lhs (new_stmt, final_mask);
11014 vect_finish_stmt_generation (vinfo, stmt_info,
11015 new_stmt, gsi);
11018 new_stmt = vect_build_one_gather_load_call
11019 (vinfo, stmt_info, gsi, &gs_info,
11020 dataref_ptr,
11021 vec_offsets[2 * vec_num * j + 2 * i + 1],
11022 final_mask);
11023 tree high = make_ssa_name (vectype);
11024 gimple_set_lhs (new_stmt, high);
11025 vect_finish_stmt_generation (vinfo, stmt_info,
11026 new_stmt, gsi);
11028 /* compose low + high. */
11029 int count = nunits.to_constant ();
11030 vec_perm_builder sel (count, count, 1);
11031 sel.quick_grow (count);
11032 for (int i = 0; i < count; ++i)
11033 sel[i] = i < count / 2 ? i : i + count / 2;
11034 vec_perm_indices indices (sel, 2, count);
11035 tree perm_mask
11036 = vect_gen_perm_mask_checked (vectype, indices);
11037 new_stmt = gimple_build_assign (NULL_TREE,
11038 VEC_PERM_EXPR,
11039 low, high, perm_mask);
11040 data_ref = NULL_TREE;
11042 else if (known_eq (nunits * 2, offset_nunits))
11044 /* We have a offset vector with double the number of
11045 lanes. Select the low/high part accordingly. */
11046 vec_offset = vec_offsets[(vec_num * j + i) / 2];
11047 if ((vec_num * j + i) & 1)
11049 int count = offset_nunits.to_constant ();
11050 vec_perm_builder sel (count, count, 1);
11051 sel.quick_grow (count);
11052 for (int i = 0; i < count; ++i)
11053 sel[i] = i | (count / 2);
11054 vec_perm_indices indices (sel, 2, count);
11055 tree perm_mask = vect_gen_perm_mask_checked
11056 (TREE_TYPE (vec_offset), indices);
11057 new_stmt = gimple_build_assign (NULL_TREE,
11058 VEC_PERM_EXPR,
11059 vec_offset,
11060 vec_offset,
11061 perm_mask);
11062 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
11063 gimple_set_lhs (new_stmt, vec_offset);
11064 vect_finish_stmt_generation (vinfo, stmt_info,
11065 new_stmt, gsi);
11067 new_stmt = vect_build_one_gather_load_call
11068 (vinfo, stmt_info, gsi, &gs_info,
11069 dataref_ptr, vec_offset, final_mask);
11070 data_ref = NULL_TREE;
11072 else
11073 gcc_unreachable ();
11075 else
11077 /* Emulated gather-scatter. */
11078 gcc_assert (!final_mask);
11079 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
11080 if (costing_p)
11082 /* For emulated gathers N offset vector element
11083 offset add is consumed by the load). */
11084 inside_cost = record_stmt_cost (cost_vec, const_nunits,
11085 vec_to_scalar, stmt_info,
11086 0, vect_body);
11087 /* N scalar loads plus gathering them into a
11088 vector. */
11089 inside_cost
11090 = record_stmt_cost (cost_vec, const_nunits, scalar_load,
11091 stmt_info, 0, vect_body);
11092 inside_cost
11093 = record_stmt_cost (cost_vec, 1, vec_construct,
11094 stmt_info, 0, vect_body);
11095 continue;
11097 unsigned HOST_WIDE_INT const_offset_nunits
11098 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
11099 .to_constant ();
11100 vec<constructor_elt, va_gc> *ctor_elts;
11101 vec_alloc (ctor_elts, const_nunits);
11102 gimple_seq stmts = NULL;
11103 /* We support offset vectors with more elements
11104 than the data vector for now. */
11105 unsigned HOST_WIDE_INT factor
11106 = const_offset_nunits / const_nunits;
11107 vec_offset = vec_offsets[j / factor];
11108 unsigned elt_offset = (j % factor) * const_nunits;
11109 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
11110 tree scale = size_int (gs_info.scale);
11111 align = get_object_alignment (DR_REF (first_dr_info->dr));
11112 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
11113 for (unsigned k = 0; k < const_nunits; ++k)
11115 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
11116 bitsize_int (k + elt_offset));
11117 tree idx
11118 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
11119 vec_offset, TYPE_SIZE (idx_type), boff);
11120 idx = gimple_convert (&stmts, sizetype, idx);
11121 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx,
11122 scale);
11123 tree ptr = gimple_build (&stmts, PLUS_EXPR,
11124 TREE_TYPE (dataref_ptr),
11125 dataref_ptr, idx);
11126 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
11127 tree elt = make_ssa_name (TREE_TYPE (vectype));
11128 tree ref = build2 (MEM_REF, ltype, ptr,
11129 build_int_cst (ref_type, 0));
11130 new_stmt = gimple_build_assign (elt, ref);
11131 gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
11132 gimple_seq_add_stmt (&stmts, new_stmt);
11133 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
11135 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
11136 new_stmt = gimple_build_assign (
11137 NULL_TREE, build_constructor (vectype, ctor_elts));
11138 data_ref = NULL_TREE;
11141 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11142 /* DATA_REF is null if we've already built the statement. */
11143 if (data_ref)
11145 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11146 new_stmt = gimple_build_assign (vec_dest, data_ref);
11148 new_temp = make_ssa_name (vec_dest, new_stmt);
11149 gimple_set_lhs (new_stmt, new_temp);
11150 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11152 /* Store vector loads in the corresponding SLP_NODE. */
11153 if (slp)
11154 slp_node->push_vec_def (new_stmt);
11157 if (!slp && !costing_p)
11158 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11161 if (!slp && !costing_p)
11162 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11164 if (costing_p && dump_enabled_p ())
11165 dump_printf_loc (MSG_NOTE, vect_location,
11166 "vect_model_load_cost: inside_cost = %u, "
11167 "prologue_cost = %u .\n",
11168 inside_cost, prologue_cost);
11169 return true;
11172 poly_uint64 group_elt = 0;
11173 unsigned int inside_cost = 0, prologue_cost = 0;
11174 for (j = 0; j < ncopies; j++)
11176 /* 1. Create the vector or array pointer update chain. */
11177 if (j == 0 && !costing_p)
11179 bool simd_lane_access_p
11180 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
11181 if (simd_lane_access_p
11182 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
11183 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
11184 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
11185 && integer_zerop (DR_INIT (first_dr_info->dr))
11186 && alias_sets_conflict_p (get_alias_set (aggr_type),
11187 get_alias_set (TREE_TYPE (ref_type)))
11188 && (alignment_support_scheme == dr_aligned
11189 || alignment_support_scheme == dr_unaligned_supported))
11191 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
11192 dataref_offset = build_int_cst (ref_type, 0);
11194 else if (diff_first_stmt_info)
11196 dataref_ptr
11197 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
11198 aggr_type, at_loop, offset, &dummy,
11199 gsi, &ptr_incr, simd_lane_access_p,
11200 bump);
11201 /* Adjust the pointer by the difference to first_stmt. */
11202 data_reference_p ptrdr
11203 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
11204 tree diff
11205 = fold_convert (sizetype,
11206 size_binop (MINUS_EXPR,
11207 DR_INIT (first_dr_info->dr),
11208 DR_INIT (ptrdr)));
11209 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11210 stmt_info, diff);
11211 if (alignment_support_scheme == dr_explicit_realign)
11213 msq = vect_setup_realignment (vinfo,
11214 first_stmt_info_for_drptr, gsi,
11215 &realignment_token,
11216 alignment_support_scheme,
11217 dataref_ptr, &at_loop);
11218 gcc_assert (!compute_in_loop);
11221 else
11222 dataref_ptr
11223 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11224 at_loop,
11225 offset, &dummy, gsi, &ptr_incr,
11226 simd_lane_access_p, bump);
11227 if (mask)
11228 vec_mask = vec_masks[0];
11230 else if (!costing_p)
11232 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11233 if (dataref_offset)
11234 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
11235 bump);
11236 else
11237 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11238 stmt_info, bump);
11239 if (mask)
11240 vec_mask = vec_masks[j];
11243 if (grouped_load || slp_perm)
11244 dr_chain.create (vec_num);
11246 gimple *new_stmt = NULL;
11247 for (i = 0; i < vec_num; i++)
11249 tree final_mask = NULL_TREE;
11250 tree final_len = NULL_TREE;
11251 tree bias = NULL_TREE;
11252 if (!costing_p)
11254 if (loop_masks)
11255 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11256 vec_num * ncopies, vectype,
11257 vec_num * j + i);
11258 if (vec_mask)
11259 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
11260 final_mask, vec_mask, gsi);
11262 if (i > 0)
11263 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11264 gsi, stmt_info, bump);
11267 /* 2. Create the vector-load in the loop. */
11268 switch (alignment_support_scheme)
11270 case dr_aligned:
11271 case dr_unaligned_supported:
11273 if (costing_p)
11274 break;
11276 unsigned int misalign;
11277 unsigned HOST_WIDE_INT align;
11278 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
11279 if (alignment_support_scheme == dr_aligned)
11280 misalign = 0;
11281 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
11283 align
11284 = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
11285 misalign = 0;
11287 else
11288 misalign = misalignment;
11289 if (dataref_offset == NULL_TREE
11290 && TREE_CODE (dataref_ptr) == SSA_NAME)
11291 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
11292 misalign);
11293 align = least_bit_hwi (misalign | align);
11295 /* Compute IFN when LOOP_LENS or final_mask valid. */
11296 machine_mode vmode = TYPE_MODE (vectype);
11297 machine_mode new_vmode = vmode;
11298 internal_fn partial_ifn = IFN_LAST;
11299 if (loop_lens)
11301 opt_machine_mode new_ovmode
11302 = get_len_load_store_mode (vmode, true, &partial_ifn);
11303 new_vmode = new_ovmode.require ();
11304 unsigned factor
11305 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
11306 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11307 vec_num * ncopies, vectype,
11308 vec_num * j + i, factor);
11310 else if (final_mask)
11312 if (!can_vec_mask_load_store_p (
11313 vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
11314 &partial_ifn))
11315 gcc_unreachable ();
11318 if (partial_ifn == IFN_MASK_LEN_LOAD)
11320 if (!final_len)
11322 /* Pass VF value to 'len' argument of
11323 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11324 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11326 if (!final_mask)
11328 /* Pass all ones value to 'mask' argument of
11329 MASK_LEN_LOAD if final_mask is invalid. */
11330 mask_vectype = truth_type_for (vectype);
11331 final_mask = build_minus_one_cst (mask_vectype);
11334 if (final_len)
11336 signed char biasval
11337 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11339 bias = build_int_cst (intQI_type_node, biasval);
11342 if (final_len)
11344 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11345 gcall *call;
11346 if (partial_ifn == IFN_MASK_LEN_LOAD)
11347 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
11348 dataref_ptr, ptr,
11349 final_mask, final_len,
11350 bias);
11351 else
11352 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
11353 dataref_ptr, ptr,
11354 final_len, bias);
11355 gimple_call_set_nothrow (call, true);
11356 new_stmt = call;
11357 data_ref = NULL_TREE;
11359 /* Need conversion if it's wrapped with VnQI. */
11360 if (vmode != new_vmode)
11362 tree new_vtype = build_vector_type_for_mode (
11363 unsigned_intQI_type_node, new_vmode);
11364 tree var
11365 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
11366 gimple_set_lhs (call, var);
11367 vect_finish_stmt_generation (vinfo, stmt_info, call,
11368 gsi);
11369 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11370 new_stmt = gimple_build_assign (vec_dest,
11371 VIEW_CONVERT_EXPR, op);
11374 else if (final_mask)
11376 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11377 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
11378 dataref_ptr, ptr,
11379 final_mask);
11380 gimple_call_set_nothrow (call, true);
11381 new_stmt = call;
11382 data_ref = NULL_TREE;
11384 else
11386 tree ltype = vectype;
11387 tree new_vtype = NULL_TREE;
11388 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11389 unsigned int vect_align
11390 = vect_known_alignment_in_bytes (first_dr_info, vectype);
11391 unsigned int scalar_dr_size
11392 = vect_get_scalar_dr_size (first_dr_info);
11393 /* If there's no peeling for gaps but we have a gap
11394 with slp loads then load the lower half of the
11395 vector only. See get_group_load_store_type for
11396 when we apply this optimization. */
11397 if (slp
11398 && loop_vinfo
11399 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != 0
11400 && known_eq (nunits, (group_size - gap) * 2)
11401 && known_eq (nunits, group_size)
11402 && gap >= (vect_align / scalar_dr_size))
11404 tree half_vtype;
11405 new_vtype
11406 = vector_vector_composition_type (vectype, 2,
11407 &half_vtype);
11408 if (new_vtype != NULL_TREE)
11409 ltype = half_vtype;
11411 tree offset
11412 = (dataref_offset ? dataref_offset
11413 : build_int_cst (ref_type, 0));
11414 if (ltype != vectype
11415 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11417 unsigned HOST_WIDE_INT gap_offset
11418 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
11419 tree gapcst = build_int_cst (ref_type, gap_offset);
11420 offset = size_binop (PLUS_EXPR, offset, gapcst);
11422 data_ref
11423 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
11424 if (alignment_support_scheme == dr_aligned)
11426 else
11427 TREE_TYPE (data_ref)
11428 = build_aligned_type (TREE_TYPE (data_ref),
11429 align * BITS_PER_UNIT);
11430 if (ltype != vectype)
11432 vect_copy_ref_info (data_ref,
11433 DR_REF (first_dr_info->dr));
11434 tree tem = make_ssa_name (ltype);
11435 new_stmt = gimple_build_assign (tem, data_ref);
11436 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
11437 gsi);
11438 data_ref = NULL;
11439 vec<constructor_elt, va_gc> *v;
11440 vec_alloc (v, 2);
11441 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11443 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11444 build_zero_cst (ltype));
11445 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11447 else
11449 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11450 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11451 build_zero_cst (ltype));
11453 gcc_assert (new_vtype != NULL_TREE);
11454 if (new_vtype == vectype)
11455 new_stmt = gimple_build_assign (
11456 vec_dest, build_constructor (vectype, v));
11457 else
11459 tree new_vname = make_ssa_name (new_vtype);
11460 new_stmt = gimple_build_assign (
11461 new_vname, build_constructor (new_vtype, v));
11462 vect_finish_stmt_generation (vinfo, stmt_info,
11463 new_stmt, gsi);
11464 new_stmt = gimple_build_assign (
11465 vec_dest,
11466 build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
11470 break;
11472 case dr_explicit_realign:
11474 if (costing_p)
11475 break;
11476 tree ptr, bump;
11478 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11480 if (compute_in_loop)
11481 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
11482 &realignment_token,
11483 dr_explicit_realign,
11484 dataref_ptr, NULL);
11486 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11487 ptr = copy_ssa_name (dataref_ptr);
11488 else
11489 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11490 // For explicit realign the target alignment should be
11491 // known at compile time.
11492 unsigned HOST_WIDE_INT align
11493 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11494 new_stmt = gimple_build_assign (
11495 ptr, BIT_AND_EXPR, dataref_ptr,
11496 build_int_cst (TREE_TYPE (dataref_ptr),
11497 -(HOST_WIDE_INT) align));
11498 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11499 data_ref
11500 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11501 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11502 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11503 new_stmt = gimple_build_assign (vec_dest, data_ref);
11504 new_temp = make_ssa_name (vec_dest, new_stmt);
11505 gimple_assign_set_lhs (new_stmt, new_temp);
11506 gimple_move_vops (new_stmt, stmt_info->stmt);
11507 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11508 msq = new_temp;
11510 bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11511 bump = size_binop (MINUS_EXPR, bump, size_one_node);
11512 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11513 bump);
11514 new_stmt = gimple_build_assign (
11515 NULL_TREE, BIT_AND_EXPR, ptr,
11516 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11517 if (TREE_CODE (ptr) == SSA_NAME)
11518 ptr = copy_ssa_name (ptr, new_stmt);
11519 else
11520 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
11521 gimple_assign_set_lhs (new_stmt, ptr);
11522 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11523 data_ref
11524 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11525 break;
11527 case dr_explicit_realign_optimized:
11529 if (costing_p)
11530 break;
11531 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11532 new_temp = copy_ssa_name (dataref_ptr);
11533 else
11534 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11535 // We should only be doing this if we know the target
11536 // alignment at compile time.
11537 unsigned HOST_WIDE_INT align
11538 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11539 new_stmt = gimple_build_assign (
11540 new_temp, BIT_AND_EXPR, dataref_ptr,
11541 build_int_cst (TREE_TYPE (dataref_ptr),
11542 -(HOST_WIDE_INT) align));
11543 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11544 data_ref = build2 (MEM_REF, vectype, new_temp,
11545 build_int_cst (ref_type, 0));
11546 break;
11548 default:
11549 gcc_unreachable ();
11552 /* One common place to cost the above vect load for different
11553 alignment support schemes. */
11554 if (costing_p)
11556 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11557 only need to take care of the first stmt, whose
11558 stmt_info is first_stmt_info, vec_num iterating on it
11559 will cover the cost for the remaining, it's consistent
11560 with transforming. For the prologue cost for realign,
11561 we only need to count it once for the whole group. */
11562 bool first_stmt_info_p = first_stmt_info == stmt_info;
11563 bool add_realign_cost = first_stmt_info_p && i == 0;
11564 if (memory_access_type == VMAT_CONTIGUOUS
11565 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11566 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11567 && (!grouped_load || first_stmt_info_p)))
11568 vect_get_load_cost (vinfo, stmt_info, 1,
11569 alignment_support_scheme, misalignment,
11570 add_realign_cost, &inside_cost,
11571 &prologue_cost, cost_vec, cost_vec, true);
11573 else
11575 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11576 /* DATA_REF is null if we've already built the statement. */
11577 if (data_ref)
11579 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11580 new_stmt = gimple_build_assign (vec_dest, data_ref);
11582 new_temp = make_ssa_name (vec_dest, new_stmt);
11583 gimple_set_lhs (new_stmt, new_temp);
11584 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11587 /* 3. Handle explicit realignment if necessary/supported.
11588 Create in loop:
11589 vec_dest = realign_load (msq, lsq, realignment_token) */
11590 if (!costing_p
11591 && (alignment_support_scheme == dr_explicit_realign_optimized
11592 || alignment_support_scheme == dr_explicit_realign))
11594 lsq = gimple_assign_lhs (new_stmt);
11595 if (!realignment_token)
11596 realignment_token = dataref_ptr;
11597 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11598 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11599 lsq, realignment_token);
11600 new_temp = make_ssa_name (vec_dest, new_stmt);
11601 gimple_assign_set_lhs (new_stmt, new_temp);
11602 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11604 if (alignment_support_scheme == dr_explicit_realign_optimized)
11606 gcc_assert (phi);
11607 if (i == vec_num - 1 && j == ncopies - 1)
11608 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11609 UNKNOWN_LOCATION);
11610 msq = lsq;
11614 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11616 if (costing_p)
11617 inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
11618 stmt_info, 0, vect_body);
11619 else
11621 tree perm_mask = perm_mask_for_reverse (vectype);
11622 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
11623 perm_mask, stmt_info, gsi);
11624 new_stmt = SSA_NAME_DEF_STMT (new_temp);
11628 /* Collect vector loads and later create their permutation in
11629 vect_transform_grouped_load (). */
11630 if (!costing_p && (grouped_load || slp_perm))
11631 dr_chain.quick_push (new_temp);
11633 /* Store vector loads in the corresponding SLP_NODE. */
11634 if (!costing_p && slp && !slp_perm)
11635 slp_node->push_vec_def (new_stmt);
11637 /* With SLP permutation we load the gaps as well, without
11638 we need to skip the gaps after we manage to fully load
11639 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11640 group_elt += nunits;
11641 if (!costing_p
11642 && maybe_ne (group_gap_adj, 0U)
11643 && !slp_perm
11644 && known_eq (group_elt, group_size - group_gap_adj))
11646 poly_wide_int bump_val
11647 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11648 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11649 == -1)
11650 bump_val = -bump_val;
11651 tree bump = wide_int_to_tree (sizetype, bump_val);
11652 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11653 stmt_info, bump);
11654 group_elt = 0;
11657 /* Bump the vector pointer to account for a gap or for excess
11658 elements loaded for a permuted SLP load. */
11659 if (!costing_p
11660 && maybe_ne (group_gap_adj, 0U)
11661 && slp_perm)
11663 poly_wide_int bump_val
11664 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11665 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11666 bump_val = -bump_val;
11667 tree bump = wide_int_to_tree (sizetype, bump_val);
11668 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11669 stmt_info, bump);
11672 if (slp && !slp_perm)
11673 continue;
11675 if (slp_perm)
11677 unsigned n_perms;
11678 /* For SLP we know we've seen all possible uses of dr_chain so
11679 direct vect_transform_slp_perm_load to DCE the unused parts.
11680 ??? This is a hack to prevent compile-time issues as seen
11681 in PR101120 and friends. */
11682 if (costing_p)
11684 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
11685 true, &n_perms, nullptr);
11686 inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
11687 stmt_info, 0, vect_body);
11689 else
11691 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11692 gsi, vf, false, &n_perms,
11693 nullptr, true);
11694 gcc_assert (ok);
11697 else
11699 if (grouped_load)
11701 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11702 /* We assume that the cost of a single load-lanes instruction
11703 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11704 If a grouped access is instead being provided by a
11705 load-and-permute operation, include the cost of the
11706 permutes. */
11707 if (costing_p && first_stmt_info == stmt_info)
11709 /* Uses an even and odd extract operations or shuffle
11710 operations for each needed permute. */
11711 int group_size = DR_GROUP_SIZE (first_stmt_info);
11712 int nstmts = ceil_log2 (group_size) * group_size;
11713 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
11714 stmt_info, 0, vect_body);
11716 if (dump_enabled_p ())
11717 dump_printf_loc (MSG_NOTE, vect_location,
11718 "vect_model_load_cost:"
11719 "strided group_size = %d .\n",
11720 group_size);
11722 else if (!costing_p)
11724 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
11725 group_size, gsi);
11726 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11729 else if (!costing_p)
11730 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11732 dr_chain.release ();
11734 if (!slp && !costing_p)
11735 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11737 if (costing_p)
11739 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
11740 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11741 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11742 if (dump_enabled_p ())
11743 dump_printf_loc (MSG_NOTE, vect_location,
11744 "vect_model_load_cost: inside_cost = %u, "
11745 "prologue_cost = %u .\n",
11746 inside_cost, prologue_cost);
11749 return true;
11752 /* Function vect_is_simple_cond.
11754 Input:
11755 LOOP - the loop that is being vectorized.
11756 COND - Condition that is checked for simple use.
11758 Output:
11759 *COMP_VECTYPE - the vector type for the comparison.
11760 *DTS - The def types for the arguments of the comparison
11762 Returns whether a COND can be vectorized. Checks whether
11763 condition operands are supportable using vec_is_simple_use. */
11765 static bool
11766 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
11767 slp_tree slp_node, tree *comp_vectype,
11768 enum vect_def_type *dts, tree vectype)
11770 tree lhs, rhs;
11771 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11772 slp_tree slp_op;
11774 /* Mask case. */
11775 if (TREE_CODE (cond) == SSA_NAME
11776 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11778 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
11779 &slp_op, &dts[0], comp_vectype)
11780 || !*comp_vectype
11781 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11782 return false;
11783 return true;
11786 if (!COMPARISON_CLASS_P (cond))
11787 return false;
11789 lhs = TREE_OPERAND (cond, 0);
11790 rhs = TREE_OPERAND (cond, 1);
11792 if (TREE_CODE (lhs) == SSA_NAME)
11794 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
11795 &lhs, &slp_op, &dts[0], &vectype1))
11796 return false;
11798 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
11799 || TREE_CODE (lhs) == FIXED_CST)
11800 dts[0] = vect_constant_def;
11801 else
11802 return false;
11804 if (TREE_CODE (rhs) == SSA_NAME)
11806 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
11807 &rhs, &slp_op, &dts[1], &vectype2))
11808 return false;
11810 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
11811 || TREE_CODE (rhs) == FIXED_CST)
11812 dts[1] = vect_constant_def;
11813 else
11814 return false;
11816 if (vectype1 && vectype2
11817 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11818 TYPE_VECTOR_SUBPARTS (vectype2)))
11819 return false;
11821 *comp_vectype = vectype1 ? vectype1 : vectype2;
11822 /* Invariant comparison. */
11823 if (! *comp_vectype)
11825 tree scalar_type = TREE_TYPE (lhs);
11826 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11827 *comp_vectype = truth_type_for (vectype);
11828 else
11830 /* If we can widen the comparison to match vectype do so. */
11831 if (INTEGRAL_TYPE_P (scalar_type)
11832 && !slp_node
11833 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
11834 TYPE_SIZE (TREE_TYPE (vectype))))
11835 scalar_type = build_nonstandard_integer_type
11836 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
11837 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
11838 slp_node);
11842 return true;
11845 /* vectorizable_condition.
11847 Check if STMT_INFO is conditional modify expression that can be vectorized.
11848 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11849 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
11850 at GSI.
11852 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
11854 Return true if STMT_INFO is vectorizable in this way. */
11856 static bool
11857 vectorizable_condition (vec_info *vinfo,
11858 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11859 gimple **vec_stmt,
11860 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11862 tree scalar_dest = NULL_TREE;
11863 tree vec_dest = NULL_TREE;
11864 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
11865 tree then_clause, else_clause;
11866 tree comp_vectype = NULL_TREE;
11867 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
11868 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
11869 tree vec_compare;
11870 tree new_temp;
11871 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11872 enum vect_def_type dts[4]
11873 = {vect_unknown_def_type, vect_unknown_def_type,
11874 vect_unknown_def_type, vect_unknown_def_type};
11875 int ndts = 4;
11876 int ncopies;
11877 int vec_num;
11878 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11879 int i;
11880 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11881 vec<tree> vec_oprnds0 = vNULL;
11882 vec<tree> vec_oprnds1 = vNULL;
11883 vec<tree> vec_oprnds2 = vNULL;
11884 vec<tree> vec_oprnds3 = vNULL;
11885 tree vec_cmp_type;
11886 bool masked = false;
11888 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11889 return false;
11891 /* Is vectorizable conditional operation? */
11892 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11893 if (!stmt)
11894 return false;
11896 code = gimple_assign_rhs_code (stmt);
11897 if (code != COND_EXPR)
11898 return false;
11900 stmt_vec_info reduc_info = NULL;
11901 int reduc_index = -1;
11902 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
11903 bool for_reduction
11904 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
11905 if (for_reduction)
11907 if (slp_node)
11908 return false;
11909 reduc_info = info_for_reduction (vinfo, stmt_info);
11910 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
11911 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
11912 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
11913 || reduc_index != -1);
11915 else
11917 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11918 return false;
11921 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11922 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11924 if (slp_node)
11926 ncopies = 1;
11927 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
11929 else
11931 ncopies = vect_get_num_copies (loop_vinfo, vectype);
11932 vec_num = 1;
11935 gcc_assert (ncopies >= 1);
11936 if (for_reduction && ncopies > 1)
11937 return false; /* FORNOW */
11939 cond_expr = gimple_assign_rhs1 (stmt);
11941 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
11942 &comp_vectype, &dts[0], vectype)
11943 || !comp_vectype)
11944 return false;
11946 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
11947 slp_tree then_slp_node, else_slp_node;
11948 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
11949 &then_clause, &then_slp_node, &dts[2], &vectype1))
11950 return false;
11951 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
11952 &else_clause, &else_slp_node, &dts[3], &vectype2))
11953 return false;
11955 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
11956 return false;
11958 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
11959 return false;
11961 masked = !COMPARISON_CLASS_P (cond_expr);
11962 vec_cmp_type = truth_type_for (comp_vectype);
11964 if (vec_cmp_type == NULL_TREE)
11965 return false;
11967 cond_code = TREE_CODE (cond_expr);
11968 if (!masked)
11970 cond_expr0 = TREE_OPERAND (cond_expr, 0);
11971 cond_expr1 = TREE_OPERAND (cond_expr, 1);
11974 /* For conditional reductions, the "then" value needs to be the candidate
11975 value calculated by this iteration while the "else" value needs to be
11976 the result carried over from previous iterations. If the COND_EXPR
11977 is the other way around, we need to swap it. */
11978 bool must_invert_cmp_result = false;
11979 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
11981 if (masked)
11982 must_invert_cmp_result = true;
11983 else
11985 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
11986 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
11987 if (new_code == ERROR_MARK)
11988 must_invert_cmp_result = true;
11989 else
11991 cond_code = new_code;
11992 /* Make sure we don't accidentally use the old condition. */
11993 cond_expr = NULL_TREE;
11996 std::swap (then_clause, else_clause);
11999 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
12001 /* Boolean values may have another representation in vectors
12002 and therefore we prefer bit operations over comparison for
12003 them (which also works for scalar masks). We store opcodes
12004 to use in bitop1 and bitop2. Statement is vectorized as
12005 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12006 depending on bitop1 and bitop2 arity. */
12007 switch (cond_code)
12009 case GT_EXPR:
12010 bitop1 = BIT_NOT_EXPR;
12011 bitop2 = BIT_AND_EXPR;
12012 break;
12013 case GE_EXPR:
12014 bitop1 = BIT_NOT_EXPR;
12015 bitop2 = BIT_IOR_EXPR;
12016 break;
12017 case LT_EXPR:
12018 bitop1 = BIT_NOT_EXPR;
12019 bitop2 = BIT_AND_EXPR;
12020 std::swap (cond_expr0, cond_expr1);
12021 break;
12022 case LE_EXPR:
12023 bitop1 = BIT_NOT_EXPR;
12024 bitop2 = BIT_IOR_EXPR;
12025 std::swap (cond_expr0, cond_expr1);
12026 break;
12027 case NE_EXPR:
12028 bitop1 = BIT_XOR_EXPR;
12029 break;
12030 case EQ_EXPR:
12031 bitop1 = BIT_XOR_EXPR;
12032 bitop2 = BIT_NOT_EXPR;
12033 break;
12034 default:
12035 return false;
12037 cond_code = SSA_NAME;
12040 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
12041 && reduction_type == EXTRACT_LAST_REDUCTION
12042 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
12044 if (dump_enabled_p ())
12045 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12046 "reduction comparison operation not supported.\n");
12047 return false;
12050 if (!vec_stmt)
12052 if (bitop1 != NOP_EXPR)
12054 machine_mode mode = TYPE_MODE (comp_vectype);
12055 optab optab;
12057 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
12058 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12059 return false;
12061 if (bitop2 != NOP_EXPR)
12063 optab = optab_for_tree_code (bitop2, comp_vectype,
12064 optab_default);
12065 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12066 return false;
12070 vect_cost_for_stmt kind = vector_stmt;
12071 if (reduction_type == EXTRACT_LAST_REDUCTION)
12072 /* Count one reduction-like operation per vector. */
12073 kind = vec_to_scalar;
12074 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
12075 && (masked
12076 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
12077 cond_code)
12078 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
12079 ERROR_MARK))))
12080 return false;
12082 if (slp_node
12083 && (!vect_maybe_update_slp_op_vectype
12084 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
12085 || (op_adjust == 1
12086 && !vect_maybe_update_slp_op_vectype
12087 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
12088 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
12089 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
12091 if (dump_enabled_p ())
12092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12093 "incompatible vector types for invariants\n");
12094 return false;
12097 if (loop_vinfo && for_reduction
12098 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12100 if (reduction_type == EXTRACT_LAST_REDUCTION)
12102 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12103 vectype, OPTIMIZE_FOR_SPEED))
12104 vect_record_loop_len (loop_vinfo,
12105 &LOOP_VINFO_LENS (loop_vinfo),
12106 ncopies * vec_num, vectype, 1);
12107 else
12108 vect_record_loop_mask (loop_vinfo,
12109 &LOOP_VINFO_MASKS (loop_vinfo),
12110 ncopies * vec_num, vectype, NULL);
12112 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12113 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
12115 if (dump_enabled_p ())
12116 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12117 "conditional reduction prevents the use"
12118 " of partial vectors.\n");
12119 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
12123 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
12124 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
12125 cost_vec, kind);
12126 return true;
12129 /* Transform. */
12131 /* Handle def. */
12132 scalar_dest = gimple_assign_lhs (stmt);
12133 if (reduction_type != EXTRACT_LAST_REDUCTION)
12134 vec_dest = vect_create_destination_var (scalar_dest, vectype);
12136 bool swap_cond_operands = false;
12138 /* See whether another part of the vectorized code applies a loop
12139 mask to the condition, or to its inverse. */
12141 vec_loop_masks *masks = NULL;
12142 vec_loop_lens *lens = NULL;
12143 if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
12145 if (reduction_type == EXTRACT_LAST_REDUCTION)
12146 lens = &LOOP_VINFO_LENS (loop_vinfo);
12148 else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
12150 if (reduction_type == EXTRACT_LAST_REDUCTION)
12151 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12152 else
12154 scalar_cond_masked_key cond (cond_expr, ncopies);
12155 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12156 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12157 else
12159 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
12160 tree_code orig_code = cond.code;
12161 cond.code = invert_tree_comparison (cond.code, honor_nans);
12162 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
12164 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12165 cond_code = cond.code;
12166 swap_cond_operands = true;
12168 else
12170 /* Try the inverse of the current mask. We check if the
12171 inverse mask is live and if so we generate a negate of
12172 the current mask such that we still honor NaNs. */
12173 cond.inverted_p = true;
12174 cond.code = orig_code;
12175 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12177 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12178 cond_code = cond.code;
12179 swap_cond_operands = true;
12180 must_invert_cmp_result = true;
12187 /* Handle cond expr. */
12188 if (masked)
12189 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12190 cond_expr, &vec_oprnds0, comp_vectype,
12191 then_clause, &vec_oprnds2, vectype,
12192 reduction_type != EXTRACT_LAST_REDUCTION
12193 ? else_clause : NULL, &vec_oprnds3, vectype);
12194 else
12195 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12196 cond_expr0, &vec_oprnds0, comp_vectype,
12197 cond_expr1, &vec_oprnds1, comp_vectype,
12198 then_clause, &vec_oprnds2, vectype,
12199 reduction_type != EXTRACT_LAST_REDUCTION
12200 ? else_clause : NULL, &vec_oprnds3, vectype);
12202 /* Arguments are ready. Create the new vector stmt. */
12203 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
12205 vec_then_clause = vec_oprnds2[i];
12206 if (reduction_type != EXTRACT_LAST_REDUCTION)
12207 vec_else_clause = vec_oprnds3[i];
12209 if (swap_cond_operands)
12210 std::swap (vec_then_clause, vec_else_clause);
12212 if (masked)
12213 vec_compare = vec_cond_lhs;
12214 else
12216 vec_cond_rhs = vec_oprnds1[i];
12217 if (bitop1 == NOP_EXPR)
12219 gimple_seq stmts = NULL;
12220 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
12221 vec_cond_lhs, vec_cond_rhs);
12222 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
12224 else
12226 new_temp = make_ssa_name (vec_cmp_type);
12227 gassign *new_stmt;
12228 if (bitop1 == BIT_NOT_EXPR)
12229 new_stmt = gimple_build_assign (new_temp, bitop1,
12230 vec_cond_rhs);
12231 else
12232 new_stmt
12233 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
12234 vec_cond_rhs);
12235 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12236 if (bitop2 == NOP_EXPR)
12237 vec_compare = new_temp;
12238 else if (bitop2 == BIT_NOT_EXPR
12239 && reduction_type != EXTRACT_LAST_REDUCTION)
12241 /* Instead of doing ~x ? y : z do x ? z : y. */
12242 vec_compare = new_temp;
12243 std::swap (vec_then_clause, vec_else_clause);
12245 else
12247 vec_compare = make_ssa_name (vec_cmp_type);
12248 if (bitop2 == BIT_NOT_EXPR)
12249 new_stmt
12250 = gimple_build_assign (vec_compare, bitop2, new_temp);
12251 else
12252 new_stmt
12253 = gimple_build_assign (vec_compare, bitop2,
12254 vec_cond_lhs, new_temp);
12255 vect_finish_stmt_generation (vinfo, stmt_info,
12256 new_stmt, gsi);
12261 /* If we decided to apply a loop mask to the result of the vector
12262 comparison, AND the comparison with the mask now. Later passes
12263 should then be able to reuse the AND results between mulitple
12264 vector statements.
12266 For example:
12267 for (int i = 0; i < 100; ++i)
12268 x[i] = y[i] ? z[i] : 10;
12270 results in following optimized GIMPLE:
12272 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12273 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12274 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12275 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12276 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12277 vect_iftmp.11_47, { 10, ... }>;
12279 instead of using a masked and unmasked forms of
12280 vec != { 0, ... } (masked in the MASK_LOAD,
12281 unmasked in the VEC_COND_EXPR). */
12283 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12284 in cases where that's necessary. */
12286 tree len = NULL_TREE, bias = NULL_TREE;
12287 if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
12289 if (!is_gimple_val (vec_compare))
12291 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12292 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12293 vec_compare);
12294 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12295 vec_compare = vec_compare_name;
12298 if (must_invert_cmp_result)
12300 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12301 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12302 BIT_NOT_EXPR,
12303 vec_compare);
12304 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12305 vec_compare = vec_compare_name;
12308 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12309 vectype, OPTIMIZE_FOR_SPEED))
12311 if (lens)
12313 len = vect_get_loop_len (loop_vinfo, gsi, lens,
12314 vec_num * ncopies, vectype, i, 1);
12315 signed char biasval
12316 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
12317 bias = build_int_cst (intQI_type_node, biasval);
12319 else
12321 len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
12322 bias = build_int_cst (intQI_type_node, 0);
12325 if (masks)
12327 tree loop_mask
12328 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
12329 vectype, i);
12330 tree tmp2 = make_ssa_name (vec_cmp_type);
12331 gassign *g
12332 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
12333 loop_mask);
12334 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
12335 vec_compare = tmp2;
12339 gimple *new_stmt;
12340 if (reduction_type == EXTRACT_LAST_REDUCTION)
12342 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
12343 tree lhs = gimple_get_lhs (old_stmt);
12344 if (len)
12345 new_stmt = gimple_build_call_internal
12346 (IFN_LEN_FOLD_EXTRACT_LAST, 5, else_clause, vec_compare,
12347 vec_then_clause, len, bias);
12348 else
12349 new_stmt = gimple_build_call_internal
12350 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
12351 vec_then_clause);
12352 gimple_call_set_lhs (new_stmt, lhs);
12353 SSA_NAME_DEF_STMT (lhs) = new_stmt;
12354 if (old_stmt == gsi_stmt (*gsi))
12355 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
12356 else
12358 /* In this case we're moving the definition to later in the
12359 block. That doesn't matter because the only uses of the
12360 lhs are in phi statements. */
12361 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12362 gsi_remove (&old_gsi, true);
12363 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12366 else
12368 new_temp = make_ssa_name (vec_dest);
12369 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12370 vec_then_clause, vec_else_clause);
12371 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12373 if (slp_node)
12374 slp_node->push_vec_def (new_stmt);
12375 else
12376 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12379 if (!slp_node)
12380 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12382 vec_oprnds0.release ();
12383 vec_oprnds1.release ();
12384 vec_oprnds2.release ();
12385 vec_oprnds3.release ();
12387 return true;
12390 /* Helper of vectorizable_comparison.
12392 Check if STMT_INFO is comparison expression CODE that can be vectorized.
12393 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12394 comparison, put it in VEC_STMT, and insert it at GSI.
12396 Return true if STMT_INFO is vectorizable in this way. */
12398 static bool
12399 vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
12400 stmt_vec_info stmt_info, tree_code code,
12401 gimple_stmt_iterator *gsi, gimple **vec_stmt,
12402 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12404 tree lhs, rhs1, rhs2;
12405 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12406 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12407 tree new_temp;
12408 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12409 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
12410 int ndts = 2;
12411 poly_uint64 nunits;
12412 int ncopies;
12413 enum tree_code bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12414 int i;
12415 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12416 vec<tree> vec_oprnds0 = vNULL;
12417 vec<tree> vec_oprnds1 = vNULL;
12418 tree mask_type;
12419 tree mask;
12421 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12422 return false;
12424 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
12425 return false;
12427 mask_type = vectype;
12428 nunits = TYPE_VECTOR_SUBPARTS (vectype);
12430 if (slp_node)
12431 ncopies = 1;
12432 else
12433 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12435 gcc_assert (ncopies >= 1);
12437 if (TREE_CODE_CLASS (code) != tcc_comparison)
12438 return false;
12440 slp_tree slp_rhs1, slp_rhs2;
12441 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12442 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
12443 return false;
12445 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12446 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
12447 return false;
12449 if (vectype1 && vectype2
12450 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12451 TYPE_VECTOR_SUBPARTS (vectype2)))
12452 return false;
12454 vectype = vectype1 ? vectype1 : vectype2;
12456 /* Invariant comparison. */
12457 if (!vectype)
12459 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
12460 vectype = mask_type;
12461 else
12462 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
12463 slp_node);
12464 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
12465 return false;
12467 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
12468 return false;
12470 /* Can't compare mask and non-mask types. */
12471 if (vectype1 && vectype2
12472 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12473 return false;
12475 /* Boolean values may have another representation in vectors
12476 and therefore we prefer bit operations over comparison for
12477 them (which also works for scalar masks). We store opcodes
12478 to use in bitop1 and bitop2. Statement is vectorized as
12479 BITOP2 (rhs1 BITOP1 rhs2) or
12480 rhs1 BITOP2 (BITOP1 rhs2)
12481 depending on bitop1 and bitop2 arity. */
12482 bool swap_p = false;
12483 if (VECTOR_BOOLEAN_TYPE_P (vectype))
12485 if (code == GT_EXPR)
12487 bitop1 = BIT_NOT_EXPR;
12488 bitop2 = BIT_AND_EXPR;
12490 else if (code == GE_EXPR)
12492 bitop1 = BIT_NOT_EXPR;
12493 bitop2 = BIT_IOR_EXPR;
12495 else if (code == LT_EXPR)
12497 bitop1 = BIT_NOT_EXPR;
12498 bitop2 = BIT_AND_EXPR;
12499 swap_p = true;
12501 else if (code == LE_EXPR)
12503 bitop1 = BIT_NOT_EXPR;
12504 bitop2 = BIT_IOR_EXPR;
12505 swap_p = true;
12507 else
12509 bitop1 = BIT_XOR_EXPR;
12510 if (code == EQ_EXPR)
12511 bitop2 = BIT_NOT_EXPR;
12515 if (!vec_stmt)
12517 if (bitop1 == NOP_EXPR)
12519 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12520 return false;
12522 else
12524 machine_mode mode = TYPE_MODE (vectype);
12525 optab optab;
12527 optab = optab_for_tree_code (bitop1, vectype, optab_default);
12528 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12529 return false;
12531 if (bitop2 != NOP_EXPR)
12533 optab = optab_for_tree_code (bitop2, vectype, optab_default);
12534 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12535 return false;
12539 /* Put types on constant and invariant SLP children. */
12540 if (slp_node
12541 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12542 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
12544 if (dump_enabled_p ())
12545 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12546 "incompatible vector types for invariants\n");
12547 return false;
12550 vect_model_simple_cost (vinfo, stmt_info,
12551 ncopies * (1 + (bitop2 != NOP_EXPR)),
12552 dts, ndts, slp_node, cost_vec);
12553 return true;
12556 /* Transform. */
12558 /* Handle def. */
12559 lhs = gimple_assign_lhs (STMT_VINFO_STMT (stmt_info));
12560 mask = vect_create_destination_var (lhs, mask_type);
12562 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12563 rhs1, &vec_oprnds0, vectype,
12564 rhs2, &vec_oprnds1, vectype);
12565 if (swap_p)
12566 std::swap (vec_oprnds0, vec_oprnds1);
12568 /* Arguments are ready. Create the new vector stmt. */
12569 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12571 gimple *new_stmt;
12572 vec_rhs2 = vec_oprnds1[i];
12574 new_temp = make_ssa_name (mask);
12575 if (bitop1 == NOP_EXPR)
12577 new_stmt = gimple_build_assign (new_temp, code,
12578 vec_rhs1, vec_rhs2);
12579 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12581 else
12583 if (bitop1 == BIT_NOT_EXPR)
12584 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12585 else
12586 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12587 vec_rhs2);
12588 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12589 if (bitop2 != NOP_EXPR)
12591 tree res = make_ssa_name (mask);
12592 if (bitop2 == BIT_NOT_EXPR)
12593 new_stmt = gimple_build_assign (res, bitop2, new_temp);
12594 else
12595 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12596 new_temp);
12597 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12600 if (slp_node)
12601 slp_node->push_vec_def (new_stmt);
12602 else
12603 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12606 if (!slp_node)
12607 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12609 vec_oprnds0.release ();
12610 vec_oprnds1.release ();
12612 return true;
12615 /* vectorizable_comparison.
12617 Check if STMT_INFO is comparison expression that can be vectorized.
12618 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12619 comparison, put it in VEC_STMT, and insert it at GSI.
12621 Return true if STMT_INFO is vectorizable in this way. */
12623 static bool
12624 vectorizable_comparison (vec_info *vinfo,
12625 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12626 gimple **vec_stmt,
12627 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12629 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12631 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12632 return false;
12634 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12635 return false;
12637 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12638 if (!stmt)
12639 return false;
12641 enum tree_code code = gimple_assign_rhs_code (stmt);
12642 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12643 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12644 vec_stmt, slp_node, cost_vec))
12645 return false;
12647 if (!vec_stmt)
12648 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
12650 return true;
12653 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
12654 can handle all live statements in the node. Otherwise return true
12655 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
12656 VEC_STMT_P is as for vectorizable_live_operation. */
12658 static bool
12659 can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
12660 slp_tree slp_node, slp_instance slp_node_instance,
12661 bool vec_stmt_p,
12662 stmt_vector_for_cost *cost_vec)
12664 if (slp_node)
12666 stmt_vec_info slp_stmt_info;
12667 unsigned int i;
12668 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
12670 if (STMT_VINFO_LIVE_P (slp_stmt_info)
12671 && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
12672 slp_node_instance, i,
12673 vec_stmt_p, cost_vec))
12674 return false;
12677 else if (STMT_VINFO_LIVE_P (stmt_info)
12678 && !vectorizable_live_operation (vinfo, stmt_info,
12679 slp_node, slp_node_instance, -1,
12680 vec_stmt_p, cost_vec))
12681 return false;
12683 return true;
12686 /* Make sure the statement is vectorizable. */
12688 opt_result
12689 vect_analyze_stmt (vec_info *vinfo,
12690 stmt_vec_info stmt_info, bool *need_to_vectorize,
12691 slp_tree node, slp_instance node_instance,
12692 stmt_vector_for_cost *cost_vec)
12694 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12695 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
12696 bool ok;
12697 gimple_seq pattern_def_seq;
12699 if (dump_enabled_p ())
12700 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
12701 stmt_info->stmt);
12703 if (gimple_has_volatile_ops (stmt_info->stmt))
12704 return opt_result::failure_at (stmt_info->stmt,
12705 "not vectorized:"
12706 " stmt has volatile operands: %G\n",
12707 stmt_info->stmt);
12709 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12710 && node == NULL
12711 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
12713 gimple_stmt_iterator si;
12715 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
12717 stmt_vec_info pattern_def_stmt_info
12718 = vinfo->lookup_stmt (gsi_stmt (si));
12719 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
12720 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
12722 /* Analyze def stmt of STMT if it's a pattern stmt. */
12723 if (dump_enabled_p ())
12724 dump_printf_loc (MSG_NOTE, vect_location,
12725 "==> examining pattern def statement: %G",
12726 pattern_def_stmt_info->stmt);
12728 opt_result res
12729 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
12730 need_to_vectorize, node, node_instance,
12731 cost_vec);
12732 if (!res)
12733 return res;
12738 /* Skip stmts that do not need to be vectorized. In loops this is expected
12739 to include:
12740 - the COND_EXPR which is the loop exit condition
12741 - any LABEL_EXPRs in the loop
12742 - computations that are used only for array indexing or loop control.
12743 In basic blocks we only analyze statements that are a part of some SLP
12744 instance, therefore, all the statements are relevant.
12746 Pattern statement needs to be analyzed instead of the original statement
12747 if the original statement is not relevant. Otherwise, we analyze both
12748 statements. In basic blocks we are called from some SLP instance
12749 traversal, don't analyze pattern stmts instead, the pattern stmts
12750 already will be part of SLP instance. */
12752 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
12753 if (!STMT_VINFO_RELEVANT_P (stmt_info)
12754 && !STMT_VINFO_LIVE_P (stmt_info))
12756 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12757 && pattern_stmt_info
12758 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12759 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12761 /* Analyze PATTERN_STMT instead of the original stmt. */
12762 stmt_info = pattern_stmt_info;
12763 if (dump_enabled_p ())
12764 dump_printf_loc (MSG_NOTE, vect_location,
12765 "==> examining pattern statement: %G",
12766 stmt_info->stmt);
12768 else
12770 if (dump_enabled_p ())
12771 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
12773 return opt_result::success ();
12776 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12777 && node == NULL
12778 && pattern_stmt_info
12779 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12780 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12782 /* Analyze PATTERN_STMT too. */
12783 if (dump_enabled_p ())
12784 dump_printf_loc (MSG_NOTE, vect_location,
12785 "==> examining pattern statement: %G",
12786 pattern_stmt_info->stmt);
12788 opt_result res
12789 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
12790 node_instance, cost_vec);
12791 if (!res)
12792 return res;
12795 switch (STMT_VINFO_DEF_TYPE (stmt_info))
12797 case vect_internal_def:
12798 break;
12800 case vect_reduction_def:
12801 case vect_nested_cycle:
12802 gcc_assert (!bb_vinfo
12803 && (relevance == vect_used_in_outer
12804 || relevance == vect_used_in_outer_by_reduction
12805 || relevance == vect_used_by_reduction
12806 || relevance == vect_unused_in_scope
12807 || relevance == vect_used_only_live));
12808 break;
12810 case vect_induction_def:
12811 case vect_first_order_recurrence:
12812 gcc_assert (!bb_vinfo);
12813 break;
12815 case vect_constant_def:
12816 case vect_external_def:
12817 case vect_unknown_def_type:
12818 default:
12819 gcc_unreachable ();
12822 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12823 if (node)
12824 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
12826 if (STMT_VINFO_RELEVANT_P (stmt_info))
12828 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
12829 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
12830 || (call && gimple_call_lhs (call) == NULL_TREE));
12831 *need_to_vectorize = true;
12834 if (PURE_SLP_STMT (stmt_info) && !node)
12836 if (dump_enabled_p ())
12837 dump_printf_loc (MSG_NOTE, vect_location,
12838 "handled only by SLP analysis\n");
12839 return opt_result::success ();
12842 ok = true;
12843 if (!bb_vinfo
12844 && (STMT_VINFO_RELEVANT_P (stmt_info)
12845 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
12846 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
12847 -mveclibabi= takes preference over library functions with
12848 the simd attribute. */
12849 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12850 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
12851 cost_vec)
12852 || vectorizable_conversion (vinfo, stmt_info,
12853 NULL, NULL, node, cost_vec)
12854 || vectorizable_operation (vinfo, stmt_info,
12855 NULL, NULL, node, cost_vec)
12856 || vectorizable_assignment (vinfo, stmt_info,
12857 NULL, NULL, node, cost_vec)
12858 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12859 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12860 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
12861 node, node_instance, cost_vec)
12862 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
12863 NULL, node, cost_vec)
12864 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12865 || vectorizable_condition (vinfo, stmt_info,
12866 NULL, NULL, node, cost_vec)
12867 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12868 cost_vec)
12869 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
12870 stmt_info, NULL, node)
12871 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
12872 stmt_info, NULL, node, cost_vec));
12873 else
12875 if (bb_vinfo)
12876 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12877 || vectorizable_simd_clone_call (vinfo, stmt_info,
12878 NULL, NULL, node, cost_vec)
12879 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
12880 cost_vec)
12881 || vectorizable_shift (vinfo, stmt_info,
12882 NULL, NULL, node, cost_vec)
12883 || vectorizable_operation (vinfo, stmt_info,
12884 NULL, NULL, node, cost_vec)
12885 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
12886 cost_vec)
12887 || vectorizable_load (vinfo, stmt_info,
12888 NULL, NULL, node, cost_vec)
12889 || vectorizable_store (vinfo, stmt_info,
12890 NULL, NULL, node, cost_vec)
12891 || vectorizable_condition (vinfo, stmt_info,
12892 NULL, NULL, node, cost_vec)
12893 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12894 cost_vec)
12895 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
12898 if (node)
12899 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12901 if (!ok)
12902 return opt_result::failure_at (stmt_info->stmt,
12903 "not vectorized:"
12904 " relevant stmt not supported: %G",
12905 stmt_info->stmt);
12907 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
12908 need extra handling, except for vectorizable reductions. */
12909 if (!bb_vinfo
12910 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
12911 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
12912 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
12913 stmt_info, node, node_instance,
12914 false, cost_vec))
12915 return opt_result::failure_at (stmt_info->stmt,
12916 "not vectorized:"
12917 " live stmt not supported: %G",
12918 stmt_info->stmt);
12920 return opt_result::success ();
12924 /* Function vect_transform_stmt.
12926 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
12928 bool
12929 vect_transform_stmt (vec_info *vinfo,
12930 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12931 slp_tree slp_node, slp_instance slp_node_instance)
12933 bool is_store = false;
12934 gimple *vec_stmt = NULL;
12935 bool done;
12937 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
12939 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12940 if (slp_node)
12941 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
12943 switch (STMT_VINFO_TYPE (stmt_info))
12945 case type_demotion_vec_info_type:
12946 case type_promotion_vec_info_type:
12947 case type_conversion_vec_info_type:
12948 done = vectorizable_conversion (vinfo, stmt_info,
12949 gsi, &vec_stmt, slp_node, NULL);
12950 gcc_assert (done);
12951 break;
12953 case induc_vec_info_type:
12954 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
12955 stmt_info, &vec_stmt, slp_node,
12956 NULL);
12957 gcc_assert (done);
12958 break;
12960 case shift_vec_info_type:
12961 done = vectorizable_shift (vinfo, stmt_info,
12962 gsi, &vec_stmt, slp_node, NULL);
12963 gcc_assert (done);
12964 break;
12966 case op_vec_info_type:
12967 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
12968 NULL);
12969 gcc_assert (done);
12970 break;
12972 case assignment_vec_info_type:
12973 done = vectorizable_assignment (vinfo, stmt_info,
12974 gsi, &vec_stmt, slp_node, NULL);
12975 gcc_assert (done);
12976 break;
12978 case load_vec_info_type:
12979 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
12980 NULL);
12981 gcc_assert (done);
12982 break;
12984 case store_vec_info_type:
12985 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
12986 && !slp_node
12987 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))
12988 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))))
12989 /* In case of interleaving, the whole chain is vectorized when the
12990 last store in the chain is reached. Store stmts before the last
12991 one are skipped, and there vec_stmt_info shouldn't be freed
12992 meanwhile. */
12994 else
12996 done = vectorizable_store (vinfo, stmt_info,
12997 gsi, &vec_stmt, slp_node, NULL);
12998 gcc_assert (done);
12999 is_store = true;
13001 break;
13003 case condition_vec_info_type:
13004 done = vectorizable_condition (vinfo, stmt_info,
13005 gsi, &vec_stmt, slp_node, NULL);
13006 gcc_assert (done);
13007 break;
13009 case comparison_vec_info_type:
13010 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
13011 slp_node, NULL);
13012 gcc_assert (done);
13013 break;
13015 case call_vec_info_type:
13016 done = vectorizable_call (vinfo, stmt_info,
13017 gsi, &vec_stmt, slp_node, NULL);
13018 break;
13020 case call_simd_clone_vec_info_type:
13021 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
13022 slp_node, NULL);
13023 break;
13025 case reduc_vec_info_type:
13026 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
13027 gsi, &vec_stmt, slp_node);
13028 gcc_assert (done);
13029 break;
13031 case cycle_phi_info_type:
13032 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
13033 &vec_stmt, slp_node, slp_node_instance);
13034 gcc_assert (done);
13035 break;
13037 case lc_phi_info_type:
13038 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
13039 stmt_info, &vec_stmt, slp_node);
13040 gcc_assert (done);
13041 break;
13043 case recurr_info_type:
13044 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13045 stmt_info, &vec_stmt, slp_node, NULL);
13046 gcc_assert (done);
13047 break;
13049 case phi_info_type:
13050 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
13051 gcc_assert (done);
13052 break;
13054 default:
13055 if (!STMT_VINFO_LIVE_P (stmt_info))
13057 if (dump_enabled_p ())
13058 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13059 "stmt not supported.\n");
13060 gcc_unreachable ();
13062 done = true;
13065 if (!slp_node && vec_stmt)
13066 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
13068 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
13070 /* Handle stmts whose DEF is used outside the loop-nest that is
13071 being vectorized. */
13072 done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
13073 slp_node_instance, true, NULL);
13074 gcc_assert (done);
13077 if (slp_node)
13078 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13080 return is_store;
13084 /* Remove a group of stores (for SLP or interleaving), free their
13085 stmt_vec_info. */
13087 void
13088 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
13090 stmt_vec_info next_stmt_info = first_stmt_info;
13092 while (next_stmt_info)
13094 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
13095 next_stmt_info = vect_orig_stmt (next_stmt_info);
13096 /* Free the attached stmt_vec_info and remove the stmt. */
13097 vinfo->remove_stmt (next_stmt_info);
13098 next_stmt_info = tmp;
13102 /* If NUNITS is nonzero, return a vector type that contains NUNITS
13103 elements of type SCALAR_TYPE, or null if the target doesn't support
13104 such a type.
13106 If NUNITS is zero, return a vector type that contains elements of
13107 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13109 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13110 for this vectorization region and want to "autodetect" the best choice.
13111 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13112 and we want the new type to be interoperable with it. PREVAILING_MODE
13113 in this case can be a scalar integer mode or a vector mode; when it
13114 is a vector mode, the function acts like a tree-level version of
13115 related_vector_mode. */
13117 tree
13118 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
13119 tree scalar_type, poly_uint64 nunits)
13121 tree orig_scalar_type = scalar_type;
13122 scalar_mode inner_mode;
13123 machine_mode simd_mode;
13124 tree vectype;
13126 if ((!INTEGRAL_TYPE_P (scalar_type)
13127 && !POINTER_TYPE_P (scalar_type)
13128 && !SCALAR_FLOAT_TYPE_P (scalar_type))
13129 || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
13130 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
13131 return NULL_TREE;
13133 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
13135 /* Interoperability between modes requires one to be a constant multiple
13136 of the other, so that the number of vectors required for each operation
13137 is a compile-time constant. */
13138 if (prevailing_mode != VOIDmode
13139 && !constant_multiple_p (nunits * nbytes,
13140 GET_MODE_SIZE (prevailing_mode))
13141 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
13142 nunits * nbytes))
13143 return NULL_TREE;
13145 /* For vector types of elements whose mode precision doesn't
13146 match their types precision we use a element type of mode
13147 precision. The vectorization routines will have to make sure
13148 they support the proper result truncation/extension.
13149 We also make sure to build vector types with INTEGER_TYPE
13150 component type only. */
13151 if (INTEGRAL_TYPE_P (scalar_type)
13152 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
13153 || TREE_CODE (scalar_type) != INTEGER_TYPE))
13154 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
13155 TYPE_UNSIGNED (scalar_type));
13157 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13158 When the component mode passes the above test simply use a type
13159 corresponding to that mode. The theory is that any use that
13160 would cause problems with this will disable vectorization anyway. */
13161 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
13162 && !INTEGRAL_TYPE_P (scalar_type))
13163 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
13165 /* We can't build a vector type of elements with alignment bigger than
13166 their size. */
13167 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
13168 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
13169 TYPE_UNSIGNED (scalar_type));
13171 /* If we felt back to using the mode fail if there was
13172 no scalar type for it. */
13173 if (scalar_type == NULL_TREE)
13174 return NULL_TREE;
13176 /* If no prevailing mode was supplied, use the mode the target prefers.
13177 Otherwise lookup a vector mode based on the prevailing mode. */
13178 if (prevailing_mode == VOIDmode)
13180 gcc_assert (known_eq (nunits, 0U));
13181 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
13182 if (SCALAR_INT_MODE_P (simd_mode))
13184 /* Traditional behavior is not to take the integer mode
13185 literally, but simply to use it as a way of determining
13186 the vector size. It is up to mode_for_vector to decide
13187 what the TYPE_MODE should be.
13189 Note that nunits == 1 is allowed in order to support single
13190 element vector types. */
13191 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
13192 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13193 return NULL_TREE;
13196 else if (SCALAR_INT_MODE_P (prevailing_mode)
13197 || !related_vector_mode (prevailing_mode,
13198 inner_mode, nunits).exists (&simd_mode))
13200 /* Fall back to using mode_for_vector, mostly in the hope of being
13201 able to use an integer mode. */
13202 if (known_eq (nunits, 0U)
13203 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
13204 return NULL_TREE;
13206 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13207 return NULL_TREE;
13210 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
13212 /* In cases where the mode was chosen by mode_for_vector, check that
13213 the target actually supports the chosen mode, or that it at least
13214 allows the vector mode to be replaced by a like-sized integer. */
13215 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
13216 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
13217 return NULL_TREE;
13219 /* Re-attach the address-space qualifier if we canonicalized the scalar
13220 type. */
13221 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
13222 return build_qualified_type
13223 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
13225 return vectype;
13228 /* Function get_vectype_for_scalar_type.
13230 Returns the vector type corresponding to SCALAR_TYPE as supported
13231 by the target. If GROUP_SIZE is nonzero and we're performing BB
13232 vectorization, make sure that the number of elements in the vector
13233 is no bigger than GROUP_SIZE. */
13235 tree
13236 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
13237 unsigned int group_size)
13239 /* For BB vectorization, we should always have a group size once we've
13240 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13241 are tentative requests during things like early data reference
13242 analysis and pattern recognition. */
13243 if (is_a <bb_vec_info> (vinfo))
13244 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13245 else
13246 group_size = 0;
13248 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13249 scalar_type);
13250 if (vectype && vinfo->vector_mode == VOIDmode)
13251 vinfo->vector_mode = TYPE_MODE (vectype);
13253 /* Register the natural choice of vector type, before the group size
13254 has been applied. */
13255 if (vectype)
13256 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
13258 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13259 try again with an explicit number of elements. */
13260 if (vectype
13261 && group_size
13262 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
13264 /* Start with the biggest number of units that fits within
13265 GROUP_SIZE and halve it until we find a valid vector type.
13266 Usually either the first attempt will succeed or all will
13267 fail (in the latter case because GROUP_SIZE is too small
13268 for the target), but it's possible that a target could have
13269 a hole between supported vector types.
13271 If GROUP_SIZE is not a power of 2, this has the effect of
13272 trying the largest power of 2 that fits within the group,
13273 even though the group is not a multiple of that vector size.
13274 The BB vectorizer will then try to carve up the group into
13275 smaller pieces. */
13276 unsigned int nunits = 1 << floor_log2 (group_size);
13279 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13280 scalar_type, nunits);
13281 nunits /= 2;
13283 while (nunits > 1 && !vectype);
13286 return vectype;
13289 /* Return the vector type corresponding to SCALAR_TYPE as supported
13290 by the target. NODE, if nonnull, is the SLP tree node that will
13291 use the returned vector type. */
13293 tree
13294 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
13296 unsigned int group_size = 0;
13297 if (node)
13298 group_size = SLP_TREE_LANES (node);
13299 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13302 /* Function get_mask_type_for_scalar_type.
13304 Returns the mask type corresponding to a result of comparison
13305 of vectors of specified SCALAR_TYPE as supported by target.
13306 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13307 make sure that the number of elements in the vector is no bigger
13308 than GROUP_SIZE. */
13310 tree
13311 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13312 unsigned int group_size)
13314 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13316 if (!vectype)
13317 return NULL;
13319 return truth_type_for (vectype);
13322 /* Function get_same_sized_vectype
13324 Returns a vector type corresponding to SCALAR_TYPE of size
13325 VECTOR_TYPE if supported by the target. */
13327 tree
13328 get_same_sized_vectype (tree scalar_type, tree vector_type)
13330 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
13331 return truth_type_for (vector_type);
13333 poly_uint64 nunits;
13334 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
13335 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
13336 return NULL_TREE;
13338 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
13339 scalar_type, nunits);
13342 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13343 would not change the chosen vector modes. */
13345 bool
13346 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
13348 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
13349 i != vinfo->used_vector_modes.end (); ++i)
13350 if (!VECTOR_MODE_P (*i)
13351 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
13352 return false;
13353 return true;
13356 /* Function vect_is_simple_use.
13358 Input:
13359 VINFO - the vect info of the loop or basic block that is being vectorized.
13360 OPERAND - operand in the loop or bb.
13361 Output:
13362 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13363 case OPERAND is an SSA_NAME that is defined in the vectorizable region
13364 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13365 the definition could be anywhere in the function
13366 DT - the type of definition
13368 Returns whether a stmt with OPERAND can be vectorized.
13369 For loops, supportable operands are constants, loop invariants, and operands
13370 that are defined by the current iteration of the loop. Unsupportable
13371 operands are those that are defined by a previous iteration of the loop (as
13372 is the case in reduction/induction computations).
13373 For basic blocks, supportable operands are constants and bb invariants.
13374 For now, operands defined outside the basic block are not supported. */
13376 bool
13377 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13378 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
13380 if (def_stmt_info_out)
13381 *def_stmt_info_out = NULL;
13382 if (def_stmt_out)
13383 *def_stmt_out = NULL;
13384 *dt = vect_unknown_def_type;
13386 if (dump_enabled_p ())
13388 dump_printf_loc (MSG_NOTE, vect_location,
13389 "vect_is_simple_use: operand ");
13390 if (TREE_CODE (operand) == SSA_NAME
13391 && !SSA_NAME_IS_DEFAULT_DEF (operand))
13392 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
13393 else
13394 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
13397 if (CONSTANT_CLASS_P (operand))
13398 *dt = vect_constant_def;
13399 else if (is_gimple_min_invariant (operand))
13400 *dt = vect_external_def;
13401 else if (TREE_CODE (operand) != SSA_NAME)
13402 *dt = vect_unknown_def_type;
13403 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
13404 *dt = vect_external_def;
13405 else
13407 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
13408 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
13409 if (!stmt_vinfo)
13410 *dt = vect_external_def;
13411 else
13413 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
13414 def_stmt = stmt_vinfo->stmt;
13415 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
13416 if (def_stmt_info_out)
13417 *def_stmt_info_out = stmt_vinfo;
13419 if (def_stmt_out)
13420 *def_stmt_out = def_stmt;
13423 if (dump_enabled_p ())
13425 dump_printf (MSG_NOTE, ", type of def: ");
13426 switch (*dt)
13428 case vect_uninitialized_def:
13429 dump_printf (MSG_NOTE, "uninitialized\n");
13430 break;
13431 case vect_constant_def:
13432 dump_printf (MSG_NOTE, "constant\n");
13433 break;
13434 case vect_external_def:
13435 dump_printf (MSG_NOTE, "external\n");
13436 break;
13437 case vect_internal_def:
13438 dump_printf (MSG_NOTE, "internal\n");
13439 break;
13440 case vect_induction_def:
13441 dump_printf (MSG_NOTE, "induction\n");
13442 break;
13443 case vect_reduction_def:
13444 dump_printf (MSG_NOTE, "reduction\n");
13445 break;
13446 case vect_double_reduction_def:
13447 dump_printf (MSG_NOTE, "double reduction\n");
13448 break;
13449 case vect_nested_cycle:
13450 dump_printf (MSG_NOTE, "nested cycle\n");
13451 break;
13452 case vect_first_order_recurrence:
13453 dump_printf (MSG_NOTE, "first order recurrence\n");
13454 break;
13455 case vect_unknown_def_type:
13456 dump_printf (MSG_NOTE, "unknown\n");
13457 break;
13461 if (*dt == vect_unknown_def_type)
13463 if (dump_enabled_p ())
13464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13465 "Unsupported pattern.\n");
13466 return false;
13469 return true;
13472 /* Function vect_is_simple_use.
13474 Same as vect_is_simple_use but also determines the vector operand
13475 type of OPERAND and stores it to *VECTYPE. If the definition of
13476 OPERAND is vect_uninitialized_def, vect_constant_def or
13477 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13478 is responsible to compute the best suited vector type for the
13479 scalar operand. */
13481 bool
13482 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13483 tree *vectype, stmt_vec_info *def_stmt_info_out,
13484 gimple **def_stmt_out)
13486 stmt_vec_info def_stmt_info;
13487 gimple *def_stmt;
13488 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
13489 return false;
13491 if (def_stmt_out)
13492 *def_stmt_out = def_stmt;
13493 if (def_stmt_info_out)
13494 *def_stmt_info_out = def_stmt_info;
13496 /* Now get a vector type if the def is internal, otherwise supply
13497 NULL_TREE and leave it up to the caller to figure out a proper
13498 type for the use stmt. */
13499 if (*dt == vect_internal_def
13500 || *dt == vect_induction_def
13501 || *dt == vect_reduction_def
13502 || *dt == vect_double_reduction_def
13503 || *dt == vect_nested_cycle
13504 || *dt == vect_first_order_recurrence)
13506 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
13507 gcc_assert (*vectype != NULL_TREE);
13508 if (dump_enabled_p ())
13509 dump_printf_loc (MSG_NOTE, vect_location,
13510 "vect_is_simple_use: vectype %T\n", *vectype);
13512 else if (*dt == vect_uninitialized_def
13513 || *dt == vect_constant_def
13514 || *dt == vect_external_def)
13515 *vectype = NULL_TREE;
13516 else
13517 gcc_unreachable ();
13519 return true;
13522 /* Function vect_is_simple_use.
13524 Same as vect_is_simple_use but determines the operand by operand
13525 position OPERAND from either STMT or SLP_NODE, filling in *OP
13526 and *SLP_DEF (when SLP_NODE is not NULL). */
13528 bool
13529 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
13530 unsigned operand, tree *op, slp_tree *slp_def,
13531 enum vect_def_type *dt,
13532 tree *vectype, stmt_vec_info *def_stmt_info_out)
13534 if (slp_node)
13536 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
13537 *slp_def = child;
13538 *vectype = SLP_TREE_VECTYPE (child);
13539 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
13541 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
13542 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
13544 else
13546 if (def_stmt_info_out)
13547 *def_stmt_info_out = NULL;
13548 *op = SLP_TREE_SCALAR_OPS (child)[0];
13549 *dt = SLP_TREE_DEF_TYPE (child);
13550 return true;
13553 else
13555 *slp_def = NULL;
13556 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
13558 if (gimple_assign_rhs_code (ass) == COND_EXPR
13559 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
13561 if (operand < 2)
13562 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
13563 else
13564 *op = gimple_op (ass, operand);
13566 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
13567 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
13568 else
13569 *op = gimple_op (ass, operand + 1);
13571 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
13572 *op = gimple_call_arg (call, operand);
13573 else
13574 gcc_unreachable ();
13575 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
13579 /* If OP is not NULL and is external or constant update its vector
13580 type with VECTYPE. Returns true if successful or false if not,
13581 for example when conflicting vector types are present. */
13583 bool
13584 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
13586 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
13587 return true;
13588 if (SLP_TREE_VECTYPE (op))
13589 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
13590 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
13591 should be handled by patters. Allow vect_constant_def for now. */
13592 if (VECTOR_BOOLEAN_TYPE_P (vectype)
13593 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
13594 return false;
13595 SLP_TREE_VECTYPE (op) = vectype;
13596 return true;
13599 /* Function supportable_widening_operation
13601 Check whether an operation represented by the code CODE is a
13602 widening operation that is supported by the target platform in
13603 vector form (i.e., when operating on arguments of type VECTYPE_IN
13604 producing a result of type VECTYPE_OUT).
13606 Widening operations we currently support are NOP (CONVERT), FLOAT,
13607 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
13608 are supported by the target platform either directly (via vector
13609 tree-codes), or via target builtins.
13611 Output:
13612 - CODE1 and CODE2 are codes of vector operations to be used when
13613 vectorizing the operation, if available.
13614 - MULTI_STEP_CVT determines the number of required intermediate steps in
13615 case of multi-step conversion (like char->short->int - in that case
13616 MULTI_STEP_CVT will be 1).
13617 - INTERM_TYPES contains the intermediate type required to perform the
13618 widening operation (short in the above example). */
13620 bool
13621 supportable_widening_operation (vec_info *vinfo,
13622 code_helper code,
13623 stmt_vec_info stmt_info,
13624 tree vectype_out, tree vectype_in,
13625 code_helper *code1,
13626 code_helper *code2,
13627 int *multi_step_cvt,
13628 vec<tree> *interm_types)
13630 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
13631 class loop *vect_loop = NULL;
13632 machine_mode vec_mode;
13633 enum insn_code icode1, icode2;
13634 optab optab1 = unknown_optab, optab2 = unknown_optab;
13635 tree vectype = vectype_in;
13636 tree wide_vectype = vectype_out;
13637 tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
13638 int i;
13639 tree prev_type, intermediate_type;
13640 machine_mode intermediate_mode, prev_mode;
13641 optab optab3, optab4;
13643 *multi_step_cvt = 0;
13644 if (loop_info)
13645 vect_loop = LOOP_VINFO_LOOP (loop_info);
13647 switch (code.safe_as_tree_code ())
13649 case MAX_TREE_CODES:
13650 /* Don't set c1 and c2 if code is not a tree_code. */
13651 break;
13653 case WIDEN_MULT_EXPR:
13654 /* The result of a vectorized widening operation usually requires
13655 two vectors (because the widened results do not fit into one vector).
13656 The generated vector results would normally be expected to be
13657 generated in the same order as in the original scalar computation,
13658 i.e. if 8 results are generated in each vector iteration, they are
13659 to be organized as follows:
13660 vect1: [res1,res2,res3,res4],
13661 vect2: [res5,res6,res7,res8].
13663 However, in the special case that the result of the widening
13664 operation is used in a reduction computation only, the order doesn't
13665 matter (because when vectorizing a reduction we change the order of
13666 the computation). Some targets can take advantage of this and
13667 generate more efficient code. For example, targets like Altivec,
13668 that support widen_mult using a sequence of {mult_even,mult_odd}
13669 generate the following vectors:
13670 vect1: [res1,res3,res5,res7],
13671 vect2: [res2,res4,res6,res8].
13673 When vectorizing outer-loops, we execute the inner-loop sequentially
13674 (each vectorized inner-loop iteration contributes to VF outer-loop
13675 iterations in parallel). We therefore don't allow to change the
13676 order of the computation in the inner-loop during outer-loop
13677 vectorization. */
13678 /* TODO: Another case in which order doesn't *really* matter is when we
13679 widen and then contract again, e.g. (short)((int)x * y >> 8).
13680 Normally, pack_trunc performs an even/odd permute, whereas the
13681 repack from an even/odd expansion would be an interleave, which
13682 would be significantly simpler for e.g. AVX2. */
13683 /* In any case, in order to avoid duplicating the code below, recurse
13684 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
13685 are properly set up for the caller. If we fail, we'll continue with
13686 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
13687 if (vect_loop
13688 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
13689 && !nested_in_vect_loop_p (vect_loop, stmt_info)
13690 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
13691 stmt_info, vectype_out,
13692 vectype_in, code1,
13693 code2, multi_step_cvt,
13694 interm_types))
13696 /* Elements in a vector with vect_used_by_reduction property cannot
13697 be reordered if the use chain with this property does not have the
13698 same operation. One such an example is s += a * b, where elements
13699 in a and b cannot be reordered. Here we check if the vector defined
13700 by STMT is only directly used in the reduction statement. */
13701 tree lhs = gimple_assign_lhs (stmt_info->stmt);
13702 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
13703 if (use_stmt_info
13704 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
13705 return true;
13707 c1 = VEC_WIDEN_MULT_LO_EXPR;
13708 c2 = VEC_WIDEN_MULT_HI_EXPR;
13709 break;
13711 case DOT_PROD_EXPR:
13712 c1 = DOT_PROD_EXPR;
13713 c2 = DOT_PROD_EXPR;
13714 break;
13716 case SAD_EXPR:
13717 c1 = SAD_EXPR;
13718 c2 = SAD_EXPR;
13719 break;
13721 case VEC_WIDEN_MULT_EVEN_EXPR:
13722 /* Support the recursion induced just above. */
13723 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
13724 c2 = VEC_WIDEN_MULT_ODD_EXPR;
13725 break;
13727 case WIDEN_LSHIFT_EXPR:
13728 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
13729 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
13730 break;
13732 CASE_CONVERT:
13733 c1 = VEC_UNPACK_LO_EXPR;
13734 c2 = VEC_UNPACK_HI_EXPR;
13735 break;
13737 case FLOAT_EXPR:
13738 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
13739 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
13740 break;
13742 case FIX_TRUNC_EXPR:
13743 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
13744 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
13745 break;
13747 default:
13748 gcc_unreachable ();
13751 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
13752 std::swap (c1, c2);
13754 if (code == FIX_TRUNC_EXPR)
13756 /* The signedness is determined from output operand. */
13757 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13758 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13760 else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
13761 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13762 && VECTOR_BOOLEAN_TYPE_P (vectype)
13763 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13764 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13766 /* If the input and result modes are the same, a different optab
13767 is needed where we pass in the number of units in vectype. */
13768 optab1 = vec_unpacks_sbool_lo_optab;
13769 optab2 = vec_unpacks_sbool_hi_optab;
13772 vec_mode = TYPE_MODE (vectype);
13773 if (widening_fn_p (code))
13775 /* If this is an internal fn then we must check whether the target
13776 supports either a low-high split or an even-odd split. */
13777 internal_fn ifn = as_internal_fn ((combined_fn) code);
13779 internal_fn lo, hi, even, odd;
13780 lookup_hilo_internal_fn (ifn, &lo, &hi);
13781 *code1 = as_combined_fn (lo);
13782 *code2 = as_combined_fn (hi);
13783 optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
13784 optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
13786 /* If we don't support low-high, then check for even-odd. */
13787 if (!optab1
13788 || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13789 || !optab2
13790 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13792 lookup_evenodd_internal_fn (ifn, &even, &odd);
13793 *code1 = as_combined_fn (even);
13794 *code2 = as_combined_fn (odd);
13795 optab1 = direct_internal_fn_optab (even, {vectype, vectype});
13796 optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
13799 else if (code.is_tree_code ())
13801 if (code == FIX_TRUNC_EXPR)
13803 /* The signedness is determined from output operand. */
13804 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13805 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13807 else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
13808 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13809 && VECTOR_BOOLEAN_TYPE_P (vectype)
13810 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13811 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13813 /* If the input and result modes are the same, a different optab
13814 is needed where we pass in the number of units in vectype. */
13815 optab1 = vec_unpacks_sbool_lo_optab;
13816 optab2 = vec_unpacks_sbool_hi_optab;
13818 else
13820 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13821 optab2 = optab_for_tree_code (c2, vectype, optab_default);
13823 *code1 = c1;
13824 *code2 = c2;
13827 if (!optab1 || !optab2)
13828 return false;
13830 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13831 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13832 return false;
13835 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13836 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13838 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13839 return true;
13840 /* For scalar masks we may have different boolean
13841 vector types having the same QImode. Thus we
13842 add additional check for elements number. */
13843 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
13844 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13845 return true;
13848 /* Check if it's a multi-step conversion that can be done using intermediate
13849 types. */
13851 prev_type = vectype;
13852 prev_mode = vec_mode;
13854 if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
13855 return false;
13857 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13858 intermediate steps in promotion sequence. We try
13859 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
13860 not. */
13861 interm_types->create (MAX_INTERM_CVT_STEPS);
13862 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
13864 intermediate_mode = insn_data[icode1].operand[0].mode;
13865 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13866 intermediate_type
13867 = vect_halve_mask_nunits (prev_type, intermediate_mode);
13868 else if (VECTOR_MODE_P (intermediate_mode))
13870 tree intermediate_element_type
13871 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
13872 TYPE_UNSIGNED (prev_type));
13873 intermediate_type
13874 = build_vector_type_for_mode (intermediate_element_type,
13875 intermediate_mode);
13877 else
13878 intermediate_type
13879 = lang_hooks.types.type_for_mode (intermediate_mode,
13880 TYPE_UNSIGNED (prev_type));
13882 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13883 && VECTOR_BOOLEAN_TYPE_P (prev_type)
13884 && intermediate_mode == prev_mode
13885 && SCALAR_INT_MODE_P (prev_mode))
13887 /* If the input and result modes are the same, a different optab
13888 is needed where we pass in the number of units in vectype. */
13889 optab3 = vec_unpacks_sbool_lo_optab;
13890 optab4 = vec_unpacks_sbool_hi_optab;
13892 else
13894 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
13895 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
13898 if (!optab3 || !optab4
13899 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
13900 || insn_data[icode1].operand[0].mode != intermediate_mode
13901 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
13902 || insn_data[icode2].operand[0].mode != intermediate_mode
13903 || ((icode1 = optab_handler (optab3, intermediate_mode))
13904 == CODE_FOR_nothing)
13905 || ((icode2 = optab_handler (optab4, intermediate_mode))
13906 == CODE_FOR_nothing))
13907 break;
13909 interm_types->quick_push (intermediate_type);
13910 (*multi_step_cvt)++;
13912 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13913 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13915 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13916 return true;
13917 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
13918 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13919 return true;
13922 prev_type = intermediate_type;
13923 prev_mode = intermediate_mode;
13926 interm_types->release ();
13927 return false;
13931 /* Function supportable_narrowing_operation
13933 Check whether an operation represented by the code CODE is a
13934 narrowing operation that is supported by the target platform in
13935 vector form (i.e., when operating on arguments of type VECTYPE_IN
13936 and producing a result of type VECTYPE_OUT).
13938 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
13939 and FLOAT. This function checks if these operations are supported by
13940 the target platform directly via vector tree-codes.
13942 Output:
13943 - CODE1 is the code of a vector operation to be used when
13944 vectorizing the operation, if available.
13945 - MULTI_STEP_CVT determines the number of required intermediate steps in
13946 case of multi-step conversion (like int->short->char - in that case
13947 MULTI_STEP_CVT will be 1).
13948 - INTERM_TYPES contains the intermediate type required to perform the
13949 narrowing operation (short in the above example). */
13951 bool
13952 supportable_narrowing_operation (code_helper code,
13953 tree vectype_out, tree vectype_in,
13954 code_helper *code1, int *multi_step_cvt,
13955 vec<tree> *interm_types)
13957 machine_mode vec_mode;
13958 enum insn_code icode1;
13959 optab optab1, interm_optab;
13960 tree vectype = vectype_in;
13961 tree narrow_vectype = vectype_out;
13962 enum tree_code c1;
13963 tree intermediate_type, prev_type;
13964 machine_mode intermediate_mode, prev_mode;
13965 int i;
13966 unsigned HOST_WIDE_INT n_elts;
13967 bool uns;
13969 if (!code.is_tree_code ())
13970 return false;
13972 *multi_step_cvt = 0;
13973 switch ((tree_code) code)
13975 CASE_CONVERT:
13976 c1 = VEC_PACK_TRUNC_EXPR;
13977 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
13978 && VECTOR_BOOLEAN_TYPE_P (vectype)
13979 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
13980 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
13981 && n_elts < BITS_PER_UNIT)
13982 optab1 = vec_pack_sbool_trunc_optab;
13983 else
13984 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13985 break;
13987 case FIX_TRUNC_EXPR:
13988 c1 = VEC_PACK_FIX_TRUNC_EXPR;
13989 /* The signedness is determined from output operand. */
13990 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13991 break;
13993 case FLOAT_EXPR:
13994 c1 = VEC_PACK_FLOAT_EXPR;
13995 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13996 break;
13998 default:
13999 gcc_unreachable ();
14002 if (!optab1)
14003 return false;
14005 vec_mode = TYPE_MODE (vectype);
14006 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
14007 return false;
14009 *code1 = c1;
14011 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14013 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14014 return true;
14015 /* For scalar masks we may have different boolean
14016 vector types having the same QImode. Thus we
14017 add additional check for elements number. */
14018 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
14019 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14020 return true;
14023 if (code == FLOAT_EXPR)
14024 return false;
14026 /* Check if it's a multi-step conversion that can be done using intermediate
14027 types. */
14028 prev_mode = vec_mode;
14029 prev_type = vectype;
14030 if (code == FIX_TRUNC_EXPR)
14031 uns = TYPE_UNSIGNED (vectype_out);
14032 else
14033 uns = TYPE_UNSIGNED (vectype);
14035 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14036 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14037 costly than signed. */
14038 if (code == FIX_TRUNC_EXPR && uns)
14040 enum insn_code icode2;
14042 intermediate_type
14043 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
14044 interm_optab
14045 = optab_for_tree_code (c1, intermediate_type, optab_default);
14046 if (interm_optab != unknown_optab
14047 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
14048 && insn_data[icode1].operand[0].mode
14049 == insn_data[icode2].operand[0].mode)
14051 uns = false;
14052 optab1 = interm_optab;
14053 icode1 = icode2;
14057 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14058 intermediate steps in promotion sequence. We try
14059 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14060 interm_types->create (MAX_INTERM_CVT_STEPS);
14061 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14063 intermediate_mode = insn_data[icode1].operand[0].mode;
14064 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14065 intermediate_type
14066 = vect_double_mask_nunits (prev_type, intermediate_mode);
14067 else
14068 intermediate_type
14069 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
14070 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14071 && VECTOR_BOOLEAN_TYPE_P (prev_type)
14072 && SCALAR_INT_MODE_P (prev_mode)
14073 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
14074 && n_elts < BITS_PER_UNIT)
14075 interm_optab = vec_pack_sbool_trunc_optab;
14076 else
14077 interm_optab
14078 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
14079 optab_default);
14080 if (!interm_optab
14081 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
14082 || insn_data[icode1].operand[0].mode != intermediate_mode
14083 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
14084 == CODE_FOR_nothing))
14085 break;
14087 interm_types->quick_push (intermediate_type);
14088 (*multi_step_cvt)++;
14090 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14092 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14093 return true;
14094 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
14095 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14096 return true;
14099 prev_mode = intermediate_mode;
14100 prev_type = intermediate_type;
14101 optab1 = interm_optab;
14104 interm_types->release ();
14105 return false;
14108 /* Generate and return a vector mask of MASK_TYPE such that
14109 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14110 Add the statements to SEQ. */
14112 tree
14113 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
14114 tree end_index, const char *name)
14116 tree cmp_type = TREE_TYPE (start_index);
14117 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
14118 cmp_type, mask_type,
14119 OPTIMIZE_FOR_SPEED));
14120 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
14121 start_index, end_index,
14122 build_zero_cst (mask_type));
14123 tree tmp;
14124 if (name)
14125 tmp = make_temp_ssa_name (mask_type, NULL, name);
14126 else
14127 tmp = make_ssa_name (mask_type);
14128 gimple_call_set_lhs (call, tmp);
14129 gimple_seq_add_stmt (seq, call);
14130 return tmp;
14133 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14134 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14136 tree
14137 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
14138 tree end_index)
14140 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
14141 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
14144 /* Try to compute the vector types required to vectorize STMT_INFO,
14145 returning true on success and false if vectorization isn't possible.
14146 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14147 take sure that the number of elements in the vectors is no bigger
14148 than GROUP_SIZE.
14150 On success:
14152 - Set *STMT_VECTYPE_OUT to:
14153 - NULL_TREE if the statement doesn't need to be vectorized;
14154 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14156 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14157 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14158 statement does not help to determine the overall number of units. */
14160 opt_result
14161 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
14162 tree *stmt_vectype_out,
14163 tree *nunits_vectype_out,
14164 unsigned int group_size)
14166 gimple *stmt = stmt_info->stmt;
14168 /* For BB vectorization, we should always have a group size once we've
14169 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14170 are tentative requests during things like early data reference
14171 analysis and pattern recognition. */
14172 if (is_a <bb_vec_info> (vinfo))
14173 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
14174 else
14175 group_size = 0;
14177 *stmt_vectype_out = NULL_TREE;
14178 *nunits_vectype_out = NULL_TREE;
14180 if (gimple_get_lhs (stmt) == NULL_TREE
14181 /* MASK_STORE has no lhs, but is ok. */
14182 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
14184 if (is_a <gcall *> (stmt))
14186 /* Ignore calls with no lhs. These must be calls to
14187 #pragma omp simd functions, and what vectorization factor
14188 it really needs can't be determined until
14189 vectorizable_simd_clone_call. */
14190 if (dump_enabled_p ())
14191 dump_printf_loc (MSG_NOTE, vect_location,
14192 "defer to SIMD clone analysis.\n");
14193 return opt_result::success ();
14196 return opt_result::failure_at (stmt,
14197 "not vectorized: irregular stmt.%G", stmt);
14200 tree vectype;
14201 tree scalar_type = NULL_TREE;
14202 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
14204 vectype = STMT_VINFO_VECTYPE (stmt_info);
14205 if (dump_enabled_p ())
14206 dump_printf_loc (MSG_NOTE, vect_location,
14207 "precomputed vectype: %T\n", vectype);
14209 else if (vect_use_mask_type_p (stmt_info))
14211 unsigned int precision = stmt_info->mask_precision;
14212 scalar_type = build_nonstandard_integer_type (precision, 1);
14213 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
14214 if (!vectype)
14215 return opt_result::failure_at (stmt, "not vectorized: unsupported"
14216 " data-type %T\n", scalar_type);
14217 if (dump_enabled_p ())
14218 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14220 else
14222 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
14223 scalar_type = TREE_TYPE (DR_REF (dr));
14224 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
14225 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
14226 else
14227 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
14229 if (dump_enabled_p ())
14231 if (group_size)
14232 dump_printf_loc (MSG_NOTE, vect_location,
14233 "get vectype for scalar type (group size %d):"
14234 " %T\n", group_size, scalar_type);
14235 else
14236 dump_printf_loc (MSG_NOTE, vect_location,
14237 "get vectype for scalar type: %T\n", scalar_type);
14239 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
14240 if (!vectype)
14241 return opt_result::failure_at (stmt,
14242 "not vectorized:"
14243 " unsupported data-type %T\n",
14244 scalar_type);
14246 if (dump_enabled_p ())
14247 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14250 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
14251 return opt_result::failure_at (stmt,
14252 "not vectorized: vector stmt in loop:%G",
14253 stmt);
14255 *stmt_vectype_out = vectype;
14257 /* Don't try to compute scalar types if the stmt produces a boolean
14258 vector; use the existing vector type instead. */
14259 tree nunits_vectype = vectype;
14260 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14262 /* The number of units is set according to the smallest scalar
14263 type (or the largest vector size, but we only support one
14264 vector size per vectorization). */
14265 scalar_type = vect_get_smallest_scalar_type (stmt_info,
14266 TREE_TYPE (vectype));
14267 if (scalar_type != TREE_TYPE (vectype))
14269 if (dump_enabled_p ())
14270 dump_printf_loc (MSG_NOTE, vect_location,
14271 "get vectype for smallest scalar type: %T\n",
14272 scalar_type);
14273 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
14274 group_size);
14275 if (!nunits_vectype)
14276 return opt_result::failure_at
14277 (stmt, "not vectorized: unsupported data-type %T\n",
14278 scalar_type);
14279 if (dump_enabled_p ())
14280 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
14281 nunits_vectype);
14285 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
14286 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
14287 return opt_result::failure_at (stmt,
14288 "Not vectorized: Incompatible number "
14289 "of vector subparts between %T and %T\n",
14290 nunits_vectype, *stmt_vectype_out);
14292 if (dump_enabled_p ())
14294 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
14295 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
14296 dump_printf (MSG_NOTE, "\n");
14299 *nunits_vectype_out = nunits_vectype;
14300 return opt_result::success ();
14303 /* Generate and return statement sequence that sets vector length LEN that is:
14305 min_of_start_and_end = min (START_INDEX, END_INDEX);
14306 left_len = END_INDEX - min_of_start_and_end;
14307 rhs = min (left_len, LEN_LIMIT);
14308 LEN = rhs;
14310 Note: the cost of the code generated by this function is modeled
14311 by vect_estimate_min_profitable_iters, so changes here may need
14312 corresponding changes there. */
14314 gimple_seq
14315 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
14317 gimple_seq stmts = NULL;
14318 tree len_type = TREE_TYPE (len);
14319 gcc_assert (TREE_TYPE (start_index) == len_type);
14321 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
14322 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
14323 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
14324 gimple* stmt = gimple_build_assign (len, rhs);
14325 gimple_seq_add_stmt (&stmts, stmt);
14327 return stmts;