Fix minor problem in stack probing
[official-gcc.git] / gcc / tree-vect-stmts.cc
blobb3a56498595d7421a4b9cde8b9d2b3fbe52c64a4
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
57 #include "regs.h"
58 #include "attribs.h"
59 #include "optabs-libfuncs.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* Return the vectorized type for the given statement. */
66 tree
67 stmt_vectype (class _stmt_vec_info *stmt_info)
69 return STMT_VINFO_VECTYPE (stmt_info);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
74 bool
75 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
77 gimple *stmt = STMT_VINFO_STMT (stmt_info);
78 basic_block bb = gimple_bb (stmt);
79 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
80 class loop* loop;
82 if (!loop_vinfo)
83 return false;
85 loop = LOOP_VINFO_LOOP (loop_vinfo);
87 return (bb->loop_father == loop->inner);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
94 static unsigned
95 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
96 enum vect_cost_for_stmt kind,
97 stmt_vec_info stmt_info, slp_tree node,
98 tree vectype, int misalign,
99 enum vect_cost_model_location where)
101 if ((kind == vector_load || kind == unaligned_load)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_gather_load;
104 if ((kind == vector_store || kind == unaligned_store)
105 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
106 kind = vector_scatter_store;
108 stmt_info_for_cost si
109 = { count, kind, where, stmt_info, node, vectype, misalign };
110 body_cost_vec->safe_push (si);
112 return (unsigned)
113 (builtin_vectorization_cost (kind, vectype, misalign) * count);
116 unsigned
117 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
118 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
119 tree vectype, int misalign,
120 enum vect_cost_model_location where)
122 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
123 vectype, misalign, where);
126 unsigned
127 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
128 enum vect_cost_for_stmt kind, slp_tree node,
129 tree vectype, int misalign,
130 enum vect_cost_model_location where)
132 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
133 vectype, misalign, where);
136 unsigned
137 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
138 enum vect_cost_for_stmt kind,
139 enum vect_cost_model_location where)
141 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
142 || kind == scalar_stmt);
143 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
144 NULL_TREE, 0, where);
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
149 static tree
150 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
152 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
153 "vect_array");
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
161 static tree
162 read_vector_array (vec_info *vinfo,
163 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
164 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
166 tree vect_type, vect, vect_name, array_ref;
167 gimple *new_stmt;
169 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170 vect_type = TREE_TYPE (TREE_TYPE (array));
171 vect = vect_create_destination_var (scalar_dest, vect_type);
172 array_ref = build4 (ARRAY_REF, vect_type, array,
173 build_int_cst (size_type_node, n),
174 NULL_TREE, NULL_TREE);
176 new_stmt = gimple_build_assign (vect, array_ref);
177 vect_name = make_ssa_name (vect, new_stmt);
178 gimple_assign_set_lhs (new_stmt, vect_name);
179 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
181 return vect_name;
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
188 static void
189 write_vector_array (vec_info *vinfo,
190 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
191 tree vect, tree array, unsigned HOST_WIDE_INT n)
193 tree array_ref;
194 gimple *new_stmt;
196 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
197 build_int_cst (size_type_node, n),
198 NULL_TREE, NULL_TREE);
200 new_stmt = gimple_build_assign (array_ref, vect);
201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
206 (and its group). */
208 static tree
209 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
211 tree mem_ref;
213 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
216 return mem_ref;
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
222 static void
223 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
224 gimple_stmt_iterator *gsi, tree var)
226 tree clobber = build_clobber (TREE_TYPE (var));
227 gimple *new_stmt = gimple_build_assign (var, clobber);
228 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
233 /* Function vect_mark_relevant.
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
237 static void
238 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
239 enum vect_relevant relevant, bool live_p)
241 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
242 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "mark relevant %d, live %d: %G", relevant, live_p,
247 stmt_info->stmt);
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE, vect_location,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
265 stmt_vec_info old_stmt_info = stmt_info;
266 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
268 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
269 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
271 if (live_p && relevant == vect_unused_in_scope)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
276 "relevant.\n");
277 relevant = vect_used_only_live;
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE, vect_location,
282 "mark relevant %d, live %d: %G", relevant, live_p,
283 stmt_info->stmt);
286 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
287 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
288 STMT_VINFO_RELEVANT (stmt_info) = relevant;
290 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE, vect_location,
295 "already marked relevant/live.\n");
296 return;
299 worklist->safe_push (stmt_info);
303 /* Function is_simple_and_all_uses_invariant
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
307 bool
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
309 loop_vec_info loop_vinfo)
311 tree op;
312 ssa_op_iter iter;
314 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
315 if (!stmt)
316 return false;
318 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
320 enum vect_def_type dt = vect_uninitialized_def;
322 if (!vect_is_simple_use (op, loop_vinfo, &dt))
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "use not simple.\n");
327 return false;
330 if (dt != vect_external_def && dt != vect_constant_def)
331 return false;
333 return true;
336 /* Function vect_stmt_relevant_p.
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
346 CHECKME: what other side effects would the vectorizer allow? */
348 static bool
349 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
350 enum vect_relevant *relevant, bool *live_p)
352 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
353 ssa_op_iter op_iter;
354 imm_use_iterator imm_iter;
355 use_operand_p use_p;
356 def_operand_p def_p;
358 *relevant = vect_unused_in_scope;
359 *live_p = false;
361 /* cond stmt other than loop exit cond. */
362 if (is_ctrl_stmt (stmt_info->stmt)
363 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
364 *relevant = vect_used_in_scope;
366 /* changing memory. */
367 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
368 if (gimple_vdef (stmt_info->stmt)
369 && !gimple_clobber_p (stmt_info->stmt))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: stmt has vdefs.\n");
374 *relevant = vect_used_in_scope;
377 /* uses outside the loop. */
378 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
380 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
382 basic_block bb = gimple_bb (USE_STMT (use_p));
383 if (!flow_bb_inside_loop_p (loop, bb))
385 if (is_gimple_debug (USE_STMT (use_p)))
386 continue;
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: used out of loop.\n");
392 /* We expect all such uses to be in the loop exit phis
393 (because of loop closed form) */
394 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
395 gcc_assert (bb == single_exit (loop)->dest);
397 *live_p = true;
402 if (*live_p && *relevant == vect_unused_in_scope
403 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
405 if (dump_enabled_p ())
406 dump_printf_loc (MSG_NOTE, vect_location,
407 "vec_stmt_relevant_p: stmt live but not relevant.\n");
408 *relevant = vect_used_only_live;
411 return (*live_p || *relevant);
415 /* Function exist_non_indexing_operands_for_use_p
417 USE is one of the uses attached to STMT_INFO. Check if USE is
418 used in STMT_INFO for anything other than indexing an array. */
420 static bool
421 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
423 tree operand;
425 /* USE corresponds to some operand in STMT. If there is no data
426 reference in STMT, then any operand that corresponds to USE
427 is not indexing an array. */
428 if (!STMT_VINFO_DATA_REF (stmt_info))
429 return true;
431 /* STMT has a data_ref. FORNOW this means that its of one of
432 the following forms:
433 -1- ARRAY_REF = var
434 -2- var = ARRAY_REF
435 (This should have been verified in analyze_data_refs).
437 'var' in the second case corresponds to a def, not a use,
438 so USE cannot correspond to any operands that are not used
439 for array indexing.
441 Therefore, all we need to check is if STMT falls into the
442 first case, and whether var corresponds to USE. */
444 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
445 if (!assign || !gimple_assign_copy_p (assign))
447 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
448 if (call && gimple_call_internal_p (call))
450 internal_fn ifn = gimple_call_internal_fn (call);
451 int mask_index = internal_fn_mask_index (ifn);
452 if (mask_index >= 0
453 && use == gimple_call_arg (call, mask_index))
454 return true;
455 int stored_value_index = internal_fn_stored_value_index (ifn);
456 if (stored_value_index >= 0
457 && use == gimple_call_arg (call, stored_value_index))
458 return true;
459 if (internal_gather_scatter_fn_p (ifn)
460 && use == gimple_call_arg (call, 1))
461 return true;
463 return false;
466 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
467 return false;
468 operand = gimple_assign_rhs1 (assign);
469 if (TREE_CODE (operand) != SSA_NAME)
470 return false;
472 if (operand == use)
473 return true;
475 return false;
480 Function process_use.
482 Inputs:
483 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
484 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
485 that defined USE. This is done by calling mark_relevant and passing it
486 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
487 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
488 be performed.
490 Outputs:
491 Generally, LIVE_P and RELEVANT are used to define the liveness and
492 relevance info of the DEF_STMT of this USE:
493 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
494 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
495 Exceptions:
496 - case 1: If USE is used only for address computations (e.g. array indexing),
497 which does not need to be directly vectorized, then the liveness/relevance
498 of the respective DEF_STMT is left unchanged.
499 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
500 we skip DEF_STMT cause it had already been processed.
501 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
502 "relevant" will be modified accordingly.
504 Return true if everything is as expected. Return false otherwise. */
506 static opt_result
507 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
508 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
509 bool force)
511 stmt_vec_info dstmt_vinfo;
512 enum vect_def_type dt;
514 /* case 1: we are only interested in uses that need to be vectorized. Uses
515 that are used for address computation are not considered relevant. */
516 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
517 return opt_result::success ();
519 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
520 return opt_result::failure_at (stmt_vinfo->stmt,
521 "not vectorized:"
522 " unsupported use in stmt.\n");
524 if (!dstmt_vinfo)
525 return opt_result::success ();
527 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
528 basic_block bb = gimple_bb (stmt_vinfo->stmt);
530 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
531 We have to force the stmt live since the epilogue loop needs it to
532 continue computing the reduction. */
533 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
534 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
537 && bb->loop_father == def_bb->loop_father)
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_NOTE, vect_location,
541 "reduc-stmt defining reduc-phi in the same nest.\n");
542 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
543 return opt_result::success ();
546 /* case 3a: outer-loop stmt defining an inner-loop stmt:
547 outer-loop-header-bb:
548 d = dstmt_vinfo
549 inner-loop:
550 stmt # use (d)
551 outer-loop-tail-bb:
552 ... */
553 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE, vect_location,
557 "outer-loop def-stmt defining inner-loop stmt.\n");
559 switch (relevant)
561 case vect_unused_in_scope:
562 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
563 vect_used_in_scope : vect_unused_in_scope;
564 break;
566 case vect_used_in_outer_by_reduction:
567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
568 relevant = vect_used_by_reduction;
569 break;
571 case vect_used_in_outer:
572 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
573 relevant = vect_used_in_scope;
574 break;
576 case vect_used_in_scope:
577 break;
579 default:
580 gcc_unreachable ();
584 /* case 3b: inner-loop stmt defining an outer-loop stmt:
585 outer-loop-header-bb:
587 inner-loop:
588 d = dstmt_vinfo
589 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
590 stmt # use (d) */
591 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
593 if (dump_enabled_p ())
594 dump_printf_loc (MSG_NOTE, vect_location,
595 "inner-loop def-stmt defining outer-loop stmt.\n");
597 switch (relevant)
599 case vect_unused_in_scope:
600 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
601 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
602 vect_used_in_outer_by_reduction : vect_unused_in_scope;
603 break;
605 case vect_used_by_reduction:
606 case vect_used_only_live:
607 relevant = vect_used_in_outer_by_reduction;
608 break;
610 case vect_used_in_scope:
611 relevant = vect_used_in_outer;
612 break;
614 default:
615 gcc_unreachable ();
618 /* We are also not interested in uses on loop PHI backedges that are
619 inductions. Otherwise we'll needlessly vectorize the IV increment
620 and cause hybrid SLP for SLP inductions. Unless the PHI is live
621 of course. */
622 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
623 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
624 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
625 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
626 loop_latch_edge (bb->loop_father))
627 == use))
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE, vect_location,
631 "induction value on backedge.\n");
632 return opt_result::success ();
636 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
637 return opt_result::success ();
641 /* Function vect_mark_stmts_to_be_vectorized.
643 Not all stmts in the loop need to be vectorized. For example:
645 for i...
646 for j...
647 1. T0 = i + j
648 2. T1 = a[T0]
650 3. j = j + 1
652 Stmt 1 and 3 do not need to be vectorized, because loop control and
653 addressing of vectorized data-refs are handled differently.
655 This pass detects such stmts. */
657 opt_result
658 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
660 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
661 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
662 unsigned int nbbs = loop->num_nodes;
663 gimple_stmt_iterator si;
664 unsigned int i;
665 basic_block bb;
666 bool live_p;
667 enum vect_relevant relevant;
669 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
671 auto_vec<stmt_vec_info, 64> worklist;
673 /* 1. Init worklist. */
674 for (i = 0; i < nbbs; i++)
676 bb = bbs[i];
677 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
679 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
682 phi_info->stmt);
684 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
685 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
687 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
689 if (is_gimple_debug (gsi_stmt (si)))
690 continue;
691 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE, vect_location,
694 "init: stmt relevant? %G", stmt_info->stmt);
696 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
697 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
701 /* 2. Process_worklist */
702 while (worklist.length () > 0)
704 use_operand_p use_p;
705 ssa_op_iter iter;
707 stmt_vec_info stmt_vinfo = worklist.pop ();
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE, vect_location,
710 "worklist: examine stmt: %G", stmt_vinfo->stmt);
712 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
713 (DEF_STMT) as relevant/irrelevant according to the relevance property
714 of STMT. */
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
717 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
718 propagated as is to the DEF_STMTs of its USEs.
720 One exception is when STMT has been identified as defining a reduction
721 variable; in this case we set the relevance to vect_used_by_reduction.
722 This is because we distinguish between two kinds of relevant stmts -
723 those that are used by a reduction computation, and those that are
724 (also) used by a regular computation. This allows us later on to
725 identify stmts that are used solely by a reduction, and therefore the
726 order of the results that they produce does not have to be kept. */
728 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
730 case vect_reduction_def:
731 gcc_assert (relevant != vect_unused_in_scope);
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_in_scope
734 && relevant != vect_used_by_reduction
735 && relevant != vect_used_only_live)
736 return opt_result::failure_at
737 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
738 break;
740 case vect_nested_cycle:
741 if (relevant != vect_unused_in_scope
742 && relevant != vect_used_in_outer_by_reduction
743 && relevant != vect_used_in_outer)
744 return opt_result::failure_at
745 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
746 break;
748 case vect_double_reduction_def:
749 if (relevant != vect_unused_in_scope
750 && relevant != vect_used_by_reduction
751 && relevant != vect_used_only_live)
752 return opt_result::failure_at
753 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
754 break;
756 default:
757 break;
760 if (is_pattern_stmt_p (stmt_vinfo))
762 /* Pattern statements are not inserted into the code, so
763 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
764 have to scan the RHS or function arguments instead. */
765 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
767 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
768 tree op = gimple_assign_rhs1 (assign);
770 i = 1;
771 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
773 opt_result res
774 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
775 loop_vinfo, relevant, &worklist, false);
776 if (!res)
777 return res;
778 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
779 loop_vinfo, relevant, &worklist, false);
780 if (!res)
781 return res;
782 i = 2;
784 for (; i < gimple_num_ops (assign); i++)
786 op = gimple_op (assign, i);
787 if (TREE_CODE (op) == SSA_NAME)
789 opt_result res
790 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
791 &worklist, false);
792 if (!res)
793 return res;
797 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
799 for (i = 0; i < gimple_call_num_args (call); i++)
801 tree arg = gimple_call_arg (call, i);
802 opt_result res
803 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
804 &worklist, false);
805 if (!res)
806 return res;
810 else
811 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
813 tree op = USE_FROM_PTR (use_p);
814 opt_result res
815 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
816 &worklist, false);
817 if (!res)
818 return res;
821 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
823 gather_scatter_info gs_info;
824 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
825 gcc_unreachable ();
826 opt_result res
827 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
828 &worklist, true);
829 if (!res)
831 if (fatal)
832 *fatal = false;
833 return res;
836 } /* while worklist */
838 return opt_result::success ();
841 /* Function vect_model_simple_cost.
843 Models cost for simple operations, i.e. those that only emit ncopies of a
844 single op. Right now, this does not account for multiple insns that could
845 be generated for the single vector op. We will handle that shortly. */
847 static void
848 vect_model_simple_cost (vec_info *,
849 stmt_vec_info stmt_info, int ncopies,
850 enum vect_def_type *dt,
851 int ndts,
852 slp_tree node,
853 stmt_vector_for_cost *cost_vec,
854 vect_cost_for_stmt kind = vector_stmt)
856 int inside_cost = 0, prologue_cost = 0;
858 gcc_assert (cost_vec != NULL);
860 /* ??? Somehow we need to fix this at the callers. */
861 if (node)
862 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
864 if (!node)
865 /* Cost the "broadcast" of a scalar operand in to a vector operand.
866 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
867 cost model. */
868 for (int i = 0; i < ndts; i++)
869 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
870 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
871 stmt_info, 0, vect_prologue);
873 /* Pass the inside-of-loop statements to the target-specific cost model. */
874 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
875 stmt_info, 0, vect_body);
877 if (dump_enabled_p ())
878 dump_printf_loc (MSG_NOTE, vect_location,
879 "vect_model_simple_cost: inside_cost = %d, "
880 "prologue_cost = %d .\n", inside_cost, prologue_cost);
884 /* Model cost for type demotion and promotion operations. PWR is
885 normally zero for single-step promotions and demotions. It will be
886 one if two-step promotion/demotion is required, and so on. NCOPIES
887 is the number of vector results (and thus number of instructions)
888 for the narrowest end of the operation chain. Each additional
889 step doubles the number of instructions required. If WIDEN_ARITH
890 is true the stmt is doing widening arithmetic. */
892 static void
893 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
894 enum vect_def_type *dt,
895 unsigned int ncopies, int pwr,
896 stmt_vector_for_cost *cost_vec,
897 bool widen_arith)
899 int i;
900 int inside_cost = 0, prologue_cost = 0;
902 for (i = 0; i < pwr + 1; i++)
904 inside_cost += record_stmt_cost (cost_vec, ncopies,
905 widen_arith
906 ? vector_stmt : vec_promote_demote,
907 stmt_info, 0, vect_body);
908 ncopies *= 2;
911 /* FORNOW: Assuming maximum 2 args per stmts. */
912 for (i = 0; i < 2; i++)
913 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
914 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
915 stmt_info, 0, vect_prologue);
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE, vect_location,
919 "vect_model_promotion_demotion_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost, prologue_cost);
923 /* Returns true if the current function returns DECL. */
925 static bool
926 cfun_returns (tree decl)
928 edge_iterator ei;
929 edge e;
930 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
932 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
933 if (!ret)
934 continue;
935 if (gimple_return_retval (ret) == decl)
936 return true;
937 /* We often end up with an aggregate copy to the result decl,
938 handle that case as well. First skip intermediate clobbers
939 though. */
940 gimple *def = ret;
943 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
945 while (gimple_clobber_p (def));
946 if (is_a <gassign *> (def)
947 && gimple_assign_lhs (def) == gimple_return_retval (ret)
948 && gimple_assign_rhs1 (def) == decl)
949 return true;
951 return false;
954 /* Calculate cost of DR's memory access. */
955 void
956 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
957 dr_alignment_support alignment_support_scheme,
958 int misalignment,
959 unsigned int *inside_cost,
960 stmt_vector_for_cost *body_cost_vec)
962 switch (alignment_support_scheme)
964 case dr_aligned:
966 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
967 vector_store, stmt_info, 0,
968 vect_body);
970 if (dump_enabled_p ())
971 dump_printf_loc (MSG_NOTE, vect_location,
972 "vect_model_store_cost: aligned.\n");
973 break;
976 case dr_unaligned_supported:
978 /* Here, we assign an additional cost for the unaligned store. */
979 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
980 unaligned_store, stmt_info,
981 misalignment, vect_body);
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_NOTE, vect_location,
984 "vect_model_store_cost: unaligned supported by "
985 "hardware.\n");
986 break;
989 case dr_unaligned_unsupported:
991 *inside_cost = VECT_MAX_COST;
993 if (dump_enabled_p ())
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
995 "vect_model_store_cost: unsupported access.\n");
996 break;
999 default:
1000 gcc_unreachable ();
1004 /* Calculate cost of DR's memory access. */
1005 void
1006 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1007 dr_alignment_support alignment_support_scheme,
1008 int misalignment,
1009 bool add_realign_cost, unsigned int *inside_cost,
1010 unsigned int *prologue_cost,
1011 stmt_vector_for_cost *prologue_cost_vec,
1012 stmt_vector_for_cost *body_cost_vec,
1013 bool record_prologue_costs)
1015 switch (alignment_support_scheme)
1017 case dr_aligned:
1019 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1020 stmt_info, 0, vect_body);
1022 if (dump_enabled_p ())
1023 dump_printf_loc (MSG_NOTE, vect_location,
1024 "vect_model_load_cost: aligned.\n");
1026 break;
1028 case dr_unaligned_supported:
1030 /* Here, we assign an additional cost for the unaligned load. */
1031 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1032 unaligned_load, stmt_info,
1033 misalignment, vect_body);
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_NOTE, vect_location,
1037 "vect_model_load_cost: unaligned supported by "
1038 "hardware.\n");
1040 break;
1042 case dr_explicit_realign:
1044 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1045 vector_load, stmt_info, 0, vect_body);
1046 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1047 vec_perm, stmt_info, 0, vect_body);
1049 /* FIXME: If the misalignment remains fixed across the iterations of
1050 the containing loop, the following cost should be added to the
1051 prologue costs. */
1052 if (targetm.vectorize.builtin_mask_for_load)
1053 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1054 stmt_info, 0, vect_body);
1056 if (dump_enabled_p ())
1057 dump_printf_loc (MSG_NOTE, vect_location,
1058 "vect_model_load_cost: explicit realign\n");
1060 break;
1062 case dr_explicit_realign_optimized:
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_NOTE, vect_location,
1066 "vect_model_load_cost: unaligned software "
1067 "pipelined.\n");
1069 /* Unaligned software pipeline has a load of an address, an initial
1070 load, and possibly a mask operation to "prime" the loop. However,
1071 if this is an access in a group of loads, which provide grouped
1072 access, then the above cost should only be considered for one
1073 access in the group. Inside the loop, there is a load op
1074 and a realignment op. */
1076 if (add_realign_cost && record_prologue_costs)
1078 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1079 vector_stmt, stmt_info,
1080 0, vect_prologue);
1081 if (targetm.vectorize.builtin_mask_for_load)
1082 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1083 vector_stmt, stmt_info,
1084 0, vect_prologue);
1087 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1088 stmt_info, 0, vect_body);
1089 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1090 stmt_info, 0, vect_body);
1092 if (dump_enabled_p ())
1093 dump_printf_loc (MSG_NOTE, vect_location,
1094 "vect_model_load_cost: explicit realign optimized"
1095 "\n");
1097 break;
1100 case dr_unaligned_unsupported:
1102 *inside_cost = VECT_MAX_COST;
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1106 "vect_model_load_cost: unsupported access.\n");
1107 break;
1110 default:
1111 gcc_unreachable ();
1115 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1116 the loop preheader for the vectorized stmt STMT_VINFO. */
1118 static void
1119 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1120 gimple_stmt_iterator *gsi)
1122 if (gsi)
1123 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1124 else
1125 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1127 if (dump_enabled_p ())
1128 dump_printf_loc (MSG_NOTE, vect_location,
1129 "created new init_stmt: %G", new_stmt);
1132 /* Function vect_init_vector.
1134 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1135 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1136 vector type a vector with all elements equal to VAL is created first.
1137 Place the initialization at GSI if it is not NULL. Otherwise, place the
1138 initialization at the loop preheader.
1139 Return the DEF of INIT_STMT.
1140 It will be used in the vectorization of STMT_INFO. */
1142 tree
1143 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1144 gimple_stmt_iterator *gsi)
1146 gimple *init_stmt;
1147 tree new_temp;
1149 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1150 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1152 gcc_assert (VECTOR_TYPE_P (type));
1153 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1155 /* Scalar boolean value should be transformed into
1156 all zeros or all ones value before building a vector. */
1157 if (VECTOR_BOOLEAN_TYPE_P (type))
1159 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1160 tree false_val = build_zero_cst (TREE_TYPE (type));
1162 if (CONSTANT_CLASS_P (val))
1163 val = integer_zerop (val) ? false_val : true_val;
1164 else
1166 new_temp = make_ssa_name (TREE_TYPE (type));
1167 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1168 val, true_val, false_val);
1169 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1170 val = new_temp;
1173 else
1175 gimple_seq stmts = NULL;
1176 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1177 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1178 TREE_TYPE (type), val);
1179 else
1180 /* ??? Condition vectorization expects us to do
1181 promotion of invariant/external defs. */
1182 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1183 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1184 !gsi_end_p (gsi2); )
1186 init_stmt = gsi_stmt (gsi2);
1187 gsi_remove (&gsi2, false);
1188 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1192 val = build_vector_from_val (type, val);
1195 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1196 init_stmt = gimple_build_assign (new_temp, val);
1197 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1198 return new_temp;
1202 /* Function vect_get_vec_defs_for_operand.
1204 OP is an operand in STMT_VINFO. This function returns a vector of
1205 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1207 In the case that OP is an SSA_NAME which is defined in the loop, then
1208 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1210 In case OP is an invariant or constant, a new stmt that creates a vector def
1211 needs to be introduced. VECTYPE may be used to specify a required type for
1212 vector invariant. */
1214 void
1215 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1216 unsigned ncopies,
1217 tree op, vec<tree> *vec_oprnds, tree vectype)
1219 gimple *def_stmt;
1220 enum vect_def_type dt;
1221 bool is_simple_use;
1222 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE, vect_location,
1226 "vect_get_vec_defs_for_operand: %T\n", op);
1228 stmt_vec_info def_stmt_info;
1229 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1230 &def_stmt_info, &def_stmt);
1231 gcc_assert (is_simple_use);
1232 if (def_stmt && dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1235 vec_oprnds->create (ncopies);
1236 if (dt == vect_constant_def || dt == vect_external_def)
1238 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1239 tree vector_type;
1241 if (vectype)
1242 vector_type = vectype;
1243 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1244 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1245 vector_type = truth_type_for (stmt_vectype);
1246 else
1247 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1249 gcc_assert (vector_type);
1250 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1251 while (ncopies--)
1252 vec_oprnds->quick_push (vop);
1254 else
1256 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1257 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1258 for (unsigned i = 0; i < ncopies; ++i)
1259 vec_oprnds->quick_push (gimple_get_lhs
1260 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1265 /* Get vectorized definitions for OP0 and OP1. */
1267 void
1268 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1269 unsigned ncopies,
1270 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1271 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1272 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1273 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1275 if (slp_node)
1277 if (op0)
1278 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1279 if (op1)
1280 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1281 if (op2)
1282 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1283 if (op3)
1284 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1286 else
1288 if (op0)
1289 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1290 op0, vec_oprnds0, vectype0);
1291 if (op1)
1292 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1293 op1, vec_oprnds1, vectype1);
1294 if (op2)
1295 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1296 op2, vec_oprnds2, vectype2);
1297 if (op3)
1298 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1299 op3, vec_oprnds3, vectype3);
1303 void
1304 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1305 unsigned ncopies,
1306 tree op0, vec<tree> *vec_oprnds0,
1307 tree op1, vec<tree> *vec_oprnds1,
1308 tree op2, vec<tree> *vec_oprnds2,
1309 tree op3, vec<tree> *vec_oprnds3)
1311 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1312 op0, vec_oprnds0, NULL_TREE,
1313 op1, vec_oprnds1, NULL_TREE,
1314 op2, vec_oprnds2, NULL_TREE,
1315 op3, vec_oprnds3, NULL_TREE);
1318 /* Helper function called by vect_finish_replace_stmt and
1319 vect_finish_stmt_generation. Set the location of the new
1320 statement and create and return a stmt_vec_info for it. */
1322 static void
1323 vect_finish_stmt_generation_1 (vec_info *,
1324 stmt_vec_info stmt_info, gimple *vec_stmt)
1326 if (dump_enabled_p ())
1327 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1329 if (stmt_info)
1331 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1333 /* While EH edges will generally prevent vectorization, stmt might
1334 e.g. be in a must-not-throw region. Ensure newly created stmts
1335 that could throw are part of the same region. */
1336 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1337 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1338 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1340 else
1341 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1344 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1345 which sets the same scalar result as STMT_INFO did. Create and return a
1346 stmt_vec_info for VEC_STMT. */
1348 void
1349 vect_finish_replace_stmt (vec_info *vinfo,
1350 stmt_vec_info stmt_info, gimple *vec_stmt)
1352 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1353 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1355 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1356 gsi_replace (&gsi, vec_stmt, true);
1358 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1361 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1362 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1364 void
1365 vect_finish_stmt_generation (vec_info *vinfo,
1366 stmt_vec_info stmt_info, gimple *vec_stmt,
1367 gimple_stmt_iterator *gsi)
1369 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1371 if (!gsi_end_p (*gsi)
1372 && gimple_has_mem_ops (vec_stmt))
1374 gimple *at_stmt = gsi_stmt (*gsi);
1375 tree vuse = gimple_vuse (at_stmt);
1376 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1378 tree vdef = gimple_vdef (at_stmt);
1379 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1380 gimple_set_modified (vec_stmt, true);
1381 /* If we have an SSA vuse and insert a store, update virtual
1382 SSA form to avoid triggering the renamer. Do so only
1383 if we can easily see all uses - which is what almost always
1384 happens with the way vectorized stmts are inserted. */
1385 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1386 && ((is_gimple_assign (vec_stmt)
1387 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1388 || (is_gimple_call (vec_stmt)
1389 && (!(gimple_call_flags (vec_stmt)
1390 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1391 || (gimple_call_lhs (vec_stmt)
1392 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1394 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1395 gimple_set_vdef (vec_stmt, new_vdef);
1396 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1400 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1401 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1404 /* We want to vectorize a call to combined function CFN with function
1405 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1406 as the types of all inputs. Check whether this is possible using
1407 an internal function, returning its code if so or IFN_LAST if not. */
1409 static internal_fn
1410 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1411 tree vectype_out, tree vectype_in)
1413 internal_fn ifn;
1414 if (internal_fn_p (cfn))
1415 ifn = as_internal_fn (cfn);
1416 else
1417 ifn = associated_internal_fn (fndecl);
1418 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1420 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1421 if (info.vectorizable)
1423 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1424 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1425 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1426 OPTIMIZE_FOR_SPEED))
1427 return ifn;
1430 return IFN_LAST;
1434 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1435 gimple_stmt_iterator *);
1437 /* Check whether a load or store statement in the loop described by
1438 LOOP_VINFO is possible in a loop using partial vectors. This is
1439 testing whether the vectorizer pass has the appropriate support,
1440 as well as whether the target does.
1442 VLS_TYPE says whether the statement is a load or store and VECTYPE
1443 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1444 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1445 says how the load or store is going to be implemented and GROUP_SIZE
1446 is the number of load or store statements in the containing group.
1447 If the access is a gather load or scatter store, GS_INFO describes
1448 its arguments. If the load or store is conditional, SCALAR_MASK is the
1449 condition under which it occurs.
1451 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1452 vectors is not supported, otherwise record the required rgroup control
1453 types. */
1455 static void
1456 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1457 slp_tree slp_node,
1458 vec_load_store_type vls_type,
1459 int group_size,
1460 vect_memory_access_type
1461 memory_access_type,
1462 gather_scatter_info *gs_info,
1463 tree scalar_mask)
1465 /* Invariant loads need no special support. */
1466 if (memory_access_type == VMAT_INVARIANT)
1467 return;
1469 unsigned int nvectors;
1470 if (slp_node)
1471 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1472 else
1473 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1475 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1476 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1477 machine_mode vecmode = TYPE_MODE (vectype);
1478 bool is_load = (vls_type == VLS_LOAD);
1479 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1481 internal_fn ifn
1482 = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1483 : vect_store_lanes_supported (vectype, group_size, true));
1484 if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1485 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1486 else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1487 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1488 scalar_mask);
1489 else
1491 if (dump_enabled_p ())
1492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1493 "can't operate on partial vectors because"
1494 " the target doesn't have an appropriate"
1495 " load/store-lanes instruction.\n");
1496 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1498 return;
1501 if (memory_access_type == VMAT_GATHER_SCATTER)
1503 internal_fn ifn = (is_load
1504 ? IFN_MASK_GATHER_LOAD
1505 : IFN_MASK_SCATTER_STORE);
1506 internal_fn len_ifn = (is_load
1507 ? IFN_MASK_LEN_GATHER_LOAD
1508 : IFN_MASK_LEN_SCATTER_STORE);
1509 if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1510 gs_info->memory_type,
1511 gs_info->offset_vectype,
1512 gs_info->scale))
1513 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1514 else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1515 gs_info->memory_type,
1516 gs_info->offset_vectype,
1517 gs_info->scale))
1518 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1519 scalar_mask);
1520 else
1522 if (dump_enabled_p ())
1523 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1524 "can't operate on partial vectors because"
1525 " the target doesn't have an appropriate"
1526 " gather load or scatter store instruction.\n");
1527 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1529 return;
1532 if (memory_access_type != VMAT_CONTIGUOUS
1533 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1535 /* Element X of the data must come from iteration i * VF + X of the
1536 scalar loop. We need more work to support other mappings. */
1537 if (dump_enabled_p ())
1538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1539 "can't operate on partial vectors because an"
1540 " access isn't contiguous.\n");
1541 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1542 return;
1545 if (!VECTOR_MODE_P (vecmode))
1547 if (dump_enabled_p ())
1548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1549 "can't operate on partial vectors when emulating"
1550 " vector operations.\n");
1551 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1552 return;
1555 /* We might load more scalars than we need for permuting SLP loads.
1556 We checked in get_group_load_store_type that the extra elements
1557 don't leak into a new vector. */
1558 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1560 unsigned int nvectors;
1561 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1562 return nvectors;
1563 gcc_unreachable ();
1566 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1567 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1568 machine_mode mask_mode;
1569 machine_mode vmode;
1570 bool using_partial_vectors_p = false;
1571 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1573 nvectors = group_memory_nvectors (group_size * vf, nunits);
1574 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1575 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1576 using_partial_vectors_p = true;
1578 else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1579 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1581 nvectors = group_memory_nvectors (group_size * vf, nunits);
1582 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1583 using_partial_vectors_p = true;
1586 if (!using_partial_vectors_p)
1588 if (dump_enabled_p ())
1589 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1590 "can't operate on partial vectors because the"
1591 " target doesn't have the appropriate partial"
1592 " vectorization load or store.\n");
1593 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1597 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1598 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1599 that needs to be applied to all loads and stores in a vectorized loop.
1600 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1601 otherwise return VEC_MASK & LOOP_MASK.
1603 MASK_TYPE is the type of both masks. If new statements are needed,
1604 insert them before GSI. */
1606 static tree
1607 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1608 tree vec_mask, gimple_stmt_iterator *gsi)
1610 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1611 if (!loop_mask)
1612 return vec_mask;
1614 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1616 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1617 return vec_mask;
1619 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1620 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1621 vec_mask, loop_mask);
1623 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1624 return and_res;
1627 /* Determine whether we can use a gather load or scatter store to vectorize
1628 strided load or store STMT_INFO by truncating the current offset to a
1629 smaller width. We need to be able to construct an offset vector:
1631 { 0, X, X*2, X*3, ... }
1633 without loss of precision, where X is STMT_INFO's DR_STEP.
1635 Return true if this is possible, describing the gather load or scatter
1636 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1638 static bool
1639 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1640 loop_vec_info loop_vinfo, bool masked_p,
1641 gather_scatter_info *gs_info)
1643 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1644 data_reference *dr = dr_info->dr;
1645 tree step = DR_STEP (dr);
1646 if (TREE_CODE (step) != INTEGER_CST)
1648 /* ??? Perhaps we could use range information here? */
1649 if (dump_enabled_p ())
1650 dump_printf_loc (MSG_NOTE, vect_location,
1651 "cannot truncate variable step.\n");
1652 return false;
1655 /* Get the number of bits in an element. */
1656 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1657 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1658 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1660 /* Set COUNT to the upper limit on the number of elements - 1.
1661 Start with the maximum vectorization factor. */
1662 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1664 /* Try lowering COUNT to the number of scalar latch iterations. */
1665 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1666 widest_int max_iters;
1667 if (max_loop_iterations (loop, &max_iters)
1668 && max_iters < count)
1669 count = max_iters.to_shwi ();
1671 /* Try scales of 1 and the element size. */
1672 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1673 wi::overflow_type overflow = wi::OVF_NONE;
1674 for (int i = 0; i < 2; ++i)
1676 int scale = scales[i];
1677 widest_int factor;
1678 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1679 continue;
1681 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1682 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1683 if (overflow)
1684 continue;
1685 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1686 unsigned int min_offset_bits = wi::min_precision (range, sign);
1688 /* Find the narrowest viable offset type. */
1689 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1690 tree offset_type = build_nonstandard_integer_type (offset_bits,
1691 sign == UNSIGNED);
1693 /* See whether the target supports the operation with an offset
1694 no narrower than OFFSET_TYPE. */
1695 tree memory_type = TREE_TYPE (DR_REF (dr));
1696 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1697 vectype, memory_type, offset_type, scale,
1698 &gs_info->ifn, &gs_info->offset_vectype)
1699 || gs_info->ifn == IFN_LAST)
1700 continue;
1702 gs_info->decl = NULL_TREE;
1703 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1704 but we don't need to store that here. */
1705 gs_info->base = NULL_TREE;
1706 gs_info->element_type = TREE_TYPE (vectype);
1707 gs_info->offset = fold_convert (offset_type, step);
1708 gs_info->offset_dt = vect_constant_def;
1709 gs_info->scale = scale;
1710 gs_info->memory_type = memory_type;
1711 return true;
1714 if (overflow && dump_enabled_p ())
1715 dump_printf_loc (MSG_NOTE, vect_location,
1716 "truncating gather/scatter offset to %d bits"
1717 " might change its value.\n", element_bits);
1719 return false;
1722 /* Return true if we can use gather/scatter internal functions to
1723 vectorize STMT_INFO, which is a grouped or strided load or store.
1724 MASKED_P is true if load or store is conditional. When returning
1725 true, fill in GS_INFO with the information required to perform the
1726 operation. */
1728 static bool
1729 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1730 loop_vec_info loop_vinfo, bool masked_p,
1731 gather_scatter_info *gs_info)
1733 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1734 || gs_info->ifn == IFN_LAST)
1735 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1736 masked_p, gs_info);
1738 tree old_offset_type = TREE_TYPE (gs_info->offset);
1739 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1741 gcc_assert (TYPE_PRECISION (new_offset_type)
1742 >= TYPE_PRECISION (old_offset_type));
1743 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1745 if (dump_enabled_p ())
1746 dump_printf_loc (MSG_NOTE, vect_location,
1747 "using gather/scatter for strided/grouped access,"
1748 " scale = %d\n", gs_info->scale);
1750 return true;
1753 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1754 elements with a known constant step. Return -1 if that step
1755 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1757 static int
1758 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1760 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1761 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1762 size_zero_node);
1765 /* If the target supports a permute mask that reverses the elements in
1766 a vector of type VECTYPE, return that mask, otherwise return null. */
1768 static tree
1769 perm_mask_for_reverse (tree vectype)
1771 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1773 /* The encoding has a single stepped pattern. */
1774 vec_perm_builder sel (nunits, 1, 3);
1775 for (int i = 0; i < 3; ++i)
1776 sel.quick_push (nunits - 1 - i);
1778 vec_perm_indices indices (sel, 1, nunits);
1779 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1780 indices))
1781 return NULL_TREE;
1782 return vect_gen_perm_mask_checked (vectype, indices);
1785 /* A subroutine of get_load_store_type, with a subset of the same
1786 arguments. Handle the case where STMT_INFO is a load or store that
1787 accesses consecutive elements with a negative step. Sets *POFFSET
1788 to the offset to be applied to the DR for the first access. */
1790 static vect_memory_access_type
1791 get_negative_load_store_type (vec_info *vinfo,
1792 stmt_vec_info stmt_info, tree vectype,
1793 vec_load_store_type vls_type,
1794 unsigned int ncopies, poly_int64 *poffset)
1796 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1797 dr_alignment_support alignment_support_scheme;
1799 if (ncopies > 1)
1801 if (dump_enabled_p ())
1802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1803 "multiple types with negative step.\n");
1804 return VMAT_ELEMENTWISE;
1807 /* For backward running DRs the first access in vectype actually is
1808 N-1 elements before the address of the DR. */
1809 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1810 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1812 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
1813 alignment_support_scheme
1814 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1815 if (alignment_support_scheme != dr_aligned
1816 && alignment_support_scheme != dr_unaligned_supported)
1818 if (dump_enabled_p ())
1819 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1820 "negative step but alignment required.\n");
1821 *poffset = 0;
1822 return VMAT_ELEMENTWISE;
1825 if (vls_type == VLS_STORE_INVARIANT)
1827 if (dump_enabled_p ())
1828 dump_printf_loc (MSG_NOTE, vect_location,
1829 "negative step with invariant source;"
1830 " no permute needed.\n");
1831 return VMAT_CONTIGUOUS_DOWN;
1834 if (!perm_mask_for_reverse (vectype))
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1838 "negative step and reversing not supported.\n");
1839 *poffset = 0;
1840 return VMAT_ELEMENTWISE;
1843 return VMAT_CONTIGUOUS_REVERSE;
1846 /* STMT_INFO is either a masked or unconditional store. Return the value
1847 being stored. */
1849 tree
1850 vect_get_store_rhs (stmt_vec_info stmt_info)
1852 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1854 gcc_assert (gimple_assign_single_p (assign));
1855 return gimple_assign_rhs1 (assign);
1857 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1859 internal_fn ifn = gimple_call_internal_fn (call);
1860 int index = internal_fn_stored_value_index (ifn);
1861 gcc_assert (index >= 0);
1862 return gimple_call_arg (call, index);
1864 gcc_unreachable ();
1867 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1869 This function returns a vector type which can be composed with NETLS pieces,
1870 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1871 same vector size as the return vector. It checks target whether supports
1872 pieces-size vector mode for construction firstly, if target fails to, check
1873 pieces-size scalar mode for construction further. It returns NULL_TREE if
1874 fails to find the available composition.
1876 For example, for (vtype=V16QI, nelts=4), we can probably get:
1877 - V16QI with PTYPE V4QI.
1878 - V4SI with PTYPE SI.
1879 - NULL_TREE. */
1881 static tree
1882 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
1884 gcc_assert (VECTOR_TYPE_P (vtype));
1885 gcc_assert (known_gt (nelts, 0U));
1887 machine_mode vmode = TYPE_MODE (vtype);
1888 if (!VECTOR_MODE_P (vmode))
1889 return NULL_TREE;
1891 /* When we are asked to compose the vector from its components let
1892 that happen directly. */
1893 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
1895 *ptype = TREE_TYPE (vtype);
1896 return vtype;
1899 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
1900 unsigned int pbsize;
1901 if (constant_multiple_p (vbsize, nelts, &pbsize))
1903 /* First check if vec_init optab supports construction from
1904 vector pieces directly. */
1905 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
1906 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
1907 machine_mode rmode;
1908 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
1909 && (convert_optab_handler (vec_init_optab, vmode, rmode)
1910 != CODE_FOR_nothing))
1912 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
1913 return vtype;
1916 /* Otherwise check if exists an integer type of the same piece size and
1917 if vec_init optab supports construction from it directly. */
1918 if (int_mode_for_size (pbsize, 0).exists (&elmode)
1919 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
1920 && (convert_optab_handler (vec_init_optab, rmode, elmode)
1921 != CODE_FOR_nothing))
1923 *ptype = build_nonstandard_integer_type (pbsize, 1);
1924 return build_vector_type (*ptype, nelts);
1928 return NULL_TREE;
1931 /* A subroutine of get_load_store_type, with a subset of the same
1932 arguments. Handle the case where STMT_INFO is part of a grouped load
1933 or store.
1935 For stores, the statements in the group are all consecutive
1936 and there is no gap at the end. For loads, the statements in the
1937 group might not be consecutive; there can be gaps between statements
1938 as well as at the end. */
1940 static bool
1941 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
1942 tree vectype, slp_tree slp_node,
1943 bool masked_p, vec_load_store_type vls_type,
1944 vect_memory_access_type *memory_access_type,
1945 poly_int64 *poffset,
1946 dr_alignment_support *alignment_support_scheme,
1947 int *misalignment,
1948 gather_scatter_info *gs_info,
1949 internal_fn *lanes_ifn)
1951 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1952 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1953 stmt_vec_info first_stmt_info;
1954 unsigned int group_size;
1955 unsigned HOST_WIDE_INT gap;
1956 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1958 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1959 group_size = DR_GROUP_SIZE (first_stmt_info);
1960 gap = DR_GROUP_GAP (first_stmt_info);
1962 else
1964 first_stmt_info = stmt_info;
1965 group_size = 1;
1966 gap = 0;
1968 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
1969 bool single_element_p = (stmt_info == first_stmt_info
1970 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
1971 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1973 /* True if the vectorized statements would access beyond the last
1974 statement in the group. */
1975 bool overrun_p = false;
1977 /* True if we can cope with such overrun by peeling for gaps, so that
1978 there is at least one final scalar iteration after the vector loop. */
1979 bool can_overrun_p = (!masked_p
1980 && vls_type == VLS_LOAD
1981 && loop_vinfo
1982 && !loop->inner);
1984 /* There can only be a gap at the end of the group if the stride is
1985 known at compile time. */
1986 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
1988 /* Stores can't yet have gaps. */
1989 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
1991 if (slp_node)
1993 /* For SLP vectorization we directly vectorize a subchain
1994 without permutation. */
1995 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1996 first_dr_info
1997 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
1998 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2000 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2001 separated by the stride, until we have a complete vector.
2002 Fall back to scalar accesses if that isn't possible. */
2003 if (multiple_p (nunits, group_size))
2004 *memory_access_type = VMAT_STRIDED_SLP;
2005 else
2006 *memory_access_type = VMAT_ELEMENTWISE;
2008 else
2010 overrun_p = loop_vinfo && gap != 0;
2011 if (overrun_p && vls_type != VLS_LOAD)
2013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2014 "Grouped store with gaps requires"
2015 " non-consecutive accesses\n");
2016 return false;
2018 /* An overrun is fine if the trailing elements are smaller
2019 than the alignment boundary B. Every vector access will
2020 be a multiple of B and so we are guaranteed to access a
2021 non-gap element in the same B-sized block. */
2022 if (overrun_p
2023 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2024 vectype)
2025 / vect_get_scalar_dr_size (first_dr_info)))
2026 overrun_p = false;
2028 /* If the gap splits the vector in half and the target
2029 can do half-vector operations avoid the epilogue peeling
2030 by simply loading half of the vector only. Usually
2031 the construction with an upper zero half will be elided. */
2032 dr_alignment_support alss;
2033 int misalign = dr_misalignment (first_dr_info, vectype);
2034 tree half_vtype;
2035 if (overrun_p
2036 && !masked_p
2037 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2038 vectype, misalign)))
2039 == dr_aligned
2040 || alss == dr_unaligned_supported)
2041 && known_eq (nunits, (group_size - gap) * 2)
2042 && known_eq (nunits, group_size)
2043 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2044 != NULL_TREE))
2045 overrun_p = false;
2047 if (overrun_p && !can_overrun_p)
2049 if (dump_enabled_p ())
2050 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2051 "Peeling for outer loop is not supported\n");
2052 return false;
2054 int cmp = compare_step_with_zero (vinfo, stmt_info);
2055 if (cmp < 0)
2057 if (single_element_p)
2058 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2059 only correct for single element "interleaving" SLP. */
2060 *memory_access_type = get_negative_load_store_type
2061 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2062 else
2064 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2065 separated by the stride, until we have a complete vector.
2066 Fall back to scalar accesses if that isn't possible. */
2067 if (multiple_p (nunits, group_size))
2068 *memory_access_type = VMAT_STRIDED_SLP;
2069 else
2070 *memory_access_type = VMAT_ELEMENTWISE;
2073 else if (cmp == 0 && loop_vinfo)
2075 gcc_assert (vls_type == VLS_LOAD);
2076 *memory_access_type = VMAT_INVARIANT;
2077 /* Invariant accesses perform only component accesses, alignment
2078 is irrelevant for them. */
2079 *alignment_support_scheme = dr_unaligned_supported;
2081 else
2082 *memory_access_type = VMAT_CONTIGUOUS;
2084 /* When we have a contiguous access across loop iterations
2085 but the access in the loop doesn't cover the full vector
2086 we can end up with no gap recorded but still excess
2087 elements accessed, see PR103116. Make sure we peel for
2088 gaps if necessary and sufficient and give up if not.
2090 If there is a combination of the access not covering the full
2091 vector and a gap recorded then we may need to peel twice. */
2092 if (loop_vinfo
2093 && *memory_access_type == VMAT_CONTIGUOUS
2094 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2095 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2096 nunits))
2098 unsigned HOST_WIDE_INT cnunits, cvf;
2099 if (!can_overrun_p
2100 || !nunits.is_constant (&cnunits)
2101 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2102 /* Peeling for gaps assumes that a single scalar iteration
2103 is enough to make sure the last vector iteration doesn't
2104 access excess elements.
2105 ??? Enhancements include peeling multiple iterations
2106 or using masked loads with a static mask. */
2107 || (group_size * cvf) % cnunits + group_size - gap < cnunits)
2109 if (dump_enabled_p ())
2110 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2111 "peeling for gaps insufficient for "
2112 "access\n");
2113 return false;
2115 overrun_p = true;
2119 else
2121 /* We can always handle this case using elementwise accesses,
2122 but see if something more efficient is available. */
2123 *memory_access_type = VMAT_ELEMENTWISE;
2125 /* If there is a gap at the end of the group then these optimizations
2126 would access excess elements in the last iteration. */
2127 bool would_overrun_p = (gap != 0);
2128 /* An overrun is fine if the trailing elements are smaller than the
2129 alignment boundary B. Every vector access will be a multiple of B
2130 and so we are guaranteed to access a non-gap element in the
2131 same B-sized block. */
2132 if (would_overrun_p
2133 && !masked_p
2134 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2135 / vect_get_scalar_dr_size (first_dr_info)))
2136 would_overrun_p = false;
2138 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2139 && (can_overrun_p || !would_overrun_p)
2140 && compare_step_with_zero (vinfo, stmt_info) > 0)
2142 /* First cope with the degenerate case of a single-element
2143 vector. */
2144 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2147 else
2149 /* Otherwise try using LOAD/STORE_LANES. */
2150 *lanes_ifn
2151 = vls_type == VLS_LOAD
2152 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2153 : vect_store_lanes_supported (vectype, group_size,
2154 masked_p);
2155 if (*lanes_ifn != IFN_LAST)
2157 *memory_access_type = VMAT_LOAD_STORE_LANES;
2158 overrun_p = would_overrun_p;
2161 /* If that fails, try using permuting loads. */
2162 else if (vls_type == VLS_LOAD
2163 ? vect_grouped_load_supported (vectype,
2164 single_element_p,
2165 group_size)
2166 : vect_grouped_store_supported (vectype, group_size))
2168 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2169 overrun_p = would_overrun_p;
2174 /* As a last resort, trying using a gather load or scatter store.
2176 ??? Although the code can handle all group sizes correctly,
2177 it probably isn't a win to use separate strided accesses based
2178 on nearby locations. Or, even if it's a win over scalar code,
2179 it might not be a win over vectorizing at a lower VF, if that
2180 allows us to use contiguous accesses. */
2181 if (*memory_access_type == VMAT_ELEMENTWISE
2182 && single_element_p
2183 && loop_vinfo
2184 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2185 masked_p, gs_info))
2186 *memory_access_type = VMAT_GATHER_SCATTER;
2189 if (*memory_access_type == VMAT_GATHER_SCATTER
2190 || *memory_access_type == VMAT_ELEMENTWISE)
2192 *alignment_support_scheme = dr_unaligned_supported;
2193 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2195 else
2197 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2198 *alignment_support_scheme
2199 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2200 *misalignment);
2203 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2205 /* STMT is the leader of the group. Check the operands of all the
2206 stmts of the group. */
2207 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2208 while (next_stmt_info)
2210 tree op = vect_get_store_rhs (next_stmt_info);
2211 enum vect_def_type dt;
2212 if (!vect_is_simple_use (op, vinfo, &dt))
2214 if (dump_enabled_p ())
2215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2216 "use not simple.\n");
2217 return false;
2219 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2223 if (overrun_p)
2225 gcc_assert (can_overrun_p);
2226 if (dump_enabled_p ())
2227 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2228 "Data access with gaps requires scalar "
2229 "epilogue loop\n");
2230 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2233 return true;
2236 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2237 if there is a memory access type that the vectorized form can use,
2238 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2239 or scatters, fill in GS_INFO accordingly. In addition
2240 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2241 the target does not support the alignment scheme. *MISALIGNMENT
2242 is set according to the alignment of the access (including
2243 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2245 SLP says whether we're performing SLP rather than loop vectorization.
2246 MASKED_P is true if the statement is conditional on a vectorized mask.
2247 VECTYPE is the vector type that the vectorized statements will use.
2248 NCOPIES is the number of vector statements that will be needed. */
2250 static bool
2251 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2252 tree vectype, slp_tree slp_node,
2253 bool masked_p, vec_load_store_type vls_type,
2254 unsigned int ncopies,
2255 vect_memory_access_type *memory_access_type,
2256 poly_int64 *poffset,
2257 dr_alignment_support *alignment_support_scheme,
2258 int *misalignment,
2259 gather_scatter_info *gs_info,
2260 internal_fn *lanes_ifn)
2262 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2263 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2264 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2265 *poffset = 0;
2266 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2268 *memory_access_type = VMAT_GATHER_SCATTER;
2269 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2270 gcc_unreachable ();
2271 /* When using internal functions, we rely on pattern recognition
2272 to convert the type of the offset to the type that the target
2273 requires, with the result being a call to an internal function.
2274 If that failed for some reason (e.g. because another pattern
2275 took priority), just handle cases in which the offset already
2276 has the right type. */
2277 else if (gs_info->ifn != IFN_LAST
2278 && !is_gimple_call (stmt_info->stmt)
2279 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2280 TREE_TYPE (gs_info->offset_vectype)))
2282 if (dump_enabled_p ())
2283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2284 "%s offset requires a conversion\n",
2285 vls_type == VLS_LOAD ? "gather" : "scatter");
2286 return false;
2288 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2289 &gs_info->offset_dt,
2290 &gs_info->offset_vectype))
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2294 "%s index use not simple.\n",
2295 vls_type == VLS_LOAD ? "gather" : "scatter");
2296 return false;
2298 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2300 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2301 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2302 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2303 (gs_info->offset_vectype),
2304 TYPE_VECTOR_SUBPARTS (vectype)))
2306 if (dump_enabled_p ())
2307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2308 "unsupported vector types for emulated "
2309 "gather.\n");
2310 return false;
2313 /* Gather-scatter accesses perform only component accesses, alignment
2314 is irrelevant for them. */
2315 *alignment_support_scheme = dr_unaligned_supported;
2317 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
2319 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2320 masked_p,
2321 vls_type, memory_access_type, poffset,
2322 alignment_support_scheme,
2323 misalignment, gs_info, lanes_ifn))
2324 return false;
2326 else if (STMT_VINFO_STRIDED_P (stmt_info))
2328 gcc_assert (!slp_node);
2329 if (loop_vinfo
2330 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2331 masked_p, gs_info))
2332 *memory_access_type = VMAT_GATHER_SCATTER;
2333 else
2334 *memory_access_type = VMAT_ELEMENTWISE;
2335 /* Alignment is irrelevant here. */
2336 *alignment_support_scheme = dr_unaligned_supported;
2338 else
2340 int cmp = compare_step_with_zero (vinfo, stmt_info);
2341 if (cmp == 0)
2343 gcc_assert (vls_type == VLS_LOAD);
2344 *memory_access_type = VMAT_INVARIANT;
2345 /* Invariant accesses perform only component accesses, alignment
2346 is irrelevant for them. */
2347 *alignment_support_scheme = dr_unaligned_supported;
2349 else
2351 if (cmp < 0)
2352 *memory_access_type = get_negative_load_store_type
2353 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2354 else
2355 *memory_access_type = VMAT_CONTIGUOUS;
2356 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2357 vectype, *poffset);
2358 *alignment_support_scheme
2359 = vect_supportable_dr_alignment (vinfo,
2360 STMT_VINFO_DR_INFO (stmt_info),
2361 vectype, *misalignment);
2365 if ((*memory_access_type == VMAT_ELEMENTWISE
2366 || *memory_access_type == VMAT_STRIDED_SLP)
2367 && !nunits.is_constant ())
2369 if (dump_enabled_p ())
2370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2371 "Not using elementwise accesses due to variable "
2372 "vectorization factor.\n");
2373 return false;
2376 if (*alignment_support_scheme == dr_unaligned_unsupported)
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2380 "unsupported unaligned access\n");
2381 return false;
2384 /* FIXME: At the moment the cost model seems to underestimate the
2385 cost of using elementwise accesses. This check preserves the
2386 traditional behavior until that can be fixed. */
2387 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2388 if (!first_stmt_info)
2389 first_stmt_info = stmt_info;
2390 if (*memory_access_type == VMAT_ELEMENTWISE
2391 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2392 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2393 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2394 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2396 if (dump_enabled_p ())
2397 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2398 "not falling back to elementwise accesses\n");
2399 return false;
2401 return true;
2404 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2405 conditional operation STMT_INFO. When returning true, store the mask
2406 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2407 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2408 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2410 static bool
2411 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2412 slp_tree slp_node, unsigned mask_index,
2413 tree *mask, slp_tree *mask_node,
2414 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2416 enum vect_def_type mask_dt;
2417 tree mask_vectype;
2418 slp_tree mask_node_1;
2419 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2420 mask, &mask_node_1, &mask_dt, &mask_vectype))
2422 if (dump_enabled_p ())
2423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2424 "mask use not simple.\n");
2425 return false;
2428 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2430 if (dump_enabled_p ())
2431 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2432 "mask argument is not a boolean.\n");
2433 return false;
2436 /* If the caller is not prepared for adjusting an external/constant
2437 SLP mask vector type fail. */
2438 if (slp_node
2439 && !mask_node
2440 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2442 if (dump_enabled_p ())
2443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2444 "SLP mask argument is not vectorized.\n");
2445 return false;
2448 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2449 if (!mask_vectype)
2450 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2452 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2454 if (dump_enabled_p ())
2455 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2456 "could not find an appropriate vector mask type.\n");
2457 return false;
2460 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2461 TYPE_VECTOR_SUBPARTS (vectype)))
2463 if (dump_enabled_p ())
2464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2465 "vector mask type %T"
2466 " does not match vector data type %T.\n",
2467 mask_vectype, vectype);
2469 return false;
2472 *mask_dt_out = mask_dt;
2473 *mask_vectype_out = mask_vectype;
2474 if (mask_node)
2475 *mask_node = mask_node_1;
2476 return true;
2479 /* Return true if stored value RHS is suitable for vectorizing store
2480 statement STMT_INFO. When returning true, store the type of the
2481 definition in *RHS_DT_OUT, the type of the vectorized store value in
2482 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2484 static bool
2485 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2486 slp_tree slp_node, tree rhs,
2487 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2488 vec_load_store_type *vls_type_out)
2490 /* In the case this is a store from a constant make sure
2491 native_encode_expr can handle it. */
2492 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2494 if (dump_enabled_p ())
2495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2496 "cannot encode constant as a byte sequence.\n");
2497 return false;
2500 int op_no = 0;
2501 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2503 if (gimple_call_internal_p (call)
2504 && internal_store_fn_p (gimple_call_internal_fn (call)))
2505 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2506 if (slp_node)
2507 op_no = vect_slp_child_index_for_operand (call, op_no);
2510 enum vect_def_type rhs_dt;
2511 tree rhs_vectype;
2512 slp_tree slp_op;
2513 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2514 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2516 if (dump_enabled_p ())
2517 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2518 "use not simple.\n");
2519 return false;
2522 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2523 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2525 if (dump_enabled_p ())
2526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2527 "incompatible vector types.\n");
2528 return false;
2531 *rhs_dt_out = rhs_dt;
2532 *rhs_vectype_out = rhs_vectype;
2533 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2534 *vls_type_out = VLS_STORE_INVARIANT;
2535 else
2536 *vls_type_out = VLS_STORE;
2537 return true;
2540 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2541 Note that we support masks with floating-point type, in which case the
2542 floats are interpreted as a bitmask. */
2544 static tree
2545 vect_build_all_ones_mask (vec_info *vinfo,
2546 stmt_vec_info stmt_info, tree masktype)
2548 if (TREE_CODE (masktype) == INTEGER_TYPE)
2549 return build_int_cst (masktype, -1);
2550 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2552 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2553 mask = build_vector_from_val (masktype, mask);
2554 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2556 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2558 REAL_VALUE_TYPE r;
2559 long tmp[6];
2560 for (int j = 0; j < 6; ++j)
2561 tmp[j] = -1;
2562 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2563 tree mask = build_real (TREE_TYPE (masktype), r);
2564 mask = build_vector_from_val (masktype, mask);
2565 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2567 gcc_unreachable ();
2570 /* Build an all-zero merge value of type VECTYPE while vectorizing
2571 STMT_INFO as a gather load. */
2573 static tree
2574 vect_build_zero_merge_argument (vec_info *vinfo,
2575 stmt_vec_info stmt_info, tree vectype)
2577 tree merge;
2578 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2579 merge = build_int_cst (TREE_TYPE (vectype), 0);
2580 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2582 REAL_VALUE_TYPE r;
2583 long tmp[6];
2584 for (int j = 0; j < 6; ++j)
2585 tmp[j] = 0;
2586 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2587 merge = build_real (TREE_TYPE (vectype), r);
2589 else
2590 gcc_unreachable ();
2591 merge = build_vector_from_val (vectype, merge);
2592 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2595 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2596 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2597 the gather load operation. If the load is conditional, MASK is the
2598 unvectorized condition and MASK_DT is its definition type, otherwise
2599 MASK is null. */
2601 static void
2602 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2603 gimple_stmt_iterator *gsi,
2604 gimple **vec_stmt,
2605 gather_scatter_info *gs_info,
2606 tree mask,
2607 stmt_vector_for_cost *cost_vec)
2609 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2611 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2612 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2613 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2614 edge pe = loop_preheader_edge (loop);
2615 enum { NARROW, NONE, WIDEN } modifier;
2616 poly_uint64 gather_off_nunits
2617 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2619 /* FIXME: Keep the previous costing way in vect_model_load_cost by costing
2620 N scalar loads, but it should be tweaked to use target specific costs
2621 on related gather load calls. */
2622 if (cost_vec)
2624 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
2625 unsigned int inside_cost;
2626 inside_cost = record_stmt_cost (cost_vec, ncopies * assumed_nunits,
2627 scalar_load, stmt_info, 0, vect_body);
2628 if (dump_enabled_p ())
2629 dump_printf_loc (MSG_NOTE, vect_location,
2630 "vect_model_load_cost: inside_cost = %d, "
2631 "prologue_cost = 0 .\n",
2632 inside_cost);
2633 return;
2636 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2637 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2638 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2639 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2640 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2641 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2642 tree scaletype = TREE_VALUE (arglist);
2643 tree real_masktype = masktype;
2644 gcc_checking_assert (types_compatible_p (srctype, rettype)
2645 && (!mask
2646 || TREE_CODE (masktype) == INTEGER_TYPE
2647 || types_compatible_p (srctype, masktype)));
2648 if (mask)
2649 masktype = truth_type_for (srctype);
2651 tree mask_halftype = masktype;
2652 tree perm_mask = NULL_TREE;
2653 tree mask_perm_mask = NULL_TREE;
2654 if (known_eq (nunits, gather_off_nunits))
2655 modifier = NONE;
2656 else if (known_eq (nunits * 2, gather_off_nunits))
2658 modifier = WIDEN;
2660 /* Currently widening gathers and scatters are only supported for
2661 fixed-length vectors. */
2662 int count = gather_off_nunits.to_constant ();
2663 vec_perm_builder sel (count, count, 1);
2664 for (int i = 0; i < count; ++i)
2665 sel.quick_push (i | (count / 2));
2667 vec_perm_indices indices (sel, 1, count);
2668 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2669 indices);
2671 else if (known_eq (nunits, gather_off_nunits * 2))
2673 modifier = NARROW;
2675 /* Currently narrowing gathers and scatters are only supported for
2676 fixed-length vectors. */
2677 int count = nunits.to_constant ();
2678 vec_perm_builder sel (count, count, 1);
2679 sel.quick_grow (count);
2680 for (int i = 0; i < count; ++i)
2681 sel[i] = i < count / 2 ? i : i + count / 2;
2682 vec_perm_indices indices (sel, 2, count);
2683 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2685 ncopies *= 2;
2687 if (mask && VECTOR_TYPE_P (real_masktype))
2689 for (int i = 0; i < count; ++i)
2690 sel[i] = i | (count / 2);
2691 indices.new_vector (sel, 2, count);
2692 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2694 else if (mask)
2695 mask_halftype = truth_type_for (gs_info->offset_vectype);
2697 else
2698 gcc_unreachable ();
2700 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2701 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2703 tree ptr = fold_convert (ptrtype, gs_info->base);
2704 if (!is_gimple_min_invariant (ptr))
2706 gimple_seq seq;
2707 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2708 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2709 gcc_assert (!new_bb);
2712 tree scale = build_int_cst (scaletype, gs_info->scale);
2714 tree vec_oprnd0 = NULL_TREE;
2715 tree vec_mask = NULL_TREE;
2716 tree src_op = NULL_TREE;
2717 tree mask_op = NULL_TREE;
2718 tree prev_res = NULL_TREE;
2720 if (!mask)
2722 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2723 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2726 auto_vec<tree> vec_oprnds0;
2727 auto_vec<tree> vec_masks;
2728 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2729 modifier == WIDEN ? ncopies / 2 : ncopies,
2730 gs_info->offset, &vec_oprnds0);
2731 if (mask)
2732 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2733 modifier == NARROW ? ncopies / 2 : ncopies,
2734 mask, &vec_masks, masktype);
2735 for (int j = 0; j < ncopies; ++j)
2737 tree op, var;
2738 if (modifier == WIDEN && (j & 1))
2739 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2740 perm_mask, stmt_info, gsi);
2741 else
2742 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2744 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2746 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2747 TYPE_VECTOR_SUBPARTS (idxtype)));
2748 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2749 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2750 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2751 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2752 op = var;
2755 if (mask)
2757 if (mask_perm_mask && (j & 1))
2758 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2759 mask_perm_mask, stmt_info, gsi);
2760 else
2762 if (modifier == NARROW)
2764 if ((j & 1) == 0)
2765 vec_mask = vec_masks[j / 2];
2767 else
2768 vec_mask = vec_masks[j];
2770 mask_op = vec_mask;
2771 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2773 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2774 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2775 gcc_assert (known_eq (sub1, sub2));
2776 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2777 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2778 gassign *new_stmt
2779 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2780 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2781 mask_op = var;
2784 if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2786 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2787 gassign *new_stmt
2788 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2789 : VEC_UNPACK_LO_EXPR,
2790 mask_op);
2791 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2792 mask_op = var;
2794 src_op = mask_op;
2797 tree mask_arg = mask_op;
2798 if (masktype != real_masktype)
2800 tree utype, optype = TREE_TYPE (mask_op);
2801 if (VECTOR_TYPE_P (real_masktype)
2802 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2803 utype = real_masktype;
2804 else
2805 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2806 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2807 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2808 gassign *new_stmt
2809 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2810 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2811 mask_arg = var;
2812 if (!useless_type_conversion_p (real_masktype, utype))
2814 gcc_assert (TYPE_PRECISION (utype)
2815 <= TYPE_PRECISION (real_masktype));
2816 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2817 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2818 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2819 mask_arg = var;
2821 src_op = build_zero_cst (srctype);
2823 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2824 mask_arg, scale);
2826 if (!useless_type_conversion_p (vectype, rettype))
2828 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2829 TYPE_VECTOR_SUBPARTS (rettype)));
2830 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2831 gimple_call_set_lhs (new_stmt, op);
2832 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2833 var = make_ssa_name (vec_dest);
2834 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2835 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2836 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2838 else
2840 var = make_ssa_name (vec_dest, new_stmt);
2841 gimple_call_set_lhs (new_stmt, var);
2842 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2845 if (modifier == NARROW)
2847 if ((j & 1) == 0)
2849 prev_res = var;
2850 continue;
2852 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2853 stmt_info, gsi);
2854 new_stmt = SSA_NAME_DEF_STMT (var);
2857 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2859 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2862 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2863 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2864 the scatter store operation. If the store is conditional, MASK is the
2865 unvectorized condition, otherwise MASK is null. */
2867 static void
2868 vect_build_scatter_store_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2869 gimple_stmt_iterator *gsi, gimple **vec_stmt,
2870 gather_scatter_info *gs_info, tree mask,
2871 stmt_vector_for_cost *cost_vec)
2873 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2874 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2875 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2876 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2877 enum { NARROW, NONE, WIDEN } modifier;
2878 poly_uint64 scatter_off_nunits
2879 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2881 /* FIXME: Keep the previous costing way in vect_model_store_cost by
2882 costing N scalar stores, but it should be tweaked to use target
2883 specific costs on related scatter store calls. */
2884 if (cost_vec)
2886 tree op = vect_get_store_rhs (stmt_info);
2887 enum vect_def_type dt;
2888 gcc_assert (vect_is_simple_use (op, vinfo, &dt));
2889 unsigned int inside_cost, prologue_cost = 0;
2890 if (dt == vect_constant_def || dt == vect_external_def)
2891 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
2892 stmt_info, 0, vect_prologue);
2893 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
2894 inside_cost = record_stmt_cost (cost_vec, ncopies * assumed_nunits,
2895 scalar_store, stmt_info, 0, vect_body);
2897 if (dump_enabled_p ())
2898 dump_printf_loc (MSG_NOTE, vect_location,
2899 "vect_model_store_cost: inside_cost = %d, "
2900 "prologue_cost = %d .\n",
2901 inside_cost, prologue_cost);
2902 return;
2905 tree perm_mask = NULL_TREE, mask_halfvectype = NULL_TREE;
2906 if (known_eq (nunits, scatter_off_nunits))
2907 modifier = NONE;
2908 else if (known_eq (nunits * 2, scatter_off_nunits))
2910 modifier = WIDEN;
2912 /* Currently gathers and scatters are only supported for
2913 fixed-length vectors. */
2914 unsigned int count = scatter_off_nunits.to_constant ();
2915 vec_perm_builder sel (count, count, 1);
2916 for (unsigned i = 0; i < (unsigned int) count; ++i)
2917 sel.quick_push (i | (count / 2));
2919 vec_perm_indices indices (sel, 1, count);
2920 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype, indices);
2921 gcc_assert (perm_mask != NULL_TREE);
2923 else if (known_eq (nunits, scatter_off_nunits * 2))
2925 modifier = NARROW;
2927 /* Currently gathers and scatters are only supported for
2928 fixed-length vectors. */
2929 unsigned int count = nunits.to_constant ();
2930 vec_perm_builder sel (count, count, 1);
2931 for (unsigned i = 0; i < (unsigned int) count; ++i)
2932 sel.quick_push (i | (count / 2));
2934 vec_perm_indices indices (sel, 2, count);
2935 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2936 gcc_assert (perm_mask != NULL_TREE);
2937 ncopies *= 2;
2939 if (mask)
2940 mask_halfvectype = truth_type_for (gs_info->offset_vectype);
2942 else
2943 gcc_unreachable ();
2945 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2946 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2947 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2948 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2949 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2950 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2951 tree scaletype = TREE_VALUE (arglist);
2953 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
2954 && TREE_CODE (rettype) == VOID_TYPE);
2956 tree ptr = fold_convert (ptrtype, gs_info->base);
2957 if (!is_gimple_min_invariant (ptr))
2959 gimple_seq seq;
2960 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2961 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2962 edge pe = loop_preheader_edge (loop);
2963 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2964 gcc_assert (!new_bb);
2967 tree mask_arg = NULL_TREE;
2968 if (mask == NULL_TREE)
2970 mask_arg = build_int_cst (masktype, -1);
2971 mask_arg = vect_init_vector (vinfo, stmt_info, mask_arg, masktype, NULL);
2974 tree scale = build_int_cst (scaletype, gs_info->scale);
2976 auto_vec<tree> vec_oprnds0;
2977 auto_vec<tree> vec_oprnds1;
2978 auto_vec<tree> vec_masks;
2979 if (mask)
2981 tree mask_vectype = truth_type_for (vectype);
2982 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2983 modifier == NARROW ? ncopies / 2 : ncopies,
2984 mask, &vec_masks, mask_vectype);
2986 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2987 modifier == WIDEN ? ncopies / 2 : ncopies,
2988 gs_info->offset, &vec_oprnds0);
2989 tree op = vect_get_store_rhs (stmt_info);
2990 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2991 modifier == NARROW ? ncopies / 2 : ncopies, op,
2992 &vec_oprnds1);
2994 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2995 tree mask_op = NULL_TREE;
2996 tree src, vec_mask;
2997 for (int j = 0; j < ncopies; ++j)
2999 if (modifier == WIDEN)
3001 if (j & 1)
3002 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0, perm_mask,
3003 stmt_info, gsi);
3004 else
3005 op = vec_oprnd0 = vec_oprnds0[j / 2];
3006 src = vec_oprnd1 = vec_oprnds1[j];
3007 if (mask)
3008 mask_op = vec_mask = vec_masks[j];
3010 else if (modifier == NARROW)
3012 if (j & 1)
3013 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
3014 perm_mask, stmt_info, gsi);
3015 else
3016 src = vec_oprnd1 = vec_oprnds1[j / 2];
3017 op = vec_oprnd0 = vec_oprnds0[j];
3018 if (mask)
3019 mask_op = vec_mask = vec_masks[j / 2];
3021 else
3023 op = vec_oprnd0 = vec_oprnds0[j];
3024 src = vec_oprnd1 = vec_oprnds1[j];
3025 if (mask)
3026 mask_op = vec_mask = vec_masks[j];
3029 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
3031 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
3032 TYPE_VECTOR_SUBPARTS (srctype)));
3033 tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
3034 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
3035 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
3036 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3037 src = var;
3040 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
3042 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
3043 TYPE_VECTOR_SUBPARTS (idxtype)));
3044 tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3045 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
3046 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3047 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3048 op = var;
3051 if (mask)
3053 tree utype;
3054 mask_arg = mask_op;
3055 if (modifier == NARROW)
3057 tree var
3058 = vect_get_new_ssa_name (mask_halfvectype, vect_simple_var);
3059 gassign *new_stmt
3060 = gimple_build_assign (var,
3061 (j & 1) ? VEC_UNPACK_HI_EXPR
3062 : VEC_UNPACK_LO_EXPR,
3063 mask_op);
3064 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3065 mask_arg = var;
3067 tree optype = TREE_TYPE (mask_arg);
3068 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
3069 utype = masktype;
3070 else
3071 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
3072 tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
3073 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
3074 gassign *new_stmt
3075 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
3076 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3077 mask_arg = var;
3078 if (!useless_type_conversion_p (masktype, utype))
3080 gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
3081 tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
3082 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
3083 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3084 mask_arg = var;
3088 gcall *new_stmt
3089 = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
3090 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3092 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3094 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3097 /* Prepare the base and offset in GS_INFO for vectorization.
3098 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3099 to the vectorized offset argument for the first copy of STMT_INFO.
3100 STMT_INFO is the statement described by GS_INFO and LOOP is the
3101 containing loop. */
3103 static void
3104 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
3105 class loop *loop, stmt_vec_info stmt_info,
3106 slp_tree slp_node, gather_scatter_info *gs_info,
3107 tree *dataref_ptr, vec<tree> *vec_offset)
3109 gimple_seq stmts = NULL;
3110 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3111 if (stmts != NULL)
3113 basic_block new_bb;
3114 edge pe = loop_preheader_edge (loop);
3115 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3116 gcc_assert (!new_bb);
3118 if (slp_node)
3119 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3120 else
3122 unsigned ncopies
3123 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
3124 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
3125 gs_info->offset, vec_offset,
3126 gs_info->offset_vectype);
3130 /* Prepare to implement a grouped or strided load or store using
3131 the gather load or scatter store operation described by GS_INFO.
3132 STMT_INFO is the load or store statement.
3134 Set *DATAREF_BUMP to the amount that should be added to the base
3135 address after each copy of the vectorized statement. Set *VEC_OFFSET
3136 to an invariant offset vector in which element I has the value
3137 I * DR_STEP / SCALE. */
3139 static void
3140 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3141 loop_vec_info loop_vinfo,
3142 gimple_stmt_iterator *gsi,
3143 gather_scatter_info *gs_info,
3144 tree *dataref_bump, tree *vec_offset,
3145 vec_loop_lens *loop_lens)
3147 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3148 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3150 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3152 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
3153 ivtmp_8 = _31 * 16 (step in bytes);
3154 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
3155 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
3156 tree loop_len
3157 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
3158 tree tmp
3159 = fold_build2 (MULT_EXPR, sizetype,
3160 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3161 loop_len);
3162 *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
3163 GSI_SAME_STMT);
3165 else
3167 tree bump
3168 = size_binop (MULT_EXPR,
3169 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3170 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3171 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3174 /* The offset given in GS_INFO can have pointer type, so use the element
3175 type of the vector instead. */
3176 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3178 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3179 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3180 ssize_int (gs_info->scale));
3181 step = fold_convert (offset_type, step);
3183 /* Create {0, X, X*2, X*3, ...}. */
3184 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3185 build_zero_cst (offset_type), step);
3186 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3189 /* Prepare the pointer IVs which needs to be updated by a variable amount.
3190 Such variable amount is the outcome of .SELECT_VL. In this case, we can
3191 allow each iteration process the flexible number of elements as long as
3192 the number <= vf elments.
3194 Return data reference according to SELECT_VL.
3195 If new statements are needed, insert them before GSI. */
3197 static tree
3198 vect_get_loop_variant_data_ptr_increment (
3199 vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
3200 vec_loop_lens *loop_lens, dr_vec_info *dr_info,
3201 vect_memory_access_type memory_access_type)
3203 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3204 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3206 /* gather/scatter never reach here. */
3207 gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
3209 /* When we support SELECT_VL pattern, we dynamic adjust
3210 the memory address by .SELECT_VL result.
3212 The result of .SELECT_VL is the number of elements to
3213 be processed of each iteration. So the memory address
3214 adjustment operation should be:
3216 addr = addr + .SELECT_VL (ARG..) * step;
3218 tree loop_len
3219 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
3220 tree len_type = TREE_TYPE (loop_len);
3221 /* Since the outcome of .SELECT_VL is element size, we should adjust
3222 it into bytesize so that it can be used in address pointer variable
3223 amount IVs adjustment. */
3224 tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
3225 wide_int_to_tree (len_type, wi::to_widest (step)));
3226 tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
3227 gassign *assign = gimple_build_assign (bump, tmp);
3228 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
3229 return bump;
3232 /* Return the amount that should be added to a vector pointer to move
3233 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3234 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3235 vectorization. */
3237 static tree
3238 vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
3239 dr_vec_info *dr_info, tree aggr_type,
3240 vect_memory_access_type memory_access_type,
3241 vec_loop_lens *loop_lens = nullptr)
3243 if (memory_access_type == VMAT_INVARIANT)
3244 return size_zero_node;
3246 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3247 if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3248 return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
3249 loop_lens, dr_info,
3250 memory_access_type);
3252 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3253 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3254 if (tree_int_cst_sgn (step) == -1)
3255 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3256 return iv_step;
3259 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3261 static bool
3262 vectorizable_bswap (vec_info *vinfo,
3263 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3264 gimple **vec_stmt, slp_tree slp_node,
3265 slp_tree *slp_op,
3266 tree vectype_in, stmt_vector_for_cost *cost_vec)
3268 tree op, vectype;
3269 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3270 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3271 unsigned ncopies;
3273 op = gimple_call_arg (stmt, 0);
3274 vectype = STMT_VINFO_VECTYPE (stmt_info);
3275 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3277 /* Multiple types in SLP are handled by creating the appropriate number of
3278 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3279 case of SLP. */
3280 if (slp_node)
3281 ncopies = 1;
3282 else
3283 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3285 gcc_assert (ncopies >= 1);
3287 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3288 if (! char_vectype)
3289 return false;
3291 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3292 unsigned word_bytes;
3293 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3294 return false;
3296 /* The encoding uses one stepped pattern for each byte in the word. */
3297 vec_perm_builder elts (num_bytes, word_bytes, 3);
3298 for (unsigned i = 0; i < 3; ++i)
3299 for (unsigned j = 0; j < word_bytes; ++j)
3300 elts.quick_push ((i + 1) * word_bytes - j - 1);
3302 vec_perm_indices indices (elts, 1, num_bytes);
3303 machine_mode vmode = TYPE_MODE (char_vectype);
3304 if (!can_vec_perm_const_p (vmode, vmode, indices))
3305 return false;
3307 if (! vec_stmt)
3309 if (slp_node
3310 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3312 if (dump_enabled_p ())
3313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3314 "incompatible vector types for invariants\n");
3315 return false;
3318 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3319 DUMP_VECT_SCOPE ("vectorizable_bswap");
3320 record_stmt_cost (cost_vec,
3321 1, vector_stmt, stmt_info, 0, vect_prologue);
3322 record_stmt_cost (cost_vec,
3323 slp_node
3324 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3325 vec_perm, stmt_info, 0, vect_body);
3326 return true;
3329 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3331 /* Transform. */
3332 vec<tree> vec_oprnds = vNULL;
3333 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3334 op, &vec_oprnds);
3335 /* Arguments are ready. create the new vector stmt. */
3336 unsigned i;
3337 tree vop;
3338 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3340 gimple *new_stmt;
3341 tree tem = make_ssa_name (char_vectype);
3342 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3343 char_vectype, vop));
3344 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3345 tree tem2 = make_ssa_name (char_vectype);
3346 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3347 tem, tem, bswap_vconst);
3348 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3349 tem = make_ssa_name (vectype);
3350 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3351 vectype, tem2));
3352 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3353 if (slp_node)
3354 slp_node->push_vec_def (new_stmt);
3355 else
3356 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3359 if (!slp_node)
3360 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3362 vec_oprnds.release ();
3363 return true;
3366 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3367 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3368 in a single step. On success, store the binary pack code in
3369 *CONVERT_CODE. */
3371 static bool
3372 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3373 code_helper *convert_code)
3375 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3376 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3377 return false;
3379 code_helper code;
3380 int multi_step_cvt = 0;
3381 auto_vec <tree, 8> interm_types;
3382 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3383 &code, &multi_step_cvt, &interm_types)
3384 || multi_step_cvt)
3385 return false;
3387 *convert_code = code;
3388 return true;
3391 /* Function vectorizable_call.
3393 Check if STMT_INFO performs a function call that can be vectorized.
3394 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3395 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3396 Return true if STMT_INFO is vectorizable in this way. */
3398 static bool
3399 vectorizable_call (vec_info *vinfo,
3400 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3401 gimple **vec_stmt, slp_tree slp_node,
3402 stmt_vector_for_cost *cost_vec)
3404 gcall *stmt;
3405 tree vec_dest;
3406 tree scalar_dest;
3407 tree op;
3408 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3409 tree vectype_out, vectype_in;
3410 poly_uint64 nunits_in;
3411 poly_uint64 nunits_out;
3412 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3413 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3414 tree fndecl, new_temp, rhs_type;
3415 enum vect_def_type dt[4]
3416 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3417 vect_unknown_def_type };
3418 tree vectypes[ARRAY_SIZE (dt)] = {};
3419 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3420 int ndts = ARRAY_SIZE (dt);
3421 int ncopies, j;
3422 auto_vec<tree, 8> vargs;
3423 enum { NARROW, NONE, WIDEN } modifier;
3424 size_t i, nargs;
3425 tree lhs;
3427 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3428 return false;
3430 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3431 && ! vec_stmt)
3432 return false;
3434 /* Is STMT_INFO a vectorizable call? */
3435 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3436 if (!stmt)
3437 return false;
3439 if (gimple_call_internal_p (stmt)
3440 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3441 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3442 /* Handled by vectorizable_load and vectorizable_store. */
3443 return false;
3445 if (gimple_call_lhs (stmt) == NULL_TREE
3446 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3447 return false;
3449 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3451 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3453 /* Process function arguments. */
3454 rhs_type = NULL_TREE;
3455 vectype_in = NULL_TREE;
3456 nargs = gimple_call_num_args (stmt);
3458 /* Bail out if the function has more than four arguments, we do not have
3459 interesting builtin functions to vectorize with more than two arguments
3460 except for fma. No arguments is also not good. */
3461 if (nargs == 0 || nargs > 4)
3462 return false;
3464 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3465 combined_fn cfn = gimple_call_combined_fn (stmt);
3466 if (cfn == CFN_GOMP_SIMD_LANE)
3468 nargs = 0;
3469 rhs_type = unsigned_type_node;
3472 int mask_opno = -1;
3473 if (internal_fn_p (cfn))
3474 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3476 for (i = 0; i < nargs; i++)
3478 if ((int) i == mask_opno)
3480 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3481 &op, &slp_op[i], &dt[i], &vectypes[i]))
3482 return false;
3483 continue;
3486 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3487 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3489 if (dump_enabled_p ())
3490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3491 "use not simple.\n");
3492 return false;
3495 /* We can only handle calls with arguments of the same type. */
3496 if (rhs_type
3497 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3499 if (dump_enabled_p ())
3500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3501 "argument types differ.\n");
3502 return false;
3504 if (!rhs_type)
3505 rhs_type = TREE_TYPE (op);
3507 if (!vectype_in)
3508 vectype_in = vectypes[i];
3509 else if (vectypes[i]
3510 && !types_compatible_p (vectypes[i], vectype_in))
3512 if (dump_enabled_p ())
3513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3514 "argument vector types differ.\n");
3515 return false;
3518 /* If all arguments are external or constant defs, infer the vector type
3519 from the scalar type. */
3520 if (!vectype_in)
3521 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3522 if (vec_stmt)
3523 gcc_assert (vectype_in);
3524 if (!vectype_in)
3526 if (dump_enabled_p ())
3527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3528 "no vectype for scalar type %T\n", rhs_type);
3530 return false;
3532 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3533 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3534 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3535 by a pack of the two vectors into an SI vector. We would need
3536 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3537 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3539 if (dump_enabled_p ())
3540 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3541 "mismatched vector sizes %T and %T\n",
3542 vectype_in, vectype_out);
3543 return false;
3546 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3547 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3549 if (dump_enabled_p ())
3550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3551 "mixed mask and nonmask vector types\n");
3552 return false;
3555 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3557 if (dump_enabled_p ())
3558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3559 "use emulated vector type for call\n");
3560 return false;
3563 /* FORNOW */
3564 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3565 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3566 if (known_eq (nunits_in * 2, nunits_out))
3567 modifier = NARROW;
3568 else if (known_eq (nunits_out, nunits_in))
3569 modifier = NONE;
3570 else if (known_eq (nunits_out * 2, nunits_in))
3571 modifier = WIDEN;
3572 else
3573 return false;
3575 /* We only handle functions that do not read or clobber memory. */
3576 if (gimple_vuse (stmt))
3578 if (dump_enabled_p ())
3579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3580 "function reads from or writes to memory.\n");
3581 return false;
3584 /* For now, we only vectorize functions if a target specific builtin
3585 is available. TODO -- in some cases, it might be profitable to
3586 insert the calls for pieces of the vector, in order to be able
3587 to vectorize other operations in the loop. */
3588 fndecl = NULL_TREE;
3589 internal_fn ifn = IFN_LAST;
3590 tree callee = gimple_call_fndecl (stmt);
3592 /* First try using an internal function. */
3593 code_helper convert_code = MAX_TREE_CODES;
3594 if (cfn != CFN_LAST
3595 && (modifier == NONE
3596 || (modifier == NARROW
3597 && simple_integer_narrowing (vectype_out, vectype_in,
3598 &convert_code))))
3599 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3600 vectype_in);
3602 /* If that fails, try asking for a target-specific built-in function. */
3603 if (ifn == IFN_LAST)
3605 if (cfn != CFN_LAST)
3606 fndecl = targetm.vectorize.builtin_vectorized_function
3607 (cfn, vectype_out, vectype_in);
3608 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3609 fndecl = targetm.vectorize.builtin_md_vectorized_function
3610 (callee, vectype_out, vectype_in);
3613 if (ifn == IFN_LAST && !fndecl)
3615 if (cfn == CFN_GOMP_SIMD_LANE
3616 && !slp_node
3617 && loop_vinfo
3618 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3619 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3620 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3621 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3623 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3624 { 0, 1, 2, ... vf - 1 } vector. */
3625 gcc_assert (nargs == 0);
3627 else if (modifier == NONE
3628 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3629 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3630 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3631 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3632 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3633 slp_op, vectype_in, cost_vec);
3634 else
3636 if (dump_enabled_p ())
3637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3638 "function is not vectorizable.\n");
3639 return false;
3643 if (slp_node)
3644 ncopies = 1;
3645 else if (modifier == NARROW && ifn == IFN_LAST)
3646 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3647 else
3648 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3650 /* Sanity check: make sure that at least one copy of the vectorized stmt
3651 needs to be generated. */
3652 gcc_assert (ncopies >= 1);
3654 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3655 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3656 internal_fn cond_len_fn = get_len_internal_fn (ifn);
3657 int len_opno = internal_fn_len_index (cond_len_fn);
3658 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3659 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3660 if (!vec_stmt) /* transformation not required. */
3662 if (slp_node)
3663 for (i = 0; i < nargs; ++i)
3664 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3665 vectypes[i]
3666 ? vectypes[i] : vectype_in))
3668 if (dump_enabled_p ())
3669 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3670 "incompatible vector types for invariants\n");
3671 return false;
3673 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3674 DUMP_VECT_SCOPE ("vectorizable_call");
3675 vect_model_simple_cost (vinfo, stmt_info,
3676 ncopies, dt, ndts, slp_node, cost_vec);
3677 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3678 record_stmt_cost (cost_vec, ncopies / 2,
3679 vec_promote_demote, stmt_info, 0, vect_body);
3681 if (loop_vinfo
3682 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3683 && (reduc_idx >= 0 || mask_opno >= 0))
3685 if (reduc_idx >= 0
3686 && (cond_fn == IFN_LAST
3687 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3688 OPTIMIZE_FOR_SPEED))
3689 && (cond_len_fn == IFN_LAST
3690 || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3691 OPTIMIZE_FOR_SPEED)))
3693 if (dump_enabled_p ())
3694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3695 "can't use a fully-masked loop because no"
3696 " conditional operation is available.\n");
3697 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3699 else
3701 unsigned int nvectors
3702 = (slp_node
3703 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3704 : ncopies);
3705 tree scalar_mask = NULL_TREE;
3706 if (mask_opno >= 0)
3707 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3708 if (cond_len_fn != IFN_LAST
3709 && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3710 OPTIMIZE_FOR_SPEED))
3711 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3713 else
3714 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3715 scalar_mask);
3718 return true;
3721 /* Transform. */
3723 if (dump_enabled_p ())
3724 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3726 /* Handle def. */
3727 scalar_dest = gimple_call_lhs (stmt);
3728 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3730 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3731 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3732 unsigned int vect_nargs = nargs;
3733 if (len_loop_p)
3735 if (len_opno >= 0)
3737 ifn = cond_len_fn;
3738 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3739 vect_nargs += 2;
3741 else if (reduc_idx >= 0)
3742 gcc_unreachable ();
3744 else if (masked_loop_p && reduc_idx >= 0)
3746 ifn = cond_fn;
3747 vect_nargs += 2;
3750 if (modifier == NONE || ifn != IFN_LAST)
3752 tree prev_res = NULL_TREE;
3753 vargs.safe_grow (vect_nargs, true);
3754 auto_vec<vec<tree> > vec_defs (nargs);
3755 for (j = 0; j < ncopies; ++j)
3757 /* Build argument list for the vectorized call. */
3758 if (slp_node)
3760 vec<tree> vec_oprnds0;
3762 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3763 vec_oprnds0 = vec_defs[0];
3765 /* Arguments are ready. Create the new vector stmt. */
3766 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3768 int varg = 0;
3769 if (masked_loop_p && reduc_idx >= 0)
3771 unsigned int vec_num = vec_oprnds0.length ();
3772 /* Always true for SLP. */
3773 gcc_assert (ncopies == 1);
3774 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
3775 gsi, masks, vec_num,
3776 vectype_out, i);
3778 size_t k;
3779 for (k = 0; k < nargs; k++)
3781 vec<tree> vec_oprndsk = vec_defs[k];
3782 vargs[varg++] = vec_oprndsk[i];
3784 if (masked_loop_p && reduc_idx >= 0)
3785 vargs[varg++] = vargs[reduc_idx + 1];
3786 gimple *new_stmt;
3787 if (modifier == NARROW)
3789 /* We don't define any narrowing conditional functions
3790 at present. */
3791 gcc_assert (mask_opno < 0);
3792 tree half_res = make_ssa_name (vectype_in);
3793 gcall *call
3794 = gimple_build_call_internal_vec (ifn, vargs);
3795 gimple_call_set_lhs (call, half_res);
3796 gimple_call_set_nothrow (call, true);
3797 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3798 if ((i & 1) == 0)
3800 prev_res = half_res;
3801 continue;
3803 new_temp = make_ssa_name (vec_dest);
3804 new_stmt = vect_gimple_build (new_temp, convert_code,
3805 prev_res, half_res);
3806 vect_finish_stmt_generation (vinfo, stmt_info,
3807 new_stmt, gsi);
3809 else
3811 if (len_opno >= 0 && len_loop_p)
3813 unsigned int vec_num = vec_oprnds0.length ();
3814 /* Always true for SLP. */
3815 gcc_assert (ncopies == 1);
3816 tree len
3817 = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3818 vectype_out, i, 1);
3819 signed char biasval
3820 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3821 tree bias = build_int_cst (intQI_type_node, biasval);
3822 vargs[len_opno] = len;
3823 vargs[len_opno + 1] = bias;
3825 else if (mask_opno >= 0 && masked_loop_p)
3827 unsigned int vec_num = vec_oprnds0.length ();
3828 /* Always true for SLP. */
3829 gcc_assert (ncopies == 1);
3830 tree mask = vect_get_loop_mask (loop_vinfo,
3831 gsi, masks, vec_num,
3832 vectype_out, i);
3833 vargs[mask_opno] = prepare_vec_mask
3834 (loop_vinfo, TREE_TYPE (mask), mask,
3835 vargs[mask_opno], gsi);
3838 gcall *call;
3839 if (ifn != IFN_LAST)
3840 call = gimple_build_call_internal_vec (ifn, vargs);
3841 else
3842 call = gimple_build_call_vec (fndecl, vargs);
3843 new_temp = make_ssa_name (vec_dest, call);
3844 gimple_call_set_lhs (call, new_temp);
3845 gimple_call_set_nothrow (call, true);
3846 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3847 new_stmt = call;
3849 slp_node->push_vec_def (new_stmt);
3851 continue;
3854 int varg = 0;
3855 if (masked_loop_p && reduc_idx >= 0)
3856 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3857 vectype_out, j);
3858 for (i = 0; i < nargs; i++)
3860 op = gimple_call_arg (stmt, i);
3861 if (j == 0)
3863 vec_defs.quick_push (vNULL);
3864 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3865 op, &vec_defs[i],
3866 vectypes[i]);
3868 vargs[varg++] = vec_defs[i][j];
3870 if (masked_loop_p && reduc_idx >= 0)
3871 vargs[varg++] = vargs[reduc_idx + 1];
3873 if (len_opno >= 0 && len_loop_p)
3875 tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3876 vectype_out, j, 1);
3877 signed char biasval
3878 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3879 tree bias = build_int_cst (intQI_type_node, biasval);
3880 vargs[len_opno] = len;
3881 vargs[len_opno + 1] = bias;
3883 else if (mask_opno >= 0 && masked_loop_p)
3885 tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3886 vectype_out, j);
3887 vargs[mask_opno]
3888 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3889 vargs[mask_opno], gsi);
3892 gimple *new_stmt;
3893 if (cfn == CFN_GOMP_SIMD_LANE)
3895 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3896 tree new_var
3897 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3898 gimple *init_stmt = gimple_build_assign (new_var, cst);
3899 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3900 new_temp = make_ssa_name (vec_dest);
3901 new_stmt = gimple_build_assign (new_temp, new_var);
3902 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3904 else if (modifier == NARROW)
3906 /* We don't define any narrowing conditional functions at
3907 present. */
3908 gcc_assert (mask_opno < 0);
3909 tree half_res = make_ssa_name (vectype_in);
3910 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3911 gimple_call_set_lhs (call, half_res);
3912 gimple_call_set_nothrow (call, true);
3913 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3914 if ((j & 1) == 0)
3916 prev_res = half_res;
3917 continue;
3919 new_temp = make_ssa_name (vec_dest);
3920 new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
3921 half_res);
3922 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3924 else
3926 gcall *call;
3927 if (ifn != IFN_LAST)
3928 call = gimple_build_call_internal_vec (ifn, vargs);
3929 else
3930 call = gimple_build_call_vec (fndecl, vargs);
3931 new_temp = make_ssa_name (vec_dest, call);
3932 gimple_call_set_lhs (call, new_temp);
3933 gimple_call_set_nothrow (call, true);
3934 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3935 new_stmt = call;
3938 if (j == (modifier == NARROW ? 1 : 0))
3939 *vec_stmt = new_stmt;
3940 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3942 for (i = 0; i < nargs; i++)
3944 vec<tree> vec_oprndsi = vec_defs[i];
3945 vec_oprndsi.release ();
3948 else if (modifier == NARROW)
3950 auto_vec<vec<tree> > vec_defs (nargs);
3951 /* We don't define any narrowing conditional functions at present. */
3952 gcc_assert (mask_opno < 0);
3953 for (j = 0; j < ncopies; ++j)
3955 /* Build argument list for the vectorized call. */
3956 if (j == 0)
3957 vargs.create (nargs * 2);
3958 else
3959 vargs.truncate (0);
3961 if (slp_node)
3963 vec<tree> vec_oprnds0;
3965 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3966 vec_oprnds0 = vec_defs[0];
3968 /* Arguments are ready. Create the new vector stmt. */
3969 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3971 size_t k;
3972 vargs.truncate (0);
3973 for (k = 0; k < nargs; k++)
3975 vec<tree> vec_oprndsk = vec_defs[k];
3976 vargs.quick_push (vec_oprndsk[i]);
3977 vargs.quick_push (vec_oprndsk[i + 1]);
3979 gcall *call;
3980 if (ifn != IFN_LAST)
3981 call = gimple_build_call_internal_vec (ifn, vargs);
3982 else
3983 call = gimple_build_call_vec (fndecl, vargs);
3984 new_temp = make_ssa_name (vec_dest, call);
3985 gimple_call_set_lhs (call, new_temp);
3986 gimple_call_set_nothrow (call, true);
3987 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3988 slp_node->push_vec_def (call);
3990 continue;
3993 for (i = 0; i < nargs; i++)
3995 op = gimple_call_arg (stmt, i);
3996 if (j == 0)
3998 vec_defs.quick_push (vNULL);
3999 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
4000 op, &vec_defs[i], vectypes[i]);
4002 vec_oprnd0 = vec_defs[i][2*j];
4003 vec_oprnd1 = vec_defs[i][2*j+1];
4005 vargs.quick_push (vec_oprnd0);
4006 vargs.quick_push (vec_oprnd1);
4009 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
4010 new_temp = make_ssa_name (vec_dest, new_stmt);
4011 gimple_call_set_lhs (new_stmt, new_temp);
4012 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4014 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4017 if (!slp_node)
4018 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
4020 for (i = 0; i < nargs; i++)
4022 vec<tree> vec_oprndsi = vec_defs[i];
4023 vec_oprndsi.release ();
4026 else
4027 /* No current target implements this case. */
4028 return false;
4030 vargs.release ();
4032 /* The call in STMT might prevent it from being removed in dce.
4033 We however cannot remove it here, due to the way the ssa name
4034 it defines is mapped to the new definition. So just replace
4035 rhs of the statement with something harmless. */
4037 if (slp_node)
4038 return true;
4040 stmt_info = vect_orig_stmt (stmt_info);
4041 lhs = gimple_get_lhs (stmt_info->stmt);
4043 gassign *new_stmt
4044 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4045 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
4047 return true;
4051 struct simd_call_arg_info
4053 tree vectype;
4054 tree op;
4055 HOST_WIDE_INT linear_step;
4056 enum vect_def_type dt;
4057 unsigned int align;
4058 bool simd_lane_linear;
4061 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
4062 is linear within simd lane (but not within whole loop), note it in
4063 *ARGINFO. */
4065 static void
4066 vect_simd_lane_linear (tree op, class loop *loop,
4067 struct simd_call_arg_info *arginfo)
4069 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
4071 if (!is_gimple_assign (def_stmt)
4072 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
4073 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
4074 return;
4076 tree base = gimple_assign_rhs1 (def_stmt);
4077 HOST_WIDE_INT linear_step = 0;
4078 tree v = gimple_assign_rhs2 (def_stmt);
4079 while (TREE_CODE (v) == SSA_NAME)
4081 tree t;
4082 def_stmt = SSA_NAME_DEF_STMT (v);
4083 if (is_gimple_assign (def_stmt))
4084 switch (gimple_assign_rhs_code (def_stmt))
4086 case PLUS_EXPR:
4087 t = gimple_assign_rhs2 (def_stmt);
4088 if (linear_step || TREE_CODE (t) != INTEGER_CST)
4089 return;
4090 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
4091 v = gimple_assign_rhs1 (def_stmt);
4092 continue;
4093 case MULT_EXPR:
4094 t = gimple_assign_rhs2 (def_stmt);
4095 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
4096 return;
4097 linear_step = tree_to_shwi (t);
4098 v = gimple_assign_rhs1 (def_stmt);
4099 continue;
4100 CASE_CONVERT:
4101 t = gimple_assign_rhs1 (def_stmt);
4102 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
4103 || (TYPE_PRECISION (TREE_TYPE (v))
4104 < TYPE_PRECISION (TREE_TYPE (t))))
4105 return;
4106 if (!linear_step)
4107 linear_step = 1;
4108 v = t;
4109 continue;
4110 default:
4111 return;
4113 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
4114 && loop->simduid
4115 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
4116 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
4117 == loop->simduid))
4119 if (!linear_step)
4120 linear_step = 1;
4121 arginfo->linear_step = linear_step;
4122 arginfo->op = base;
4123 arginfo->simd_lane_linear = true;
4124 return;
4129 /* Return the number of elements in vector type VECTYPE, which is associated
4130 with a SIMD clone. At present these vectors always have a constant
4131 length. */
4133 static unsigned HOST_WIDE_INT
4134 simd_clone_subparts (tree vectype)
4136 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
4139 /* Function vectorizable_simd_clone_call.
4141 Check if STMT_INFO performs a function call that can be vectorized
4142 by calling a simd clone of the function.
4143 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4144 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4145 Return true if STMT_INFO is vectorizable in this way. */
4147 static bool
4148 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
4149 gimple_stmt_iterator *gsi,
4150 gimple **vec_stmt, slp_tree slp_node,
4151 stmt_vector_for_cost *)
4153 tree vec_dest;
4154 tree scalar_dest;
4155 tree op, type;
4156 tree vec_oprnd0 = NULL_TREE;
4157 tree vectype;
4158 poly_uint64 nunits;
4159 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4160 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4161 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
4162 tree fndecl, new_temp;
4163 int ncopies, j;
4164 auto_vec<simd_call_arg_info> arginfo;
4165 vec<tree> vargs = vNULL;
4166 size_t i, nargs;
4167 tree lhs, rtype, ratype;
4168 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
4169 int arg_offset = 0;
4171 /* Is STMT a vectorizable call? */
4172 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4173 if (!stmt)
4174 return false;
4176 fndecl = gimple_call_fndecl (stmt);
4177 if (fndecl == NULL_TREE
4178 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
4180 fndecl = gimple_call_arg (stmt, 0);
4181 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
4182 fndecl = TREE_OPERAND (fndecl, 0);
4183 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
4184 arg_offset = 1;
4186 if (fndecl == NULL_TREE)
4187 return false;
4189 struct cgraph_node *node = cgraph_node::get (fndecl);
4190 if (node == NULL || node->simd_clones == NULL)
4191 return false;
4193 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4194 return false;
4196 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4197 && ! vec_stmt)
4198 return false;
4200 if (gimple_call_lhs (stmt)
4201 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4202 return false;
4204 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4206 vectype = STMT_VINFO_VECTYPE (stmt_info);
4208 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4209 return false;
4211 /* Process function arguments. */
4212 nargs = gimple_call_num_args (stmt) - arg_offset;
4214 /* Bail out if the function has zero arguments. */
4215 if (nargs == 0)
4216 return false;
4218 arginfo.reserve (nargs, true);
4219 auto_vec<slp_tree> slp_op;
4220 slp_op.safe_grow_cleared (nargs);
4222 for (i = 0; i < nargs; i++)
4224 simd_call_arg_info thisarginfo;
4225 affine_iv iv;
4227 thisarginfo.linear_step = 0;
4228 thisarginfo.align = 0;
4229 thisarginfo.op = NULL_TREE;
4230 thisarginfo.simd_lane_linear = false;
4232 int op_no = i + arg_offset;
4233 if (slp_node)
4234 op_no = vect_slp_child_index_for_operand (stmt, op_no);
4235 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4236 op_no, &op, &slp_op[i],
4237 &thisarginfo.dt, &thisarginfo.vectype)
4238 || thisarginfo.dt == vect_uninitialized_def)
4240 if (dump_enabled_p ())
4241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4242 "use not simple.\n");
4243 return false;
4246 if (thisarginfo.dt == vect_constant_def
4247 || thisarginfo.dt == vect_external_def)
4249 gcc_assert (vec_stmt || thisarginfo.vectype == NULL_TREE);
4250 if (!vec_stmt)
4251 thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
4252 TREE_TYPE (op),
4253 slp_node);
4255 else
4256 gcc_assert (thisarginfo.vectype != NULL_TREE);
4258 /* For linear arguments, the analyze phase should have saved
4259 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4260 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4261 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4263 gcc_assert (vec_stmt);
4264 thisarginfo.linear_step
4265 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4266 thisarginfo.op
4267 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4268 thisarginfo.simd_lane_linear
4269 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4270 == boolean_true_node);
4271 /* If loop has been peeled for alignment, we need to adjust it. */
4272 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4273 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4274 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4276 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4277 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4278 tree opt = TREE_TYPE (thisarginfo.op);
4279 bias = fold_convert (TREE_TYPE (step), bias);
4280 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4281 thisarginfo.op
4282 = fold_build2 (POINTER_TYPE_P (opt)
4283 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4284 thisarginfo.op, bias);
4287 else if (!vec_stmt
4288 && thisarginfo.dt != vect_constant_def
4289 && thisarginfo.dt != vect_external_def
4290 && loop_vinfo
4291 && TREE_CODE (op) == SSA_NAME
4292 && simple_iv (loop, loop_containing_stmt (stmt), op,
4293 &iv, false)
4294 && tree_fits_shwi_p (iv.step))
4296 thisarginfo.linear_step = tree_to_shwi (iv.step);
4297 thisarginfo.op = iv.base;
4299 else if ((thisarginfo.dt == vect_constant_def
4300 || thisarginfo.dt == vect_external_def)
4301 && POINTER_TYPE_P (TREE_TYPE (op)))
4302 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4303 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4304 linear too. */
4305 if (POINTER_TYPE_P (TREE_TYPE (op))
4306 && !thisarginfo.linear_step
4307 && !vec_stmt
4308 && thisarginfo.dt != vect_constant_def
4309 && thisarginfo.dt != vect_external_def
4310 && loop_vinfo
4311 && TREE_CODE (op) == SSA_NAME)
4312 vect_simd_lane_linear (op, loop, &thisarginfo);
4314 arginfo.quick_push (thisarginfo);
4317 if (loop_vinfo
4318 && !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ())
4320 if (dump_enabled_p ())
4321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4322 "not considering SIMD clones; not yet supported"
4323 " for variable-width vectors.\n");
4324 return false;
4327 poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
4328 unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : 1;
4329 unsigned int badness = 0;
4330 struct cgraph_node *bestn = NULL;
4331 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4332 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4333 else
4334 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4335 n = n->simdclone->next_clone)
4337 unsigned int this_badness = 0;
4338 unsigned int num_calls;
4339 if (!constant_multiple_p (vf * group_size,
4340 n->simdclone->simdlen, &num_calls)
4341 || n->simdclone->nargs != nargs)
4342 continue;
4343 if (num_calls != 1)
4344 this_badness += exact_log2 (num_calls) * 4096;
4345 if (n->simdclone->inbranch)
4346 this_badness += 8192;
4347 int target_badness = targetm.simd_clone.usable (n);
4348 if (target_badness < 0)
4349 continue;
4350 this_badness += target_badness * 512;
4351 for (i = 0; i < nargs; i++)
4353 switch (n->simdclone->args[i].arg_type)
4355 case SIMD_CLONE_ARG_TYPE_VECTOR:
4356 if (!useless_type_conversion_p
4357 (n->simdclone->args[i].orig_type,
4358 TREE_TYPE (gimple_call_arg (stmt, i + arg_offset))))
4359 i = -1;
4360 else if (arginfo[i].dt == vect_constant_def
4361 || arginfo[i].dt == vect_external_def
4362 || arginfo[i].linear_step)
4363 this_badness += 64;
4364 break;
4365 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4366 if (arginfo[i].dt != vect_constant_def
4367 && arginfo[i].dt != vect_external_def)
4368 i = -1;
4369 break;
4370 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4371 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4372 if (arginfo[i].dt == vect_constant_def
4373 || arginfo[i].dt == vect_external_def
4374 || (arginfo[i].linear_step
4375 != n->simdclone->args[i].linear_step))
4376 i = -1;
4377 break;
4378 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4379 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4380 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4381 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4382 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4383 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4384 /* FORNOW */
4385 i = -1;
4386 break;
4387 case SIMD_CLONE_ARG_TYPE_MASK:
4388 if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4389 != SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4390 i = -1;
4391 break;
4393 if (i == (size_t) -1)
4394 break;
4395 if (n->simdclone->args[i].alignment > arginfo[i].align)
4397 i = -1;
4398 break;
4400 if (arginfo[i].align)
4401 this_badness += (exact_log2 (arginfo[i].align)
4402 - exact_log2 (n->simdclone->args[i].alignment));
4404 if (i == (size_t) -1)
4405 continue;
4406 if (bestn == NULL || this_badness < badness)
4408 bestn = n;
4409 badness = this_badness;
4413 if (bestn == NULL)
4414 return false;
4416 unsigned int num_mask_args = 0;
4417 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4418 for (i = 0; i < nargs; i++)
4419 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4420 num_mask_args++;
4422 for (i = 0; i < nargs; i++)
4424 if ((arginfo[i].dt == vect_constant_def
4425 || arginfo[i].dt == vect_external_def)
4426 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4428 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i + arg_offset));
4429 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4430 slp_node);
4431 if (arginfo[i].vectype == NULL
4432 || !constant_multiple_p (bestn->simdclone->simdlen,
4433 simd_clone_subparts (arginfo[i].vectype)))
4434 return false;
4437 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4438 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4440 if (dump_enabled_p ())
4441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4442 "vector mask arguments are not supported.\n");
4443 return false;
4446 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4448 if (bestn->simdclone->mask_mode == VOIDmode)
4450 if (simd_clone_subparts (bestn->simdclone->args[i].vector_type)
4451 != simd_clone_subparts (arginfo[i].vectype))
4453 /* FORNOW we only have partial support for vector-type masks
4454 that can't hold all of simdlen. */
4455 if (dump_enabled_p ())
4456 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4457 vect_location,
4458 "in-branch vector clones are not yet"
4459 " supported for mismatched vector sizes.\n");
4460 return false;
4463 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4465 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype))
4466 || maybe_ne (exact_div (bestn->simdclone->simdlen,
4467 num_mask_args),
4468 simd_clone_subparts (arginfo[i].vectype)))
4470 /* FORNOW we only have partial support for integer-type masks
4471 that represent the same number of lanes as the
4472 vectorized mask inputs. */
4473 if (dump_enabled_p ())
4474 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4475 vect_location,
4476 "in-branch vector clones are not yet "
4477 "supported for mismatched vector sizes.\n");
4478 return false;
4481 else
4483 if (dump_enabled_p ())
4484 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4485 vect_location,
4486 "in-branch vector clones not supported"
4487 " on this target.\n");
4488 return false;
4493 fndecl = bestn->decl;
4494 nunits = bestn->simdclone->simdlen;
4495 if (slp_node)
4496 ncopies = vector_unroll_factor (vf * group_size, nunits);
4497 else
4498 ncopies = vector_unroll_factor (vf, nunits);
4500 /* If the function isn't const, only allow it in simd loops where user
4501 has asserted that at least nunits consecutive iterations can be
4502 performed using SIMD instructions. */
4503 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4504 && gimple_vuse (stmt))
4505 return false;
4507 /* Sanity check: make sure that at least one copy of the vectorized stmt
4508 needs to be generated. */
4509 gcc_assert (ncopies >= 1);
4511 if (!vec_stmt) /* transformation not required. */
4513 if (slp_node)
4514 for (unsigned i = 0; i < nargs; ++i)
4515 if (!vect_maybe_update_slp_op_vectype (slp_op[i], arginfo[i].vectype))
4517 if (dump_enabled_p ())
4518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4519 "incompatible vector types for invariants\n");
4520 return false;
4522 /* When the original call is pure or const but the SIMD ABI dictates
4523 an aggregate return we will have to use a virtual definition and
4524 in a loop eventually even need to add a virtual PHI. That's
4525 not straight-forward so allow to fix this up via renaming. */
4526 if (gimple_call_lhs (stmt)
4527 && !gimple_vdef (stmt)
4528 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4529 vinfo->any_known_not_updated_vssa = true;
4530 /* ??? For SLP code-gen we end up inserting after the last
4531 vector argument def rather than at the original call position
4532 so automagic virtual operand updating doesn't work. */
4533 if (gimple_vuse (stmt) && slp_node)
4534 vinfo->any_known_not_updated_vssa = true;
4535 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4536 for (i = 0; i < nargs; i++)
4537 if ((bestn->simdclone->args[i].arg_type
4538 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4539 || (bestn->simdclone->args[i].arg_type
4540 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4542 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4543 + 1,
4544 true);
4545 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4546 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4547 ? size_type_node : TREE_TYPE (arginfo[i].op);
4548 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4549 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4550 tree sll = arginfo[i].simd_lane_linear
4551 ? boolean_true_node : boolean_false_node;
4552 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4554 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4555 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4556 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4557 dt, slp_node, cost_vec); */
4558 return true;
4561 /* Transform. */
4563 if (dump_enabled_p ())
4564 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4566 /* Handle def. */
4567 scalar_dest = gimple_call_lhs (stmt);
4568 vec_dest = NULL_TREE;
4569 rtype = NULL_TREE;
4570 ratype = NULL_TREE;
4571 if (scalar_dest)
4573 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4574 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4575 if (TREE_CODE (rtype) == ARRAY_TYPE)
4577 ratype = rtype;
4578 rtype = TREE_TYPE (ratype);
4582 auto_vec<vec<tree> > vec_oprnds;
4583 auto_vec<unsigned> vec_oprnds_i;
4584 vec_oprnds_i.safe_grow_cleared (nargs, true);
4585 if (slp_node)
4587 vec_oprnds.reserve_exact (nargs);
4588 vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
4590 else
4591 vec_oprnds.safe_grow_cleared (nargs, true);
4592 for (j = 0; j < ncopies; ++j)
4594 /* Build argument list for the vectorized call. */
4595 if (j == 0)
4596 vargs.create (nargs);
4597 else
4598 vargs.truncate (0);
4600 for (i = 0; i < nargs; i++)
4602 unsigned int k, l, m, o;
4603 tree atype;
4604 op = gimple_call_arg (stmt, i + arg_offset);
4605 switch (bestn->simdclone->args[i].arg_type)
4607 case SIMD_CLONE_ARG_TYPE_VECTOR:
4608 atype = bestn->simdclone->args[i].vector_type;
4609 o = vector_unroll_factor (nunits,
4610 simd_clone_subparts (atype));
4611 for (m = j * o; m < (j + 1) * o; m++)
4613 if (simd_clone_subparts (atype)
4614 < simd_clone_subparts (arginfo[i].vectype))
4616 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4617 k = (simd_clone_subparts (arginfo[i].vectype)
4618 / simd_clone_subparts (atype));
4619 gcc_assert ((k & (k - 1)) == 0);
4620 if (m == 0)
4622 if (!slp_node)
4623 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4624 ncopies * o / k, op,
4625 &vec_oprnds[i]);
4626 vec_oprnds_i[i] = 0;
4627 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4629 else
4631 vec_oprnd0 = arginfo[i].op;
4632 if ((m & (k - 1)) == 0)
4633 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4635 arginfo[i].op = vec_oprnd0;
4636 vec_oprnd0
4637 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4638 bitsize_int (prec),
4639 bitsize_int ((m & (k - 1)) * prec));
4640 gassign *new_stmt
4641 = gimple_build_assign (make_ssa_name (atype),
4642 vec_oprnd0);
4643 vect_finish_stmt_generation (vinfo, stmt_info,
4644 new_stmt, gsi);
4645 vargs.safe_push (gimple_assign_lhs (new_stmt));
4647 else
4649 k = (simd_clone_subparts (atype)
4650 / simd_clone_subparts (arginfo[i].vectype));
4651 gcc_assert ((k & (k - 1)) == 0);
4652 vec<constructor_elt, va_gc> *ctor_elts;
4653 if (k != 1)
4654 vec_alloc (ctor_elts, k);
4655 else
4656 ctor_elts = NULL;
4657 for (l = 0; l < k; l++)
4659 if (m == 0 && l == 0)
4661 if (!slp_node)
4662 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4663 k * o * ncopies,
4665 &vec_oprnds[i]);
4666 vec_oprnds_i[i] = 0;
4667 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4669 else
4670 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4671 arginfo[i].op = vec_oprnd0;
4672 if (k == 1)
4673 break;
4674 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4675 vec_oprnd0);
4677 if (k == 1)
4678 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4679 atype))
4681 vec_oprnd0
4682 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4683 gassign *new_stmt
4684 = gimple_build_assign (make_ssa_name (atype),
4685 vec_oprnd0);
4686 vect_finish_stmt_generation (vinfo, stmt_info,
4687 new_stmt, gsi);
4688 vargs.safe_push (gimple_assign_lhs (new_stmt));
4690 else
4691 vargs.safe_push (vec_oprnd0);
4692 else
4694 vec_oprnd0 = build_constructor (atype, ctor_elts);
4695 gassign *new_stmt
4696 = gimple_build_assign (make_ssa_name (atype),
4697 vec_oprnd0);
4698 vect_finish_stmt_generation (vinfo, stmt_info,
4699 new_stmt, gsi);
4700 vargs.safe_push (gimple_assign_lhs (new_stmt));
4704 break;
4705 case SIMD_CLONE_ARG_TYPE_MASK:
4706 if (bestn->simdclone->mask_mode == VOIDmode)
4708 atype = bestn->simdclone->args[i].vector_type;
4709 tree elt_type = TREE_TYPE (atype);
4710 tree one = fold_convert (elt_type, integer_one_node);
4711 tree zero = fold_convert (elt_type, integer_zero_node);
4712 o = vector_unroll_factor (nunits,
4713 simd_clone_subparts (atype));
4714 for (m = j * o; m < (j + 1) * o; m++)
4716 if (simd_clone_subparts (atype)
4717 < simd_clone_subparts (arginfo[i].vectype))
4719 /* The mask type has fewer elements than simdlen. */
4721 /* FORNOW */
4722 gcc_unreachable ();
4724 else if (simd_clone_subparts (atype)
4725 == simd_clone_subparts (arginfo[i].vectype))
4727 /* The SIMD clone function has the same number of
4728 elements as the current function. */
4729 if (m == 0)
4731 if (!slp_node)
4732 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4733 o * ncopies,
4735 &vec_oprnds[i]);
4736 vec_oprnds_i[i] = 0;
4738 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4739 vec_oprnd0
4740 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4741 build_vector_from_val (atype, one),
4742 build_vector_from_val (atype, zero));
4743 gassign *new_stmt
4744 = gimple_build_assign (make_ssa_name (atype),
4745 vec_oprnd0);
4746 vect_finish_stmt_generation (vinfo, stmt_info,
4747 new_stmt, gsi);
4748 vargs.safe_push (gimple_assign_lhs (new_stmt));
4750 else
4752 /* The mask type has more elements than simdlen. */
4754 /* FORNOW */
4755 gcc_unreachable ();
4759 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4761 atype = bestn->simdclone->args[i].vector_type;
4762 /* Guess the number of lanes represented by atype. */
4763 unsigned HOST_WIDE_INT atype_subparts
4764 = exact_div (bestn->simdclone->simdlen,
4765 num_mask_args).to_constant ();
4766 o = vector_unroll_factor (nunits, atype_subparts);
4767 for (m = j * o; m < (j + 1) * o; m++)
4769 if (m == 0)
4771 if (!slp_node)
4772 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4773 o * ncopies,
4775 &vec_oprnds[i]);
4776 vec_oprnds_i[i] = 0;
4778 if (atype_subparts
4779 < simd_clone_subparts (arginfo[i].vectype))
4781 /* The mask argument has fewer elements than the
4782 input vector. */
4783 /* FORNOW */
4784 gcc_unreachable ();
4786 else if (atype_subparts
4787 == simd_clone_subparts (arginfo[i].vectype))
4789 /* The vector mask argument matches the input
4790 in the number of lanes, but not necessarily
4791 in the mode. */
4792 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4793 tree st = lang_hooks.types.type_for_mode
4794 (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
4795 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
4796 vec_oprnd0);
4797 gassign *new_stmt
4798 = gimple_build_assign (make_ssa_name (st),
4799 vec_oprnd0);
4800 vect_finish_stmt_generation (vinfo, stmt_info,
4801 new_stmt, gsi);
4802 if (!types_compatible_p (atype, st))
4804 new_stmt
4805 = gimple_build_assign (make_ssa_name (atype),
4806 NOP_EXPR,
4807 gimple_assign_lhs
4808 (new_stmt));
4809 vect_finish_stmt_generation (vinfo, stmt_info,
4810 new_stmt, gsi);
4812 vargs.safe_push (gimple_assign_lhs (new_stmt));
4814 else
4816 /* The mask argument has more elements than the
4817 input vector. */
4818 /* FORNOW */
4819 gcc_unreachable ();
4823 else
4824 gcc_unreachable ();
4825 break;
4826 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4827 vargs.safe_push (op);
4828 break;
4829 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4830 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4831 if (j == 0)
4833 gimple_seq stmts;
4834 arginfo[i].op
4835 = force_gimple_operand (unshare_expr (arginfo[i].op),
4836 &stmts, true, NULL_TREE);
4837 if (stmts != NULL)
4839 basic_block new_bb;
4840 edge pe = loop_preheader_edge (loop);
4841 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4842 gcc_assert (!new_bb);
4844 if (arginfo[i].simd_lane_linear)
4846 vargs.safe_push (arginfo[i].op);
4847 break;
4849 tree phi_res = copy_ssa_name (op);
4850 gphi *new_phi = create_phi_node (phi_res, loop->header);
4851 add_phi_arg (new_phi, arginfo[i].op,
4852 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4853 enum tree_code code
4854 = POINTER_TYPE_P (TREE_TYPE (op))
4855 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4856 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4857 ? sizetype : TREE_TYPE (op);
4858 poly_widest_int cst
4859 = wi::mul (bestn->simdclone->args[i].linear_step,
4860 ncopies * nunits);
4861 tree tcst = wide_int_to_tree (type, cst);
4862 tree phi_arg = copy_ssa_name (op);
4863 gassign *new_stmt
4864 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4865 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4866 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4867 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4868 UNKNOWN_LOCATION);
4869 arginfo[i].op = phi_res;
4870 vargs.safe_push (phi_res);
4872 else
4874 enum tree_code code
4875 = POINTER_TYPE_P (TREE_TYPE (op))
4876 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4877 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4878 ? sizetype : TREE_TYPE (op);
4879 poly_widest_int cst
4880 = wi::mul (bestn->simdclone->args[i].linear_step,
4881 j * nunits);
4882 tree tcst = wide_int_to_tree (type, cst);
4883 new_temp = make_ssa_name (TREE_TYPE (op));
4884 gassign *new_stmt
4885 = gimple_build_assign (new_temp, code,
4886 arginfo[i].op, tcst);
4887 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4888 vargs.safe_push (new_temp);
4890 break;
4891 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4892 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4893 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4894 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4895 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4896 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4897 default:
4898 gcc_unreachable ();
4902 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4903 if (vec_dest)
4905 gcc_assert (ratype
4906 || known_eq (simd_clone_subparts (rtype), nunits));
4907 if (ratype)
4908 new_temp = create_tmp_var (ratype);
4909 else if (useless_type_conversion_p (vectype, rtype))
4910 new_temp = make_ssa_name (vec_dest, new_call);
4911 else
4912 new_temp = make_ssa_name (rtype, new_call);
4913 gimple_call_set_lhs (new_call, new_temp);
4915 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4916 gimple *new_stmt = new_call;
4918 if (vec_dest)
4920 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4922 unsigned int k, l;
4923 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4924 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4925 k = vector_unroll_factor (nunits,
4926 simd_clone_subparts (vectype));
4927 gcc_assert ((k & (k - 1)) == 0);
4928 for (l = 0; l < k; l++)
4930 tree t;
4931 if (ratype)
4933 t = build_fold_addr_expr (new_temp);
4934 t = build2 (MEM_REF, vectype, t,
4935 build_int_cst (TREE_TYPE (t), l * bytes));
4937 else
4938 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4939 bitsize_int (prec), bitsize_int (l * prec));
4940 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4941 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4943 if (j == 0 && l == 0)
4944 *vec_stmt = new_stmt;
4945 if (slp_node)
4946 SLP_TREE_VEC_DEFS (slp_node)
4947 .quick_push (gimple_assign_lhs (new_stmt));
4948 else
4949 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4952 if (ratype)
4953 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4954 continue;
4956 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4958 unsigned int k = (simd_clone_subparts (vectype)
4959 / simd_clone_subparts (rtype));
4960 gcc_assert ((k & (k - 1)) == 0);
4961 if ((j & (k - 1)) == 0)
4962 vec_alloc (ret_ctor_elts, k);
4963 if (ratype)
4965 unsigned int m, o;
4966 o = vector_unroll_factor (nunits,
4967 simd_clone_subparts (rtype));
4968 for (m = 0; m < o; m++)
4970 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4971 size_int (m), NULL_TREE, NULL_TREE);
4972 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4973 tem);
4974 vect_finish_stmt_generation (vinfo, stmt_info,
4975 new_stmt, gsi);
4976 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4977 gimple_assign_lhs (new_stmt));
4979 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4981 else
4982 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4983 if ((j & (k - 1)) != k - 1)
4984 continue;
4985 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4986 new_stmt
4987 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4988 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4990 if ((unsigned) j == k - 1)
4991 *vec_stmt = new_stmt;
4992 if (slp_node)
4993 SLP_TREE_VEC_DEFS (slp_node)
4994 .quick_push (gimple_assign_lhs (new_stmt));
4995 else
4996 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4997 continue;
4999 else if (ratype)
5001 tree t = build_fold_addr_expr (new_temp);
5002 t = build2 (MEM_REF, vectype, t,
5003 build_int_cst (TREE_TYPE (t), 0));
5004 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
5005 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5006 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
5008 else if (!useless_type_conversion_p (vectype, rtype))
5010 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
5011 new_stmt
5012 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
5013 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5017 if (j == 0)
5018 *vec_stmt = new_stmt;
5019 if (slp_node)
5020 SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
5021 else
5022 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5025 for (i = 0; i < nargs; ++i)
5027 vec<tree> oprndsi = vec_oprnds[i];
5028 oprndsi.release ();
5030 vargs.release ();
5032 /* Mark the clone as no longer being a candidate for GC. */
5033 bestn->gc_candidate = false;
5035 /* The call in STMT might prevent it from being removed in dce.
5036 We however cannot remove it here, due to the way the ssa name
5037 it defines is mapped to the new definition. So just replace
5038 rhs of the statement with something harmless. */
5040 if (slp_node)
5041 return true;
5043 gimple *new_stmt;
5044 if (scalar_dest)
5046 type = TREE_TYPE (scalar_dest);
5047 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
5048 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
5050 else
5051 new_stmt = gimple_build_nop ();
5052 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
5053 unlink_stmt_vdef (stmt);
5055 return true;
5059 /* Function vect_gen_widened_results_half
5061 Create a vector stmt whose code, type, number of arguments, and result
5062 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
5063 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
5064 In the case that CODE is a CALL_EXPR, this means that a call to DECL
5065 needs to be created (DECL is a function-decl of a target-builtin).
5066 STMT_INFO is the original scalar stmt that we are vectorizing. */
5068 static gimple *
5069 vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
5070 tree vec_oprnd0, tree vec_oprnd1, int op_type,
5071 tree vec_dest, gimple_stmt_iterator *gsi,
5072 stmt_vec_info stmt_info)
5074 gimple *new_stmt;
5075 tree new_temp;
5077 /* Generate half of the widened result: */
5078 if (op_type != binary_op)
5079 vec_oprnd1 = NULL;
5080 new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
5081 new_temp = make_ssa_name (vec_dest, new_stmt);
5082 gimple_set_lhs (new_stmt, new_temp);
5083 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5085 return new_stmt;
5089 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
5090 For multi-step conversions store the resulting vectors and call the function
5091 recursively. When NARROW_SRC_P is true, there's still a conversion after
5092 narrowing, don't store the vectors in the SLP_NODE or in vector info of
5093 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
5095 static void
5096 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
5097 int multi_step_cvt,
5098 stmt_vec_info stmt_info,
5099 vec<tree> &vec_dsts,
5100 gimple_stmt_iterator *gsi,
5101 slp_tree slp_node, code_helper code,
5102 bool narrow_src_p)
5104 unsigned int i;
5105 tree vop0, vop1, new_tmp, vec_dest;
5107 vec_dest = vec_dsts.pop ();
5109 for (i = 0; i < vec_oprnds->length (); i += 2)
5111 /* Create demotion operation. */
5112 vop0 = (*vec_oprnds)[i];
5113 vop1 = (*vec_oprnds)[i + 1];
5114 gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
5115 new_tmp = make_ssa_name (vec_dest, new_stmt);
5116 gimple_set_lhs (new_stmt, new_tmp);
5117 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5118 if (multi_step_cvt || narrow_src_p)
5119 /* Store the resulting vector for next recursive call,
5120 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
5121 (*vec_oprnds)[i/2] = new_tmp;
5122 else
5124 /* This is the last step of the conversion sequence. Store the
5125 vectors in SLP_NODE or in vector info of the scalar statement
5126 (or in STMT_VINFO_RELATED_STMT chain). */
5127 if (slp_node)
5128 slp_node->push_vec_def (new_stmt);
5129 else
5130 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5134 /* For multi-step demotion operations we first generate demotion operations
5135 from the source type to the intermediate types, and then combine the
5136 results (stored in VEC_OPRNDS) in demotion operation to the destination
5137 type. */
5138 if (multi_step_cvt)
5140 /* At each level of recursion we have half of the operands we had at the
5141 previous level. */
5142 vec_oprnds->truncate ((i+1)/2);
5143 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
5144 multi_step_cvt - 1,
5145 stmt_info, vec_dsts, gsi,
5146 slp_node, VEC_PACK_TRUNC_EXPR,
5147 narrow_src_p);
5150 vec_dsts.quick_push (vec_dest);
5154 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5155 and VEC_OPRNDS1, for a binary operation associated with scalar statement
5156 STMT_INFO. For multi-step conversions store the resulting vectors and
5157 call the function recursively. */
5159 static void
5160 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
5161 vec<tree> *vec_oprnds0,
5162 vec<tree> *vec_oprnds1,
5163 stmt_vec_info stmt_info, tree vec_dest,
5164 gimple_stmt_iterator *gsi,
5165 code_helper ch1,
5166 code_helper ch2, int op_type)
5168 int i;
5169 tree vop0, vop1, new_tmp1, new_tmp2;
5170 gimple *new_stmt1, *new_stmt2;
5171 vec<tree> vec_tmp = vNULL;
5173 vec_tmp.create (vec_oprnds0->length () * 2);
5174 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5176 if (op_type == binary_op)
5177 vop1 = (*vec_oprnds1)[i];
5178 else
5179 vop1 = NULL_TREE;
5181 /* Generate the two halves of promotion operation. */
5182 new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
5183 op_type, vec_dest, gsi,
5184 stmt_info);
5185 new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
5186 op_type, vec_dest, gsi,
5187 stmt_info);
5188 if (is_gimple_call (new_stmt1))
5190 new_tmp1 = gimple_call_lhs (new_stmt1);
5191 new_tmp2 = gimple_call_lhs (new_stmt2);
5193 else
5195 new_tmp1 = gimple_assign_lhs (new_stmt1);
5196 new_tmp2 = gimple_assign_lhs (new_stmt2);
5199 /* Store the results for the next step. */
5200 vec_tmp.quick_push (new_tmp1);
5201 vec_tmp.quick_push (new_tmp2);
5204 vec_oprnds0->release ();
5205 *vec_oprnds0 = vec_tmp;
5208 /* Create vectorized promotion stmts for widening stmts using only half the
5209 potential vector size for input. */
5210 static void
5211 vect_create_half_widening_stmts (vec_info *vinfo,
5212 vec<tree> *vec_oprnds0,
5213 vec<tree> *vec_oprnds1,
5214 stmt_vec_info stmt_info, tree vec_dest,
5215 gimple_stmt_iterator *gsi,
5216 code_helper code1,
5217 int op_type)
5219 int i;
5220 tree vop0, vop1;
5221 gimple *new_stmt1;
5222 gimple *new_stmt2;
5223 gimple *new_stmt3;
5224 vec<tree> vec_tmp = vNULL;
5226 vec_tmp.create (vec_oprnds0->length ());
5227 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5229 tree new_tmp1, new_tmp2, new_tmp3, out_type;
5231 gcc_assert (op_type == binary_op);
5232 vop1 = (*vec_oprnds1)[i];
5234 /* Widen the first vector input. */
5235 out_type = TREE_TYPE (vec_dest);
5236 new_tmp1 = make_ssa_name (out_type);
5237 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5238 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
5239 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5241 /* Widen the second vector input. */
5242 new_tmp2 = make_ssa_name (out_type);
5243 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5244 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
5245 /* Perform the operation. With both vector inputs widened. */
5246 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5248 else
5250 /* Perform the operation. With the single vector input widened. */
5251 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5254 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
5255 gimple_assign_set_lhs (new_stmt3, new_tmp3);
5256 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
5258 /* Store the results for the next step. */
5259 vec_tmp.quick_push (new_tmp3);
5262 vec_oprnds0->release ();
5263 *vec_oprnds0 = vec_tmp;
5267 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5268 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5269 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5270 Return true if STMT_INFO is vectorizable in this way. */
5272 static bool
5273 vectorizable_conversion (vec_info *vinfo,
5274 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5275 gimple **vec_stmt, slp_tree slp_node,
5276 stmt_vector_for_cost *cost_vec)
5278 tree vec_dest, cvt_op = NULL_TREE;
5279 tree scalar_dest;
5280 tree op0, op1 = NULL_TREE;
5281 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5282 tree_code tc1, tc2;
5283 code_helper code, code1, code2;
5284 code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5285 tree new_temp;
5286 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5287 int ndts = 2;
5288 poly_uint64 nunits_in;
5289 poly_uint64 nunits_out;
5290 tree vectype_out, vectype_in;
5291 int ncopies, i;
5292 tree lhs_type, rhs_type;
5293 /* For conversions between floating point and integer, there're 2 NARROW
5294 cases. NARROW_SRC is for FLOAT_EXPR, means
5295 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5296 This is safe when the range of the source integer can fit into the lower
5297 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5298 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5299 For other conversions, when there's narrowing, NARROW_DST is used as
5300 default. */
5301 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5302 vec<tree> vec_oprnds0 = vNULL;
5303 vec<tree> vec_oprnds1 = vNULL;
5304 tree vop0;
5305 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5306 int multi_step_cvt = 0;
5307 vec<tree> interm_types = vNULL;
5308 tree intermediate_type, cvt_type = NULL_TREE;
5309 int op_type;
5310 unsigned short fltsz;
5312 /* Is STMT a vectorizable conversion? */
5314 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5315 return false;
5317 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5318 && ! vec_stmt)
5319 return false;
5321 gimple* stmt = stmt_info->stmt;
5322 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
5323 return false;
5325 if (gimple_get_lhs (stmt) == NULL_TREE
5326 || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5327 return false;
5329 if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5330 return false;
5332 if (is_gimple_assign (stmt))
5334 code = gimple_assign_rhs_code (stmt);
5335 op_type = TREE_CODE_LENGTH ((tree_code) code);
5337 else if (gimple_call_internal_p (stmt))
5339 code = gimple_call_internal_fn (stmt);
5340 op_type = gimple_call_num_args (stmt);
5342 else
5343 return false;
5345 bool widen_arith = (code == WIDEN_MULT_EXPR
5346 || code == WIDEN_LSHIFT_EXPR
5347 || widening_fn_p (code));
5349 if (!widen_arith
5350 && !CONVERT_EXPR_CODE_P (code)
5351 && code != FIX_TRUNC_EXPR
5352 && code != FLOAT_EXPR)
5353 return false;
5355 /* Check types of lhs and rhs. */
5356 scalar_dest = gimple_get_lhs (stmt);
5357 lhs_type = TREE_TYPE (scalar_dest);
5358 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5360 /* Check the operands of the operation. */
5361 slp_tree slp_op0, slp_op1 = NULL;
5362 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5363 0, &op0, &slp_op0, &dt[0], &vectype_in))
5365 if (dump_enabled_p ())
5366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5367 "use not simple.\n");
5368 return false;
5371 rhs_type = TREE_TYPE (op0);
5372 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5373 && !((INTEGRAL_TYPE_P (lhs_type)
5374 && INTEGRAL_TYPE_P (rhs_type))
5375 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5376 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5377 return false;
5379 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5380 && ((INTEGRAL_TYPE_P (lhs_type)
5381 && !type_has_mode_precision_p (lhs_type))
5382 || (INTEGRAL_TYPE_P (rhs_type)
5383 && !type_has_mode_precision_p (rhs_type))))
5385 if (dump_enabled_p ())
5386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5387 "type conversion to/from bit-precision unsupported."
5388 "\n");
5389 return false;
5392 if (op_type == binary_op)
5394 gcc_assert (code == WIDEN_MULT_EXPR
5395 || code == WIDEN_LSHIFT_EXPR
5396 || widening_fn_p (code));
5398 op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
5399 gimple_call_arg (stmt, 0);
5400 tree vectype1_in;
5401 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5402 &op1, &slp_op1, &dt[1], &vectype1_in))
5404 if (dump_enabled_p ())
5405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5406 "use not simple.\n");
5407 return false;
5409 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5410 OP1. */
5411 if (!vectype_in)
5412 vectype_in = vectype1_in;
5415 /* If op0 is an external or constant def, infer the vector type
5416 from the scalar type. */
5417 if (!vectype_in)
5418 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5419 if (vec_stmt)
5420 gcc_assert (vectype_in);
5421 if (!vectype_in)
5423 if (dump_enabled_p ())
5424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5425 "no vectype for scalar type %T\n", rhs_type);
5427 return false;
5430 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5431 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5433 if (dump_enabled_p ())
5434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5435 "can't convert between boolean and non "
5436 "boolean vectors %T\n", rhs_type);
5438 return false;
5441 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5442 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5443 if (known_eq (nunits_out, nunits_in))
5444 if (widen_arith)
5445 modifier = WIDEN;
5446 else
5447 modifier = NONE;
5448 else if (multiple_p (nunits_out, nunits_in))
5449 modifier = NARROW_DST;
5450 else
5452 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5453 modifier = WIDEN;
5456 /* Multiple types in SLP are handled by creating the appropriate number of
5457 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5458 case of SLP. */
5459 if (slp_node)
5460 ncopies = 1;
5461 else if (modifier == NARROW_DST)
5462 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5463 else
5464 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5466 /* Sanity check: make sure that at least one copy of the vectorized stmt
5467 needs to be generated. */
5468 gcc_assert (ncopies >= 1);
5470 bool found_mode = false;
5471 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5472 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5473 opt_scalar_mode rhs_mode_iter;
5475 /* Supportable by target? */
5476 switch (modifier)
5478 case NONE:
5479 if (code != FIX_TRUNC_EXPR
5480 && code != FLOAT_EXPR
5481 && !CONVERT_EXPR_CODE_P (code))
5482 return false;
5483 gcc_assert (code.is_tree_code ());
5484 if (supportable_convert_operation ((tree_code) code, vectype_out,
5485 vectype_in, &tc1))
5487 code1 = tc1;
5488 break;
5491 /* For conversions between float and integer types try whether
5492 we can use intermediate signed integer types to support the
5493 conversion. */
5494 if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
5495 && (code == FLOAT_EXPR ||
5496 (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
5498 bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
5499 bool float_expr_p = code == FLOAT_EXPR;
5500 unsigned short target_size;
5501 scalar_mode intermediate_mode;
5502 if (demotion)
5504 intermediate_mode = lhs_mode;
5505 target_size = GET_MODE_SIZE (rhs_mode);
5507 else
5509 target_size = GET_MODE_SIZE (lhs_mode);
5510 if (!int_mode_for_size
5511 (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
5512 goto unsupported;
5514 code1 = float_expr_p ? code : NOP_EXPR;
5515 codecvt1 = float_expr_p ? NOP_EXPR : code;
5516 opt_scalar_mode mode_iter;
5517 FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
5519 intermediate_mode = mode_iter.require ();
5521 if (GET_MODE_SIZE (intermediate_mode) > target_size)
5522 break;
5524 scalar_mode cvt_mode;
5525 if (!int_mode_for_size
5526 (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
5527 break;
5529 cvt_type = build_nonstandard_integer_type
5530 (GET_MODE_BITSIZE (cvt_mode), 0);
5532 /* Check if the intermediate type can hold OP0's range.
5533 When converting from float to integer this is not necessary
5534 because values that do not fit the (smaller) target type are
5535 unspecified anyway. */
5536 if (demotion && float_expr_p)
5538 wide_int op_min_value, op_max_value;
5539 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5540 break;
5542 if (cvt_type == NULL_TREE
5543 || (wi::min_precision (op_max_value, SIGNED)
5544 > TYPE_PRECISION (cvt_type))
5545 || (wi::min_precision (op_min_value, SIGNED)
5546 > TYPE_PRECISION (cvt_type)))
5547 continue;
5550 cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
5551 /* This should only happened for SLP as long as loop vectorizer
5552 only supports same-sized vector. */
5553 if (cvt_type == NULL_TREE
5554 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
5555 || !supportable_convert_operation ((tree_code) code1,
5556 vectype_out,
5557 cvt_type, &tc1)
5558 || !supportable_convert_operation ((tree_code) codecvt1,
5559 cvt_type,
5560 vectype_in, &tc2))
5561 continue;
5563 found_mode = true;
5564 break;
5567 if (found_mode)
5569 multi_step_cvt++;
5570 interm_types.safe_push (cvt_type);
5571 cvt_type = NULL_TREE;
5572 code1 = tc1;
5573 codecvt1 = tc2;
5574 break;
5577 /* FALLTHRU */
5578 unsupported:
5579 if (dump_enabled_p ())
5580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5581 "conversion not supported by target.\n");
5582 return false;
5584 case WIDEN:
5585 if (known_eq (nunits_in, nunits_out))
5587 if (!(code.is_tree_code ()
5588 && supportable_half_widening_operation ((tree_code) code,
5589 vectype_out, vectype_in,
5590 &tc1)))
5591 goto unsupported;
5592 code1 = tc1;
5593 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5594 break;
5596 if (supportable_widening_operation (vinfo, code, stmt_info,
5597 vectype_out, vectype_in, &code1,
5598 &code2, &multi_step_cvt,
5599 &interm_types))
5601 /* Binary widening operation can only be supported directly by the
5602 architecture. */
5603 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5604 break;
5607 if (code != FLOAT_EXPR
5608 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5609 goto unsupported;
5611 fltsz = GET_MODE_SIZE (lhs_mode);
5612 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5614 rhs_mode = rhs_mode_iter.require ();
5615 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5616 break;
5618 cvt_type
5619 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5620 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5621 if (cvt_type == NULL_TREE)
5622 goto unsupported;
5624 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5626 tc1 = ERROR_MARK;
5627 gcc_assert (code.is_tree_code ());
5628 if (!supportable_convert_operation ((tree_code) code, vectype_out,
5629 cvt_type, &tc1))
5630 goto unsupported;
5631 codecvt1 = tc1;
5633 else if (!supportable_widening_operation (vinfo, code,
5634 stmt_info, vectype_out,
5635 cvt_type, &codecvt1,
5636 &codecvt2, &multi_step_cvt,
5637 &interm_types))
5638 continue;
5639 else
5640 gcc_assert (multi_step_cvt == 0);
5642 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5643 cvt_type,
5644 vectype_in, &code1,
5645 &code2, &multi_step_cvt,
5646 &interm_types))
5648 found_mode = true;
5649 break;
5653 if (!found_mode)
5654 goto unsupported;
5656 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5657 codecvt2 = ERROR_MARK;
5658 else
5660 multi_step_cvt++;
5661 interm_types.safe_push (cvt_type);
5662 cvt_type = NULL_TREE;
5664 break;
5666 case NARROW_DST:
5667 gcc_assert (op_type == unary_op);
5668 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5669 &code1, &multi_step_cvt,
5670 &interm_types))
5671 break;
5673 if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5674 goto unsupported;
5676 if (code == FIX_TRUNC_EXPR)
5678 cvt_type
5679 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5680 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5681 if (cvt_type == NULL_TREE)
5682 goto unsupported;
5683 if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5684 &tc1))
5685 codecvt1 = tc1;
5686 else
5687 goto unsupported;
5688 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5689 &code1, &multi_step_cvt,
5690 &interm_types))
5691 break;
5693 /* If op0 can be represented with low precision integer,
5694 truncate it to cvt_type and the do FLOAT_EXPR. */
5695 else if (code == FLOAT_EXPR)
5697 wide_int op_min_value, op_max_value;
5698 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5699 goto unsupported;
5701 cvt_type
5702 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5703 if (cvt_type == NULL_TREE
5704 || (wi::min_precision (op_max_value, SIGNED)
5705 > TYPE_PRECISION (cvt_type))
5706 || (wi::min_precision (op_min_value, SIGNED)
5707 > TYPE_PRECISION (cvt_type)))
5708 goto unsupported;
5710 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5711 if (cvt_type == NULL_TREE)
5712 goto unsupported;
5713 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5714 &code1, &multi_step_cvt,
5715 &interm_types))
5716 goto unsupported;
5717 if (supportable_convert_operation ((tree_code) code, vectype_out,
5718 cvt_type, &tc1))
5720 codecvt1 = tc1;
5721 modifier = NARROW_SRC;
5722 break;
5726 goto unsupported;
5728 default:
5729 gcc_unreachable ();
5732 if (!vec_stmt) /* transformation not required. */
5734 if (slp_node
5735 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5736 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5738 if (dump_enabled_p ())
5739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5740 "incompatible vector types for invariants\n");
5741 return false;
5743 DUMP_VECT_SCOPE ("vectorizable_conversion");
5744 if (modifier == NONE)
5746 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5747 vect_model_simple_cost (vinfo, stmt_info,
5748 ncopies * (1 + multi_step_cvt),
5749 dt, ndts, slp_node, cost_vec);
5751 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5753 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5754 /* The final packing step produces one vector result per copy. */
5755 unsigned int nvectors
5756 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5757 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5758 multi_step_cvt, cost_vec,
5759 widen_arith);
5761 else
5763 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5764 /* The initial unpacking step produces two vector results
5765 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5766 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5767 unsigned int nvectors
5768 = (slp_node
5769 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5770 : ncopies * 2);
5771 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5772 multi_step_cvt, cost_vec,
5773 widen_arith);
5775 interm_types.release ();
5776 return true;
5779 /* Transform. */
5780 if (dump_enabled_p ())
5781 dump_printf_loc (MSG_NOTE, vect_location,
5782 "transform conversion. ncopies = %d.\n", ncopies);
5784 if (op_type == binary_op)
5786 if (CONSTANT_CLASS_P (op0))
5787 op0 = fold_convert (TREE_TYPE (op1), op0);
5788 else if (CONSTANT_CLASS_P (op1))
5789 op1 = fold_convert (TREE_TYPE (op0), op1);
5792 /* In case of multi-step conversion, we first generate conversion operations
5793 to the intermediate types, and then from that types to the final one.
5794 We create vector destinations for the intermediate type (TYPES) received
5795 from supportable_*_operation, and store them in the correct order
5796 for future use in vect_create_vectorized_*_stmts (). */
5797 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5798 bool widen_or_narrow_float_p
5799 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5800 vec_dest = vect_create_destination_var (scalar_dest,
5801 widen_or_narrow_float_p
5802 ? cvt_type : vectype_out);
5803 vec_dsts.quick_push (vec_dest);
5805 if (multi_step_cvt)
5807 for (i = interm_types.length () - 1;
5808 interm_types.iterate (i, &intermediate_type); i--)
5810 vec_dest = vect_create_destination_var (scalar_dest,
5811 intermediate_type);
5812 vec_dsts.quick_push (vec_dest);
5816 if (cvt_type)
5817 vec_dest = vect_create_destination_var (scalar_dest,
5818 widen_or_narrow_float_p
5819 ? vectype_out : cvt_type);
5821 int ninputs = 1;
5822 if (!slp_node)
5824 if (modifier == WIDEN)
5826 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5828 if (multi_step_cvt)
5829 ninputs = vect_pow2 (multi_step_cvt);
5830 ninputs *= 2;
5834 switch (modifier)
5836 case NONE:
5837 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5838 op0, &vec_oprnds0);
5839 /* vec_dest is intermediate type operand when multi_step_cvt. */
5840 if (multi_step_cvt)
5842 cvt_op = vec_dest;
5843 vec_dest = vec_dsts[0];
5846 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5848 /* Arguments are ready, create the new vector stmt. */
5849 gimple* new_stmt;
5850 if (multi_step_cvt)
5852 gcc_assert (multi_step_cvt == 1);
5853 new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5854 new_temp = make_ssa_name (cvt_op, new_stmt);
5855 gimple_assign_set_lhs (new_stmt, new_temp);
5856 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5857 vop0 = new_temp;
5859 new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5860 new_temp = make_ssa_name (vec_dest, new_stmt);
5861 gimple_set_lhs (new_stmt, new_temp);
5862 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5864 if (slp_node)
5865 slp_node->push_vec_def (new_stmt);
5866 else
5867 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5869 break;
5871 case WIDEN:
5872 /* In case the vectorization factor (VF) is bigger than the number
5873 of elements that we can fit in a vectype (nunits), we have to
5874 generate more than one vector stmt - i.e - we need to "unroll"
5875 the vector stmt by a factor VF/nunits. */
5876 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5877 op0, &vec_oprnds0,
5878 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5879 &vec_oprnds1);
5880 if (code == WIDEN_LSHIFT_EXPR)
5882 int oprnds_size = vec_oprnds0.length ();
5883 vec_oprnds1.create (oprnds_size);
5884 for (i = 0; i < oprnds_size; ++i)
5885 vec_oprnds1.quick_push (op1);
5887 /* Arguments are ready. Create the new vector stmts. */
5888 for (i = multi_step_cvt; i >= 0; i--)
5890 tree this_dest = vec_dsts[i];
5891 code_helper c1 = code1, c2 = code2;
5892 if (i == 0 && codecvt2 != ERROR_MARK)
5894 c1 = codecvt1;
5895 c2 = codecvt2;
5897 if (known_eq (nunits_out, nunits_in))
5898 vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
5899 stmt_info, this_dest, gsi, c1,
5900 op_type);
5901 else
5902 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5903 &vec_oprnds1, stmt_info,
5904 this_dest, gsi,
5905 c1, c2, op_type);
5908 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5910 gimple *new_stmt;
5911 if (cvt_type)
5913 new_temp = make_ssa_name (vec_dest);
5914 new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5915 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5917 else
5918 new_stmt = SSA_NAME_DEF_STMT (vop0);
5920 if (slp_node)
5921 slp_node->push_vec_def (new_stmt);
5922 else
5923 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5925 break;
5927 case NARROW_SRC:
5928 case NARROW_DST:
5929 /* In case the vectorization factor (VF) is bigger than the number
5930 of elements that we can fit in a vectype (nunits), we have to
5931 generate more than one vector stmt - i.e - we need to "unroll"
5932 the vector stmt by a factor VF/nunits. */
5933 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5934 op0, &vec_oprnds0);
5935 /* Arguments are ready. Create the new vector stmts. */
5936 if (cvt_type && modifier == NARROW_DST)
5937 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5939 new_temp = make_ssa_name (vec_dest);
5940 gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5941 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5942 vec_oprnds0[i] = new_temp;
5945 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5946 multi_step_cvt,
5947 stmt_info, vec_dsts, gsi,
5948 slp_node, code1,
5949 modifier == NARROW_SRC);
5950 /* After demoting op0 to cvt_type, convert it to dest. */
5951 if (cvt_type && code == FLOAT_EXPR)
5953 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5955 /* Arguments are ready, create the new vector stmt. */
5956 gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5957 gimple *new_stmt
5958 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5959 new_temp = make_ssa_name (vec_dest, new_stmt);
5960 gimple_set_lhs (new_stmt, new_temp);
5961 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5963 /* This is the last step of the conversion sequence. Store the
5964 vectors in SLP_NODE or in vector info of the scalar statement
5965 (or in STMT_VINFO_RELATED_STMT chain). */
5966 if (slp_node)
5967 slp_node->push_vec_def (new_stmt);
5968 else
5969 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5972 break;
5974 if (!slp_node)
5975 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5977 vec_oprnds0.release ();
5978 vec_oprnds1.release ();
5979 interm_types.release ();
5981 return true;
5984 /* Return true if we can assume from the scalar form of STMT_INFO that
5985 neither the scalar nor the vector forms will generate code. STMT_INFO
5986 is known not to involve a data reference. */
5988 bool
5989 vect_nop_conversion_p (stmt_vec_info stmt_info)
5991 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5992 if (!stmt)
5993 return false;
5995 tree lhs = gimple_assign_lhs (stmt);
5996 tree_code code = gimple_assign_rhs_code (stmt);
5997 tree rhs = gimple_assign_rhs1 (stmt);
5999 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
6000 return true;
6002 if (CONVERT_EXPR_CODE_P (code))
6003 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
6005 return false;
6008 /* Function vectorizable_assignment.
6010 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
6011 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6012 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6013 Return true if STMT_INFO is vectorizable in this way. */
6015 static bool
6016 vectorizable_assignment (vec_info *vinfo,
6017 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6018 gimple **vec_stmt, slp_tree slp_node,
6019 stmt_vector_for_cost *cost_vec)
6021 tree vec_dest;
6022 tree scalar_dest;
6023 tree op;
6024 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6025 tree new_temp;
6026 enum vect_def_type dt[1] = {vect_unknown_def_type};
6027 int ndts = 1;
6028 int ncopies;
6029 int i;
6030 vec<tree> vec_oprnds = vNULL;
6031 tree vop;
6032 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6033 enum tree_code code;
6034 tree vectype_in;
6036 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6037 return false;
6039 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6040 && ! vec_stmt)
6041 return false;
6043 /* Is vectorizable assignment? */
6044 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6045 if (!stmt)
6046 return false;
6048 scalar_dest = gimple_assign_lhs (stmt);
6049 if (TREE_CODE (scalar_dest) != SSA_NAME)
6050 return false;
6052 if (STMT_VINFO_DATA_REF (stmt_info))
6053 return false;
6055 code = gimple_assign_rhs_code (stmt);
6056 if (!(gimple_assign_single_p (stmt)
6057 || code == PAREN_EXPR
6058 || CONVERT_EXPR_CODE_P (code)))
6059 return false;
6061 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6062 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6064 /* Multiple types in SLP are handled by creating the appropriate number of
6065 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6066 case of SLP. */
6067 if (slp_node)
6068 ncopies = 1;
6069 else
6070 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6072 gcc_assert (ncopies >= 1);
6074 slp_tree slp_op;
6075 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
6076 &dt[0], &vectype_in))
6078 if (dump_enabled_p ())
6079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6080 "use not simple.\n");
6081 return false;
6083 if (!vectype_in)
6084 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
6086 /* We can handle NOP_EXPR conversions that do not change the number
6087 of elements or the vector size. */
6088 if ((CONVERT_EXPR_CODE_P (code)
6089 || code == VIEW_CONVERT_EXPR)
6090 && (!vectype_in
6091 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
6092 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
6093 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
6094 return false;
6096 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
6098 if (dump_enabled_p ())
6099 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6100 "can't convert between boolean and non "
6101 "boolean vectors %T\n", TREE_TYPE (op));
6103 return false;
6106 /* We do not handle bit-precision changes. */
6107 if ((CONVERT_EXPR_CODE_P (code)
6108 || code == VIEW_CONVERT_EXPR)
6109 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6110 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6111 || (INTEGRAL_TYPE_P (TREE_TYPE (op))
6112 && !type_has_mode_precision_p (TREE_TYPE (op))))
6113 /* But a conversion that does not change the bit-pattern is ok. */
6114 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6115 && INTEGRAL_TYPE_P (TREE_TYPE (op))
6116 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
6117 > TYPE_PRECISION (TREE_TYPE (op)))
6118 && TYPE_UNSIGNED (TREE_TYPE (op))))
6120 if (dump_enabled_p ())
6121 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6122 "type conversion to/from bit-precision "
6123 "unsupported.\n");
6124 return false;
6127 if (!vec_stmt) /* transformation not required. */
6129 if (slp_node
6130 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
6132 if (dump_enabled_p ())
6133 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6134 "incompatible vector types for invariants\n");
6135 return false;
6137 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
6138 DUMP_VECT_SCOPE ("vectorizable_assignment");
6139 if (!vect_nop_conversion_p (stmt_info))
6140 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
6141 cost_vec);
6142 return true;
6145 /* Transform. */
6146 if (dump_enabled_p ())
6147 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
6149 /* Handle def. */
6150 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6152 /* Handle use. */
6153 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
6155 /* Arguments are ready. create the new vector stmt. */
6156 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
6158 if (CONVERT_EXPR_CODE_P (code)
6159 || code == VIEW_CONVERT_EXPR)
6160 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
6161 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
6162 new_temp = make_ssa_name (vec_dest, new_stmt);
6163 gimple_assign_set_lhs (new_stmt, new_temp);
6164 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6165 if (slp_node)
6166 slp_node->push_vec_def (new_stmt);
6167 else
6168 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6170 if (!slp_node)
6171 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6173 vec_oprnds.release ();
6174 return true;
6178 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6179 either as shift by a scalar or by a vector. */
6181 bool
6182 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
6185 machine_mode vec_mode;
6186 optab optab;
6187 int icode;
6188 tree vectype;
6190 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6191 if (!vectype)
6192 return false;
6194 optab = optab_for_tree_code (code, vectype, optab_scalar);
6195 if (!optab
6196 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
6198 optab = optab_for_tree_code (code, vectype, optab_vector);
6199 if (!optab
6200 || (optab_handler (optab, TYPE_MODE (vectype))
6201 == CODE_FOR_nothing))
6202 return false;
6205 vec_mode = TYPE_MODE (vectype);
6206 icode = (int) optab_handler (optab, vec_mode);
6207 if (icode == CODE_FOR_nothing)
6208 return false;
6210 return true;
6214 /* Function vectorizable_shift.
6216 Check if STMT_INFO performs a shift operation that can be vectorized.
6217 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6218 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6219 Return true if STMT_INFO is vectorizable in this way. */
6221 static bool
6222 vectorizable_shift (vec_info *vinfo,
6223 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6224 gimple **vec_stmt, slp_tree slp_node,
6225 stmt_vector_for_cost *cost_vec)
6227 tree vec_dest;
6228 tree scalar_dest;
6229 tree op0, op1 = NULL;
6230 tree vec_oprnd1 = NULL_TREE;
6231 tree vectype;
6232 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6233 enum tree_code code;
6234 machine_mode vec_mode;
6235 tree new_temp;
6236 optab optab;
6237 int icode;
6238 machine_mode optab_op2_mode;
6239 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
6240 int ndts = 2;
6241 poly_uint64 nunits_in;
6242 poly_uint64 nunits_out;
6243 tree vectype_out;
6244 tree op1_vectype;
6245 int ncopies;
6246 int i;
6247 vec<tree> vec_oprnds0 = vNULL;
6248 vec<tree> vec_oprnds1 = vNULL;
6249 tree vop0, vop1;
6250 unsigned int k;
6251 bool scalar_shift_arg = true;
6252 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6253 bool incompatible_op1_vectype_p = false;
6255 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6256 return false;
6258 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6259 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6260 && ! vec_stmt)
6261 return false;
6263 /* Is STMT a vectorizable binary/unary operation? */
6264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6265 if (!stmt)
6266 return false;
6268 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6269 return false;
6271 code = gimple_assign_rhs_code (stmt);
6273 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6274 || code == RROTATE_EXPR))
6275 return false;
6277 scalar_dest = gimple_assign_lhs (stmt);
6278 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6279 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6281 if (dump_enabled_p ())
6282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6283 "bit-precision shifts not supported.\n");
6284 return false;
6287 slp_tree slp_op0;
6288 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6289 0, &op0, &slp_op0, &dt[0], &vectype))
6291 if (dump_enabled_p ())
6292 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6293 "use not simple.\n");
6294 return false;
6296 /* If op0 is an external or constant def, infer the vector type
6297 from the scalar type. */
6298 if (!vectype)
6299 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6300 if (vec_stmt)
6301 gcc_assert (vectype);
6302 if (!vectype)
6304 if (dump_enabled_p ())
6305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6306 "no vectype for scalar type\n");
6307 return false;
6310 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6311 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6312 if (maybe_ne (nunits_out, nunits_in))
6313 return false;
6315 stmt_vec_info op1_def_stmt_info;
6316 slp_tree slp_op1;
6317 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
6318 &dt[1], &op1_vectype, &op1_def_stmt_info))
6320 if (dump_enabled_p ())
6321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6322 "use not simple.\n");
6323 return false;
6326 /* Multiple types in SLP are handled by creating the appropriate number of
6327 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6328 case of SLP. */
6329 if (slp_node)
6330 ncopies = 1;
6331 else
6332 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6334 gcc_assert (ncopies >= 1);
6336 /* Determine whether the shift amount is a vector, or scalar. If the
6337 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6339 if ((dt[1] == vect_internal_def
6340 || dt[1] == vect_induction_def
6341 || dt[1] == vect_nested_cycle)
6342 && !slp_node)
6343 scalar_shift_arg = false;
6344 else if (dt[1] == vect_constant_def
6345 || dt[1] == vect_external_def
6346 || dt[1] == vect_internal_def)
6348 /* In SLP, need to check whether the shift count is the same,
6349 in loops if it is a constant or invariant, it is always
6350 a scalar shift. */
6351 if (slp_node)
6353 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6354 stmt_vec_info slpstmt_info;
6356 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6358 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
6359 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
6360 scalar_shift_arg = false;
6363 /* For internal SLP defs we have to make sure we see scalar stmts
6364 for all vector elements.
6365 ??? For different vectors we could resort to a different
6366 scalar shift operand but code-generation below simply always
6367 takes the first. */
6368 if (dt[1] == vect_internal_def
6369 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6370 stmts.length ()))
6371 scalar_shift_arg = false;
6374 /* If the shift amount is computed by a pattern stmt we cannot
6375 use the scalar amount directly thus give up and use a vector
6376 shift. */
6377 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
6378 scalar_shift_arg = false;
6380 else
6382 if (dump_enabled_p ())
6383 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6384 "operand mode requires invariant argument.\n");
6385 return false;
6388 /* Vector shifted by vector. */
6389 bool was_scalar_shift_arg = scalar_shift_arg;
6390 if (!scalar_shift_arg)
6392 optab = optab_for_tree_code (code, vectype, optab_vector);
6393 if (dump_enabled_p ())
6394 dump_printf_loc (MSG_NOTE, vect_location,
6395 "vector/vector shift/rotate found.\n");
6397 if (!op1_vectype)
6398 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6399 slp_op1);
6400 incompatible_op1_vectype_p
6401 = (op1_vectype == NULL_TREE
6402 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
6403 TYPE_VECTOR_SUBPARTS (vectype))
6404 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6405 if (incompatible_op1_vectype_p
6406 && (!slp_node
6407 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6408 || slp_op1->refcnt != 1))
6410 if (dump_enabled_p ())
6411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6412 "unusable type for last operand in"
6413 " vector/vector shift/rotate.\n");
6414 return false;
6417 /* See if the machine has a vector shifted by scalar insn and if not
6418 then see if it has a vector shifted by vector insn. */
6419 else
6421 optab = optab_for_tree_code (code, vectype, optab_scalar);
6422 if (optab
6423 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6425 if (dump_enabled_p ())
6426 dump_printf_loc (MSG_NOTE, vect_location,
6427 "vector/scalar shift/rotate found.\n");
6429 else
6431 optab = optab_for_tree_code (code, vectype, optab_vector);
6432 if (optab
6433 && (optab_handler (optab, TYPE_MODE (vectype))
6434 != CODE_FOR_nothing))
6436 scalar_shift_arg = false;
6438 if (dump_enabled_p ())
6439 dump_printf_loc (MSG_NOTE, vect_location,
6440 "vector/vector shift/rotate found.\n");
6442 if (!op1_vectype)
6443 op1_vectype = get_vectype_for_scalar_type (vinfo,
6444 TREE_TYPE (op1),
6445 slp_op1);
6447 /* Unlike the other binary operators, shifts/rotates have
6448 the rhs being int, instead of the same type as the lhs,
6449 so make sure the scalar is the right type if we are
6450 dealing with vectors of long long/long/short/char. */
6451 incompatible_op1_vectype_p
6452 = (!op1_vectype
6453 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6454 TREE_TYPE (op1)));
6455 if (incompatible_op1_vectype_p
6456 && dt[1] == vect_internal_def)
6458 if (dump_enabled_p ())
6459 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6460 "unusable type for last operand in"
6461 " vector/vector shift/rotate.\n");
6462 return false;
6468 /* Supportable by target? */
6469 if (!optab)
6471 if (dump_enabled_p ())
6472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6473 "no optab.\n");
6474 return false;
6476 vec_mode = TYPE_MODE (vectype);
6477 icode = (int) optab_handler (optab, vec_mode);
6478 if (icode == CODE_FOR_nothing)
6480 if (dump_enabled_p ())
6481 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6482 "op not supported by target.\n");
6483 return false;
6485 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6486 if (vect_emulated_vector_p (vectype))
6487 return false;
6489 if (!vec_stmt) /* transformation not required. */
6491 if (slp_node
6492 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6493 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6494 && (!incompatible_op1_vectype_p
6495 || dt[1] == vect_constant_def)
6496 && !vect_maybe_update_slp_op_vectype
6497 (slp_op1,
6498 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6500 if (dump_enabled_p ())
6501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6502 "incompatible vector types for invariants\n");
6503 return false;
6505 /* Now adjust the constant shift amount in place. */
6506 if (slp_node
6507 && incompatible_op1_vectype_p
6508 && dt[1] == vect_constant_def)
6510 for (unsigned i = 0;
6511 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6513 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6514 = fold_convert (TREE_TYPE (vectype),
6515 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6516 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6517 == INTEGER_CST));
6520 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6521 DUMP_VECT_SCOPE ("vectorizable_shift");
6522 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6523 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6524 return true;
6527 /* Transform. */
6529 if (dump_enabled_p ())
6530 dump_printf_loc (MSG_NOTE, vect_location,
6531 "transform binary/unary operation.\n");
6533 if (incompatible_op1_vectype_p && !slp_node)
6535 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6536 op1 = fold_convert (TREE_TYPE (vectype), op1);
6537 if (dt[1] != vect_constant_def)
6538 op1 = vect_init_vector (vinfo, stmt_info, op1,
6539 TREE_TYPE (vectype), NULL);
6542 /* Handle def. */
6543 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6545 if (scalar_shift_arg && dt[1] != vect_internal_def)
6547 /* Vector shl and shr insn patterns can be defined with scalar
6548 operand 2 (shift operand). In this case, use constant or loop
6549 invariant op1 directly, without extending it to vector mode
6550 first. */
6551 optab_op2_mode = insn_data[icode].operand[2].mode;
6552 if (!VECTOR_MODE_P (optab_op2_mode))
6554 if (dump_enabled_p ())
6555 dump_printf_loc (MSG_NOTE, vect_location,
6556 "operand 1 using scalar mode.\n");
6557 vec_oprnd1 = op1;
6558 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6559 vec_oprnds1.quick_push (vec_oprnd1);
6560 /* Store vec_oprnd1 for every vector stmt to be created.
6561 We check during the analysis that all the shift arguments
6562 are the same.
6563 TODO: Allow different constants for different vector
6564 stmts generated for an SLP instance. */
6565 for (k = 0;
6566 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6567 vec_oprnds1.quick_push (vec_oprnd1);
6570 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6572 if (was_scalar_shift_arg)
6574 /* If the argument was the same in all lanes create
6575 the correctly typed vector shift amount directly. */
6576 op1 = fold_convert (TREE_TYPE (vectype), op1);
6577 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6578 !loop_vinfo ? gsi : NULL);
6579 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6580 !loop_vinfo ? gsi : NULL);
6581 vec_oprnds1.create (slp_node->vec_stmts_size);
6582 for (k = 0; k < slp_node->vec_stmts_size; k++)
6583 vec_oprnds1.quick_push (vec_oprnd1);
6585 else if (dt[1] == vect_constant_def)
6586 /* The constant shift amount has been adjusted in place. */
6588 else
6589 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6592 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6593 (a special case for certain kind of vector shifts); otherwise,
6594 operand 1 should be of a vector type (the usual case). */
6595 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6596 op0, &vec_oprnds0,
6597 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6599 /* Arguments are ready. Create the new vector stmt. */
6600 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6602 /* For internal defs where we need to use a scalar shift arg
6603 extract the first lane. */
6604 if (scalar_shift_arg && dt[1] == vect_internal_def)
6606 vop1 = vec_oprnds1[0];
6607 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6608 gassign *new_stmt
6609 = gimple_build_assign (new_temp,
6610 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6611 vop1,
6612 TYPE_SIZE (TREE_TYPE (new_temp)),
6613 bitsize_zero_node));
6614 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6615 vop1 = new_temp;
6617 else
6618 vop1 = vec_oprnds1[i];
6619 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6620 new_temp = make_ssa_name (vec_dest, new_stmt);
6621 gimple_assign_set_lhs (new_stmt, new_temp);
6622 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6623 if (slp_node)
6624 slp_node->push_vec_def (new_stmt);
6625 else
6626 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6629 if (!slp_node)
6630 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6632 vec_oprnds0.release ();
6633 vec_oprnds1.release ();
6635 return true;
6638 /* Function vectorizable_operation.
6640 Check if STMT_INFO performs a binary, unary or ternary operation that can
6641 be vectorized.
6642 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6643 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6644 Return true if STMT_INFO is vectorizable in this way. */
6646 static bool
6647 vectorizable_operation (vec_info *vinfo,
6648 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6649 gimple **vec_stmt, slp_tree slp_node,
6650 stmt_vector_for_cost *cost_vec)
6652 tree vec_dest;
6653 tree scalar_dest;
6654 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6655 tree vectype;
6656 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6657 enum tree_code code, orig_code;
6658 machine_mode vec_mode;
6659 tree new_temp;
6660 int op_type;
6661 optab optab;
6662 bool target_support_p;
6663 enum vect_def_type dt[3]
6664 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6665 int ndts = 3;
6666 poly_uint64 nunits_in;
6667 poly_uint64 nunits_out;
6668 tree vectype_out;
6669 int ncopies, vec_num;
6670 int i;
6671 vec<tree> vec_oprnds0 = vNULL;
6672 vec<tree> vec_oprnds1 = vNULL;
6673 vec<tree> vec_oprnds2 = vNULL;
6674 tree vop0, vop1, vop2;
6675 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6677 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6678 return false;
6680 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6681 && ! vec_stmt)
6682 return false;
6684 /* Is STMT a vectorizable binary/unary operation? */
6685 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6686 if (!stmt)
6687 return false;
6689 /* Loads and stores are handled in vectorizable_{load,store}. */
6690 if (STMT_VINFO_DATA_REF (stmt_info))
6691 return false;
6693 orig_code = code = gimple_assign_rhs_code (stmt);
6695 /* Shifts are handled in vectorizable_shift. */
6696 if (code == LSHIFT_EXPR
6697 || code == RSHIFT_EXPR
6698 || code == LROTATE_EXPR
6699 || code == RROTATE_EXPR)
6700 return false;
6702 /* Comparisons are handled in vectorizable_comparison. */
6703 if (TREE_CODE_CLASS (code) == tcc_comparison)
6704 return false;
6706 /* Conditions are handled in vectorizable_condition. */
6707 if (code == COND_EXPR)
6708 return false;
6710 /* For pointer addition and subtraction, we should use the normal
6711 plus and minus for the vector operation. */
6712 if (code == POINTER_PLUS_EXPR)
6713 code = PLUS_EXPR;
6714 if (code == POINTER_DIFF_EXPR)
6715 code = MINUS_EXPR;
6717 /* Support only unary or binary operations. */
6718 op_type = TREE_CODE_LENGTH (code);
6719 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6721 if (dump_enabled_p ())
6722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6723 "num. args = %d (not unary/binary/ternary op).\n",
6724 op_type);
6725 return false;
6728 scalar_dest = gimple_assign_lhs (stmt);
6729 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6731 /* Most operations cannot handle bit-precision types without extra
6732 truncations. */
6733 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6734 if (!mask_op_p
6735 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6736 /* Exception are bitwise binary operations. */
6737 && code != BIT_IOR_EXPR
6738 && code != BIT_XOR_EXPR
6739 && code != BIT_AND_EXPR)
6741 if (dump_enabled_p ())
6742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6743 "bit-precision arithmetic not supported.\n");
6744 return false;
6747 slp_tree slp_op0;
6748 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6749 0, &op0, &slp_op0, &dt[0], &vectype))
6751 if (dump_enabled_p ())
6752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6753 "use not simple.\n");
6754 return false;
6756 bool is_invariant = (dt[0] == vect_external_def
6757 || dt[0] == vect_constant_def);
6758 /* If op0 is an external or constant def, infer the vector type
6759 from the scalar type. */
6760 if (!vectype)
6762 /* For boolean type we cannot determine vectype by
6763 invariant value (don't know whether it is a vector
6764 of booleans or vector of integers). We use output
6765 vectype because operations on boolean don't change
6766 type. */
6767 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6769 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6771 if (dump_enabled_p ())
6772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6773 "not supported operation on bool value.\n");
6774 return false;
6776 vectype = vectype_out;
6778 else
6779 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6780 slp_node);
6782 if (vec_stmt)
6783 gcc_assert (vectype);
6784 if (!vectype)
6786 if (dump_enabled_p ())
6787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6788 "no vectype for scalar type %T\n",
6789 TREE_TYPE (op0));
6791 return false;
6794 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6795 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6796 if (maybe_ne (nunits_out, nunits_in))
6797 return false;
6799 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6800 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6801 if (op_type == binary_op || op_type == ternary_op)
6803 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6804 1, &op1, &slp_op1, &dt[1], &vectype2))
6806 if (dump_enabled_p ())
6807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6808 "use not simple.\n");
6809 return false;
6811 is_invariant &= (dt[1] == vect_external_def
6812 || dt[1] == vect_constant_def);
6813 if (vectype2
6814 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6815 return false;
6817 if (op_type == ternary_op)
6819 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6820 2, &op2, &slp_op2, &dt[2], &vectype3))
6822 if (dump_enabled_p ())
6823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6824 "use not simple.\n");
6825 return false;
6827 is_invariant &= (dt[2] == vect_external_def
6828 || dt[2] == vect_constant_def);
6829 if (vectype3
6830 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6831 return false;
6834 /* Multiple types in SLP are handled by creating the appropriate number of
6835 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6836 case of SLP. */
6837 if (slp_node)
6839 ncopies = 1;
6840 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6842 else
6844 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6845 vec_num = 1;
6848 gcc_assert (ncopies >= 1);
6850 /* Reject attempts to combine mask types with nonmask types, e.g. if
6851 we have an AND between a (nonmask) boolean loaded from memory and
6852 a (mask) boolean result of a comparison.
6854 TODO: We could easily fix these cases up using pattern statements. */
6855 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6856 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6857 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6859 if (dump_enabled_p ())
6860 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6861 "mixed mask and nonmask vector types\n");
6862 return false;
6865 /* Supportable by target? */
6867 vec_mode = TYPE_MODE (vectype);
6868 if (code == MULT_HIGHPART_EXPR)
6869 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6870 else
6872 optab = optab_for_tree_code (code, vectype, optab_default);
6873 if (!optab)
6875 if (dump_enabled_p ())
6876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6877 "no optab.\n");
6878 return false;
6880 target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing
6881 || optab_libfunc (optab, vec_mode));
6884 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6885 if (!target_support_p || using_emulated_vectors_p)
6887 if (dump_enabled_p ())
6888 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6889 "op not supported by target.\n");
6890 /* When vec_mode is not a vector mode and we verified ops we
6891 do not have to lower like AND are natively supported let
6892 those through even when the mode isn't word_mode. For
6893 ops we have to lower the lowering code assumes we are
6894 dealing with word_mode. */
6895 if ((((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6896 || !target_support_p)
6897 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6898 /* Check only during analysis. */
6899 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6901 if (dump_enabled_p ())
6902 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6903 return false;
6905 if (dump_enabled_p ())
6906 dump_printf_loc (MSG_NOTE, vect_location,
6907 "proceeding using word mode.\n");
6908 using_emulated_vectors_p = true;
6911 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6912 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6913 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6914 internal_fn cond_fn = get_conditional_internal_fn (code);
6915 internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6917 /* If operating on inactive elements could generate spurious traps,
6918 we need to restrict the operation to active lanes. Note that this
6919 specifically doesn't apply to unhoisted invariants, since they
6920 operate on the same value for every lane.
6922 Similarly, if this operation is part of a reduction, a fully-masked
6923 loop should only change the active lanes of the reduction chain,
6924 keeping the inactive lanes as-is. */
6925 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6926 || reduc_idx >= 0);
6928 if (!vec_stmt) /* transformation not required. */
6930 if (loop_vinfo
6931 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6932 && mask_out_inactive)
6934 if (cond_len_fn != IFN_LAST
6935 && direct_internal_fn_supported_p (cond_len_fn, vectype,
6936 OPTIMIZE_FOR_SPEED))
6937 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6939 else if (cond_fn != IFN_LAST
6940 && direct_internal_fn_supported_p (cond_fn, vectype,
6941 OPTIMIZE_FOR_SPEED))
6942 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6943 vectype, NULL);
6944 else
6946 if (dump_enabled_p ())
6947 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6948 "can't use a fully-masked loop because no"
6949 " conditional operation is available.\n");
6950 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6954 /* Put types on constant and invariant SLP children. */
6955 if (slp_node
6956 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6957 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6958 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6960 if (dump_enabled_p ())
6961 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6962 "incompatible vector types for invariants\n");
6963 return false;
6966 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6967 DUMP_VECT_SCOPE ("vectorizable_operation");
6968 vect_model_simple_cost (vinfo, stmt_info,
6969 ncopies, dt, ndts, slp_node, cost_vec);
6970 if (using_emulated_vectors_p)
6972 /* The above vect_model_simple_cost call handles constants
6973 in the prologue and (mis-)costs one of the stmts as
6974 vector stmt. See below for the actual lowering that will
6975 be applied. */
6976 unsigned n
6977 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6978 switch (code)
6980 case PLUS_EXPR:
6981 n *= 5;
6982 break;
6983 case MINUS_EXPR:
6984 n *= 6;
6985 break;
6986 case NEGATE_EXPR:
6987 n *= 4;
6988 break;
6989 default:
6990 /* Bit operations do not have extra cost and are accounted
6991 as vector stmt by vect_model_simple_cost. */
6992 n = 0;
6993 break;
6995 if (n != 0)
6997 /* We also need to materialize two large constants. */
6998 record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6999 0, vect_prologue);
7000 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
7001 0, vect_body);
7004 return true;
7007 /* Transform. */
7009 if (dump_enabled_p ())
7010 dump_printf_loc (MSG_NOTE, vect_location,
7011 "transform binary/unary operation.\n");
7013 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
7014 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
7016 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
7017 vectors with unsigned elements, but the result is signed. So, we
7018 need to compute the MINUS_EXPR into vectype temporary and
7019 VIEW_CONVERT_EXPR it into the final vectype_out result. */
7020 tree vec_cvt_dest = NULL_TREE;
7021 if (orig_code == POINTER_DIFF_EXPR)
7023 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7024 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
7026 /* Handle def. */
7027 else
7028 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
7030 /* In case the vectorization factor (VF) is bigger than the number
7031 of elements that we can fit in a vectype (nunits), we have to generate
7032 more than one vector stmt - i.e - we need to "unroll" the
7033 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7034 from one copy of the vector stmt to the next, in the field
7035 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7036 stages to find the correct vector defs to be used when vectorizing
7037 stmts that use the defs of the current stmt. The example below
7038 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
7039 we need to create 4 vectorized stmts):
7041 before vectorization:
7042 RELATED_STMT VEC_STMT
7043 S1: x = memref - -
7044 S2: z = x + 1 - -
7046 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
7047 there):
7048 RELATED_STMT VEC_STMT
7049 VS1_0: vx0 = memref0 VS1_1 -
7050 VS1_1: vx1 = memref1 VS1_2 -
7051 VS1_2: vx2 = memref2 VS1_3 -
7052 VS1_3: vx3 = memref3 - -
7053 S1: x = load - VS1_0
7054 S2: z = x + 1 - -
7056 step2: vectorize stmt S2 (done here):
7057 To vectorize stmt S2 we first need to find the relevant vector
7058 def for the first operand 'x'. This is, as usual, obtained from
7059 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
7060 that defines 'x' (S1). This way we find the stmt VS1_0, and the
7061 relevant vector def 'vx0'. Having found 'vx0' we can generate
7062 the vector stmt VS2_0, and as usual, record it in the
7063 STMT_VINFO_VEC_STMT of stmt S2.
7064 When creating the second copy (VS2_1), we obtain the relevant vector
7065 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
7066 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
7067 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
7068 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
7069 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
7070 chain of stmts and pointers:
7071 RELATED_STMT VEC_STMT
7072 VS1_0: vx0 = memref0 VS1_1 -
7073 VS1_1: vx1 = memref1 VS1_2 -
7074 VS1_2: vx2 = memref2 VS1_3 -
7075 VS1_3: vx3 = memref3 - -
7076 S1: x = load - VS1_0
7077 VS2_0: vz0 = vx0 + v1 VS2_1 -
7078 VS2_1: vz1 = vx1 + v1 VS2_2 -
7079 VS2_2: vz2 = vx2 + v1 VS2_3 -
7080 VS2_3: vz3 = vx3 + v1 - -
7081 S2: z = x + 1 - VS2_0 */
7083 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
7084 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
7085 /* Arguments are ready. Create the new vector stmt. */
7086 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
7088 gimple *new_stmt = NULL;
7089 vop1 = ((op_type == binary_op || op_type == ternary_op)
7090 ? vec_oprnds1[i] : NULL_TREE);
7091 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
7092 if (using_emulated_vectors_p
7093 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
7095 /* Lower the operation. This follows vector lowering. */
7096 unsigned int width = vector_element_bits (vectype);
7097 tree inner_type = TREE_TYPE (vectype);
7098 tree word_type
7099 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
7100 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
7101 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
7102 tree high_bits
7103 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
7104 tree wvop0 = make_ssa_name (word_type);
7105 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
7106 build1 (VIEW_CONVERT_EXPR,
7107 word_type, vop0));
7108 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7109 tree result_low, signs;
7110 if (code == PLUS_EXPR || code == MINUS_EXPR)
7112 tree wvop1 = make_ssa_name (word_type);
7113 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
7114 build1 (VIEW_CONVERT_EXPR,
7115 word_type, vop1));
7116 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7117 signs = make_ssa_name (word_type);
7118 new_stmt = gimple_build_assign (signs,
7119 BIT_XOR_EXPR, wvop0, wvop1);
7120 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7121 tree b_low = make_ssa_name (word_type);
7122 new_stmt = gimple_build_assign (b_low,
7123 BIT_AND_EXPR, wvop1, low_bits);
7124 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7125 tree a_low = make_ssa_name (word_type);
7126 if (code == PLUS_EXPR)
7127 new_stmt = gimple_build_assign (a_low,
7128 BIT_AND_EXPR, wvop0, low_bits);
7129 else
7130 new_stmt = gimple_build_assign (a_low,
7131 BIT_IOR_EXPR, wvop0, high_bits);
7132 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7133 if (code == MINUS_EXPR)
7135 new_stmt = gimple_build_assign (NULL_TREE,
7136 BIT_NOT_EXPR, signs);
7137 signs = make_ssa_name (word_type);
7138 gimple_assign_set_lhs (new_stmt, signs);
7139 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7141 new_stmt = gimple_build_assign (NULL_TREE,
7142 BIT_AND_EXPR, signs, high_bits);
7143 signs = make_ssa_name (word_type);
7144 gimple_assign_set_lhs (new_stmt, signs);
7145 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7146 result_low = make_ssa_name (word_type);
7147 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
7148 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7150 else
7152 tree a_low = make_ssa_name (word_type);
7153 new_stmt = gimple_build_assign (a_low,
7154 BIT_AND_EXPR, wvop0, low_bits);
7155 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7156 signs = make_ssa_name (word_type);
7157 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
7158 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7159 new_stmt = gimple_build_assign (NULL_TREE,
7160 BIT_AND_EXPR, signs, high_bits);
7161 signs = make_ssa_name (word_type);
7162 gimple_assign_set_lhs (new_stmt, signs);
7163 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7164 result_low = make_ssa_name (word_type);
7165 new_stmt = gimple_build_assign (result_low,
7166 MINUS_EXPR, high_bits, a_low);
7167 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7169 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
7170 signs);
7171 result_low = make_ssa_name (word_type);
7172 gimple_assign_set_lhs (new_stmt, result_low);
7173 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7174 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
7175 build1 (VIEW_CONVERT_EXPR,
7176 vectype, result_low));
7177 new_temp = make_ssa_name (vectype);
7178 gimple_assign_set_lhs (new_stmt, new_temp);
7179 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7181 else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
7183 tree mask;
7184 if (masked_loop_p)
7185 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7186 vec_num * ncopies, vectype, i);
7187 else
7188 /* Dummy mask. */
7189 mask = build_minus_one_cst (truth_type_for (vectype));
7190 auto_vec<tree> vops (6);
7191 vops.quick_push (mask);
7192 vops.quick_push (vop0);
7193 if (vop1)
7194 vops.quick_push (vop1);
7195 if (vop2)
7196 vops.quick_push (vop2);
7197 if (reduc_idx >= 0)
7199 /* Perform the operation on active elements only and take
7200 inactive elements from the reduction chain input. */
7201 gcc_assert (!vop2);
7202 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
7204 else
7206 auto else_value = targetm.preferred_else_value
7207 (cond_fn, vectype, vops.length () - 1, &vops[1]);
7208 vops.quick_push (else_value);
7210 if (len_loop_p)
7212 tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7213 vec_num * ncopies, vectype, i, 1);
7214 signed char biasval
7215 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7216 tree bias = build_int_cst (intQI_type_node, biasval);
7217 vops.quick_push (len);
7218 vops.quick_push (bias);
7220 gcall *call
7221 = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7222 : cond_len_fn,
7223 vops);
7224 new_temp = make_ssa_name (vec_dest, call);
7225 gimple_call_set_lhs (call, new_temp);
7226 gimple_call_set_nothrow (call, true);
7227 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7228 new_stmt = call;
7230 else
7232 tree mask = NULL_TREE;
7233 /* When combining two masks check if either of them is elsewhere
7234 combined with a loop mask, if that's the case we can mark that the
7235 new combined mask doesn't need to be combined with a loop mask. */
7236 if (masked_loop_p
7237 && code == BIT_AND_EXPR
7238 && VECTOR_BOOLEAN_TYPE_P (vectype))
7240 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
7241 ncopies}))
7243 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7244 vec_num * ncopies, vectype, i);
7246 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7247 vop0, gsi);
7250 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
7251 ncopies }))
7253 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7254 vec_num * ncopies, vectype, i);
7256 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7257 vop1, gsi);
7261 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7262 new_temp = make_ssa_name (vec_dest, new_stmt);
7263 gimple_assign_set_lhs (new_stmt, new_temp);
7264 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7265 if (using_emulated_vectors_p)
7266 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7268 /* Enter the combined value into the vector cond hash so we don't
7269 AND it with a loop mask again. */
7270 if (mask)
7271 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
7274 if (vec_cvt_dest)
7276 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7277 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7278 new_temp);
7279 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
7280 gimple_assign_set_lhs (new_stmt, new_temp);
7281 vect_finish_stmt_generation (vinfo, stmt_info,
7282 new_stmt, gsi);
7285 if (slp_node)
7286 slp_node->push_vec_def (new_stmt);
7287 else
7288 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7291 if (!slp_node)
7292 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7294 vec_oprnds0.release ();
7295 vec_oprnds1.release ();
7296 vec_oprnds2.release ();
7298 return true;
7301 /* A helper function to ensure data reference DR_INFO's base alignment. */
7303 static void
7304 ensure_base_align (dr_vec_info *dr_info)
7306 /* Alignment is only analyzed for the first element of a DR group,
7307 use that to look at base alignment we need to enforce. */
7308 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7309 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7311 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7313 if (dr_info->base_misaligned)
7315 tree base_decl = dr_info->base_decl;
7317 // We should only be able to increase the alignment of a base object if
7318 // we know what its new alignment should be at compile time.
7319 unsigned HOST_WIDE_INT align_base_to =
7320 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7322 if (decl_in_symtab_p (base_decl))
7323 symtab_node::get (base_decl)->increase_alignment (align_base_to);
7324 else if (DECL_ALIGN (base_decl) < align_base_to)
7326 SET_DECL_ALIGN (base_decl, align_base_to);
7327 DECL_USER_ALIGN (base_decl) = 1;
7329 dr_info->base_misaligned = false;
7334 /* Function get_group_alias_ptr_type.
7336 Return the alias type for the group starting at FIRST_STMT_INFO. */
7338 static tree
7339 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7341 struct data_reference *first_dr, *next_dr;
7343 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7344 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7345 while (next_stmt_info)
7347 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7348 if (get_alias_set (DR_REF (first_dr))
7349 != get_alias_set (DR_REF (next_dr)))
7351 if (dump_enabled_p ())
7352 dump_printf_loc (MSG_NOTE, vect_location,
7353 "conflicting alias set types.\n");
7354 return ptr_type_node;
7356 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7358 return reference_alias_ptr_type (DR_REF (first_dr));
7362 /* Function scan_operand_equal_p.
7364 Helper function for check_scan_store. Compare two references
7365 with .GOMP_SIMD_LANE bases. */
7367 static bool
7368 scan_operand_equal_p (tree ref1, tree ref2)
7370 tree ref[2] = { ref1, ref2 };
7371 poly_int64 bitsize[2], bitpos[2];
7372 tree offset[2], base[2];
7373 for (int i = 0; i < 2; ++i)
7375 machine_mode mode;
7376 int unsignedp, reversep, volatilep = 0;
7377 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7378 &offset[i], &mode, &unsignedp,
7379 &reversep, &volatilep);
7380 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
7381 return false;
7382 if (TREE_CODE (base[i]) == MEM_REF
7383 && offset[i] == NULL_TREE
7384 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7386 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7387 if (is_gimple_assign (def_stmt)
7388 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
7389 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7390 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7392 if (maybe_ne (mem_ref_offset (base[i]), 0))
7393 return false;
7394 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7395 offset[i] = gimple_assign_rhs2 (def_stmt);
7400 if (!operand_equal_p (base[0], base[1], 0))
7401 return false;
7402 if (maybe_ne (bitsize[0], bitsize[1]))
7403 return false;
7404 if (offset[0] != offset[1])
7406 if (!offset[0] || !offset[1])
7407 return false;
7408 if (!operand_equal_p (offset[0], offset[1], 0))
7410 tree step[2];
7411 for (int i = 0; i < 2; ++i)
7413 step[i] = integer_one_node;
7414 if (TREE_CODE (offset[i]) == SSA_NAME)
7416 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7417 if (is_gimple_assign (def_stmt)
7418 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
7419 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7420 == INTEGER_CST))
7422 step[i] = gimple_assign_rhs2 (def_stmt);
7423 offset[i] = gimple_assign_rhs1 (def_stmt);
7426 else if (TREE_CODE (offset[i]) == MULT_EXPR)
7428 step[i] = TREE_OPERAND (offset[i], 1);
7429 offset[i] = TREE_OPERAND (offset[i], 0);
7431 tree rhs1 = NULL_TREE;
7432 if (TREE_CODE (offset[i]) == SSA_NAME)
7434 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7435 if (gimple_assign_cast_p (def_stmt))
7436 rhs1 = gimple_assign_rhs1 (def_stmt);
7438 else if (CONVERT_EXPR_P (offset[i]))
7439 rhs1 = TREE_OPERAND (offset[i], 0);
7440 if (rhs1
7441 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7442 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7443 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7444 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7445 offset[i] = rhs1;
7447 if (!operand_equal_p (offset[0], offset[1], 0)
7448 || !operand_equal_p (step[0], step[1], 0))
7449 return false;
7452 return true;
7456 enum scan_store_kind {
7457 /* Normal permutation. */
7458 scan_store_kind_perm,
7460 /* Whole vector left shift permutation with zero init. */
7461 scan_store_kind_lshift_zero,
7463 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7464 scan_store_kind_lshift_cond
7467 /* Function check_scan_store.
7469 Verify if we can perform the needed permutations or whole vector shifts.
7470 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7471 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7472 to do at each step. */
7474 static int
7475 scan_store_can_perm_p (tree vectype, tree init,
7476 vec<enum scan_store_kind> *use_whole_vector = NULL)
7478 enum machine_mode vec_mode = TYPE_MODE (vectype);
7479 unsigned HOST_WIDE_INT nunits;
7480 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7481 return -1;
7482 int units_log2 = exact_log2 (nunits);
7483 if (units_log2 <= 0)
7484 return -1;
7486 int i;
7487 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7488 for (i = 0; i <= units_log2; ++i)
7490 unsigned HOST_WIDE_INT j, k;
7491 enum scan_store_kind kind = scan_store_kind_perm;
7492 vec_perm_builder sel (nunits, nunits, 1);
7493 sel.quick_grow (nunits);
7494 if (i == units_log2)
7496 for (j = 0; j < nunits; ++j)
7497 sel[j] = nunits - 1;
7499 else
7501 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7502 sel[j] = j;
7503 for (k = 0; j < nunits; ++j, ++k)
7504 sel[j] = nunits + k;
7506 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7507 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7509 if (i == units_log2)
7510 return -1;
7512 if (whole_vector_shift_kind == scan_store_kind_perm)
7514 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
7515 return -1;
7516 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7517 /* Whole vector shifts shift in zeros, so if init is all zero
7518 constant, there is no need to do anything further. */
7519 if ((TREE_CODE (init) != INTEGER_CST
7520 && TREE_CODE (init) != REAL_CST)
7521 || !initializer_zerop (init))
7523 tree masktype = truth_type_for (vectype);
7524 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7525 return -1;
7526 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7529 kind = whole_vector_shift_kind;
7531 if (use_whole_vector)
7533 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7534 use_whole_vector->safe_grow_cleared (i, true);
7535 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7536 use_whole_vector->safe_push (kind);
7540 return units_log2;
7544 /* Function check_scan_store.
7546 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7548 static bool
7549 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7550 enum vect_def_type rhs_dt, bool slp, tree mask,
7551 vect_memory_access_type memory_access_type)
7553 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7554 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7555 tree ref_type;
7557 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7558 if (slp
7559 || mask
7560 || memory_access_type != VMAT_CONTIGUOUS
7561 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7562 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7563 || loop_vinfo == NULL
7564 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7565 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7566 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7567 || !integer_zerop (DR_INIT (dr_info->dr))
7568 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7569 || !alias_sets_conflict_p (get_alias_set (vectype),
7570 get_alias_set (TREE_TYPE (ref_type))))
7572 if (dump_enabled_p ())
7573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7574 "unsupported OpenMP scan store.\n");
7575 return false;
7578 /* We need to pattern match code built by OpenMP lowering and simplified
7579 by following optimizations into something we can handle.
7580 #pragma omp simd reduction(inscan,+:r)
7581 for (...)
7583 r += something ();
7584 #pragma omp scan inclusive (r)
7585 use (r);
7587 shall have body with:
7588 // Initialization for input phase, store the reduction initializer:
7589 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7590 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7591 D.2042[_21] = 0;
7592 // Actual input phase:
7594 r.0_5 = D.2042[_20];
7595 _6 = _4 + r.0_5;
7596 D.2042[_20] = _6;
7597 // Initialization for scan phase:
7598 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7599 _26 = D.2043[_25];
7600 _27 = D.2042[_25];
7601 _28 = _26 + _27;
7602 D.2043[_25] = _28;
7603 D.2042[_25] = _28;
7604 // Actual scan phase:
7606 r.1_8 = D.2042[_20];
7608 The "omp simd array" variable D.2042 holds the privatized copy used
7609 inside of the loop and D.2043 is another one that holds copies of
7610 the current original list item. The separate GOMP_SIMD_LANE ifn
7611 kinds are there in order to allow optimizing the initializer store
7612 and combiner sequence, e.g. if it is originally some C++ish user
7613 defined reduction, but allow the vectorizer to pattern recognize it
7614 and turn into the appropriate vectorized scan.
7616 For exclusive scan, this is slightly different:
7617 #pragma omp simd reduction(inscan,+:r)
7618 for (...)
7620 use (r);
7621 #pragma omp scan exclusive (r)
7622 r += something ();
7624 shall have body with:
7625 // Initialization for input phase, store the reduction initializer:
7626 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7627 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7628 D.2042[_21] = 0;
7629 // Actual input phase:
7631 r.0_5 = D.2042[_20];
7632 _6 = _4 + r.0_5;
7633 D.2042[_20] = _6;
7634 // Initialization for scan phase:
7635 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7636 _26 = D.2043[_25];
7637 D.2044[_25] = _26;
7638 _27 = D.2042[_25];
7639 _28 = _26 + _27;
7640 D.2043[_25] = _28;
7641 // Actual scan phase:
7643 r.1_8 = D.2044[_20];
7644 ... */
7646 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7648 /* Match the D.2042[_21] = 0; store above. Just require that
7649 it is a constant or external definition store. */
7650 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7652 fail_init:
7653 if (dump_enabled_p ())
7654 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7655 "unsupported OpenMP scan initializer store.\n");
7656 return false;
7659 if (! loop_vinfo->scan_map)
7660 loop_vinfo->scan_map = new hash_map<tree, tree>;
7661 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7662 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7663 if (cached)
7664 goto fail_init;
7665 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7667 /* These stores can be vectorized normally. */
7668 return true;
7671 if (rhs_dt != vect_internal_def)
7673 fail:
7674 if (dump_enabled_p ())
7675 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7676 "unsupported OpenMP scan combiner pattern.\n");
7677 return false;
7680 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7681 tree rhs = gimple_assign_rhs1 (stmt);
7682 if (TREE_CODE (rhs) != SSA_NAME)
7683 goto fail;
7685 gimple *other_store_stmt = NULL;
7686 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7687 bool inscan_var_store
7688 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7690 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7692 if (!inscan_var_store)
7694 use_operand_p use_p;
7695 imm_use_iterator iter;
7696 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7698 gimple *use_stmt = USE_STMT (use_p);
7699 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7700 continue;
7701 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7702 || !is_gimple_assign (use_stmt)
7703 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7704 || other_store_stmt
7705 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7706 goto fail;
7707 other_store_stmt = use_stmt;
7709 if (other_store_stmt == NULL)
7710 goto fail;
7711 rhs = gimple_assign_lhs (other_store_stmt);
7712 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7713 goto fail;
7716 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7718 use_operand_p use_p;
7719 imm_use_iterator iter;
7720 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7722 gimple *use_stmt = USE_STMT (use_p);
7723 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7724 continue;
7725 if (other_store_stmt)
7726 goto fail;
7727 other_store_stmt = use_stmt;
7730 else
7731 goto fail;
7733 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7734 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7735 || !is_gimple_assign (def_stmt)
7736 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7737 goto fail;
7739 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7740 /* For pointer addition, we should use the normal plus for the vector
7741 operation. */
7742 switch (code)
7744 case POINTER_PLUS_EXPR:
7745 code = PLUS_EXPR;
7746 break;
7747 case MULT_HIGHPART_EXPR:
7748 goto fail;
7749 default:
7750 break;
7752 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7753 goto fail;
7755 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7756 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7757 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7758 goto fail;
7760 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7761 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7762 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7763 || !gimple_assign_load_p (load1_stmt)
7764 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7765 || !gimple_assign_load_p (load2_stmt))
7766 goto fail;
7768 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7769 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7770 if (load1_stmt_info == NULL
7771 || load2_stmt_info == NULL
7772 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7773 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7774 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7775 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7776 goto fail;
7778 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7780 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7781 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7782 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7783 goto fail;
7784 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7785 tree lrhs;
7786 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7787 lrhs = rhs1;
7788 else
7789 lrhs = rhs2;
7790 use_operand_p use_p;
7791 imm_use_iterator iter;
7792 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7794 gimple *use_stmt = USE_STMT (use_p);
7795 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7796 continue;
7797 if (other_store_stmt)
7798 goto fail;
7799 other_store_stmt = use_stmt;
7803 if (other_store_stmt == NULL)
7804 goto fail;
7805 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7806 || !gimple_store_p (other_store_stmt))
7807 goto fail;
7809 stmt_vec_info other_store_stmt_info
7810 = loop_vinfo->lookup_stmt (other_store_stmt);
7811 if (other_store_stmt_info == NULL
7812 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7813 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7814 goto fail;
7816 gimple *stmt1 = stmt;
7817 gimple *stmt2 = other_store_stmt;
7818 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7819 std::swap (stmt1, stmt2);
7820 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7821 gimple_assign_rhs1 (load2_stmt)))
7823 std::swap (rhs1, rhs2);
7824 std::swap (load1_stmt, load2_stmt);
7825 std::swap (load1_stmt_info, load2_stmt_info);
7827 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7828 gimple_assign_rhs1 (load1_stmt)))
7829 goto fail;
7831 tree var3 = NULL_TREE;
7832 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7833 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7834 gimple_assign_rhs1 (load2_stmt)))
7835 goto fail;
7836 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7838 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7839 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7840 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7841 goto fail;
7842 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7843 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7844 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7845 || lookup_attribute ("omp simd inscan exclusive",
7846 DECL_ATTRIBUTES (var3)))
7847 goto fail;
7850 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7851 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7852 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7853 goto fail;
7855 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7856 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7857 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7858 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7859 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7860 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7861 goto fail;
7863 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7864 std::swap (var1, var2);
7866 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7868 if (!lookup_attribute ("omp simd inscan exclusive",
7869 DECL_ATTRIBUTES (var1)))
7870 goto fail;
7871 var1 = var3;
7874 if (loop_vinfo->scan_map == NULL)
7875 goto fail;
7876 tree *init = loop_vinfo->scan_map->get (var1);
7877 if (init == NULL)
7878 goto fail;
7880 /* The IL is as expected, now check if we can actually vectorize it.
7881 Inclusive scan:
7882 _26 = D.2043[_25];
7883 _27 = D.2042[_25];
7884 _28 = _26 + _27;
7885 D.2043[_25] = _28;
7886 D.2042[_25] = _28;
7887 should be vectorized as (where _40 is the vectorized rhs
7888 from the D.2042[_21] = 0; store):
7889 _30 = MEM <vector(8) int> [(int *)&D.2043];
7890 _31 = MEM <vector(8) int> [(int *)&D.2042];
7891 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7892 _33 = _31 + _32;
7893 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7894 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7895 _35 = _33 + _34;
7896 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7897 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7898 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7899 _37 = _35 + _36;
7900 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7901 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7902 _38 = _30 + _37;
7903 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7904 MEM <vector(8) int> [(int *)&D.2043] = _39;
7905 MEM <vector(8) int> [(int *)&D.2042] = _38;
7906 Exclusive scan:
7907 _26 = D.2043[_25];
7908 D.2044[_25] = _26;
7909 _27 = D.2042[_25];
7910 _28 = _26 + _27;
7911 D.2043[_25] = _28;
7912 should be vectorized as (where _40 is the vectorized rhs
7913 from the D.2042[_21] = 0; store):
7914 _30 = MEM <vector(8) int> [(int *)&D.2043];
7915 _31 = MEM <vector(8) int> [(int *)&D.2042];
7916 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7917 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7918 _34 = _32 + _33;
7919 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7920 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7921 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7922 _36 = _34 + _35;
7923 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7924 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7925 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7926 _38 = _36 + _37;
7927 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7928 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7929 _39 = _30 + _38;
7930 _50 = _31 + _39;
7931 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7932 MEM <vector(8) int> [(int *)&D.2044] = _39;
7933 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7934 enum machine_mode vec_mode = TYPE_MODE (vectype);
7935 optab optab = optab_for_tree_code (code, vectype, optab_default);
7936 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7937 goto fail;
7939 int units_log2 = scan_store_can_perm_p (vectype, *init);
7940 if (units_log2 == -1)
7941 goto fail;
7943 return true;
7947 /* Function vectorizable_scan_store.
7949 Helper of vectorizable_score, arguments like on vectorizable_store.
7950 Handle only the transformation, checking is done in check_scan_store. */
7952 static bool
7953 vectorizable_scan_store (vec_info *vinfo,
7954 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7955 gimple **vec_stmt, int ncopies)
7957 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7958 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7959 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7960 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7962 if (dump_enabled_p ())
7963 dump_printf_loc (MSG_NOTE, vect_location,
7964 "transform scan store. ncopies = %d\n", ncopies);
7966 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7967 tree rhs = gimple_assign_rhs1 (stmt);
7968 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7970 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7971 bool inscan_var_store
7972 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7974 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7976 use_operand_p use_p;
7977 imm_use_iterator iter;
7978 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7980 gimple *use_stmt = USE_STMT (use_p);
7981 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7982 continue;
7983 rhs = gimple_assign_lhs (use_stmt);
7984 break;
7988 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7989 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7990 if (code == POINTER_PLUS_EXPR)
7991 code = PLUS_EXPR;
7992 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7993 && commutative_tree_code (code));
7994 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7995 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7996 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7997 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7998 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7999 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
8000 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
8001 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
8002 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
8003 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
8004 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
8006 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
8008 std::swap (rhs1, rhs2);
8009 std::swap (var1, var2);
8010 std::swap (load1_dr_info, load2_dr_info);
8013 tree *init = loop_vinfo->scan_map->get (var1);
8014 gcc_assert (init);
8016 unsigned HOST_WIDE_INT nunits;
8017 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
8018 gcc_unreachable ();
8019 auto_vec<enum scan_store_kind, 16> use_whole_vector;
8020 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
8021 gcc_assert (units_log2 > 0);
8022 auto_vec<tree, 16> perms;
8023 perms.quick_grow (units_log2 + 1);
8024 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
8025 for (int i = 0; i <= units_log2; ++i)
8027 unsigned HOST_WIDE_INT j, k;
8028 vec_perm_builder sel (nunits, nunits, 1);
8029 sel.quick_grow (nunits);
8030 if (i == units_log2)
8031 for (j = 0; j < nunits; ++j)
8032 sel[j] = nunits - 1;
8033 else
8035 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
8036 sel[j] = j;
8037 for (k = 0; j < nunits; ++j, ++k)
8038 sel[j] = nunits + k;
8040 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
8041 if (!use_whole_vector.is_empty ()
8042 && use_whole_vector[i] != scan_store_kind_perm)
8044 if (zero_vec == NULL_TREE)
8045 zero_vec = build_zero_cst (vectype);
8046 if (masktype == NULL_TREE
8047 && use_whole_vector[i] == scan_store_kind_lshift_cond)
8048 masktype = truth_type_for (vectype);
8049 perms[i] = vect_gen_perm_mask_any (vectype, indices);
8051 else
8052 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
8055 tree vec_oprnd1 = NULL_TREE;
8056 tree vec_oprnd2 = NULL_TREE;
8057 tree vec_oprnd3 = NULL_TREE;
8058 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
8059 tree dataref_offset = build_int_cst (ref_type, 0);
8060 tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
8061 vectype, VMAT_CONTIGUOUS);
8062 tree ldataref_ptr = NULL_TREE;
8063 tree orig = NULL_TREE;
8064 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
8065 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
8066 auto_vec<tree> vec_oprnds1;
8067 auto_vec<tree> vec_oprnds2;
8068 auto_vec<tree> vec_oprnds3;
8069 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
8070 *init, &vec_oprnds1,
8071 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
8072 rhs2, &vec_oprnds3);
8073 for (int j = 0; j < ncopies; j++)
8075 vec_oprnd1 = vec_oprnds1[j];
8076 if (ldataref_ptr == NULL)
8077 vec_oprnd2 = vec_oprnds2[j];
8078 vec_oprnd3 = vec_oprnds3[j];
8079 if (j == 0)
8080 orig = vec_oprnd3;
8081 else if (!inscan_var_store)
8082 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8084 if (ldataref_ptr)
8086 vec_oprnd2 = make_ssa_name (vectype);
8087 tree data_ref = fold_build2 (MEM_REF, vectype,
8088 unshare_expr (ldataref_ptr),
8089 dataref_offset);
8090 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
8091 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
8092 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8093 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8094 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8097 tree v = vec_oprnd2;
8098 for (int i = 0; i < units_log2; ++i)
8100 tree new_temp = make_ssa_name (vectype);
8101 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
8102 (zero_vec
8103 && (use_whole_vector[i]
8104 != scan_store_kind_perm))
8105 ? zero_vec : vec_oprnd1, v,
8106 perms[i]);
8107 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8108 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8109 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8111 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
8113 /* Whole vector shift shifted in zero bits, but if *init
8114 is not initializer_zerop, we need to replace those elements
8115 with elements from vec_oprnd1. */
8116 tree_vector_builder vb (masktype, nunits, 1);
8117 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
8118 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
8119 ? boolean_false_node : boolean_true_node);
8121 tree new_temp2 = make_ssa_name (vectype);
8122 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
8123 new_temp, vec_oprnd1);
8124 vect_finish_stmt_generation (vinfo, stmt_info,
8125 g, gsi);
8126 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8127 new_temp = new_temp2;
8130 /* For exclusive scan, perform the perms[i] permutation once
8131 more. */
8132 if (i == 0
8133 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
8134 && v == vec_oprnd2)
8136 v = new_temp;
8137 --i;
8138 continue;
8141 tree new_temp2 = make_ssa_name (vectype);
8142 g = gimple_build_assign (new_temp2, code, v, new_temp);
8143 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8144 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8146 v = new_temp2;
8149 tree new_temp = make_ssa_name (vectype);
8150 gimple *g = gimple_build_assign (new_temp, code, orig, v);
8151 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8152 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8154 tree last_perm_arg = new_temp;
8155 /* For exclusive scan, new_temp computed above is the exclusive scan
8156 prefix sum. Turn it into inclusive prefix sum for the broadcast
8157 of the last element into orig. */
8158 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
8160 last_perm_arg = make_ssa_name (vectype);
8161 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
8162 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8163 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8166 orig = make_ssa_name (vectype);
8167 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
8168 last_perm_arg, perms[units_log2]);
8169 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8170 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8172 if (!inscan_var_store)
8174 tree data_ref = fold_build2 (MEM_REF, vectype,
8175 unshare_expr (dataref_ptr),
8176 dataref_offset);
8177 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8178 g = gimple_build_assign (data_ref, new_temp);
8179 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8180 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8184 if (inscan_var_store)
8185 for (int j = 0; j < ncopies; j++)
8187 if (j != 0)
8188 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8190 tree data_ref = fold_build2 (MEM_REF, vectype,
8191 unshare_expr (dataref_ptr),
8192 dataref_offset);
8193 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8194 gimple *g = gimple_build_assign (data_ref, orig);
8195 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8196 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8198 return true;
8202 /* Function vectorizable_store.
8204 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8205 that can be vectorized.
8206 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8207 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8208 Return true if STMT_INFO is vectorizable in this way. */
8210 static bool
8211 vectorizable_store (vec_info *vinfo,
8212 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8213 gimple **vec_stmt, slp_tree slp_node,
8214 stmt_vector_for_cost *cost_vec)
8216 tree data_ref;
8217 tree op;
8218 tree vec_oprnd = NULL_TREE;
8219 tree elem_type;
8220 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8221 class loop *loop = NULL;
8222 machine_mode vec_mode;
8223 tree dummy;
8224 enum vect_def_type rhs_dt = vect_unknown_def_type;
8225 enum vect_def_type mask_dt = vect_unknown_def_type;
8226 tree dataref_ptr = NULL_TREE;
8227 tree dataref_offset = NULL_TREE;
8228 gimple *ptr_incr = NULL;
8229 int ncopies;
8230 int j;
8231 stmt_vec_info first_stmt_info;
8232 bool grouped_store;
8233 unsigned int group_size, i;
8234 bool slp = (slp_node != NULL);
8235 unsigned int vec_num;
8236 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8237 tree aggr_type;
8238 gather_scatter_info gs_info;
8239 poly_uint64 vf;
8240 vec_load_store_type vls_type;
8241 tree ref_type;
8243 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8244 return false;
8246 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8247 && ! vec_stmt)
8248 return false;
8250 /* Is vectorizable store? */
8252 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8253 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8255 tree scalar_dest = gimple_assign_lhs (assign);
8256 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8257 && is_pattern_stmt_p (stmt_info))
8258 scalar_dest = TREE_OPERAND (scalar_dest, 0);
8259 if (TREE_CODE (scalar_dest) != ARRAY_REF
8260 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8261 && TREE_CODE (scalar_dest) != INDIRECT_REF
8262 && TREE_CODE (scalar_dest) != COMPONENT_REF
8263 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8264 && TREE_CODE (scalar_dest) != REALPART_EXPR
8265 && TREE_CODE (scalar_dest) != MEM_REF)
8266 return false;
8268 else
8270 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8271 if (!call || !gimple_call_internal_p (call))
8272 return false;
8274 internal_fn ifn = gimple_call_internal_fn (call);
8275 if (!internal_store_fn_p (ifn))
8276 return false;
8278 int mask_index = internal_fn_mask_index (ifn);
8279 if (mask_index >= 0 && slp_node)
8280 mask_index = vect_slp_child_index_for_operand (call, mask_index);
8281 if (mask_index >= 0
8282 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8283 &mask, NULL, &mask_dt, &mask_vectype))
8284 return false;
8287 op = vect_get_store_rhs (stmt_info);
8289 /* Cannot have hybrid store SLP -- that would mean storing to the
8290 same location twice. */
8291 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8293 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8294 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8296 if (loop_vinfo)
8298 loop = LOOP_VINFO_LOOP (loop_vinfo);
8299 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8301 else
8302 vf = 1;
8304 /* Multiple types in SLP are handled by creating the appropriate number of
8305 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8306 case of SLP. */
8307 if (slp)
8308 ncopies = 1;
8309 else
8310 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8312 gcc_assert (ncopies >= 1);
8314 /* FORNOW. This restriction should be relaxed. */
8315 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
8317 if (dump_enabled_p ())
8318 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8319 "multiple types in nested loop.\n");
8320 return false;
8323 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8324 op, &rhs_dt, &rhs_vectype, &vls_type))
8325 return false;
8327 elem_type = TREE_TYPE (vectype);
8328 vec_mode = TYPE_MODE (vectype);
8330 if (!STMT_VINFO_DATA_REF (stmt_info))
8331 return false;
8333 vect_memory_access_type memory_access_type;
8334 enum dr_alignment_support alignment_support_scheme;
8335 int misalignment;
8336 poly_int64 poffset;
8337 internal_fn lanes_ifn;
8338 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
8339 ncopies, &memory_access_type, &poffset,
8340 &alignment_support_scheme, &misalignment, &gs_info,
8341 &lanes_ifn))
8342 return false;
8344 if (mask)
8346 if (memory_access_type == VMAT_CONTIGUOUS)
8348 if (!VECTOR_MODE_P (vec_mode)
8349 || !can_vec_mask_load_store_p (vec_mode,
8350 TYPE_MODE (mask_vectype), false))
8351 return false;
8353 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8354 && (memory_access_type != VMAT_GATHER_SCATTER
8355 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8357 if (dump_enabled_p ())
8358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8359 "unsupported access type for masked store.\n");
8360 return false;
8362 else if (memory_access_type == VMAT_GATHER_SCATTER
8363 && gs_info.ifn == IFN_LAST
8364 && !gs_info.decl)
8366 if (dump_enabled_p ())
8367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8368 "unsupported masked emulated scatter.\n");
8369 return false;
8372 else
8374 /* FORNOW. In some cases can vectorize even if data-type not supported
8375 (e.g. - array initialization with 0). */
8376 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
8377 return false;
8380 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8381 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8382 && memory_access_type != VMAT_GATHER_SCATTER
8383 && (slp || memory_access_type != VMAT_CONTIGUOUS));
8384 if (grouped_store)
8386 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8387 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8388 group_size = DR_GROUP_SIZE (first_stmt_info);
8390 else
8392 first_stmt_info = stmt_info;
8393 first_dr_info = dr_info;
8394 group_size = vec_num = 1;
8397 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
8399 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
8400 memory_access_type))
8401 return false;
8404 bool costing_p = !vec_stmt;
8405 if (costing_p) /* transformation not required. */
8407 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8409 if (loop_vinfo
8410 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8411 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8412 vls_type, group_size,
8413 memory_access_type, &gs_info,
8414 mask);
8416 if (slp_node
8417 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8418 vectype))
8420 if (dump_enabled_p ())
8421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8422 "incompatible vector types for invariants\n");
8423 return false;
8426 if (dump_enabled_p ()
8427 && memory_access_type != VMAT_ELEMENTWISE
8428 && memory_access_type != VMAT_GATHER_SCATTER
8429 && alignment_support_scheme != dr_aligned)
8430 dump_printf_loc (MSG_NOTE, vect_location,
8431 "Vectorizing an unaligned access.\n");
8433 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8435 /* As function vect_transform_stmt shows, for interleaving stores
8436 the whole chain is vectorized when the last store in the chain
8437 is reached, the other stores in the group are skipped. So we
8438 want to only cost the last one here, but it's not trivial to
8439 get the last, as it's equivalent to use the first one for
8440 costing, use the first one instead. */
8441 if (grouped_store
8442 && !slp
8443 && first_stmt_info != stmt_info)
8444 return true;
8446 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8448 /* Transform. */
8450 ensure_base_align (dr_info);
8452 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8454 vect_build_scatter_store_calls (vinfo, stmt_info, gsi, vec_stmt, &gs_info,
8455 mask, cost_vec);
8456 return true;
8458 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8460 gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
8461 gcc_assert (!slp);
8462 if (costing_p)
8464 unsigned int inside_cost = 0, prologue_cost = 0;
8465 if (vls_type == VLS_STORE_INVARIANT)
8466 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8467 stmt_info, 0, vect_prologue);
8468 vect_get_store_cost (vinfo, stmt_info, ncopies,
8469 alignment_support_scheme, misalignment,
8470 &inside_cost, cost_vec);
8472 if (dump_enabled_p ())
8473 dump_printf_loc (MSG_NOTE, vect_location,
8474 "vect_model_store_cost: inside_cost = %d, "
8475 "prologue_cost = %d .\n",
8476 inside_cost, prologue_cost);
8478 return true;
8480 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8483 if (grouped_store)
8485 /* FORNOW */
8486 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8488 if (slp)
8490 grouped_store = false;
8491 /* VEC_NUM is the number of vect stmts to be created for this
8492 group. */
8493 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8494 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8495 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8496 == first_stmt_info);
8497 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8498 op = vect_get_store_rhs (first_stmt_info);
8500 else
8501 /* VEC_NUM is the number of vect stmts to be created for this
8502 group. */
8503 vec_num = group_size;
8505 ref_type = get_group_alias_ptr_type (first_stmt_info);
8507 else
8508 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8510 if (!costing_p && dump_enabled_p ())
8511 dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n",
8512 ncopies);
8514 /* Check if we need to update prologue cost for invariant,
8515 and update it accordingly if so. If it's not for
8516 interleaving store, we can just check vls_type; but if
8517 it's for interleaving store, need to check the def_type
8518 of the stored value since the current vls_type is just
8519 for first_stmt_info. */
8520 auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
8522 gcc_assert (costing_p);
8523 if (slp)
8524 return;
8525 if (grouped_store)
8527 gcc_assert (store_rhs);
8528 enum vect_def_type cdt;
8529 gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
8530 if (cdt != vect_constant_def && cdt != vect_external_def)
8531 return;
8533 else if (vls_type != VLS_STORE_INVARIANT)
8534 return;
8535 *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info,
8536 0, vect_prologue);
8539 if (memory_access_type == VMAT_ELEMENTWISE
8540 || memory_access_type == VMAT_STRIDED_SLP)
8542 unsigned inside_cost = 0, prologue_cost = 0;
8543 gimple_stmt_iterator incr_gsi;
8544 bool insert_after;
8545 gimple *incr;
8546 tree offvar;
8547 tree ivstep;
8548 tree running_off;
8549 tree stride_base, stride_step, alias_off;
8550 tree vec_oprnd = NULL_TREE;
8551 tree dr_offset;
8552 unsigned int g;
8553 /* Checked by get_load_store_type. */
8554 unsigned int const_nunits = nunits.to_constant ();
8556 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8557 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8559 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8560 stride_base
8561 = fold_build_pointer_plus
8562 (DR_BASE_ADDRESS (first_dr_info->dr),
8563 size_binop (PLUS_EXPR,
8564 convert_to_ptrofftype (dr_offset),
8565 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8566 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8568 /* For a store with loop-invariant (but other than power-of-2)
8569 stride (i.e. not a grouped access) like so:
8571 for (i = 0; i < n; i += stride)
8572 array[i] = ...;
8574 we generate a new induction variable and new stores from
8575 the components of the (vectorized) rhs:
8577 for (j = 0; ; j += VF*stride)
8578 vectemp = ...;
8579 tmp1 = vectemp[0];
8580 array[j] = tmp1;
8581 tmp2 = vectemp[1];
8582 array[j + stride] = tmp2;
8586 unsigned nstores = const_nunits;
8587 unsigned lnel = 1;
8588 tree ltype = elem_type;
8589 tree lvectype = vectype;
8590 if (slp)
8592 if (group_size < const_nunits
8593 && const_nunits % group_size == 0)
8595 nstores = const_nunits / group_size;
8596 lnel = group_size;
8597 ltype = build_vector_type (elem_type, group_size);
8598 lvectype = vectype;
8600 /* First check if vec_extract optab doesn't support extraction
8601 of vector elts directly. */
8602 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8603 machine_mode vmode;
8604 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8605 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8606 group_size).exists (&vmode)
8607 || (convert_optab_handler (vec_extract_optab,
8608 TYPE_MODE (vectype), vmode)
8609 == CODE_FOR_nothing))
8611 /* Try to avoid emitting an extract of vector elements
8612 by performing the extracts using an integer type of the
8613 same size, extracting from a vector of those and then
8614 re-interpreting it as the original vector type if
8615 supported. */
8616 unsigned lsize
8617 = group_size * GET_MODE_BITSIZE (elmode);
8618 unsigned int lnunits = const_nunits / group_size;
8619 /* If we can't construct such a vector fall back to
8620 element extracts from the original vector type and
8621 element size stores. */
8622 if (int_mode_for_size (lsize, 0).exists (&elmode)
8623 && VECTOR_MODE_P (TYPE_MODE (vectype))
8624 && related_vector_mode (TYPE_MODE (vectype), elmode,
8625 lnunits).exists (&vmode)
8626 && (convert_optab_handler (vec_extract_optab,
8627 vmode, elmode)
8628 != CODE_FOR_nothing))
8630 nstores = lnunits;
8631 lnel = group_size;
8632 ltype = build_nonstandard_integer_type (lsize, 1);
8633 lvectype = build_vector_type (ltype, nstores);
8635 /* Else fall back to vector extraction anyway.
8636 Fewer stores are more important than avoiding spilling
8637 of the vector we extract from. Compared to the
8638 construction case in vectorizable_load no store-forwarding
8639 issue exists here for reasonable archs. */
8642 else if (group_size >= const_nunits
8643 && group_size % const_nunits == 0)
8645 int mis_align = dr_misalignment (first_dr_info, vectype);
8646 dr_alignment_support dr_align
8647 = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
8648 mis_align);
8649 if (dr_align == dr_aligned
8650 || dr_align == dr_unaligned_supported)
8652 nstores = 1;
8653 lnel = const_nunits;
8654 ltype = vectype;
8655 lvectype = vectype;
8656 alignment_support_scheme = dr_align;
8657 misalignment = mis_align;
8660 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8661 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8664 if (!costing_p)
8666 ivstep = stride_step;
8667 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8668 build_int_cst (TREE_TYPE (ivstep), vf));
8670 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8672 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8673 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8674 create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
8675 insert_after, &offvar, NULL);
8676 incr = gsi_stmt (incr_gsi);
8678 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8681 alias_off = build_int_cst (ref_type, 0);
8682 stmt_vec_info next_stmt_info = first_stmt_info;
8683 auto_vec<tree> vec_oprnds (ncopies);
8684 for (g = 0; g < group_size; g++)
8686 running_off = offvar;
8687 if (!costing_p)
8689 if (g)
8691 tree size = TYPE_SIZE_UNIT (ltype);
8692 tree pos
8693 = fold_build2 (MULT_EXPR, sizetype, size_int (g), size);
8694 tree newoff = copy_ssa_name (running_off, NULL);
8695 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8696 running_off, pos);
8697 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8698 running_off = newoff;
8701 if (!slp)
8702 op = vect_get_store_rhs (next_stmt_info);
8703 if (!costing_p)
8704 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies, op,
8705 &vec_oprnds);
8706 else
8707 update_prologue_cost (&prologue_cost, op);
8708 unsigned int group_el = 0;
8709 unsigned HOST_WIDE_INT
8710 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8711 for (j = 0; j < ncopies; j++)
8713 if (!costing_p)
8715 vec_oprnd = vec_oprnds[j];
8716 /* Pun the vector to extract from if necessary. */
8717 if (lvectype != vectype)
8719 tree tem = make_ssa_name (lvectype);
8720 tree cvt
8721 = build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
8722 gimple *pun = gimple_build_assign (tem, cvt);
8723 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8724 vec_oprnd = tem;
8727 for (i = 0; i < nstores; i++)
8729 if (costing_p)
8731 /* Only need vector extracting when there are more
8732 than one stores. */
8733 if (nstores > 1)
8734 inside_cost
8735 += record_stmt_cost (cost_vec, 1, vec_to_scalar,
8736 stmt_info, 0, vect_body);
8737 /* Take a single lane vector type store as scalar
8738 store to avoid ICE like 110776. */
8739 if (VECTOR_TYPE_P (ltype)
8740 && known_ne (TYPE_VECTOR_SUBPARTS (ltype), 1U))
8741 vect_get_store_cost (vinfo, stmt_info, 1,
8742 alignment_support_scheme,
8743 misalignment, &inside_cost,
8744 cost_vec);
8745 else
8746 inside_cost
8747 += record_stmt_cost (cost_vec, 1, scalar_store,
8748 stmt_info, 0, vect_body);
8749 continue;
8751 tree newref, newoff;
8752 gimple *incr, *assign;
8753 tree size = TYPE_SIZE (ltype);
8754 /* Extract the i'th component. */
8755 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8756 bitsize_int (i), size);
8757 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8758 size, pos);
8760 elem = force_gimple_operand_gsi (gsi, elem, true,
8761 NULL_TREE, true,
8762 GSI_SAME_STMT);
8764 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8765 group_el * elsz);
8766 newref = build2 (MEM_REF, ltype,
8767 running_off, this_off);
8768 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8770 /* And store it to *running_off. */
8771 assign = gimple_build_assign (newref, elem);
8772 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8774 group_el += lnel;
8775 if (! slp
8776 || group_el == group_size)
8778 newoff = copy_ssa_name (running_off, NULL);
8779 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8780 running_off, stride_step);
8781 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8783 running_off = newoff;
8784 group_el = 0;
8786 if (g == group_size - 1
8787 && !slp)
8789 if (j == 0 && i == 0)
8790 *vec_stmt = assign;
8791 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8795 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8796 vec_oprnds.truncate(0);
8797 if (slp)
8798 break;
8801 if (costing_p && dump_enabled_p ())
8802 dump_printf_loc (MSG_NOTE, vect_location,
8803 "vect_model_store_cost: inside_cost = %d, "
8804 "prologue_cost = %d .\n",
8805 inside_cost, prologue_cost);
8807 return true;
8810 gcc_assert (alignment_support_scheme);
8811 vec_loop_masks *loop_masks
8812 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8813 ? &LOOP_VINFO_MASKS (loop_vinfo)
8814 : NULL);
8815 vec_loop_lens *loop_lens
8816 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8817 ? &LOOP_VINFO_LENS (loop_vinfo)
8818 : NULL);
8820 /* Shouldn't go with length-based approach if fully masked. */
8821 gcc_assert (!loop_lens || !loop_masks);
8823 /* Targets with store-lane instructions must not require explicit
8824 realignment. vect_supportable_dr_alignment always returns either
8825 dr_aligned or dr_unaligned_supported for masked operations. */
8826 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8827 && !mask
8828 && !loop_masks)
8829 || alignment_support_scheme == dr_aligned
8830 || alignment_support_scheme == dr_unaligned_supported);
8832 tree offset = NULL_TREE;
8833 if (!known_eq (poffset, 0))
8834 offset = size_int (poffset);
8836 tree bump;
8837 tree vec_offset = NULL_TREE;
8838 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8840 aggr_type = NULL_TREE;
8841 bump = NULL_TREE;
8843 else if (memory_access_type == VMAT_GATHER_SCATTER)
8845 aggr_type = elem_type;
8846 if (!costing_p)
8847 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
8848 &bump, &vec_offset, loop_lens);
8850 else
8852 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8853 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8854 else
8855 aggr_type = vectype;
8856 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8857 memory_access_type, loop_lens);
8860 if (mask && !costing_p)
8861 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8863 /* In case the vectorization factor (VF) is bigger than the number
8864 of elements that we can fit in a vectype (nunits), we have to generate
8865 more than one vector stmt - i.e - we need to "unroll" the
8866 vector stmt by a factor VF/nunits. */
8868 /* In case of interleaving (non-unit grouped access):
8870 S1: &base + 2 = x2
8871 S2: &base = x0
8872 S3: &base + 1 = x1
8873 S4: &base + 3 = x3
8875 We create vectorized stores starting from base address (the access of the
8876 first stmt in the chain (S2 in the above example), when the last store stmt
8877 of the chain (S4) is reached:
8879 VS1: &base = vx2
8880 VS2: &base + vec_size*1 = vx0
8881 VS3: &base + vec_size*2 = vx1
8882 VS4: &base + vec_size*3 = vx3
8884 Then permutation statements are generated:
8886 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8887 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8890 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8891 (the order of the data-refs in the output of vect_permute_store_chain
8892 corresponds to the order of scalar stmts in the interleaving chain - see
8893 the documentation of vect_permute_store_chain()).
8895 In case of both multiple types and interleaving, above vector stores and
8896 permutation stmts are created for every copy. The result vector stmts are
8897 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8898 STMT_VINFO_RELATED_STMT for the next copies.
8901 auto_vec<tree> dr_chain (group_size);
8902 auto_vec<tree> vec_masks;
8903 tree vec_mask = NULL;
8904 auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8905 for (i = 0; i < group_size; i++)
8906 gvec_oprnds.quick_push (new auto_vec<tree> (ncopies));
8908 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8910 gcc_assert (!slp && grouped_store);
8911 unsigned inside_cost = 0, prologue_cost = 0;
8912 for (j = 0; j < ncopies; j++)
8914 gimple *new_stmt;
8915 if (j == 0)
8917 /* For interleaved stores we collect vectorized defs for all
8918 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8919 as an input to vect_permute_store_chain(). */
8920 stmt_vec_info next_stmt_info = first_stmt_info;
8921 for (i = 0; i < group_size; i++)
8923 /* Since gaps are not supported for interleaved stores,
8924 DR_GROUP_SIZE is the exact number of stmts in the
8925 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8926 op = vect_get_store_rhs (next_stmt_info);
8927 if (costing_p)
8928 update_prologue_cost (&prologue_cost, op);
8929 else
8931 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8932 ncopies, op,
8933 gvec_oprnds[i]);
8934 vec_oprnd = (*gvec_oprnds[i])[0];
8935 dr_chain.quick_push (vec_oprnd);
8937 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8940 if (!costing_p)
8942 if (mask)
8944 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8945 mask, &vec_masks,
8946 mask_vectype);
8947 vec_mask = vec_masks[0];
8950 /* We should have catched mismatched types earlier. */
8951 gcc_assert (
8952 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
8953 dataref_ptr
8954 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8955 aggr_type, NULL, offset, &dummy,
8956 gsi, &ptr_incr, false, bump);
8959 else if (!costing_p)
8961 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8962 /* DR_CHAIN is then used as an input to
8963 vect_permute_store_chain(). */
8964 for (i = 0; i < group_size; i++)
8966 vec_oprnd = (*gvec_oprnds[i])[j];
8967 dr_chain[i] = vec_oprnd;
8969 if (mask)
8970 vec_mask = vec_masks[j];
8971 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8972 stmt_info, bump);
8975 if (costing_p)
8977 for (i = 0; i < vec_num; i++)
8978 vect_get_store_cost (vinfo, stmt_info, 1,
8979 alignment_support_scheme, misalignment,
8980 &inside_cost, cost_vec);
8981 continue;
8984 /* Get an array into which we can store the individual vectors. */
8985 tree vec_array = create_vector_array (vectype, vec_num);
8987 /* Invalidate the current contents of VEC_ARRAY. This should
8988 become an RTL clobber too, which prevents the vector registers
8989 from being upward-exposed. */
8990 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8992 /* Store the individual vectors into the array. */
8993 for (i = 0; i < vec_num; i++)
8995 vec_oprnd = dr_chain[i];
8996 write_vector_array (vinfo, stmt_info, gsi, vec_oprnd, vec_array,
9000 tree final_mask = NULL;
9001 tree final_len = NULL;
9002 tree bias = NULL;
9003 if (loop_masks)
9004 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9005 ncopies, vectype, j);
9006 if (vec_mask)
9007 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
9008 vec_mask, gsi);
9010 if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
9012 if (loop_lens)
9013 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9014 ncopies, vectype, j, 1);
9015 else
9016 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9017 signed char biasval
9018 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9019 bias = build_int_cst (intQI_type_node, biasval);
9020 if (!final_mask)
9022 mask_vectype = truth_type_for (vectype);
9023 final_mask = build_minus_one_cst (mask_vectype);
9027 gcall *call;
9028 if (final_len && final_mask)
9030 /* Emit:
9031 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
9032 LEN, BIAS, VEC_ARRAY). */
9033 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9034 tree alias_ptr = build_int_cst (ref_type, align);
9035 call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
9036 dataref_ptr, alias_ptr,
9037 final_mask, final_len, bias,
9038 vec_array);
9040 else if (final_mask)
9042 /* Emit:
9043 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
9044 VEC_ARRAY). */
9045 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9046 tree alias_ptr = build_int_cst (ref_type, align);
9047 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
9048 dataref_ptr, alias_ptr,
9049 final_mask, vec_array);
9051 else
9053 /* Emit:
9054 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
9055 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9056 call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
9057 gimple_call_set_lhs (call, data_ref);
9059 gimple_call_set_nothrow (call, true);
9060 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9061 new_stmt = call;
9063 /* Record that VEC_ARRAY is now dead. */
9064 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9065 if (j == 0)
9066 *vec_stmt = new_stmt;
9067 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9070 if (costing_p && dump_enabled_p ())
9071 dump_printf_loc (MSG_NOTE, vect_location,
9072 "vect_model_store_cost: inside_cost = %d, "
9073 "prologue_cost = %d .\n",
9074 inside_cost, prologue_cost);
9076 return true;
9079 if (memory_access_type == VMAT_GATHER_SCATTER)
9081 gcc_assert (!slp && !grouped_store);
9082 auto_vec<tree> vec_offsets;
9083 unsigned int inside_cost = 0, prologue_cost = 0;
9084 for (j = 0; j < ncopies; j++)
9086 gimple *new_stmt;
9087 if (j == 0)
9089 if (costing_p && vls_type == VLS_STORE_INVARIANT)
9090 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
9091 stmt_info, 0, vect_prologue);
9092 else if (!costing_p)
9094 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
9095 DR_CHAIN is of size 1. */
9096 gcc_assert (group_size == 1);
9097 op = vect_get_store_rhs (first_stmt_info);
9098 vect_get_vec_defs_for_operand (vinfo, first_stmt_info,
9099 ncopies, op, gvec_oprnds[0]);
9100 vec_oprnd = (*gvec_oprnds[0])[0];
9101 dr_chain.quick_push (vec_oprnd);
9102 if (mask)
9104 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
9105 mask, &vec_masks,
9106 mask_vectype);
9107 vec_mask = vec_masks[0];
9110 /* We should have catched mismatched types earlier. */
9111 gcc_assert (
9112 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
9113 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9114 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9115 slp_node, &gs_info,
9116 &dataref_ptr, &vec_offsets);
9117 else
9118 dataref_ptr
9119 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
9120 aggr_type, NULL, offset,
9121 &dummy, gsi, &ptr_incr, false,
9122 bump);
9125 else if (!costing_p)
9127 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9128 vec_oprnd = (*gvec_oprnds[0])[j];
9129 dr_chain[0] = vec_oprnd;
9130 if (mask)
9131 vec_mask = vec_masks[j];
9132 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9133 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9134 gsi, stmt_info, bump);
9137 new_stmt = NULL;
9138 unsigned HOST_WIDE_INT align;
9139 tree final_mask = NULL_TREE;
9140 tree final_len = NULL_TREE;
9141 tree bias = NULL_TREE;
9142 if (!costing_p)
9144 if (loop_masks)
9145 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9146 ncopies, vectype, j);
9147 if (vec_mask)
9148 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9149 final_mask, vec_mask, gsi);
9152 if (gs_info.ifn != IFN_LAST)
9154 if (costing_p)
9156 unsigned int cnunits = vect_nunits_for_cost (vectype);
9157 inside_cost
9158 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9159 stmt_info, 0, vect_body);
9160 continue;
9163 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9164 vec_offset = vec_offsets[j];
9165 tree scale = size_int (gs_info.scale);
9167 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
9169 if (loop_lens)
9170 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9171 ncopies, vectype, j, 1);
9172 else
9173 final_len = build_int_cst (sizetype,
9174 TYPE_VECTOR_SUBPARTS (vectype));
9175 signed char biasval
9176 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9177 bias = build_int_cst (intQI_type_node, biasval);
9178 if (!final_mask)
9180 mask_vectype = truth_type_for (vectype);
9181 final_mask = build_minus_one_cst (mask_vectype);
9185 gcall *call;
9186 if (final_len && final_mask)
9187 call = gimple_build_call_internal (IFN_MASK_LEN_SCATTER_STORE,
9188 7, dataref_ptr, vec_offset,
9189 scale, vec_oprnd, final_mask,
9190 final_len, bias);
9191 else if (final_mask)
9192 call
9193 = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
9194 dataref_ptr, vec_offset, scale,
9195 vec_oprnd, final_mask);
9196 else
9197 call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
9198 dataref_ptr, vec_offset,
9199 scale, vec_oprnd);
9200 gimple_call_set_nothrow (call, true);
9201 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9202 new_stmt = call;
9204 else
9206 /* Emulated scatter. */
9207 gcc_assert (!final_mask);
9208 if (costing_p)
9210 unsigned int cnunits = vect_nunits_for_cost (vectype);
9211 /* For emulated scatter N offset vector element extracts
9212 (we assume the scalar scaling and ptr + offset add is
9213 consumed by the load). */
9214 inside_cost
9215 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9216 stmt_info, 0, vect_body);
9217 /* N scalar stores plus extracting the elements. */
9218 inside_cost
9219 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9220 stmt_info, 0, vect_body);
9221 inside_cost
9222 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9223 stmt_info, 0, vect_body);
9224 continue;
9227 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9228 unsigned HOST_WIDE_INT const_offset_nunits
9229 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant ();
9230 vec<constructor_elt, va_gc> *ctor_elts;
9231 vec_alloc (ctor_elts, const_nunits);
9232 gimple_seq stmts = NULL;
9233 tree elt_type = TREE_TYPE (vectype);
9234 unsigned HOST_WIDE_INT elt_size
9235 = tree_to_uhwi (TYPE_SIZE (elt_type));
9236 /* We support offset vectors with more elements
9237 than the data vector for now. */
9238 unsigned HOST_WIDE_INT factor
9239 = const_offset_nunits / const_nunits;
9240 vec_offset = vec_offsets[j / factor];
9241 unsigned elt_offset = (j % factor) * const_nunits;
9242 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9243 tree scale = size_int (gs_info.scale);
9244 align = get_object_alignment (DR_REF (first_dr_info->dr));
9245 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9246 for (unsigned k = 0; k < const_nunits; ++k)
9248 /* Compute the offsetted pointer. */
9249 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9250 bitsize_int (k + elt_offset));
9251 tree idx
9252 = gimple_build (&stmts, BIT_FIELD_REF, idx_type, vec_offset,
9253 TYPE_SIZE (idx_type), boff);
9254 idx = gimple_convert (&stmts, sizetype, idx);
9255 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx, scale);
9256 tree ptr
9257 = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (dataref_ptr),
9258 dataref_ptr, idx);
9259 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9260 /* Extract the element to be stored. */
9261 tree elt
9262 = gimple_build (&stmts, BIT_FIELD_REF, TREE_TYPE (vectype),
9263 vec_oprnd, TYPE_SIZE (elt_type),
9264 bitsize_int (k * elt_size));
9265 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9266 stmts = NULL;
9267 tree ref
9268 = build2 (MEM_REF, ltype, ptr, build_int_cst (ref_type, 0));
9269 new_stmt = gimple_build_assign (ref, elt);
9270 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9273 if (j == 0)
9274 *vec_stmt = new_stmt;
9275 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9278 if (costing_p && dump_enabled_p ())
9279 dump_printf_loc (MSG_NOTE, vect_location,
9280 "vect_model_store_cost: inside_cost = %d, "
9281 "prologue_cost = %d .\n",
9282 inside_cost, prologue_cost);
9284 return true;
9287 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
9288 || memory_access_type == VMAT_CONTIGUOUS_DOWN
9289 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE
9290 || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
9292 unsigned inside_cost = 0, prologue_cost = 0;
9293 auto_vec<tree> result_chain (group_size);
9294 auto_vec<tree, 1> vec_oprnds;
9295 for (j = 0; j < ncopies; j++)
9297 gimple *new_stmt;
9298 if (j == 0)
9300 if (slp && !costing_p)
9302 /* Get vectorized arguments for SLP_NODE. */
9303 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
9304 &vec_oprnds, mask, &vec_masks);
9305 vec_oprnd = vec_oprnds[0];
9306 if (mask)
9307 vec_mask = vec_masks[0];
9309 else
9311 /* For interleaved stores we collect vectorized defs for all the
9312 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9313 input to vect_permute_store_chain().
9315 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9316 is of size 1. */
9317 stmt_vec_info next_stmt_info = first_stmt_info;
9318 for (i = 0; i < group_size; i++)
9320 /* Since gaps are not supported for interleaved stores,
9321 DR_GROUP_SIZE is the exact number of stmts in the chain.
9322 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9323 that there is no interleaving, DR_GROUP_SIZE is 1,
9324 and only one iteration of the loop will be executed. */
9325 op = vect_get_store_rhs (next_stmt_info);
9326 if (costing_p)
9327 update_prologue_cost (&prologue_cost, op);
9328 else
9330 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
9331 ncopies, op,
9332 gvec_oprnds[i]);
9333 vec_oprnd = (*gvec_oprnds[i])[0];
9334 dr_chain.quick_push (vec_oprnd);
9336 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9338 if (mask && !costing_p)
9340 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
9341 mask, &vec_masks,
9342 mask_vectype);
9343 vec_mask = vec_masks[0];
9347 /* We should have catched mismatched types earlier. */
9348 gcc_assert (costing_p
9349 || useless_type_conversion_p (vectype,
9350 TREE_TYPE (vec_oprnd)));
9351 bool simd_lane_access_p
9352 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9353 if (!costing_p
9354 && simd_lane_access_p
9355 && !loop_masks
9356 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9357 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9358 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9359 && integer_zerop (DR_INIT (first_dr_info->dr))
9360 && alias_sets_conflict_p (get_alias_set (aggr_type),
9361 get_alias_set (TREE_TYPE (ref_type))))
9363 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9364 dataref_offset = build_int_cst (ref_type, 0);
9366 else if (!costing_p)
9367 dataref_ptr
9368 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9369 simd_lane_access_p ? loop : NULL,
9370 offset, &dummy, gsi, &ptr_incr,
9371 simd_lane_access_p, bump);
9373 else if (!costing_p)
9375 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9376 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9377 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9378 of size 1. */
9379 for (i = 0; i < group_size; i++)
9381 vec_oprnd = (*gvec_oprnds[i])[j];
9382 dr_chain[i] = vec_oprnd;
9384 if (mask)
9385 vec_mask = vec_masks[j];
9386 if (dataref_offset)
9387 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
9388 else
9389 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9390 stmt_info, bump);
9393 new_stmt = NULL;
9394 if (grouped_store)
9396 /* Permute. */
9397 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
9398 if (costing_p)
9400 int group_size = DR_GROUP_SIZE (first_stmt_info);
9401 int nstmts = ceil_log2 (group_size) * group_size;
9402 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
9403 stmt_info, 0, vect_body);
9404 if (dump_enabled_p ())
9405 dump_printf_loc (MSG_NOTE, vect_location,
9406 "vect_model_store_cost: "
9407 "strided group_size = %d .\n",
9408 group_size);
9410 else
9411 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
9412 gsi, &result_chain);
9415 stmt_vec_info next_stmt_info = first_stmt_info;
9416 for (i = 0; i < vec_num; i++)
9418 if (!costing_p)
9420 if (slp)
9421 vec_oprnd = vec_oprnds[i];
9422 else if (grouped_store)
9423 /* For grouped stores vectorized defs are interleaved in
9424 vect_permute_store_chain(). */
9425 vec_oprnd = result_chain[i];
9428 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9430 if (costing_p)
9431 inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
9432 stmt_info, 0, vect_body);
9433 else
9435 tree perm_mask = perm_mask_for_reverse (vectype);
9436 tree perm_dest = vect_create_destination_var (
9437 vect_get_store_rhs (stmt_info), vectype);
9438 tree new_temp = make_ssa_name (perm_dest);
9440 /* Generate the permute statement. */
9441 gimple *perm_stmt
9442 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9443 vec_oprnd, perm_mask);
9444 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
9445 gsi);
9447 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9448 vec_oprnd = new_temp;
9452 if (costing_p)
9454 vect_get_store_cost (vinfo, stmt_info, 1,
9455 alignment_support_scheme, misalignment,
9456 &inside_cost, cost_vec);
9458 if (!slp)
9460 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9461 if (!next_stmt_info)
9462 break;
9465 continue;
9468 tree final_mask = NULL_TREE;
9469 tree final_len = NULL_TREE;
9470 tree bias = NULL_TREE;
9471 if (loop_masks)
9472 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9473 vec_num * ncopies, vectype,
9474 vec_num * j + i);
9475 if (slp && vec_mask)
9476 vec_mask = vec_masks[i];
9477 if (vec_mask)
9478 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
9479 vec_mask, gsi);
9481 if (i > 0)
9482 /* Bump the vector pointer. */
9483 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9484 stmt_info, bump);
9486 unsigned misalign;
9487 unsigned HOST_WIDE_INT align;
9488 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9489 if (alignment_support_scheme == dr_aligned)
9490 misalign = 0;
9491 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9493 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
9494 misalign = 0;
9496 else
9497 misalign = misalignment;
9498 if (dataref_offset == NULL_TREE
9499 && TREE_CODE (dataref_ptr) == SSA_NAME)
9500 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9501 misalign);
9502 align = least_bit_hwi (misalign | align);
9504 /* Compute IFN when LOOP_LENS or final_mask valid. */
9505 machine_mode vmode = TYPE_MODE (vectype);
9506 machine_mode new_vmode = vmode;
9507 internal_fn partial_ifn = IFN_LAST;
9508 if (loop_lens)
9510 opt_machine_mode new_ovmode
9511 = get_len_load_store_mode (vmode, false, &partial_ifn);
9512 new_vmode = new_ovmode.require ();
9513 unsigned factor
9514 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9515 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9516 vec_num * ncopies, vectype,
9517 vec_num * j + i, factor);
9519 else if (final_mask)
9521 if (!can_vec_mask_load_store_p (
9522 vmode, TYPE_MODE (TREE_TYPE (final_mask)), false,
9523 &partial_ifn))
9524 gcc_unreachable ();
9527 if (partial_ifn == IFN_MASK_LEN_STORE)
9529 if (!final_len)
9531 /* Pass VF value to 'len' argument of
9532 MASK_LEN_STORE if LOOP_LENS is invalid. */
9533 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9535 if (!final_mask)
9537 /* Pass all ones value to 'mask' argument of
9538 MASK_LEN_STORE if final_mask is invalid. */
9539 mask_vectype = truth_type_for (vectype);
9540 final_mask = build_minus_one_cst (mask_vectype);
9543 if (final_len)
9545 signed char biasval
9546 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9548 bias = build_int_cst (intQI_type_node, biasval);
9551 /* Arguments are ready. Create the new vector stmt. */
9552 if (final_len)
9554 gcall *call;
9555 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9556 /* Need conversion if it's wrapped with VnQI. */
9557 if (vmode != new_vmode)
9559 tree new_vtype
9560 = build_vector_type_for_mode (unsigned_intQI_type_node,
9561 new_vmode);
9562 tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9563 vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9564 gassign *new_stmt
9565 = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9566 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9567 vec_oprnd = var;
9570 if (partial_ifn == IFN_MASK_LEN_STORE)
9571 call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9572 dataref_ptr, ptr, final_mask,
9573 final_len, bias, vec_oprnd);
9574 else
9575 call = gimple_build_call_internal (IFN_LEN_STORE, 5,
9576 dataref_ptr, ptr, final_len,
9577 bias, vec_oprnd);
9578 gimple_call_set_nothrow (call, true);
9579 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9580 new_stmt = call;
9582 else if (final_mask)
9584 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9585 gcall *call
9586 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
9587 ptr, final_mask, vec_oprnd);
9588 gimple_call_set_nothrow (call, true);
9589 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9590 new_stmt = call;
9592 else
9594 data_ref
9595 = fold_build2 (MEM_REF, vectype, dataref_ptr,
9596 dataref_offset ? dataref_offset
9597 : build_int_cst (ref_type, 0));
9598 if (alignment_support_scheme == dr_aligned)
9600 else
9601 TREE_TYPE (data_ref)
9602 = build_aligned_type (TREE_TYPE (data_ref),
9603 align * BITS_PER_UNIT);
9604 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9605 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9606 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9609 if (slp)
9610 continue;
9612 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9613 if (!next_stmt_info)
9614 break;
9616 if (!slp && !costing_p)
9618 if (j == 0)
9619 *vec_stmt = new_stmt;
9620 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9624 if (costing_p)
9626 /* When vectorizing a store into the function result assign
9627 a penalty if the function returns in a multi-register location.
9628 In this case we assume we'll end up with having to spill the
9629 vector result and do piecewise loads as a conservative estimate. */
9630 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
9631 if (base
9632 && (TREE_CODE (base) == RESULT_DECL
9633 || (DECL_P (base) && cfun_returns (base)))
9634 && !aggregate_value_p (base, cfun->decl))
9636 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
9637 /* ??? Handle PARALLEL in some way. */
9638 if (REG_P (reg))
9640 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
9641 /* Assume that a single reg-reg move is possible and cheap,
9642 do not account for vector to gp register move cost. */
9643 if (nregs > 1)
9645 /* Spill. */
9646 prologue_cost
9647 += record_stmt_cost (cost_vec, ncopies, vector_store,
9648 stmt_info, 0, vect_epilogue);
9649 /* Loads. */
9650 prologue_cost
9651 += record_stmt_cost (cost_vec, ncopies * nregs, scalar_load,
9652 stmt_info, 0, vect_epilogue);
9656 if (dump_enabled_p ())
9657 dump_printf_loc (MSG_NOTE, vect_location,
9658 "vect_model_store_cost: inside_cost = %d, "
9659 "prologue_cost = %d .\n",
9660 inside_cost, prologue_cost);
9663 return true;
9666 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9667 VECTOR_CST mask. No checks are made that the target platform supports the
9668 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9669 vect_gen_perm_mask_checked. */
9671 tree
9672 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9674 tree mask_type;
9676 poly_uint64 nunits = sel.length ();
9677 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9679 mask_type = build_vector_type (ssizetype, nunits);
9680 return vec_perm_indices_to_tree (mask_type, sel);
9683 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9684 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9686 tree
9687 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9689 machine_mode vmode = TYPE_MODE (vectype);
9690 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9691 return vect_gen_perm_mask_any (vectype, sel);
9694 /* Given a vector variable X and Y, that was generated for the scalar
9695 STMT_INFO, generate instructions to permute the vector elements of X and Y
9696 using permutation mask MASK_VEC, insert them at *GSI and return the
9697 permuted vector variable. */
9699 static tree
9700 permute_vec_elements (vec_info *vinfo,
9701 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9702 gimple_stmt_iterator *gsi)
9704 tree vectype = TREE_TYPE (x);
9705 tree perm_dest, data_ref;
9706 gimple *perm_stmt;
9708 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9709 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9710 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9711 else
9712 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9713 data_ref = make_ssa_name (perm_dest);
9715 /* Generate the permute statement. */
9716 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9717 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9719 return data_ref;
9722 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9723 inserting them on the loops preheader edge. Returns true if we
9724 were successful in doing so (and thus STMT_INFO can be moved then),
9725 otherwise returns false. HOIST_P indicates if we want to hoist the
9726 definitions of all SSA uses, it would be false when we are costing. */
9728 static bool
9729 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
9731 ssa_op_iter i;
9732 tree op;
9733 bool any = false;
9735 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9737 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9738 if (!gimple_nop_p (def_stmt)
9739 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9741 /* Make sure we don't need to recurse. While we could do
9742 so in simple cases when there are more complex use webs
9743 we don't have an easy way to preserve stmt order to fulfil
9744 dependencies within them. */
9745 tree op2;
9746 ssa_op_iter i2;
9747 if (gimple_code (def_stmt) == GIMPLE_PHI)
9748 return false;
9749 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9751 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9752 if (!gimple_nop_p (def_stmt2)
9753 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9754 return false;
9756 any = true;
9760 if (!any)
9761 return true;
9763 if (!hoist_p)
9764 return true;
9766 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9768 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9769 if (!gimple_nop_p (def_stmt)
9770 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9772 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9773 gsi_remove (&gsi, false);
9774 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9778 return true;
9781 /* vectorizable_load.
9783 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9784 that can be vectorized.
9785 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9786 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9787 Return true if STMT_INFO is vectorizable in this way. */
9789 static bool
9790 vectorizable_load (vec_info *vinfo,
9791 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9792 gimple **vec_stmt, slp_tree slp_node,
9793 stmt_vector_for_cost *cost_vec)
9795 tree scalar_dest;
9796 tree vec_dest = NULL;
9797 tree data_ref = NULL;
9798 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9799 class loop *loop = NULL;
9800 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9801 bool nested_in_vect_loop = false;
9802 tree elem_type;
9803 /* Avoid false positive uninitialized warning, see PR110652. */
9804 tree new_temp = NULL_TREE;
9805 machine_mode mode;
9806 tree dummy;
9807 tree dataref_ptr = NULL_TREE;
9808 tree dataref_offset = NULL_TREE;
9809 gimple *ptr_incr = NULL;
9810 int ncopies;
9811 int i, j;
9812 unsigned int group_size;
9813 poly_uint64 group_gap_adj;
9814 tree msq = NULL_TREE, lsq;
9815 tree realignment_token = NULL_TREE;
9816 gphi *phi = NULL;
9817 vec<tree> dr_chain = vNULL;
9818 bool grouped_load = false;
9819 stmt_vec_info first_stmt_info;
9820 stmt_vec_info first_stmt_info_for_drptr = NULL;
9821 bool compute_in_loop = false;
9822 class loop *at_loop;
9823 int vec_num;
9824 bool slp = (slp_node != NULL);
9825 bool slp_perm = false;
9826 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9827 poly_uint64 vf;
9828 tree aggr_type;
9829 gather_scatter_info gs_info;
9830 tree ref_type;
9831 enum vect_def_type mask_dt = vect_unknown_def_type;
9833 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9834 return false;
9836 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9837 && ! vec_stmt)
9838 return false;
9840 if (!STMT_VINFO_DATA_REF (stmt_info))
9841 return false;
9843 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9844 int mask_index = -1;
9845 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9847 scalar_dest = gimple_assign_lhs (assign);
9848 if (TREE_CODE (scalar_dest) != SSA_NAME)
9849 return false;
9851 tree_code code = gimple_assign_rhs_code (assign);
9852 if (code != ARRAY_REF
9853 && code != BIT_FIELD_REF
9854 && code != INDIRECT_REF
9855 && code != COMPONENT_REF
9856 && code != IMAGPART_EXPR
9857 && code != REALPART_EXPR
9858 && code != MEM_REF
9859 && TREE_CODE_CLASS (code) != tcc_declaration)
9860 return false;
9862 else
9864 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9865 if (!call || !gimple_call_internal_p (call))
9866 return false;
9868 internal_fn ifn = gimple_call_internal_fn (call);
9869 if (!internal_load_fn_p (ifn))
9870 return false;
9872 scalar_dest = gimple_call_lhs (call);
9873 if (!scalar_dest)
9874 return false;
9876 mask_index = internal_fn_mask_index (ifn);
9877 if (mask_index >= 0 && slp_node)
9878 mask_index = vect_slp_child_index_for_operand (call, mask_index);
9879 if (mask_index >= 0
9880 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9881 &mask, NULL, &mask_dt, &mask_vectype))
9882 return false;
9885 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9886 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9888 if (loop_vinfo)
9890 loop = LOOP_VINFO_LOOP (loop_vinfo);
9891 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9892 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9894 else
9895 vf = 1;
9897 /* Multiple types in SLP are handled by creating the appropriate number of
9898 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9899 case of SLP. */
9900 if (slp)
9901 ncopies = 1;
9902 else
9903 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9905 gcc_assert (ncopies >= 1);
9907 /* FORNOW. This restriction should be relaxed. */
9908 if (nested_in_vect_loop && ncopies > 1)
9910 if (dump_enabled_p ())
9911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9912 "multiple types in nested loop.\n");
9913 return false;
9916 /* Invalidate assumptions made by dependence analysis when vectorization
9917 on the unrolled body effectively re-orders stmts. */
9918 if (ncopies > 1
9919 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9920 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9921 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9923 if (dump_enabled_p ())
9924 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9925 "cannot perform implicit CSE when unrolling "
9926 "with negative dependence distance\n");
9927 return false;
9930 elem_type = TREE_TYPE (vectype);
9931 mode = TYPE_MODE (vectype);
9933 /* FORNOW. In some cases can vectorize even if data-type not supported
9934 (e.g. - data copies). */
9935 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
9937 if (dump_enabled_p ())
9938 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9939 "Aligned load, but unsupported type.\n");
9940 return false;
9943 /* Check if the load is a part of an interleaving chain. */
9944 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9946 grouped_load = true;
9947 /* FORNOW */
9948 gcc_assert (!nested_in_vect_loop);
9949 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9951 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9952 group_size = DR_GROUP_SIZE (first_stmt_info);
9954 /* Refuse non-SLP vectorization of SLP-only groups. */
9955 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9957 if (dump_enabled_p ())
9958 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9959 "cannot vectorize load in non-SLP mode.\n");
9960 return false;
9963 /* Invalidate assumptions made by dependence analysis when vectorization
9964 on the unrolled body effectively re-orders stmts. */
9965 if (!PURE_SLP_STMT (stmt_info)
9966 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9967 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9968 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9970 if (dump_enabled_p ())
9971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9972 "cannot perform implicit CSE when performing "
9973 "group loads with negative dependence distance\n");
9974 return false;
9977 else
9978 group_size = 1;
9980 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9982 slp_perm = true;
9984 if (!loop_vinfo)
9986 /* In BB vectorization we may not actually use a loaded vector
9987 accessing elements in excess of DR_GROUP_SIZE. */
9988 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9989 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9990 unsigned HOST_WIDE_INT nunits;
9991 unsigned j, k, maxk = 0;
9992 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
9993 if (k > maxk)
9994 maxk = k;
9995 tree vectype = SLP_TREE_VECTYPE (slp_node);
9996 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
9997 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
9999 if (dump_enabled_p ())
10000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10001 "BB vectorization with gaps at the end of "
10002 "a load is not supported\n");
10003 return false;
10007 auto_vec<tree> tem;
10008 unsigned n_perms;
10009 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
10010 true, &n_perms))
10012 if (dump_enabled_p ())
10013 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
10014 vect_location,
10015 "unsupported load permutation\n");
10016 return false;
10020 vect_memory_access_type memory_access_type;
10021 enum dr_alignment_support alignment_support_scheme;
10022 int misalignment;
10023 poly_int64 poffset;
10024 internal_fn lanes_ifn;
10025 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
10026 ncopies, &memory_access_type, &poffset,
10027 &alignment_support_scheme, &misalignment, &gs_info,
10028 &lanes_ifn))
10029 return false;
10031 if (mask)
10033 if (memory_access_type == VMAT_CONTIGUOUS)
10035 machine_mode vec_mode = TYPE_MODE (vectype);
10036 if (!VECTOR_MODE_P (vec_mode)
10037 || !can_vec_mask_load_store_p (vec_mode,
10038 TYPE_MODE (mask_vectype), true))
10039 return false;
10041 else if (memory_access_type != VMAT_LOAD_STORE_LANES
10042 && memory_access_type != VMAT_GATHER_SCATTER)
10044 if (dump_enabled_p ())
10045 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10046 "unsupported access type for masked load.\n");
10047 return false;
10049 else if (memory_access_type == VMAT_GATHER_SCATTER
10050 && gs_info.ifn == IFN_LAST
10051 && !gs_info.decl)
10053 if (dump_enabled_p ())
10054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10055 "unsupported masked emulated gather.\n");
10056 return false;
10060 bool costing_p = !vec_stmt;
10062 if (costing_p) /* transformation not required. */
10064 if (slp_node
10065 && mask
10066 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
10067 mask_vectype))
10069 if (dump_enabled_p ())
10070 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10071 "incompatible vector types for invariants\n");
10072 return false;
10075 if (!slp)
10076 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
10078 if (loop_vinfo
10079 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10080 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
10081 VLS_LOAD, group_size,
10082 memory_access_type, &gs_info,
10083 mask);
10085 if (dump_enabled_p ()
10086 && memory_access_type != VMAT_ELEMENTWISE
10087 && memory_access_type != VMAT_GATHER_SCATTER
10088 && alignment_support_scheme != dr_aligned)
10089 dump_printf_loc (MSG_NOTE, vect_location,
10090 "Vectorizing an unaligned access.\n");
10092 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10093 vinfo->any_known_not_updated_vssa = true;
10095 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
10098 if (!slp)
10099 gcc_assert (memory_access_type
10100 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
10102 if (dump_enabled_p () && !costing_p)
10103 dump_printf_loc (MSG_NOTE, vect_location,
10104 "transform load. ncopies = %d\n", ncopies);
10106 /* Transform. */
10108 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
10109 ensure_base_align (dr_info);
10111 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
10113 vect_build_gather_load_calls (vinfo, stmt_info, gsi, vec_stmt, &gs_info,
10114 mask, cost_vec);
10115 return true;
10118 if (memory_access_type == VMAT_INVARIANT)
10120 gcc_assert (!grouped_load && !mask && !bb_vinfo);
10121 /* If we have versioned for aliasing or the loop doesn't
10122 have any data dependencies that would preclude this,
10123 then we are sure this is a loop invariant load and
10124 thus we can insert it on the preheader edge.
10125 TODO: hoist_defs_of_uses should ideally be computed
10126 once at analysis time, remembered and used in the
10127 transform time. */
10128 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
10129 && !nested_in_vect_loop
10130 && hoist_defs_of_uses (stmt_info, loop, !costing_p));
10131 if (costing_p)
10133 enum vect_cost_model_location cost_loc
10134 = hoist_p ? vect_prologue : vect_body;
10135 unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
10136 stmt_info, 0, cost_loc);
10137 cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0,
10138 cost_loc);
10139 unsigned int prologue_cost = hoist_p ? cost : 0;
10140 unsigned int inside_cost = hoist_p ? 0 : cost;
10141 if (dump_enabled_p ())
10142 dump_printf_loc (MSG_NOTE, vect_location,
10143 "vect_model_load_cost: inside_cost = %d, "
10144 "prologue_cost = %d .\n",
10145 inside_cost, prologue_cost);
10146 return true;
10148 if (hoist_p)
10150 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
10151 if (dump_enabled_p ())
10152 dump_printf_loc (MSG_NOTE, vect_location,
10153 "hoisting out of the vectorized loop: %G",
10154 (gimple *) stmt);
10155 scalar_dest = copy_ssa_name (scalar_dest);
10156 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
10157 edge pe = loop_preheader_edge (loop);
10158 gphi *vphi = get_virtual_phi (loop->header);
10159 tree vuse;
10160 if (vphi)
10161 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
10162 else
10163 vuse = gimple_vuse (gsi_stmt (*gsi));
10164 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
10165 gimple_set_vuse (new_stmt, vuse);
10166 gsi_insert_on_edge_immediate (pe, new_stmt);
10168 /* These copies are all equivalent. */
10169 if (hoist_p)
10170 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10171 vectype, NULL);
10172 else
10174 gimple_stmt_iterator gsi2 = *gsi;
10175 gsi_next (&gsi2);
10176 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10177 vectype, &gsi2);
10179 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
10180 if (slp)
10181 for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
10182 slp_node->push_vec_def (new_stmt);
10183 else
10185 for (j = 0; j < ncopies; ++j)
10186 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10187 *vec_stmt = new_stmt;
10189 return true;
10192 if (memory_access_type == VMAT_ELEMENTWISE
10193 || memory_access_type == VMAT_STRIDED_SLP)
10195 gimple_stmt_iterator incr_gsi;
10196 bool insert_after;
10197 tree offvar;
10198 tree ivstep;
10199 tree running_off;
10200 vec<constructor_elt, va_gc> *v = NULL;
10201 tree stride_base, stride_step, alias_off;
10202 /* Checked by get_load_store_type. */
10203 unsigned int const_nunits = nunits.to_constant ();
10204 unsigned HOST_WIDE_INT cst_offset = 0;
10205 tree dr_offset;
10206 unsigned int inside_cost = 0;
10208 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
10209 gcc_assert (!nested_in_vect_loop);
10211 if (grouped_load)
10213 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10214 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10216 else
10218 first_stmt_info = stmt_info;
10219 first_dr_info = dr_info;
10222 if (slp && grouped_load)
10224 group_size = DR_GROUP_SIZE (first_stmt_info);
10225 ref_type = get_group_alias_ptr_type (first_stmt_info);
10227 else
10229 if (grouped_load)
10230 cst_offset
10231 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
10232 * vect_get_place_in_interleaving_chain (stmt_info,
10233 first_stmt_info));
10234 group_size = 1;
10235 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
10238 if (!costing_p)
10240 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
10241 stride_base = fold_build_pointer_plus (
10242 DR_BASE_ADDRESS (first_dr_info->dr),
10243 size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
10244 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
10245 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
10247 /* For a load with loop-invariant (but other than power-of-2)
10248 stride (i.e. not a grouped access) like so:
10250 for (i = 0; i < n; i += stride)
10251 ... = array[i];
10253 we generate a new induction variable and new accesses to
10254 form a new vector (or vectors, depending on ncopies):
10256 for (j = 0; ; j += VF*stride)
10257 tmp1 = array[j];
10258 tmp2 = array[j + stride];
10260 vectemp = {tmp1, tmp2, ...}
10263 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
10264 build_int_cst (TREE_TYPE (stride_step), vf));
10266 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
10268 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
10269 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
10270 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
10271 loop, &incr_gsi, insert_after,
10272 &offvar, NULL);
10274 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
10277 running_off = offvar;
10278 alias_off = build_int_cst (ref_type, 0);
10279 int nloads = const_nunits;
10280 int lnel = 1;
10281 tree ltype = TREE_TYPE (vectype);
10282 tree lvectype = vectype;
10283 auto_vec<tree> dr_chain;
10284 if (memory_access_type == VMAT_STRIDED_SLP)
10286 if (group_size < const_nunits)
10288 /* First check if vec_init optab supports construction from vector
10289 elts directly. Otherwise avoid emitting a constructor of
10290 vector elements by performing the loads using an integer type
10291 of the same size, constructing a vector of those and then
10292 re-interpreting it as the original vector type. This avoids a
10293 huge runtime penalty due to the general inability to perform
10294 store forwarding from smaller stores to a larger load. */
10295 tree ptype;
10296 tree vtype
10297 = vector_vector_composition_type (vectype,
10298 const_nunits / group_size,
10299 &ptype);
10300 if (vtype != NULL_TREE)
10302 nloads = const_nunits / group_size;
10303 lnel = group_size;
10304 lvectype = vtype;
10305 ltype = ptype;
10308 else
10310 nloads = 1;
10311 lnel = const_nunits;
10312 ltype = vectype;
10314 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
10316 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10317 else if (nloads == 1)
10318 ltype = vectype;
10320 if (slp)
10322 /* For SLP permutation support we need to load the whole group,
10323 not only the number of vector stmts the permutation result
10324 fits in. */
10325 if (slp_perm)
10327 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10328 variable VF. */
10329 unsigned int const_vf = vf.to_constant ();
10330 ncopies = CEIL (group_size * const_vf, const_nunits);
10331 dr_chain.create (ncopies);
10333 else
10334 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10336 unsigned int group_el = 0;
10337 unsigned HOST_WIDE_INT
10338 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10339 unsigned int n_groups = 0;
10340 for (j = 0; j < ncopies; j++)
10342 if (nloads > 1 && !costing_p)
10343 vec_alloc (v, nloads);
10344 gimple *new_stmt = NULL;
10345 for (i = 0; i < nloads; i++)
10347 if (costing_p)
10349 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10350 avoid ICE, see PR110776. */
10351 if (VECTOR_TYPE_P (ltype)
10352 && memory_access_type != VMAT_ELEMENTWISE)
10353 vect_get_load_cost (vinfo, stmt_info, 1,
10354 alignment_support_scheme, misalignment,
10355 false, &inside_cost, nullptr, cost_vec,
10356 cost_vec, true);
10357 else
10358 inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
10359 stmt_info, 0, vect_body);
10360 continue;
10362 tree this_off = build_int_cst (TREE_TYPE (alias_off),
10363 group_el * elsz + cst_offset);
10364 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10365 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10366 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
10367 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10368 if (nloads > 1)
10369 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10370 gimple_assign_lhs (new_stmt));
10372 group_el += lnel;
10373 if (! slp
10374 || group_el == group_size)
10376 n_groups++;
10377 /* When doing SLP make sure to not load elements from
10378 the next vector iteration, those will not be accessed
10379 so just use the last element again. See PR107451. */
10380 if (!slp || known_lt (n_groups, vf))
10382 tree newoff = copy_ssa_name (running_off);
10383 gimple *incr
10384 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10385 running_off, stride_step);
10386 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
10387 running_off = newoff;
10389 group_el = 0;
10393 if (nloads > 1)
10395 if (costing_p)
10396 inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
10397 stmt_info, 0, vect_body);
10398 else
10400 tree vec_inv = build_constructor (lvectype, v);
10401 new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
10402 lvectype, gsi);
10403 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10404 if (lvectype != vectype)
10406 new_stmt
10407 = gimple_build_assign (make_ssa_name (vectype),
10408 VIEW_CONVERT_EXPR,
10409 build1 (VIEW_CONVERT_EXPR,
10410 vectype, new_temp));
10411 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10412 gsi);
10417 if (!costing_p)
10419 if (slp)
10421 if (slp_perm)
10422 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
10423 else
10424 slp_node->push_vec_def (new_stmt);
10426 else
10428 if (j == 0)
10429 *vec_stmt = new_stmt;
10430 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10434 if (slp_perm)
10436 unsigned n_perms;
10437 if (costing_p)
10439 unsigned n_loads;
10440 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10441 true, &n_perms, &n_loads);
10442 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
10443 first_stmt_info, 0, vect_body);
10445 else
10446 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10447 false, &n_perms);
10450 if (costing_p && dump_enabled_p ())
10451 dump_printf_loc (MSG_NOTE, vect_location,
10452 "vect_model_load_cost: inside_cost = %u, "
10453 "prologue_cost = 0 .\n",
10454 inside_cost);
10456 return true;
10459 if (memory_access_type == VMAT_GATHER_SCATTER
10460 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
10461 grouped_load = false;
10463 if (grouped_load
10464 || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10466 if (grouped_load)
10468 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10469 group_size = DR_GROUP_SIZE (first_stmt_info);
10471 else
10473 first_stmt_info = stmt_info;
10474 group_size = 1;
10476 /* For SLP vectorization we directly vectorize a subchain
10477 without permutation. */
10478 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10479 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10480 /* For BB vectorization always use the first stmt to base
10481 the data ref pointer on. */
10482 if (bb_vinfo)
10483 first_stmt_info_for_drptr
10484 = vect_find_first_scalar_stmt_in_slp (slp_node);
10486 /* Check if the chain of loads is already vectorized. */
10487 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10488 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10489 ??? But we can only do so if there is exactly one
10490 as we have no way to get at the rest. Leave the CSE
10491 opportunity alone.
10492 ??? With the group load eventually participating
10493 in multiple different permutations (having multiple
10494 slp nodes which refer to the same group) the CSE
10495 is even wrong code. See PR56270. */
10496 && !slp)
10498 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10499 return true;
10501 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10502 group_gap_adj = 0;
10504 /* VEC_NUM is the number of vect stmts to be created for this group. */
10505 if (slp)
10507 grouped_load = false;
10508 /* If an SLP permutation is from N elements to N elements,
10509 and if one vector holds a whole number of N, we can load
10510 the inputs to the permutation in the same way as an
10511 unpermuted sequence. In other cases we need to load the
10512 whole group, not only the number of vector stmts the
10513 permutation result fits in. */
10514 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10515 if (slp_perm
10516 && (group_size != scalar_lanes
10517 || !multiple_p (nunits, group_size)))
10519 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10520 variable VF; see vect_transform_slp_perm_load. */
10521 unsigned int const_vf = vf.to_constant ();
10522 unsigned int const_nunits = nunits.to_constant ();
10523 vec_num = CEIL (group_size * const_vf, const_nunits);
10524 group_gap_adj = vf * group_size - nunits * vec_num;
10526 else
10528 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10529 group_gap_adj
10530 = group_size - scalar_lanes;
10533 else
10534 vec_num = group_size;
10536 ref_type = get_group_alias_ptr_type (first_stmt_info);
10538 else
10540 first_stmt_info = stmt_info;
10541 first_dr_info = dr_info;
10542 group_size = vec_num = 1;
10543 group_gap_adj = 0;
10544 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10545 if (slp)
10546 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10549 gcc_assert (alignment_support_scheme);
10550 vec_loop_masks *loop_masks
10551 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10552 ? &LOOP_VINFO_MASKS (loop_vinfo)
10553 : NULL);
10554 vec_loop_lens *loop_lens
10555 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10556 ? &LOOP_VINFO_LENS (loop_vinfo)
10557 : NULL);
10559 /* Shouldn't go with length-based approach if fully masked. */
10560 gcc_assert (!loop_lens || !loop_masks);
10562 /* Targets with store-lane instructions must not require explicit
10563 realignment. vect_supportable_dr_alignment always returns either
10564 dr_aligned or dr_unaligned_supported for masked operations. */
10565 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10566 && !mask
10567 && !loop_masks)
10568 || alignment_support_scheme == dr_aligned
10569 || alignment_support_scheme == dr_unaligned_supported);
10571 /* In case the vectorization factor (VF) is bigger than the number
10572 of elements that we can fit in a vectype (nunits), we have to generate
10573 more than one vector stmt - i.e - we need to "unroll" the
10574 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10575 from one copy of the vector stmt to the next, in the field
10576 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10577 stages to find the correct vector defs to be used when vectorizing
10578 stmts that use the defs of the current stmt. The example below
10579 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10580 need to create 4 vectorized stmts):
10582 before vectorization:
10583 RELATED_STMT VEC_STMT
10584 S1: x = memref - -
10585 S2: z = x + 1 - -
10587 step 1: vectorize stmt S1:
10588 We first create the vector stmt VS1_0, and, as usual, record a
10589 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10590 Next, we create the vector stmt VS1_1, and record a pointer to
10591 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10592 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10593 stmts and pointers:
10594 RELATED_STMT VEC_STMT
10595 VS1_0: vx0 = memref0 VS1_1 -
10596 VS1_1: vx1 = memref1 VS1_2 -
10597 VS1_2: vx2 = memref2 VS1_3 -
10598 VS1_3: vx3 = memref3 - -
10599 S1: x = load - VS1_0
10600 S2: z = x + 1 - -
10603 /* In case of interleaving (non-unit grouped access):
10605 S1: x2 = &base + 2
10606 S2: x0 = &base
10607 S3: x1 = &base + 1
10608 S4: x3 = &base + 3
10610 Vectorized loads are created in the order of memory accesses
10611 starting from the access of the first stmt of the chain:
10613 VS1: vx0 = &base
10614 VS2: vx1 = &base + vec_size*1
10615 VS3: vx3 = &base + vec_size*2
10616 VS4: vx4 = &base + vec_size*3
10618 Then permutation statements are generated:
10620 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10621 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10624 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10625 (the order of the data-refs in the output of vect_permute_load_chain
10626 corresponds to the order of scalar stmts in the interleaving chain - see
10627 the documentation of vect_permute_load_chain()).
10628 The generation of permutation stmts and recording them in
10629 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10631 In case of both multiple types and interleaving, the vector loads and
10632 permutation stmts above are created for every copy. The result vector
10633 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10634 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10636 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10637 on a target that supports unaligned accesses (dr_unaligned_supported)
10638 we generate the following code:
10639 p = initial_addr;
10640 indx = 0;
10641 loop {
10642 p = p + indx * vectype_size;
10643 vec_dest = *(p);
10644 indx = indx + 1;
10647 Otherwise, the data reference is potentially unaligned on a target that
10648 does not support unaligned accesses (dr_explicit_realign_optimized) -
10649 then generate the following code, in which the data in each iteration is
10650 obtained by two vector loads, one from the previous iteration, and one
10651 from the current iteration:
10652 p1 = initial_addr;
10653 msq_init = *(floor(p1))
10654 p2 = initial_addr + VS - 1;
10655 realignment_token = call target_builtin;
10656 indx = 0;
10657 loop {
10658 p2 = p2 + indx * vectype_size
10659 lsq = *(floor(p2))
10660 vec_dest = realign_load (msq, lsq, realignment_token)
10661 indx = indx + 1;
10662 msq = lsq;
10663 } */
10665 /* If the misalignment remains the same throughout the execution of the
10666 loop, we can create the init_addr and permutation mask at the loop
10667 preheader. Otherwise, it needs to be created inside the loop.
10668 This can only occur when vectorizing memory accesses in the inner-loop
10669 nested within an outer-loop that is being vectorized. */
10671 if (nested_in_vect_loop
10672 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10673 GET_MODE_SIZE (TYPE_MODE (vectype))))
10675 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10676 compute_in_loop = true;
10679 bool diff_first_stmt_info
10680 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10682 tree offset = NULL_TREE;
10683 if ((alignment_support_scheme == dr_explicit_realign_optimized
10684 || alignment_support_scheme == dr_explicit_realign)
10685 && !compute_in_loop)
10687 /* If we have different first_stmt_info, we can't set up realignment
10688 here, since we can't guarantee first_stmt_info DR has been
10689 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10690 distance from first_stmt_info DR instead as below. */
10691 if (!costing_p)
10693 if (!diff_first_stmt_info)
10694 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10695 &realignment_token,
10696 alignment_support_scheme, NULL_TREE,
10697 &at_loop);
10698 if (alignment_support_scheme == dr_explicit_realign_optimized)
10700 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10701 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10702 size_one_node);
10703 gcc_assert (!first_stmt_info_for_drptr);
10707 else
10708 at_loop = loop;
10710 if (!known_eq (poffset, 0))
10711 offset = (offset
10712 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10713 : size_int (poffset));
10715 tree bump;
10716 tree vec_offset = NULL_TREE;
10717 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10719 aggr_type = NULL_TREE;
10720 bump = NULL_TREE;
10722 else if (memory_access_type == VMAT_GATHER_SCATTER)
10724 aggr_type = elem_type;
10725 if (!costing_p)
10726 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
10727 &bump, &vec_offset, loop_lens);
10729 else
10731 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10732 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
10733 else
10734 aggr_type = vectype;
10735 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10736 memory_access_type, loop_lens);
10739 auto_vec<tree> vec_offsets;
10740 auto_vec<tree> vec_masks;
10741 if (mask && !costing_p)
10743 if (slp_node)
10744 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10745 &vec_masks);
10746 else
10747 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
10748 &vec_masks, mask_vectype);
10751 tree vec_mask = NULL_TREE;
10752 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10754 gcc_assert (alignment_support_scheme == dr_aligned
10755 || alignment_support_scheme == dr_unaligned_supported);
10756 gcc_assert (grouped_load && !slp);
10758 unsigned int inside_cost = 0, prologue_cost = 0;
10759 for (j = 0; j < ncopies; j++)
10761 if (costing_p)
10763 /* An IFN_LOAD_LANES will load all its vector results,
10764 regardless of which ones we actually need. Account
10765 for the cost of unused results. */
10766 if (first_stmt_info == stmt_info)
10768 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10769 stmt_vec_info next_stmt_info = first_stmt_info;
10772 gaps -= 1;
10773 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10775 while (next_stmt_info);
10776 if (gaps)
10778 if (dump_enabled_p ())
10779 dump_printf_loc (MSG_NOTE, vect_location,
10780 "vect_model_load_cost: %d "
10781 "unused vectors.\n",
10782 gaps);
10783 vect_get_load_cost (vinfo, stmt_info, gaps,
10784 alignment_support_scheme,
10785 misalignment, false, &inside_cost,
10786 &prologue_cost, cost_vec, cost_vec,
10787 true);
10790 vect_get_load_cost (vinfo, stmt_info, 1, alignment_support_scheme,
10791 misalignment, false, &inside_cost,
10792 &prologue_cost, cost_vec, cost_vec, true);
10793 continue;
10796 /* 1. Create the vector or array pointer update chain. */
10797 if (j == 0)
10798 dataref_ptr
10799 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10800 at_loop, offset, &dummy, gsi,
10801 &ptr_incr, false, bump);
10802 else
10804 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10805 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10806 stmt_info, bump);
10808 if (mask)
10809 vec_mask = vec_masks[j];
10811 tree vec_array = create_vector_array (vectype, vec_num);
10813 tree final_mask = NULL_TREE;
10814 tree final_len = NULL_TREE;
10815 tree bias = NULL_TREE;
10816 if (loop_masks)
10817 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10818 ncopies, vectype, j);
10819 if (vec_mask)
10820 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
10821 vec_mask, gsi);
10823 if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10825 if (loop_lens)
10826 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10827 ncopies, vectype, j, 1);
10828 else
10829 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10830 signed char biasval
10831 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10832 bias = build_int_cst (intQI_type_node, biasval);
10833 if (!final_mask)
10835 mask_vectype = truth_type_for (vectype);
10836 final_mask = build_minus_one_cst (mask_vectype);
10840 gcall *call;
10841 if (final_len && final_mask)
10843 /* Emit:
10844 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10845 VEC_MASK, LEN, BIAS). */
10846 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10847 tree alias_ptr = build_int_cst (ref_type, align);
10848 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
10849 dataref_ptr, alias_ptr,
10850 final_mask, final_len, bias);
10852 else if (final_mask)
10854 /* Emit:
10855 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10856 VEC_MASK). */
10857 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10858 tree alias_ptr = build_int_cst (ref_type, align);
10859 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
10860 dataref_ptr, alias_ptr,
10861 final_mask);
10863 else
10865 /* Emit:
10866 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10867 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
10868 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10870 gimple_call_set_lhs (call, vec_array);
10871 gimple_call_set_nothrow (call, true);
10872 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
10874 dr_chain.create (vec_num);
10875 /* Extract each vector into an SSA_NAME. */
10876 for (i = 0; i < vec_num; i++)
10878 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10879 vec_array, i);
10880 dr_chain.quick_push (new_temp);
10883 /* Record the mapping between SSA_NAMEs and statements. */
10884 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10886 /* Record that VEC_ARRAY is now dead. */
10887 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10889 dr_chain.release ();
10891 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10894 if (costing_p && dump_enabled_p ())
10895 dump_printf_loc (MSG_NOTE, vect_location,
10896 "vect_model_load_cost: inside_cost = %u, "
10897 "prologue_cost = %u .\n",
10898 inside_cost, prologue_cost);
10900 return true;
10903 if (memory_access_type == VMAT_GATHER_SCATTER)
10905 gcc_assert (alignment_support_scheme == dr_aligned
10906 || alignment_support_scheme == dr_unaligned_supported);
10907 gcc_assert (!grouped_load && !slp_perm);
10909 unsigned int inside_cost = 0, prologue_cost = 0;
10910 for (j = 0; j < ncopies; j++)
10912 /* 1. Create the vector or array pointer update chain. */
10913 if (j == 0 && !costing_p)
10915 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10916 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10917 slp_node, &gs_info, &dataref_ptr,
10918 &vec_offsets);
10919 else
10920 dataref_ptr
10921 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10922 at_loop, offset, &dummy, gsi,
10923 &ptr_incr, false, bump);
10925 else if (!costing_p)
10927 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10928 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10929 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10930 gsi, stmt_info, bump);
10933 if (mask && !costing_p)
10934 vec_mask = vec_masks[j];
10936 gimple *new_stmt = NULL;
10937 for (i = 0; i < vec_num; i++)
10939 tree final_mask = NULL_TREE;
10940 tree final_len = NULL_TREE;
10941 tree bias = NULL_TREE;
10942 if (!costing_p)
10944 if (loop_masks)
10945 final_mask
10946 = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10947 vec_num * ncopies, vectype,
10948 vec_num * j + i);
10949 if (vec_mask)
10950 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10951 final_mask, vec_mask, gsi);
10953 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10954 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10955 gsi, stmt_info, bump);
10958 /* 2. Create the vector-load in the loop. */
10959 unsigned HOST_WIDE_INT align;
10960 if (gs_info.ifn != IFN_LAST)
10962 if (costing_p)
10964 unsigned int cnunits = vect_nunits_for_cost (vectype);
10965 inside_cost
10966 = record_stmt_cost (cost_vec, cnunits, scalar_load,
10967 stmt_info, 0, vect_body);
10968 continue;
10970 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10971 vec_offset = vec_offsets[vec_num * j + i];
10972 tree zero = build_zero_cst (vectype);
10973 tree scale = size_int (gs_info.scale);
10975 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
10977 if (loop_lens)
10978 final_len
10979 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10980 vec_num * ncopies, vectype,
10981 vec_num * j + i, 1);
10982 else
10983 final_len
10984 = build_int_cst (sizetype,
10985 TYPE_VECTOR_SUBPARTS (vectype));
10986 signed char biasval
10987 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10988 bias = build_int_cst (intQI_type_node, biasval);
10989 if (!final_mask)
10991 mask_vectype = truth_type_for (vectype);
10992 final_mask = build_minus_one_cst (mask_vectype);
10996 gcall *call;
10997 if (final_len && final_mask)
10998 call
10999 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
11000 dataref_ptr, vec_offset,
11001 scale, zero, final_mask,
11002 final_len, bias);
11003 else if (final_mask)
11004 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
11005 dataref_ptr, vec_offset,
11006 scale, zero, final_mask);
11007 else
11008 call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
11009 dataref_ptr, vec_offset,
11010 scale, zero);
11011 gimple_call_set_nothrow (call, true);
11012 new_stmt = call;
11013 data_ref = NULL_TREE;
11015 else
11017 /* Emulated gather-scatter. */
11018 gcc_assert (!final_mask);
11019 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
11020 if (costing_p)
11022 /* For emulated gathers N offset vector element
11023 offset add is consumed by the load). */
11024 inside_cost = record_stmt_cost (cost_vec, const_nunits,
11025 vec_to_scalar, stmt_info,
11026 0, vect_body);
11027 /* N scalar loads plus gathering them into a
11028 vector. */
11029 inside_cost
11030 = record_stmt_cost (cost_vec, const_nunits, scalar_load,
11031 stmt_info, 0, vect_body);
11032 inside_cost
11033 = record_stmt_cost (cost_vec, 1, vec_construct,
11034 stmt_info, 0, vect_body);
11035 continue;
11037 unsigned HOST_WIDE_INT const_offset_nunits
11038 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
11039 .to_constant ();
11040 vec<constructor_elt, va_gc> *ctor_elts;
11041 vec_alloc (ctor_elts, const_nunits);
11042 gimple_seq stmts = NULL;
11043 /* We support offset vectors with more elements
11044 than the data vector for now. */
11045 unsigned HOST_WIDE_INT factor
11046 = const_offset_nunits / const_nunits;
11047 vec_offset = vec_offsets[j / factor];
11048 unsigned elt_offset = (j % factor) * const_nunits;
11049 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
11050 tree scale = size_int (gs_info.scale);
11051 align = get_object_alignment (DR_REF (first_dr_info->dr));
11052 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
11053 for (unsigned k = 0; k < const_nunits; ++k)
11055 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
11056 bitsize_int (k + elt_offset));
11057 tree idx
11058 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
11059 vec_offset, TYPE_SIZE (idx_type), boff);
11060 idx = gimple_convert (&stmts, sizetype, idx);
11061 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx,
11062 scale);
11063 tree ptr = gimple_build (&stmts, PLUS_EXPR,
11064 TREE_TYPE (dataref_ptr),
11065 dataref_ptr, idx);
11066 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
11067 tree elt = make_ssa_name (TREE_TYPE (vectype));
11068 tree ref = build2 (MEM_REF, ltype, ptr,
11069 build_int_cst (ref_type, 0));
11070 new_stmt = gimple_build_assign (elt, ref);
11071 gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
11072 gimple_seq_add_stmt (&stmts, new_stmt);
11073 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
11075 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
11076 new_stmt = gimple_build_assign (
11077 NULL_TREE, build_constructor (vectype, ctor_elts));
11078 data_ref = NULL_TREE;
11081 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11082 /* DATA_REF is null if we've already built the statement. */
11083 if (data_ref)
11085 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11086 new_stmt = gimple_build_assign (vec_dest, data_ref);
11088 new_temp = make_ssa_name (vec_dest, new_stmt);
11089 gimple_set_lhs (new_stmt, new_temp);
11090 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11092 /* Store vector loads in the corresponding SLP_NODE. */
11093 if (slp)
11094 slp_node->push_vec_def (new_stmt);
11097 if (!slp && !costing_p)
11098 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11101 if (!slp && !costing_p)
11102 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11104 if (costing_p && dump_enabled_p ())
11105 dump_printf_loc (MSG_NOTE, vect_location,
11106 "vect_model_load_cost: inside_cost = %u, "
11107 "prologue_cost = %u .\n",
11108 inside_cost, prologue_cost);
11109 return true;
11112 poly_uint64 group_elt = 0;
11113 unsigned int inside_cost = 0, prologue_cost = 0;
11114 for (j = 0; j < ncopies; j++)
11116 /* 1. Create the vector or array pointer update chain. */
11117 if (j == 0 && !costing_p)
11119 bool simd_lane_access_p
11120 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
11121 if (simd_lane_access_p
11122 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
11123 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
11124 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
11125 && integer_zerop (DR_INIT (first_dr_info->dr))
11126 && alias_sets_conflict_p (get_alias_set (aggr_type),
11127 get_alias_set (TREE_TYPE (ref_type)))
11128 && (alignment_support_scheme == dr_aligned
11129 || alignment_support_scheme == dr_unaligned_supported))
11131 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
11132 dataref_offset = build_int_cst (ref_type, 0);
11134 else if (diff_first_stmt_info)
11136 dataref_ptr
11137 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
11138 aggr_type, at_loop, offset, &dummy,
11139 gsi, &ptr_incr, simd_lane_access_p,
11140 bump);
11141 /* Adjust the pointer by the difference to first_stmt. */
11142 data_reference_p ptrdr
11143 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
11144 tree diff
11145 = fold_convert (sizetype,
11146 size_binop (MINUS_EXPR,
11147 DR_INIT (first_dr_info->dr),
11148 DR_INIT (ptrdr)));
11149 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11150 stmt_info, diff);
11151 if (alignment_support_scheme == dr_explicit_realign)
11153 msq = vect_setup_realignment (vinfo,
11154 first_stmt_info_for_drptr, gsi,
11155 &realignment_token,
11156 alignment_support_scheme,
11157 dataref_ptr, &at_loop);
11158 gcc_assert (!compute_in_loop);
11161 else
11162 dataref_ptr
11163 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11164 at_loop,
11165 offset, &dummy, gsi, &ptr_incr,
11166 simd_lane_access_p, bump);
11167 if (mask)
11168 vec_mask = vec_masks[0];
11170 else if (!costing_p)
11172 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11173 if (dataref_offset)
11174 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
11175 bump);
11176 else
11177 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11178 stmt_info, bump);
11179 if (mask)
11180 vec_mask = vec_masks[j];
11183 if (grouped_load || slp_perm)
11184 dr_chain.create (vec_num);
11186 gimple *new_stmt = NULL;
11187 for (i = 0; i < vec_num; i++)
11189 tree final_mask = NULL_TREE;
11190 tree final_len = NULL_TREE;
11191 tree bias = NULL_TREE;
11192 if (!costing_p)
11194 if (loop_masks)
11195 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11196 vec_num * ncopies, vectype,
11197 vec_num * j + i);
11198 if (vec_mask)
11199 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
11200 final_mask, vec_mask, gsi);
11202 if (i > 0)
11203 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11204 gsi, stmt_info, bump);
11207 /* 2. Create the vector-load in the loop. */
11208 switch (alignment_support_scheme)
11210 case dr_aligned:
11211 case dr_unaligned_supported:
11213 if (costing_p)
11214 break;
11216 unsigned int misalign;
11217 unsigned HOST_WIDE_INT align;
11218 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
11219 if (alignment_support_scheme == dr_aligned)
11220 misalign = 0;
11221 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
11223 align
11224 = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
11225 misalign = 0;
11227 else
11228 misalign = misalignment;
11229 if (dataref_offset == NULL_TREE
11230 && TREE_CODE (dataref_ptr) == SSA_NAME)
11231 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
11232 misalign);
11233 align = least_bit_hwi (misalign | align);
11235 /* Compute IFN when LOOP_LENS or final_mask valid. */
11236 machine_mode vmode = TYPE_MODE (vectype);
11237 machine_mode new_vmode = vmode;
11238 internal_fn partial_ifn = IFN_LAST;
11239 if (loop_lens)
11241 opt_machine_mode new_ovmode
11242 = get_len_load_store_mode (vmode, true, &partial_ifn);
11243 new_vmode = new_ovmode.require ();
11244 unsigned factor
11245 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
11246 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11247 vec_num * ncopies, vectype,
11248 vec_num * j + i, factor);
11250 else if (final_mask)
11252 if (!can_vec_mask_load_store_p (
11253 vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
11254 &partial_ifn))
11255 gcc_unreachable ();
11258 if (partial_ifn == IFN_MASK_LEN_LOAD)
11260 if (!final_len)
11262 /* Pass VF value to 'len' argument of
11263 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11264 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11266 if (!final_mask)
11268 /* Pass all ones value to 'mask' argument of
11269 MASK_LEN_LOAD if final_mask is invalid. */
11270 mask_vectype = truth_type_for (vectype);
11271 final_mask = build_minus_one_cst (mask_vectype);
11274 if (final_len)
11276 signed char biasval
11277 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11279 bias = build_int_cst (intQI_type_node, biasval);
11282 if (final_len)
11284 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11285 gcall *call;
11286 if (partial_ifn == IFN_MASK_LEN_LOAD)
11287 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
11288 dataref_ptr, ptr,
11289 final_mask, final_len,
11290 bias);
11291 else
11292 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
11293 dataref_ptr, ptr,
11294 final_len, bias);
11295 gimple_call_set_nothrow (call, true);
11296 new_stmt = call;
11297 data_ref = NULL_TREE;
11299 /* Need conversion if it's wrapped with VnQI. */
11300 if (vmode != new_vmode)
11302 tree new_vtype = build_vector_type_for_mode (
11303 unsigned_intQI_type_node, new_vmode);
11304 tree var
11305 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
11306 gimple_set_lhs (call, var);
11307 vect_finish_stmt_generation (vinfo, stmt_info, call,
11308 gsi);
11309 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11310 new_stmt = gimple_build_assign (vec_dest,
11311 VIEW_CONVERT_EXPR, op);
11314 else if (final_mask)
11316 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11317 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
11318 dataref_ptr, ptr,
11319 final_mask);
11320 gimple_call_set_nothrow (call, true);
11321 new_stmt = call;
11322 data_ref = NULL_TREE;
11324 else
11326 tree ltype = vectype;
11327 tree new_vtype = NULL_TREE;
11328 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11329 unsigned int vect_align
11330 = vect_known_alignment_in_bytes (first_dr_info, vectype);
11331 unsigned int scalar_dr_size
11332 = vect_get_scalar_dr_size (first_dr_info);
11333 /* If there's no peeling for gaps but we have a gap
11334 with slp loads then load the lower half of the
11335 vector only. See get_group_load_store_type for
11336 when we apply this optimization. */
11337 if (slp
11338 && loop_vinfo
11339 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != 0
11340 && known_eq (nunits, (group_size - gap) * 2)
11341 && known_eq (nunits, group_size)
11342 && gap >= (vect_align / scalar_dr_size))
11344 tree half_vtype;
11345 new_vtype
11346 = vector_vector_composition_type (vectype, 2,
11347 &half_vtype);
11348 if (new_vtype != NULL_TREE)
11349 ltype = half_vtype;
11351 tree offset
11352 = (dataref_offset ? dataref_offset
11353 : build_int_cst (ref_type, 0));
11354 if (ltype != vectype
11355 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11357 unsigned HOST_WIDE_INT gap_offset
11358 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
11359 tree gapcst = build_int_cst (ref_type, gap_offset);
11360 offset = size_binop (PLUS_EXPR, offset, gapcst);
11362 data_ref
11363 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
11364 if (alignment_support_scheme == dr_aligned)
11366 else
11367 TREE_TYPE (data_ref)
11368 = build_aligned_type (TREE_TYPE (data_ref),
11369 align * BITS_PER_UNIT);
11370 if (ltype != vectype)
11372 vect_copy_ref_info (data_ref,
11373 DR_REF (first_dr_info->dr));
11374 tree tem = make_ssa_name (ltype);
11375 new_stmt = gimple_build_assign (tem, data_ref);
11376 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
11377 gsi);
11378 data_ref = NULL;
11379 vec<constructor_elt, va_gc> *v;
11380 vec_alloc (v, 2);
11381 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11383 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11384 build_zero_cst (ltype));
11385 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11387 else
11389 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11390 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11391 build_zero_cst (ltype));
11393 gcc_assert (new_vtype != NULL_TREE);
11394 if (new_vtype == vectype)
11395 new_stmt = gimple_build_assign (
11396 vec_dest, build_constructor (vectype, v));
11397 else
11399 tree new_vname = make_ssa_name (new_vtype);
11400 new_stmt = gimple_build_assign (
11401 new_vname, build_constructor (new_vtype, v));
11402 vect_finish_stmt_generation (vinfo, stmt_info,
11403 new_stmt, gsi);
11404 new_stmt = gimple_build_assign (
11405 vec_dest,
11406 build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
11410 break;
11412 case dr_explicit_realign:
11414 if (costing_p)
11415 break;
11416 tree ptr, bump;
11418 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11420 if (compute_in_loop)
11421 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
11422 &realignment_token,
11423 dr_explicit_realign,
11424 dataref_ptr, NULL);
11426 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11427 ptr = copy_ssa_name (dataref_ptr);
11428 else
11429 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11430 // For explicit realign the target alignment should be
11431 // known at compile time.
11432 unsigned HOST_WIDE_INT align
11433 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11434 new_stmt = gimple_build_assign (
11435 ptr, BIT_AND_EXPR, dataref_ptr,
11436 build_int_cst (TREE_TYPE (dataref_ptr),
11437 -(HOST_WIDE_INT) align));
11438 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11439 data_ref
11440 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11441 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11442 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11443 new_stmt = gimple_build_assign (vec_dest, data_ref);
11444 new_temp = make_ssa_name (vec_dest, new_stmt);
11445 gimple_assign_set_lhs (new_stmt, new_temp);
11446 gimple_move_vops (new_stmt, stmt_info->stmt);
11447 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11448 msq = new_temp;
11450 bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11451 bump = size_binop (MINUS_EXPR, bump, size_one_node);
11452 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11453 bump);
11454 new_stmt = gimple_build_assign (
11455 NULL_TREE, BIT_AND_EXPR, ptr,
11456 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11457 if (TREE_CODE (ptr) == SSA_NAME)
11458 ptr = copy_ssa_name (ptr, new_stmt);
11459 else
11460 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
11461 gimple_assign_set_lhs (new_stmt, ptr);
11462 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11463 data_ref
11464 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11465 break;
11467 case dr_explicit_realign_optimized:
11469 if (costing_p)
11470 break;
11471 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11472 new_temp = copy_ssa_name (dataref_ptr);
11473 else
11474 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11475 // We should only be doing this if we know the target
11476 // alignment at compile time.
11477 unsigned HOST_WIDE_INT align
11478 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11479 new_stmt = gimple_build_assign (
11480 new_temp, BIT_AND_EXPR, dataref_ptr,
11481 build_int_cst (TREE_TYPE (dataref_ptr),
11482 -(HOST_WIDE_INT) align));
11483 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11484 data_ref = build2 (MEM_REF, vectype, new_temp,
11485 build_int_cst (ref_type, 0));
11486 break;
11488 default:
11489 gcc_unreachable ();
11492 /* One common place to cost the above vect load for different
11493 alignment support schemes. */
11494 if (costing_p)
11496 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11497 only need to take care of the first stmt, whose
11498 stmt_info is first_stmt_info, vec_num iterating on it
11499 will cover the cost for the remaining, it's consistent
11500 with transforming. For the prologue cost for realign,
11501 we only need to count it once for the whole group. */
11502 bool first_stmt_info_p = first_stmt_info == stmt_info;
11503 bool add_realign_cost = first_stmt_info_p && i == 0;
11504 if (memory_access_type == VMAT_CONTIGUOUS
11505 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11506 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11507 && (!grouped_load || first_stmt_info_p)))
11508 vect_get_load_cost (vinfo, stmt_info, 1,
11509 alignment_support_scheme, misalignment,
11510 add_realign_cost, &inside_cost,
11511 &prologue_cost, cost_vec, cost_vec, true);
11513 else
11515 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11516 /* DATA_REF is null if we've already built the statement. */
11517 if (data_ref)
11519 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11520 new_stmt = gimple_build_assign (vec_dest, data_ref);
11522 new_temp = make_ssa_name (vec_dest, new_stmt);
11523 gimple_set_lhs (new_stmt, new_temp);
11524 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11527 /* 3. Handle explicit realignment if necessary/supported.
11528 Create in loop:
11529 vec_dest = realign_load (msq, lsq, realignment_token) */
11530 if (!costing_p
11531 && (alignment_support_scheme == dr_explicit_realign_optimized
11532 || alignment_support_scheme == dr_explicit_realign))
11534 lsq = gimple_assign_lhs (new_stmt);
11535 if (!realignment_token)
11536 realignment_token = dataref_ptr;
11537 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11538 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11539 lsq, realignment_token);
11540 new_temp = make_ssa_name (vec_dest, new_stmt);
11541 gimple_assign_set_lhs (new_stmt, new_temp);
11542 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11544 if (alignment_support_scheme == dr_explicit_realign_optimized)
11546 gcc_assert (phi);
11547 if (i == vec_num - 1 && j == ncopies - 1)
11548 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11549 UNKNOWN_LOCATION);
11550 msq = lsq;
11554 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11556 if (costing_p)
11557 inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
11558 stmt_info, 0, vect_body);
11559 else
11561 tree perm_mask = perm_mask_for_reverse (vectype);
11562 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
11563 perm_mask, stmt_info, gsi);
11564 new_stmt = SSA_NAME_DEF_STMT (new_temp);
11568 /* Collect vector loads and later create their permutation in
11569 vect_transform_grouped_load (). */
11570 if (!costing_p && (grouped_load || slp_perm))
11571 dr_chain.quick_push (new_temp);
11573 /* Store vector loads in the corresponding SLP_NODE. */
11574 if (!costing_p && slp && !slp_perm)
11575 slp_node->push_vec_def (new_stmt);
11577 /* With SLP permutation we load the gaps as well, without
11578 we need to skip the gaps after we manage to fully load
11579 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11580 group_elt += nunits;
11581 if (!costing_p
11582 && maybe_ne (group_gap_adj, 0U)
11583 && !slp_perm
11584 && known_eq (group_elt, group_size - group_gap_adj))
11586 poly_wide_int bump_val
11587 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11588 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11589 == -1)
11590 bump_val = -bump_val;
11591 tree bump = wide_int_to_tree (sizetype, bump_val);
11592 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11593 stmt_info, bump);
11594 group_elt = 0;
11597 /* Bump the vector pointer to account for a gap or for excess
11598 elements loaded for a permuted SLP load. */
11599 if (!costing_p
11600 && maybe_ne (group_gap_adj, 0U)
11601 && slp_perm)
11603 poly_wide_int bump_val
11604 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11605 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11606 bump_val = -bump_val;
11607 tree bump = wide_int_to_tree (sizetype, bump_val);
11608 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11609 stmt_info, bump);
11612 if (slp && !slp_perm)
11613 continue;
11615 if (slp_perm)
11617 unsigned n_perms;
11618 /* For SLP we know we've seen all possible uses of dr_chain so
11619 direct vect_transform_slp_perm_load to DCE the unused parts.
11620 ??? This is a hack to prevent compile-time issues as seen
11621 in PR101120 and friends. */
11622 if (costing_p)
11624 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
11625 true, &n_perms, nullptr);
11626 inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
11627 stmt_info, 0, vect_body);
11629 else
11631 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11632 gsi, vf, false, &n_perms,
11633 nullptr, true);
11634 gcc_assert (ok);
11637 else
11639 if (grouped_load)
11641 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11642 /* We assume that the cost of a single load-lanes instruction
11643 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11644 If a grouped access is instead being provided by a
11645 load-and-permute operation, include the cost of the
11646 permutes. */
11647 if (costing_p && first_stmt_info == stmt_info)
11649 /* Uses an even and odd extract operations or shuffle
11650 operations for each needed permute. */
11651 int group_size = DR_GROUP_SIZE (first_stmt_info);
11652 int nstmts = ceil_log2 (group_size) * group_size;
11653 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
11654 stmt_info, 0, vect_body);
11656 if (dump_enabled_p ())
11657 dump_printf_loc (MSG_NOTE, vect_location,
11658 "vect_model_load_cost:"
11659 "strided group_size = %d .\n",
11660 group_size);
11662 else if (!costing_p)
11664 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
11665 group_size, gsi);
11666 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11669 else if (!costing_p)
11670 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11672 dr_chain.release ();
11674 if (!slp && !costing_p)
11675 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11677 if (costing_p)
11679 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
11680 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11681 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11682 if (dump_enabled_p ())
11683 dump_printf_loc (MSG_NOTE, vect_location,
11684 "vect_model_load_cost: inside_cost = %u, "
11685 "prologue_cost = %u .\n",
11686 inside_cost, prologue_cost);
11689 return true;
11692 /* Function vect_is_simple_cond.
11694 Input:
11695 LOOP - the loop that is being vectorized.
11696 COND - Condition that is checked for simple use.
11698 Output:
11699 *COMP_VECTYPE - the vector type for the comparison.
11700 *DTS - The def types for the arguments of the comparison
11702 Returns whether a COND can be vectorized. Checks whether
11703 condition operands are supportable using vec_is_simple_use. */
11705 static bool
11706 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
11707 slp_tree slp_node, tree *comp_vectype,
11708 enum vect_def_type *dts, tree vectype)
11710 tree lhs, rhs;
11711 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11712 slp_tree slp_op;
11714 /* Mask case. */
11715 if (TREE_CODE (cond) == SSA_NAME
11716 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11718 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
11719 &slp_op, &dts[0], comp_vectype)
11720 || !*comp_vectype
11721 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11722 return false;
11723 return true;
11726 if (!COMPARISON_CLASS_P (cond))
11727 return false;
11729 lhs = TREE_OPERAND (cond, 0);
11730 rhs = TREE_OPERAND (cond, 1);
11732 if (TREE_CODE (lhs) == SSA_NAME)
11734 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
11735 &lhs, &slp_op, &dts[0], &vectype1))
11736 return false;
11738 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
11739 || TREE_CODE (lhs) == FIXED_CST)
11740 dts[0] = vect_constant_def;
11741 else
11742 return false;
11744 if (TREE_CODE (rhs) == SSA_NAME)
11746 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
11747 &rhs, &slp_op, &dts[1], &vectype2))
11748 return false;
11750 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
11751 || TREE_CODE (rhs) == FIXED_CST)
11752 dts[1] = vect_constant_def;
11753 else
11754 return false;
11756 if (vectype1 && vectype2
11757 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11758 TYPE_VECTOR_SUBPARTS (vectype2)))
11759 return false;
11761 *comp_vectype = vectype1 ? vectype1 : vectype2;
11762 /* Invariant comparison. */
11763 if (! *comp_vectype)
11765 tree scalar_type = TREE_TYPE (lhs);
11766 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11767 *comp_vectype = truth_type_for (vectype);
11768 else
11770 /* If we can widen the comparison to match vectype do so. */
11771 if (INTEGRAL_TYPE_P (scalar_type)
11772 && !slp_node
11773 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
11774 TYPE_SIZE (TREE_TYPE (vectype))))
11775 scalar_type = build_nonstandard_integer_type
11776 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
11777 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
11778 slp_node);
11782 return true;
11785 /* vectorizable_condition.
11787 Check if STMT_INFO is conditional modify expression that can be vectorized.
11788 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11789 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
11790 at GSI.
11792 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
11794 Return true if STMT_INFO is vectorizable in this way. */
11796 static bool
11797 vectorizable_condition (vec_info *vinfo,
11798 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11799 gimple **vec_stmt,
11800 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11802 tree scalar_dest = NULL_TREE;
11803 tree vec_dest = NULL_TREE;
11804 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
11805 tree then_clause, else_clause;
11806 tree comp_vectype = NULL_TREE;
11807 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
11808 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
11809 tree vec_compare;
11810 tree new_temp;
11811 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11812 enum vect_def_type dts[4]
11813 = {vect_unknown_def_type, vect_unknown_def_type,
11814 vect_unknown_def_type, vect_unknown_def_type};
11815 int ndts = 4;
11816 int ncopies;
11817 int vec_num;
11818 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11819 int i;
11820 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11821 vec<tree> vec_oprnds0 = vNULL;
11822 vec<tree> vec_oprnds1 = vNULL;
11823 vec<tree> vec_oprnds2 = vNULL;
11824 vec<tree> vec_oprnds3 = vNULL;
11825 tree vec_cmp_type;
11826 bool masked = false;
11828 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11829 return false;
11831 /* Is vectorizable conditional operation? */
11832 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11833 if (!stmt)
11834 return false;
11836 code = gimple_assign_rhs_code (stmt);
11837 if (code != COND_EXPR)
11838 return false;
11840 stmt_vec_info reduc_info = NULL;
11841 int reduc_index = -1;
11842 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
11843 bool for_reduction
11844 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
11845 if (for_reduction)
11847 if (slp_node)
11848 return false;
11849 reduc_info = info_for_reduction (vinfo, stmt_info);
11850 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
11851 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
11852 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
11853 || reduc_index != -1);
11855 else
11857 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11858 return false;
11861 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11862 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11864 if (slp_node)
11866 ncopies = 1;
11867 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
11869 else
11871 ncopies = vect_get_num_copies (loop_vinfo, vectype);
11872 vec_num = 1;
11875 gcc_assert (ncopies >= 1);
11876 if (for_reduction && ncopies > 1)
11877 return false; /* FORNOW */
11879 cond_expr = gimple_assign_rhs1 (stmt);
11881 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
11882 &comp_vectype, &dts[0], vectype)
11883 || !comp_vectype)
11884 return false;
11886 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
11887 slp_tree then_slp_node, else_slp_node;
11888 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
11889 &then_clause, &then_slp_node, &dts[2], &vectype1))
11890 return false;
11891 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
11892 &else_clause, &else_slp_node, &dts[3], &vectype2))
11893 return false;
11895 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
11896 return false;
11898 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
11899 return false;
11901 masked = !COMPARISON_CLASS_P (cond_expr);
11902 vec_cmp_type = truth_type_for (comp_vectype);
11904 if (vec_cmp_type == NULL_TREE)
11905 return false;
11907 cond_code = TREE_CODE (cond_expr);
11908 if (!masked)
11910 cond_expr0 = TREE_OPERAND (cond_expr, 0);
11911 cond_expr1 = TREE_OPERAND (cond_expr, 1);
11914 /* For conditional reductions, the "then" value needs to be the candidate
11915 value calculated by this iteration while the "else" value needs to be
11916 the result carried over from previous iterations. If the COND_EXPR
11917 is the other way around, we need to swap it. */
11918 bool must_invert_cmp_result = false;
11919 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
11921 if (masked)
11922 must_invert_cmp_result = true;
11923 else
11925 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
11926 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
11927 if (new_code == ERROR_MARK)
11928 must_invert_cmp_result = true;
11929 else
11931 cond_code = new_code;
11932 /* Make sure we don't accidentally use the old condition. */
11933 cond_expr = NULL_TREE;
11936 std::swap (then_clause, else_clause);
11939 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
11941 /* Boolean values may have another representation in vectors
11942 and therefore we prefer bit operations over comparison for
11943 them (which also works for scalar masks). We store opcodes
11944 to use in bitop1 and bitop2. Statement is vectorized as
11945 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
11946 depending on bitop1 and bitop2 arity. */
11947 switch (cond_code)
11949 case GT_EXPR:
11950 bitop1 = BIT_NOT_EXPR;
11951 bitop2 = BIT_AND_EXPR;
11952 break;
11953 case GE_EXPR:
11954 bitop1 = BIT_NOT_EXPR;
11955 bitop2 = BIT_IOR_EXPR;
11956 break;
11957 case LT_EXPR:
11958 bitop1 = BIT_NOT_EXPR;
11959 bitop2 = BIT_AND_EXPR;
11960 std::swap (cond_expr0, cond_expr1);
11961 break;
11962 case LE_EXPR:
11963 bitop1 = BIT_NOT_EXPR;
11964 bitop2 = BIT_IOR_EXPR;
11965 std::swap (cond_expr0, cond_expr1);
11966 break;
11967 case NE_EXPR:
11968 bitop1 = BIT_XOR_EXPR;
11969 break;
11970 case EQ_EXPR:
11971 bitop1 = BIT_XOR_EXPR;
11972 bitop2 = BIT_NOT_EXPR;
11973 break;
11974 default:
11975 return false;
11977 cond_code = SSA_NAME;
11980 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
11981 && reduction_type == EXTRACT_LAST_REDUCTION
11982 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
11984 if (dump_enabled_p ())
11985 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11986 "reduction comparison operation not supported.\n");
11987 return false;
11990 if (!vec_stmt)
11992 if (bitop1 != NOP_EXPR)
11994 machine_mode mode = TYPE_MODE (comp_vectype);
11995 optab optab;
11997 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
11998 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11999 return false;
12001 if (bitop2 != NOP_EXPR)
12003 optab = optab_for_tree_code (bitop2, comp_vectype,
12004 optab_default);
12005 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12006 return false;
12010 vect_cost_for_stmt kind = vector_stmt;
12011 if (reduction_type == EXTRACT_LAST_REDUCTION)
12012 /* Count one reduction-like operation per vector. */
12013 kind = vec_to_scalar;
12014 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
12015 && (masked
12016 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
12017 cond_code)
12018 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
12019 ERROR_MARK))))
12020 return false;
12022 if (slp_node
12023 && (!vect_maybe_update_slp_op_vectype
12024 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
12025 || (op_adjust == 1
12026 && !vect_maybe_update_slp_op_vectype
12027 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
12028 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
12029 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
12031 if (dump_enabled_p ())
12032 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12033 "incompatible vector types for invariants\n");
12034 return false;
12037 if (loop_vinfo && for_reduction
12038 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12040 if (reduction_type == EXTRACT_LAST_REDUCTION)
12042 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12043 vectype, OPTIMIZE_FOR_SPEED))
12044 vect_record_loop_len (loop_vinfo,
12045 &LOOP_VINFO_LENS (loop_vinfo),
12046 ncopies * vec_num, vectype, 1);
12047 else
12048 vect_record_loop_mask (loop_vinfo,
12049 &LOOP_VINFO_MASKS (loop_vinfo),
12050 ncopies * vec_num, vectype, NULL);
12052 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12053 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
12055 if (dump_enabled_p ())
12056 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12057 "conditional reduction prevents the use"
12058 " of partial vectors.\n");
12059 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
12063 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
12064 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
12065 cost_vec, kind);
12066 return true;
12069 /* Transform. */
12071 /* Handle def. */
12072 scalar_dest = gimple_assign_lhs (stmt);
12073 if (reduction_type != EXTRACT_LAST_REDUCTION)
12074 vec_dest = vect_create_destination_var (scalar_dest, vectype);
12076 bool swap_cond_operands = false;
12078 /* See whether another part of the vectorized code applies a loop
12079 mask to the condition, or to its inverse. */
12081 vec_loop_masks *masks = NULL;
12082 vec_loop_lens *lens = NULL;
12083 if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
12085 if (reduction_type == EXTRACT_LAST_REDUCTION)
12086 lens = &LOOP_VINFO_LENS (loop_vinfo);
12088 else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
12090 if (reduction_type == EXTRACT_LAST_REDUCTION)
12091 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12092 else
12094 scalar_cond_masked_key cond (cond_expr, ncopies);
12095 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12096 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12097 else
12099 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
12100 tree_code orig_code = cond.code;
12101 cond.code = invert_tree_comparison (cond.code, honor_nans);
12102 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
12104 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12105 cond_code = cond.code;
12106 swap_cond_operands = true;
12108 else
12110 /* Try the inverse of the current mask. We check if the
12111 inverse mask is live and if so we generate a negate of
12112 the current mask such that we still honor NaNs. */
12113 cond.inverted_p = true;
12114 cond.code = orig_code;
12115 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12117 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12118 cond_code = cond.code;
12119 swap_cond_operands = true;
12120 must_invert_cmp_result = true;
12127 /* Handle cond expr. */
12128 if (masked)
12129 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12130 cond_expr, &vec_oprnds0, comp_vectype,
12131 then_clause, &vec_oprnds2, vectype,
12132 reduction_type != EXTRACT_LAST_REDUCTION
12133 ? else_clause : NULL, &vec_oprnds3, vectype);
12134 else
12135 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12136 cond_expr0, &vec_oprnds0, comp_vectype,
12137 cond_expr1, &vec_oprnds1, comp_vectype,
12138 then_clause, &vec_oprnds2, vectype,
12139 reduction_type != EXTRACT_LAST_REDUCTION
12140 ? else_clause : NULL, &vec_oprnds3, vectype);
12142 /* Arguments are ready. Create the new vector stmt. */
12143 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
12145 vec_then_clause = vec_oprnds2[i];
12146 if (reduction_type != EXTRACT_LAST_REDUCTION)
12147 vec_else_clause = vec_oprnds3[i];
12149 if (swap_cond_operands)
12150 std::swap (vec_then_clause, vec_else_clause);
12152 if (masked)
12153 vec_compare = vec_cond_lhs;
12154 else
12156 vec_cond_rhs = vec_oprnds1[i];
12157 if (bitop1 == NOP_EXPR)
12159 gimple_seq stmts = NULL;
12160 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
12161 vec_cond_lhs, vec_cond_rhs);
12162 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
12164 else
12166 new_temp = make_ssa_name (vec_cmp_type);
12167 gassign *new_stmt;
12168 if (bitop1 == BIT_NOT_EXPR)
12169 new_stmt = gimple_build_assign (new_temp, bitop1,
12170 vec_cond_rhs);
12171 else
12172 new_stmt
12173 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
12174 vec_cond_rhs);
12175 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12176 if (bitop2 == NOP_EXPR)
12177 vec_compare = new_temp;
12178 else if (bitop2 == BIT_NOT_EXPR
12179 && reduction_type != EXTRACT_LAST_REDUCTION)
12181 /* Instead of doing ~x ? y : z do x ? z : y. */
12182 vec_compare = new_temp;
12183 std::swap (vec_then_clause, vec_else_clause);
12185 else
12187 vec_compare = make_ssa_name (vec_cmp_type);
12188 if (bitop2 == BIT_NOT_EXPR)
12189 new_stmt
12190 = gimple_build_assign (vec_compare, bitop2, new_temp);
12191 else
12192 new_stmt
12193 = gimple_build_assign (vec_compare, bitop2,
12194 vec_cond_lhs, new_temp);
12195 vect_finish_stmt_generation (vinfo, stmt_info,
12196 new_stmt, gsi);
12201 /* If we decided to apply a loop mask to the result of the vector
12202 comparison, AND the comparison with the mask now. Later passes
12203 should then be able to reuse the AND results between mulitple
12204 vector statements.
12206 For example:
12207 for (int i = 0; i < 100; ++i)
12208 x[i] = y[i] ? z[i] : 10;
12210 results in following optimized GIMPLE:
12212 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12213 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12214 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12215 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12216 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12217 vect_iftmp.11_47, { 10, ... }>;
12219 instead of using a masked and unmasked forms of
12220 vec != { 0, ... } (masked in the MASK_LOAD,
12221 unmasked in the VEC_COND_EXPR). */
12223 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12224 in cases where that's necessary. */
12226 tree len = NULL_TREE, bias = NULL_TREE;
12227 if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
12229 if (!is_gimple_val (vec_compare))
12231 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12232 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12233 vec_compare);
12234 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12235 vec_compare = vec_compare_name;
12238 if (must_invert_cmp_result)
12240 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12241 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12242 BIT_NOT_EXPR,
12243 vec_compare);
12244 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12245 vec_compare = vec_compare_name;
12248 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12249 vectype, OPTIMIZE_FOR_SPEED))
12251 if (lens)
12253 len = vect_get_loop_len (loop_vinfo, gsi, lens,
12254 vec_num * ncopies, vectype, i, 1);
12255 signed char biasval
12256 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
12257 bias = build_int_cst (intQI_type_node, biasval);
12259 else
12261 len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
12262 bias = build_int_cst (intQI_type_node, 0);
12265 if (masks)
12267 tree loop_mask
12268 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
12269 vectype, i);
12270 tree tmp2 = make_ssa_name (vec_cmp_type);
12271 gassign *g
12272 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
12273 loop_mask);
12274 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
12275 vec_compare = tmp2;
12279 gimple *new_stmt;
12280 if (reduction_type == EXTRACT_LAST_REDUCTION)
12282 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
12283 tree lhs = gimple_get_lhs (old_stmt);
12284 if (len)
12285 new_stmt = gimple_build_call_internal
12286 (IFN_LEN_FOLD_EXTRACT_LAST, 5, else_clause, vec_compare,
12287 vec_then_clause, len, bias);
12288 else
12289 new_stmt = gimple_build_call_internal
12290 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
12291 vec_then_clause);
12292 gimple_call_set_lhs (new_stmt, lhs);
12293 SSA_NAME_DEF_STMT (lhs) = new_stmt;
12294 if (old_stmt == gsi_stmt (*gsi))
12295 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
12296 else
12298 /* In this case we're moving the definition to later in the
12299 block. That doesn't matter because the only uses of the
12300 lhs are in phi statements. */
12301 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12302 gsi_remove (&old_gsi, true);
12303 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12306 else
12308 new_temp = make_ssa_name (vec_dest);
12309 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12310 vec_then_clause, vec_else_clause);
12311 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12313 if (slp_node)
12314 slp_node->push_vec_def (new_stmt);
12315 else
12316 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12319 if (!slp_node)
12320 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12322 vec_oprnds0.release ();
12323 vec_oprnds1.release ();
12324 vec_oprnds2.release ();
12325 vec_oprnds3.release ();
12327 return true;
12330 /* vectorizable_comparison.
12332 Check if STMT_INFO is comparison expression that can be vectorized.
12333 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12334 comparison, put it in VEC_STMT, and insert it at GSI.
12336 Return true if STMT_INFO is vectorizable in this way. */
12338 static bool
12339 vectorizable_comparison (vec_info *vinfo,
12340 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12341 gimple **vec_stmt,
12342 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12344 tree lhs, rhs1, rhs2;
12345 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12346 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12347 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12348 tree new_temp;
12349 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12350 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
12351 int ndts = 2;
12352 poly_uint64 nunits;
12353 int ncopies;
12354 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12355 int i;
12356 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12357 vec<tree> vec_oprnds0 = vNULL;
12358 vec<tree> vec_oprnds1 = vNULL;
12359 tree mask_type;
12360 tree mask;
12362 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12363 return false;
12365 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
12366 return false;
12368 mask_type = vectype;
12369 nunits = TYPE_VECTOR_SUBPARTS (vectype);
12371 if (slp_node)
12372 ncopies = 1;
12373 else
12374 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12376 gcc_assert (ncopies >= 1);
12377 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12378 return false;
12380 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12381 if (!stmt)
12382 return false;
12384 code = gimple_assign_rhs_code (stmt);
12386 if (TREE_CODE_CLASS (code) != tcc_comparison)
12387 return false;
12389 slp_tree slp_rhs1, slp_rhs2;
12390 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12391 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
12392 return false;
12394 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12395 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
12396 return false;
12398 if (vectype1 && vectype2
12399 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12400 TYPE_VECTOR_SUBPARTS (vectype2)))
12401 return false;
12403 vectype = vectype1 ? vectype1 : vectype2;
12405 /* Invariant comparison. */
12406 if (!vectype)
12408 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
12409 vectype = mask_type;
12410 else
12411 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
12412 slp_node);
12413 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
12414 return false;
12416 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
12417 return false;
12419 /* Can't compare mask and non-mask types. */
12420 if (vectype1 && vectype2
12421 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12422 return false;
12424 /* Boolean values may have another representation in vectors
12425 and therefore we prefer bit operations over comparison for
12426 them (which also works for scalar masks). We store opcodes
12427 to use in bitop1 and bitop2. Statement is vectorized as
12428 BITOP2 (rhs1 BITOP1 rhs2) or
12429 rhs1 BITOP2 (BITOP1 rhs2)
12430 depending on bitop1 and bitop2 arity. */
12431 bool swap_p = false;
12432 if (VECTOR_BOOLEAN_TYPE_P (vectype))
12434 if (code == GT_EXPR)
12436 bitop1 = BIT_NOT_EXPR;
12437 bitop2 = BIT_AND_EXPR;
12439 else if (code == GE_EXPR)
12441 bitop1 = BIT_NOT_EXPR;
12442 bitop2 = BIT_IOR_EXPR;
12444 else if (code == LT_EXPR)
12446 bitop1 = BIT_NOT_EXPR;
12447 bitop2 = BIT_AND_EXPR;
12448 swap_p = true;
12450 else if (code == LE_EXPR)
12452 bitop1 = BIT_NOT_EXPR;
12453 bitop2 = BIT_IOR_EXPR;
12454 swap_p = true;
12456 else
12458 bitop1 = BIT_XOR_EXPR;
12459 if (code == EQ_EXPR)
12460 bitop2 = BIT_NOT_EXPR;
12464 if (!vec_stmt)
12466 if (bitop1 == NOP_EXPR)
12468 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12469 return false;
12471 else
12473 machine_mode mode = TYPE_MODE (vectype);
12474 optab optab;
12476 optab = optab_for_tree_code (bitop1, vectype, optab_default);
12477 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12478 return false;
12480 if (bitop2 != NOP_EXPR)
12482 optab = optab_for_tree_code (bitop2, vectype, optab_default);
12483 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12484 return false;
12488 /* Put types on constant and invariant SLP children. */
12489 if (slp_node
12490 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12491 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
12493 if (dump_enabled_p ())
12494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12495 "incompatible vector types for invariants\n");
12496 return false;
12499 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
12500 vect_model_simple_cost (vinfo, stmt_info,
12501 ncopies * (1 + (bitop2 != NOP_EXPR)),
12502 dts, ndts, slp_node, cost_vec);
12503 return true;
12506 /* Transform. */
12508 /* Handle def. */
12509 lhs = gimple_assign_lhs (stmt);
12510 mask = vect_create_destination_var (lhs, mask_type);
12512 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12513 rhs1, &vec_oprnds0, vectype,
12514 rhs2, &vec_oprnds1, vectype);
12515 if (swap_p)
12516 std::swap (vec_oprnds0, vec_oprnds1);
12518 /* Arguments are ready. Create the new vector stmt. */
12519 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12521 gimple *new_stmt;
12522 vec_rhs2 = vec_oprnds1[i];
12524 new_temp = make_ssa_name (mask);
12525 if (bitop1 == NOP_EXPR)
12527 new_stmt = gimple_build_assign (new_temp, code,
12528 vec_rhs1, vec_rhs2);
12529 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12531 else
12533 if (bitop1 == BIT_NOT_EXPR)
12534 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12535 else
12536 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12537 vec_rhs2);
12538 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12539 if (bitop2 != NOP_EXPR)
12541 tree res = make_ssa_name (mask);
12542 if (bitop2 == BIT_NOT_EXPR)
12543 new_stmt = gimple_build_assign (res, bitop2, new_temp);
12544 else
12545 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12546 new_temp);
12547 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12550 if (slp_node)
12551 slp_node->push_vec_def (new_stmt);
12552 else
12553 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12556 if (!slp_node)
12557 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12559 vec_oprnds0.release ();
12560 vec_oprnds1.release ();
12562 return true;
12565 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
12566 can handle all live statements in the node. Otherwise return true
12567 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
12568 VEC_STMT_P is as for vectorizable_live_operation. */
12570 static bool
12571 can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
12572 slp_tree slp_node, slp_instance slp_node_instance,
12573 bool vec_stmt_p,
12574 stmt_vector_for_cost *cost_vec)
12576 if (slp_node)
12578 stmt_vec_info slp_stmt_info;
12579 unsigned int i;
12580 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
12582 if (STMT_VINFO_LIVE_P (slp_stmt_info)
12583 && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
12584 slp_node_instance, i,
12585 vec_stmt_p, cost_vec))
12586 return false;
12589 else if (STMT_VINFO_LIVE_P (stmt_info)
12590 && !vectorizable_live_operation (vinfo, stmt_info,
12591 slp_node, slp_node_instance, -1,
12592 vec_stmt_p, cost_vec))
12593 return false;
12595 return true;
12598 /* Make sure the statement is vectorizable. */
12600 opt_result
12601 vect_analyze_stmt (vec_info *vinfo,
12602 stmt_vec_info stmt_info, bool *need_to_vectorize,
12603 slp_tree node, slp_instance node_instance,
12604 stmt_vector_for_cost *cost_vec)
12606 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12607 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
12608 bool ok;
12609 gimple_seq pattern_def_seq;
12611 if (dump_enabled_p ())
12612 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
12613 stmt_info->stmt);
12615 if (gimple_has_volatile_ops (stmt_info->stmt))
12616 return opt_result::failure_at (stmt_info->stmt,
12617 "not vectorized:"
12618 " stmt has volatile operands: %G\n",
12619 stmt_info->stmt);
12621 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12622 && node == NULL
12623 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
12625 gimple_stmt_iterator si;
12627 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
12629 stmt_vec_info pattern_def_stmt_info
12630 = vinfo->lookup_stmt (gsi_stmt (si));
12631 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
12632 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
12634 /* Analyze def stmt of STMT if it's a pattern stmt. */
12635 if (dump_enabled_p ())
12636 dump_printf_loc (MSG_NOTE, vect_location,
12637 "==> examining pattern def statement: %G",
12638 pattern_def_stmt_info->stmt);
12640 opt_result res
12641 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
12642 need_to_vectorize, node, node_instance,
12643 cost_vec);
12644 if (!res)
12645 return res;
12650 /* Skip stmts that do not need to be vectorized. In loops this is expected
12651 to include:
12652 - the COND_EXPR which is the loop exit condition
12653 - any LABEL_EXPRs in the loop
12654 - computations that are used only for array indexing or loop control.
12655 In basic blocks we only analyze statements that are a part of some SLP
12656 instance, therefore, all the statements are relevant.
12658 Pattern statement needs to be analyzed instead of the original statement
12659 if the original statement is not relevant. Otherwise, we analyze both
12660 statements. In basic blocks we are called from some SLP instance
12661 traversal, don't analyze pattern stmts instead, the pattern stmts
12662 already will be part of SLP instance. */
12664 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
12665 if (!STMT_VINFO_RELEVANT_P (stmt_info)
12666 && !STMT_VINFO_LIVE_P (stmt_info))
12668 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12669 && pattern_stmt_info
12670 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12671 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12673 /* Analyze PATTERN_STMT instead of the original stmt. */
12674 stmt_info = pattern_stmt_info;
12675 if (dump_enabled_p ())
12676 dump_printf_loc (MSG_NOTE, vect_location,
12677 "==> examining pattern statement: %G",
12678 stmt_info->stmt);
12680 else
12682 if (dump_enabled_p ())
12683 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
12685 return opt_result::success ();
12688 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12689 && node == NULL
12690 && pattern_stmt_info
12691 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12692 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12694 /* Analyze PATTERN_STMT too. */
12695 if (dump_enabled_p ())
12696 dump_printf_loc (MSG_NOTE, vect_location,
12697 "==> examining pattern statement: %G",
12698 pattern_stmt_info->stmt);
12700 opt_result res
12701 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
12702 node_instance, cost_vec);
12703 if (!res)
12704 return res;
12707 switch (STMT_VINFO_DEF_TYPE (stmt_info))
12709 case vect_internal_def:
12710 break;
12712 case vect_reduction_def:
12713 case vect_nested_cycle:
12714 gcc_assert (!bb_vinfo
12715 && (relevance == vect_used_in_outer
12716 || relevance == vect_used_in_outer_by_reduction
12717 || relevance == vect_used_by_reduction
12718 || relevance == vect_unused_in_scope
12719 || relevance == vect_used_only_live));
12720 break;
12722 case vect_induction_def:
12723 case vect_first_order_recurrence:
12724 gcc_assert (!bb_vinfo);
12725 break;
12727 case vect_constant_def:
12728 case vect_external_def:
12729 case vect_unknown_def_type:
12730 default:
12731 gcc_unreachable ();
12734 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12735 if (node)
12736 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
12738 if (STMT_VINFO_RELEVANT_P (stmt_info))
12740 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
12741 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
12742 || (call && gimple_call_lhs (call) == NULL_TREE));
12743 *need_to_vectorize = true;
12746 if (PURE_SLP_STMT (stmt_info) && !node)
12748 if (dump_enabled_p ())
12749 dump_printf_loc (MSG_NOTE, vect_location,
12750 "handled only by SLP analysis\n");
12751 return opt_result::success ();
12754 ok = true;
12755 if (!bb_vinfo
12756 && (STMT_VINFO_RELEVANT_P (stmt_info)
12757 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
12758 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
12759 -mveclibabi= takes preference over library functions with
12760 the simd attribute. */
12761 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12762 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
12763 cost_vec)
12764 || vectorizable_conversion (vinfo, stmt_info,
12765 NULL, NULL, node, cost_vec)
12766 || vectorizable_operation (vinfo, stmt_info,
12767 NULL, NULL, node, cost_vec)
12768 || vectorizable_assignment (vinfo, stmt_info,
12769 NULL, NULL, node, cost_vec)
12770 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12771 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12772 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
12773 node, node_instance, cost_vec)
12774 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
12775 NULL, node, cost_vec)
12776 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12777 || vectorizable_condition (vinfo, stmt_info,
12778 NULL, NULL, node, cost_vec)
12779 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12780 cost_vec)
12781 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
12782 stmt_info, NULL, node)
12783 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
12784 stmt_info, NULL, node, cost_vec));
12785 else
12787 if (bb_vinfo)
12788 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12789 || vectorizable_simd_clone_call (vinfo, stmt_info,
12790 NULL, NULL, node, cost_vec)
12791 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
12792 cost_vec)
12793 || vectorizable_shift (vinfo, stmt_info,
12794 NULL, NULL, node, cost_vec)
12795 || vectorizable_operation (vinfo, stmt_info,
12796 NULL, NULL, node, cost_vec)
12797 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
12798 cost_vec)
12799 || vectorizable_load (vinfo, stmt_info,
12800 NULL, NULL, node, cost_vec)
12801 || vectorizable_store (vinfo, stmt_info,
12802 NULL, NULL, node, cost_vec)
12803 || vectorizable_condition (vinfo, stmt_info,
12804 NULL, NULL, node, cost_vec)
12805 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12806 cost_vec)
12807 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
12810 if (node)
12811 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12813 if (!ok)
12814 return opt_result::failure_at (stmt_info->stmt,
12815 "not vectorized:"
12816 " relevant stmt not supported: %G",
12817 stmt_info->stmt);
12819 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
12820 need extra handling, except for vectorizable reductions. */
12821 if (!bb_vinfo
12822 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
12823 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
12824 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
12825 stmt_info, node, node_instance,
12826 false, cost_vec))
12827 return opt_result::failure_at (stmt_info->stmt,
12828 "not vectorized:"
12829 " live stmt not supported: %G",
12830 stmt_info->stmt);
12832 return opt_result::success ();
12836 /* Function vect_transform_stmt.
12838 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
12840 bool
12841 vect_transform_stmt (vec_info *vinfo,
12842 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12843 slp_tree slp_node, slp_instance slp_node_instance)
12845 bool is_store = false;
12846 gimple *vec_stmt = NULL;
12847 bool done;
12849 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
12851 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12852 if (slp_node)
12853 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
12855 switch (STMT_VINFO_TYPE (stmt_info))
12857 case type_demotion_vec_info_type:
12858 case type_promotion_vec_info_type:
12859 case type_conversion_vec_info_type:
12860 done = vectorizable_conversion (vinfo, stmt_info,
12861 gsi, &vec_stmt, slp_node, NULL);
12862 gcc_assert (done);
12863 break;
12865 case induc_vec_info_type:
12866 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
12867 stmt_info, &vec_stmt, slp_node,
12868 NULL);
12869 gcc_assert (done);
12870 break;
12872 case shift_vec_info_type:
12873 done = vectorizable_shift (vinfo, stmt_info,
12874 gsi, &vec_stmt, slp_node, NULL);
12875 gcc_assert (done);
12876 break;
12878 case op_vec_info_type:
12879 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
12880 NULL);
12881 gcc_assert (done);
12882 break;
12884 case assignment_vec_info_type:
12885 done = vectorizable_assignment (vinfo, stmt_info,
12886 gsi, &vec_stmt, slp_node, NULL);
12887 gcc_assert (done);
12888 break;
12890 case load_vec_info_type:
12891 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
12892 NULL);
12893 gcc_assert (done);
12894 break;
12896 case store_vec_info_type:
12897 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
12898 && !slp_node
12899 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))
12900 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))))
12901 /* In case of interleaving, the whole chain is vectorized when the
12902 last store in the chain is reached. Store stmts before the last
12903 one are skipped, and there vec_stmt_info shouldn't be freed
12904 meanwhile. */
12906 else
12908 done = vectorizable_store (vinfo, stmt_info,
12909 gsi, &vec_stmt, slp_node, NULL);
12910 gcc_assert (done);
12911 is_store = true;
12913 break;
12915 case condition_vec_info_type:
12916 done = vectorizable_condition (vinfo, stmt_info,
12917 gsi, &vec_stmt, slp_node, NULL);
12918 gcc_assert (done);
12919 break;
12921 case comparison_vec_info_type:
12922 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
12923 slp_node, NULL);
12924 gcc_assert (done);
12925 break;
12927 case call_vec_info_type:
12928 done = vectorizable_call (vinfo, stmt_info,
12929 gsi, &vec_stmt, slp_node, NULL);
12930 break;
12932 case call_simd_clone_vec_info_type:
12933 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
12934 slp_node, NULL);
12935 break;
12937 case reduc_vec_info_type:
12938 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
12939 gsi, &vec_stmt, slp_node);
12940 gcc_assert (done);
12941 break;
12943 case cycle_phi_info_type:
12944 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
12945 &vec_stmt, slp_node, slp_node_instance);
12946 gcc_assert (done);
12947 break;
12949 case lc_phi_info_type:
12950 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
12951 stmt_info, &vec_stmt, slp_node);
12952 gcc_assert (done);
12953 break;
12955 case recurr_info_type:
12956 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
12957 stmt_info, &vec_stmt, slp_node, NULL);
12958 gcc_assert (done);
12959 break;
12961 case phi_info_type:
12962 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
12963 gcc_assert (done);
12964 break;
12966 default:
12967 if (!STMT_VINFO_LIVE_P (stmt_info))
12969 if (dump_enabled_p ())
12970 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12971 "stmt not supported.\n");
12972 gcc_unreachable ();
12974 done = true;
12977 if (!slp_node && vec_stmt)
12978 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
12980 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
12982 /* Handle stmts whose DEF is used outside the loop-nest that is
12983 being vectorized. */
12984 done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
12985 slp_node_instance, true, NULL);
12986 gcc_assert (done);
12989 if (slp_node)
12990 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12992 return is_store;
12996 /* Remove a group of stores (for SLP or interleaving), free their
12997 stmt_vec_info. */
12999 void
13000 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
13002 stmt_vec_info next_stmt_info = first_stmt_info;
13004 while (next_stmt_info)
13006 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
13007 next_stmt_info = vect_orig_stmt (next_stmt_info);
13008 /* Free the attached stmt_vec_info and remove the stmt. */
13009 vinfo->remove_stmt (next_stmt_info);
13010 next_stmt_info = tmp;
13014 /* If NUNITS is nonzero, return a vector type that contains NUNITS
13015 elements of type SCALAR_TYPE, or null if the target doesn't support
13016 such a type.
13018 If NUNITS is zero, return a vector type that contains elements of
13019 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13021 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13022 for this vectorization region and want to "autodetect" the best choice.
13023 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13024 and we want the new type to be interoperable with it. PREVAILING_MODE
13025 in this case can be a scalar integer mode or a vector mode; when it
13026 is a vector mode, the function acts like a tree-level version of
13027 related_vector_mode. */
13029 tree
13030 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
13031 tree scalar_type, poly_uint64 nunits)
13033 tree orig_scalar_type = scalar_type;
13034 scalar_mode inner_mode;
13035 machine_mode simd_mode;
13036 tree vectype;
13038 if ((!INTEGRAL_TYPE_P (scalar_type)
13039 && !POINTER_TYPE_P (scalar_type)
13040 && !SCALAR_FLOAT_TYPE_P (scalar_type))
13041 || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
13042 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
13043 return NULL_TREE;
13045 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
13047 /* Interoperability between modes requires one to be a constant multiple
13048 of the other, so that the number of vectors required for each operation
13049 is a compile-time constant. */
13050 if (prevailing_mode != VOIDmode
13051 && !constant_multiple_p (nunits * nbytes,
13052 GET_MODE_SIZE (prevailing_mode))
13053 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
13054 nunits * nbytes))
13055 return NULL_TREE;
13057 /* For vector types of elements whose mode precision doesn't
13058 match their types precision we use a element type of mode
13059 precision. The vectorization routines will have to make sure
13060 they support the proper result truncation/extension.
13061 We also make sure to build vector types with INTEGER_TYPE
13062 component type only. */
13063 if (INTEGRAL_TYPE_P (scalar_type)
13064 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
13065 || TREE_CODE (scalar_type) != INTEGER_TYPE))
13066 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
13067 TYPE_UNSIGNED (scalar_type));
13069 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13070 When the component mode passes the above test simply use a type
13071 corresponding to that mode. The theory is that any use that
13072 would cause problems with this will disable vectorization anyway. */
13073 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
13074 && !INTEGRAL_TYPE_P (scalar_type))
13075 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
13077 /* We can't build a vector type of elements with alignment bigger than
13078 their size. */
13079 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
13080 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
13081 TYPE_UNSIGNED (scalar_type));
13083 /* If we felt back to using the mode fail if there was
13084 no scalar type for it. */
13085 if (scalar_type == NULL_TREE)
13086 return NULL_TREE;
13088 /* If no prevailing mode was supplied, use the mode the target prefers.
13089 Otherwise lookup a vector mode based on the prevailing mode. */
13090 if (prevailing_mode == VOIDmode)
13092 gcc_assert (known_eq (nunits, 0U));
13093 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
13094 if (SCALAR_INT_MODE_P (simd_mode))
13096 /* Traditional behavior is not to take the integer mode
13097 literally, but simply to use it as a way of determining
13098 the vector size. It is up to mode_for_vector to decide
13099 what the TYPE_MODE should be.
13101 Note that nunits == 1 is allowed in order to support single
13102 element vector types. */
13103 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
13104 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13105 return NULL_TREE;
13108 else if (SCALAR_INT_MODE_P (prevailing_mode)
13109 || !related_vector_mode (prevailing_mode,
13110 inner_mode, nunits).exists (&simd_mode))
13112 /* Fall back to using mode_for_vector, mostly in the hope of being
13113 able to use an integer mode. */
13114 if (known_eq (nunits, 0U)
13115 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
13116 return NULL_TREE;
13118 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13119 return NULL_TREE;
13122 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
13124 /* In cases where the mode was chosen by mode_for_vector, check that
13125 the target actually supports the chosen mode, or that it at least
13126 allows the vector mode to be replaced by a like-sized integer. */
13127 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
13128 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
13129 return NULL_TREE;
13131 /* Re-attach the address-space qualifier if we canonicalized the scalar
13132 type. */
13133 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
13134 return build_qualified_type
13135 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
13137 return vectype;
13140 /* Function get_vectype_for_scalar_type.
13142 Returns the vector type corresponding to SCALAR_TYPE as supported
13143 by the target. If GROUP_SIZE is nonzero and we're performing BB
13144 vectorization, make sure that the number of elements in the vector
13145 is no bigger than GROUP_SIZE. */
13147 tree
13148 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
13149 unsigned int group_size)
13151 /* For BB vectorization, we should always have a group size once we've
13152 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13153 are tentative requests during things like early data reference
13154 analysis and pattern recognition. */
13155 if (is_a <bb_vec_info> (vinfo))
13156 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13157 else
13158 group_size = 0;
13160 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13161 scalar_type);
13162 if (vectype && vinfo->vector_mode == VOIDmode)
13163 vinfo->vector_mode = TYPE_MODE (vectype);
13165 /* Register the natural choice of vector type, before the group size
13166 has been applied. */
13167 if (vectype)
13168 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
13170 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13171 try again with an explicit number of elements. */
13172 if (vectype
13173 && group_size
13174 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
13176 /* Start with the biggest number of units that fits within
13177 GROUP_SIZE and halve it until we find a valid vector type.
13178 Usually either the first attempt will succeed or all will
13179 fail (in the latter case because GROUP_SIZE is too small
13180 for the target), but it's possible that a target could have
13181 a hole between supported vector types.
13183 If GROUP_SIZE is not a power of 2, this has the effect of
13184 trying the largest power of 2 that fits within the group,
13185 even though the group is not a multiple of that vector size.
13186 The BB vectorizer will then try to carve up the group into
13187 smaller pieces. */
13188 unsigned int nunits = 1 << floor_log2 (group_size);
13191 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13192 scalar_type, nunits);
13193 nunits /= 2;
13195 while (nunits > 1 && !vectype);
13198 return vectype;
13201 /* Return the vector type corresponding to SCALAR_TYPE as supported
13202 by the target. NODE, if nonnull, is the SLP tree node that will
13203 use the returned vector type. */
13205 tree
13206 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
13208 unsigned int group_size = 0;
13209 if (node)
13210 group_size = SLP_TREE_LANES (node);
13211 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13214 /* Function get_mask_type_for_scalar_type.
13216 Returns the mask type corresponding to a result of comparison
13217 of vectors of specified SCALAR_TYPE as supported by target.
13218 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13219 make sure that the number of elements in the vector is no bigger
13220 than GROUP_SIZE. */
13222 tree
13223 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13224 unsigned int group_size)
13226 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13228 if (!vectype)
13229 return NULL;
13231 return truth_type_for (vectype);
13234 /* Function get_same_sized_vectype
13236 Returns a vector type corresponding to SCALAR_TYPE of size
13237 VECTOR_TYPE if supported by the target. */
13239 tree
13240 get_same_sized_vectype (tree scalar_type, tree vector_type)
13242 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
13243 return truth_type_for (vector_type);
13245 poly_uint64 nunits;
13246 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
13247 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
13248 return NULL_TREE;
13250 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
13251 scalar_type, nunits);
13254 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13255 would not change the chosen vector modes. */
13257 bool
13258 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
13260 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
13261 i != vinfo->used_vector_modes.end (); ++i)
13262 if (!VECTOR_MODE_P (*i)
13263 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
13264 return false;
13265 return true;
13268 /* Function vect_is_simple_use.
13270 Input:
13271 VINFO - the vect info of the loop or basic block that is being vectorized.
13272 OPERAND - operand in the loop or bb.
13273 Output:
13274 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13275 case OPERAND is an SSA_NAME that is defined in the vectorizable region
13276 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13277 the definition could be anywhere in the function
13278 DT - the type of definition
13280 Returns whether a stmt with OPERAND can be vectorized.
13281 For loops, supportable operands are constants, loop invariants, and operands
13282 that are defined by the current iteration of the loop. Unsupportable
13283 operands are those that are defined by a previous iteration of the loop (as
13284 is the case in reduction/induction computations).
13285 For basic blocks, supportable operands are constants and bb invariants.
13286 For now, operands defined outside the basic block are not supported. */
13288 bool
13289 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13290 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
13292 if (def_stmt_info_out)
13293 *def_stmt_info_out = NULL;
13294 if (def_stmt_out)
13295 *def_stmt_out = NULL;
13296 *dt = vect_unknown_def_type;
13298 if (dump_enabled_p ())
13300 dump_printf_loc (MSG_NOTE, vect_location,
13301 "vect_is_simple_use: operand ");
13302 if (TREE_CODE (operand) == SSA_NAME
13303 && !SSA_NAME_IS_DEFAULT_DEF (operand))
13304 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
13305 else
13306 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
13309 if (CONSTANT_CLASS_P (operand))
13310 *dt = vect_constant_def;
13311 else if (is_gimple_min_invariant (operand))
13312 *dt = vect_external_def;
13313 else if (TREE_CODE (operand) != SSA_NAME)
13314 *dt = vect_unknown_def_type;
13315 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
13316 *dt = vect_external_def;
13317 else
13319 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
13320 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
13321 if (!stmt_vinfo)
13322 *dt = vect_external_def;
13323 else
13325 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
13326 def_stmt = stmt_vinfo->stmt;
13327 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
13328 if (def_stmt_info_out)
13329 *def_stmt_info_out = stmt_vinfo;
13331 if (def_stmt_out)
13332 *def_stmt_out = def_stmt;
13335 if (dump_enabled_p ())
13337 dump_printf (MSG_NOTE, ", type of def: ");
13338 switch (*dt)
13340 case vect_uninitialized_def:
13341 dump_printf (MSG_NOTE, "uninitialized\n");
13342 break;
13343 case vect_constant_def:
13344 dump_printf (MSG_NOTE, "constant\n");
13345 break;
13346 case vect_external_def:
13347 dump_printf (MSG_NOTE, "external\n");
13348 break;
13349 case vect_internal_def:
13350 dump_printf (MSG_NOTE, "internal\n");
13351 break;
13352 case vect_induction_def:
13353 dump_printf (MSG_NOTE, "induction\n");
13354 break;
13355 case vect_reduction_def:
13356 dump_printf (MSG_NOTE, "reduction\n");
13357 break;
13358 case vect_double_reduction_def:
13359 dump_printf (MSG_NOTE, "double reduction\n");
13360 break;
13361 case vect_nested_cycle:
13362 dump_printf (MSG_NOTE, "nested cycle\n");
13363 break;
13364 case vect_first_order_recurrence:
13365 dump_printf (MSG_NOTE, "first order recurrence\n");
13366 break;
13367 case vect_unknown_def_type:
13368 dump_printf (MSG_NOTE, "unknown\n");
13369 break;
13373 if (*dt == vect_unknown_def_type)
13375 if (dump_enabled_p ())
13376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13377 "Unsupported pattern.\n");
13378 return false;
13381 return true;
13384 /* Function vect_is_simple_use.
13386 Same as vect_is_simple_use but also determines the vector operand
13387 type of OPERAND and stores it to *VECTYPE. If the definition of
13388 OPERAND is vect_uninitialized_def, vect_constant_def or
13389 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13390 is responsible to compute the best suited vector type for the
13391 scalar operand. */
13393 bool
13394 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13395 tree *vectype, stmt_vec_info *def_stmt_info_out,
13396 gimple **def_stmt_out)
13398 stmt_vec_info def_stmt_info;
13399 gimple *def_stmt;
13400 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
13401 return false;
13403 if (def_stmt_out)
13404 *def_stmt_out = def_stmt;
13405 if (def_stmt_info_out)
13406 *def_stmt_info_out = def_stmt_info;
13408 /* Now get a vector type if the def is internal, otherwise supply
13409 NULL_TREE and leave it up to the caller to figure out a proper
13410 type for the use stmt. */
13411 if (*dt == vect_internal_def
13412 || *dt == vect_induction_def
13413 || *dt == vect_reduction_def
13414 || *dt == vect_double_reduction_def
13415 || *dt == vect_nested_cycle
13416 || *dt == vect_first_order_recurrence)
13418 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
13419 gcc_assert (*vectype != NULL_TREE);
13420 if (dump_enabled_p ())
13421 dump_printf_loc (MSG_NOTE, vect_location,
13422 "vect_is_simple_use: vectype %T\n", *vectype);
13424 else if (*dt == vect_uninitialized_def
13425 || *dt == vect_constant_def
13426 || *dt == vect_external_def)
13427 *vectype = NULL_TREE;
13428 else
13429 gcc_unreachable ();
13431 return true;
13434 /* Function vect_is_simple_use.
13436 Same as vect_is_simple_use but determines the operand by operand
13437 position OPERAND from either STMT or SLP_NODE, filling in *OP
13438 and *SLP_DEF (when SLP_NODE is not NULL). */
13440 bool
13441 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
13442 unsigned operand, tree *op, slp_tree *slp_def,
13443 enum vect_def_type *dt,
13444 tree *vectype, stmt_vec_info *def_stmt_info_out)
13446 if (slp_node)
13448 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
13449 *slp_def = child;
13450 *vectype = SLP_TREE_VECTYPE (child);
13451 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
13453 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
13454 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
13456 else
13458 if (def_stmt_info_out)
13459 *def_stmt_info_out = NULL;
13460 *op = SLP_TREE_SCALAR_OPS (child)[0];
13461 *dt = SLP_TREE_DEF_TYPE (child);
13462 return true;
13465 else
13467 *slp_def = NULL;
13468 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
13470 if (gimple_assign_rhs_code (ass) == COND_EXPR
13471 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
13473 if (operand < 2)
13474 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
13475 else
13476 *op = gimple_op (ass, operand);
13478 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
13479 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
13480 else
13481 *op = gimple_op (ass, operand + 1);
13483 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
13484 *op = gimple_call_arg (call, operand);
13485 else
13486 gcc_unreachable ();
13487 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
13491 /* If OP is not NULL and is external or constant update its vector
13492 type with VECTYPE. Returns true if successful or false if not,
13493 for example when conflicting vector types are present. */
13495 bool
13496 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
13498 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
13499 return true;
13500 if (SLP_TREE_VECTYPE (op))
13501 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
13502 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
13503 should be handled by patters. Allow vect_constant_def for now. */
13504 if (VECTOR_BOOLEAN_TYPE_P (vectype)
13505 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
13506 return false;
13507 SLP_TREE_VECTYPE (op) = vectype;
13508 return true;
13511 /* Function supportable_widening_operation
13513 Check whether an operation represented by the code CODE is a
13514 widening operation that is supported by the target platform in
13515 vector form (i.e., when operating on arguments of type VECTYPE_IN
13516 producing a result of type VECTYPE_OUT).
13518 Widening operations we currently support are NOP (CONVERT), FLOAT,
13519 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
13520 are supported by the target platform either directly (via vector
13521 tree-codes), or via target builtins.
13523 Output:
13524 - CODE1 and CODE2 are codes of vector operations to be used when
13525 vectorizing the operation, if available.
13526 - MULTI_STEP_CVT determines the number of required intermediate steps in
13527 case of multi-step conversion (like char->short->int - in that case
13528 MULTI_STEP_CVT will be 1).
13529 - INTERM_TYPES contains the intermediate type required to perform the
13530 widening operation (short in the above example). */
13532 bool
13533 supportable_widening_operation (vec_info *vinfo,
13534 code_helper code,
13535 stmt_vec_info stmt_info,
13536 tree vectype_out, tree vectype_in,
13537 code_helper *code1,
13538 code_helper *code2,
13539 int *multi_step_cvt,
13540 vec<tree> *interm_types)
13542 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
13543 class loop *vect_loop = NULL;
13544 machine_mode vec_mode;
13545 enum insn_code icode1, icode2;
13546 optab optab1 = unknown_optab, optab2 = unknown_optab;
13547 tree vectype = vectype_in;
13548 tree wide_vectype = vectype_out;
13549 tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
13550 int i;
13551 tree prev_type, intermediate_type;
13552 machine_mode intermediate_mode, prev_mode;
13553 optab optab3, optab4;
13555 *multi_step_cvt = 0;
13556 if (loop_info)
13557 vect_loop = LOOP_VINFO_LOOP (loop_info);
13559 switch (code.safe_as_tree_code ())
13561 case MAX_TREE_CODES:
13562 /* Don't set c1 and c2 if code is not a tree_code. */
13563 break;
13565 case WIDEN_MULT_EXPR:
13566 /* The result of a vectorized widening operation usually requires
13567 two vectors (because the widened results do not fit into one vector).
13568 The generated vector results would normally be expected to be
13569 generated in the same order as in the original scalar computation,
13570 i.e. if 8 results are generated in each vector iteration, they are
13571 to be organized as follows:
13572 vect1: [res1,res2,res3,res4],
13573 vect2: [res5,res6,res7,res8].
13575 However, in the special case that the result of the widening
13576 operation is used in a reduction computation only, the order doesn't
13577 matter (because when vectorizing a reduction we change the order of
13578 the computation). Some targets can take advantage of this and
13579 generate more efficient code. For example, targets like Altivec,
13580 that support widen_mult using a sequence of {mult_even,mult_odd}
13581 generate the following vectors:
13582 vect1: [res1,res3,res5,res7],
13583 vect2: [res2,res4,res6,res8].
13585 When vectorizing outer-loops, we execute the inner-loop sequentially
13586 (each vectorized inner-loop iteration contributes to VF outer-loop
13587 iterations in parallel). We therefore don't allow to change the
13588 order of the computation in the inner-loop during outer-loop
13589 vectorization. */
13590 /* TODO: Another case in which order doesn't *really* matter is when we
13591 widen and then contract again, e.g. (short)((int)x * y >> 8).
13592 Normally, pack_trunc performs an even/odd permute, whereas the
13593 repack from an even/odd expansion would be an interleave, which
13594 would be significantly simpler for e.g. AVX2. */
13595 /* In any case, in order to avoid duplicating the code below, recurse
13596 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
13597 are properly set up for the caller. If we fail, we'll continue with
13598 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
13599 if (vect_loop
13600 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
13601 && !nested_in_vect_loop_p (vect_loop, stmt_info)
13602 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
13603 stmt_info, vectype_out,
13604 vectype_in, code1,
13605 code2, multi_step_cvt,
13606 interm_types))
13608 /* Elements in a vector with vect_used_by_reduction property cannot
13609 be reordered if the use chain with this property does not have the
13610 same operation. One such an example is s += a * b, where elements
13611 in a and b cannot be reordered. Here we check if the vector defined
13612 by STMT is only directly used in the reduction statement. */
13613 tree lhs = gimple_assign_lhs (stmt_info->stmt);
13614 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
13615 if (use_stmt_info
13616 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
13617 return true;
13619 c1 = VEC_WIDEN_MULT_LO_EXPR;
13620 c2 = VEC_WIDEN_MULT_HI_EXPR;
13621 break;
13623 case DOT_PROD_EXPR:
13624 c1 = DOT_PROD_EXPR;
13625 c2 = DOT_PROD_EXPR;
13626 break;
13628 case SAD_EXPR:
13629 c1 = SAD_EXPR;
13630 c2 = SAD_EXPR;
13631 break;
13633 case VEC_WIDEN_MULT_EVEN_EXPR:
13634 /* Support the recursion induced just above. */
13635 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
13636 c2 = VEC_WIDEN_MULT_ODD_EXPR;
13637 break;
13639 case WIDEN_LSHIFT_EXPR:
13640 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
13641 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
13642 break;
13644 CASE_CONVERT:
13645 c1 = VEC_UNPACK_LO_EXPR;
13646 c2 = VEC_UNPACK_HI_EXPR;
13647 break;
13649 case FLOAT_EXPR:
13650 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
13651 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
13652 break;
13654 case FIX_TRUNC_EXPR:
13655 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
13656 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
13657 break;
13659 default:
13660 gcc_unreachable ();
13663 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
13664 std::swap (c1, c2);
13666 if (code == FIX_TRUNC_EXPR)
13668 /* The signedness is determined from output operand. */
13669 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13670 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13672 else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
13673 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13674 && VECTOR_BOOLEAN_TYPE_P (vectype)
13675 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13676 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13678 /* If the input and result modes are the same, a different optab
13679 is needed where we pass in the number of units in vectype. */
13680 optab1 = vec_unpacks_sbool_lo_optab;
13681 optab2 = vec_unpacks_sbool_hi_optab;
13684 vec_mode = TYPE_MODE (vectype);
13685 if (widening_fn_p (code))
13687 /* If this is an internal fn then we must check whether the target
13688 supports either a low-high split or an even-odd split. */
13689 internal_fn ifn = as_internal_fn ((combined_fn) code);
13691 internal_fn lo, hi, even, odd;
13692 lookup_hilo_internal_fn (ifn, &lo, &hi);
13693 *code1 = as_combined_fn (lo);
13694 *code2 = as_combined_fn (hi);
13695 optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
13696 optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
13698 /* If we don't support low-high, then check for even-odd. */
13699 if (!optab1
13700 || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13701 || !optab2
13702 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13704 lookup_evenodd_internal_fn (ifn, &even, &odd);
13705 *code1 = as_combined_fn (even);
13706 *code2 = as_combined_fn (odd);
13707 optab1 = direct_internal_fn_optab (even, {vectype, vectype});
13708 optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
13711 else if (code.is_tree_code ())
13713 if (code == FIX_TRUNC_EXPR)
13715 /* The signedness is determined from output operand. */
13716 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13717 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13719 else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
13720 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13721 && VECTOR_BOOLEAN_TYPE_P (vectype)
13722 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13723 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13725 /* If the input and result modes are the same, a different optab
13726 is needed where we pass in the number of units in vectype. */
13727 optab1 = vec_unpacks_sbool_lo_optab;
13728 optab2 = vec_unpacks_sbool_hi_optab;
13730 else
13732 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13733 optab2 = optab_for_tree_code (c2, vectype, optab_default);
13735 *code1 = c1;
13736 *code2 = c2;
13739 if (!optab1 || !optab2)
13740 return false;
13742 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13743 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13744 return false;
13747 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13748 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13750 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13751 return true;
13752 /* For scalar masks we may have different boolean
13753 vector types having the same QImode. Thus we
13754 add additional check for elements number. */
13755 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
13756 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13757 return true;
13760 /* Check if it's a multi-step conversion that can be done using intermediate
13761 types. */
13763 prev_type = vectype;
13764 prev_mode = vec_mode;
13766 if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
13767 return false;
13769 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13770 intermediate steps in promotion sequence. We try
13771 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
13772 not. */
13773 interm_types->create (MAX_INTERM_CVT_STEPS);
13774 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
13776 intermediate_mode = insn_data[icode1].operand[0].mode;
13777 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13778 intermediate_type
13779 = vect_halve_mask_nunits (prev_type, intermediate_mode);
13780 else if (VECTOR_MODE_P (intermediate_mode))
13782 tree intermediate_element_type
13783 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
13784 TYPE_UNSIGNED (prev_type));
13785 intermediate_type
13786 = build_vector_type_for_mode (intermediate_element_type,
13787 intermediate_mode);
13789 else
13790 intermediate_type
13791 = lang_hooks.types.type_for_mode (intermediate_mode,
13792 TYPE_UNSIGNED (prev_type));
13794 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13795 && VECTOR_BOOLEAN_TYPE_P (prev_type)
13796 && intermediate_mode == prev_mode
13797 && SCALAR_INT_MODE_P (prev_mode))
13799 /* If the input and result modes are the same, a different optab
13800 is needed where we pass in the number of units in vectype. */
13801 optab3 = vec_unpacks_sbool_lo_optab;
13802 optab4 = vec_unpacks_sbool_hi_optab;
13804 else
13806 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
13807 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
13810 if (!optab3 || !optab4
13811 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
13812 || insn_data[icode1].operand[0].mode != intermediate_mode
13813 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
13814 || insn_data[icode2].operand[0].mode != intermediate_mode
13815 || ((icode1 = optab_handler (optab3, intermediate_mode))
13816 == CODE_FOR_nothing)
13817 || ((icode2 = optab_handler (optab4, intermediate_mode))
13818 == CODE_FOR_nothing))
13819 break;
13821 interm_types->quick_push (intermediate_type);
13822 (*multi_step_cvt)++;
13824 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13825 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13827 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13828 return true;
13829 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
13830 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13831 return true;
13834 prev_type = intermediate_type;
13835 prev_mode = intermediate_mode;
13838 interm_types->release ();
13839 return false;
13843 /* Function supportable_narrowing_operation
13845 Check whether an operation represented by the code CODE is a
13846 narrowing operation that is supported by the target platform in
13847 vector form (i.e., when operating on arguments of type VECTYPE_IN
13848 and producing a result of type VECTYPE_OUT).
13850 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
13851 and FLOAT. This function checks if these operations are supported by
13852 the target platform directly via vector tree-codes.
13854 Output:
13855 - CODE1 is the code of a vector operation to be used when
13856 vectorizing the operation, if available.
13857 - MULTI_STEP_CVT determines the number of required intermediate steps in
13858 case of multi-step conversion (like int->short->char - in that case
13859 MULTI_STEP_CVT will be 1).
13860 - INTERM_TYPES contains the intermediate type required to perform the
13861 narrowing operation (short in the above example). */
13863 bool
13864 supportable_narrowing_operation (code_helper code,
13865 tree vectype_out, tree vectype_in,
13866 code_helper *code1, int *multi_step_cvt,
13867 vec<tree> *interm_types)
13869 machine_mode vec_mode;
13870 enum insn_code icode1;
13871 optab optab1, interm_optab;
13872 tree vectype = vectype_in;
13873 tree narrow_vectype = vectype_out;
13874 enum tree_code c1;
13875 tree intermediate_type, prev_type;
13876 machine_mode intermediate_mode, prev_mode;
13877 int i;
13878 unsigned HOST_WIDE_INT n_elts;
13879 bool uns;
13881 if (!code.is_tree_code ())
13882 return false;
13884 *multi_step_cvt = 0;
13885 switch ((tree_code) code)
13887 CASE_CONVERT:
13888 c1 = VEC_PACK_TRUNC_EXPR;
13889 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
13890 && VECTOR_BOOLEAN_TYPE_P (vectype)
13891 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
13892 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
13893 && n_elts < BITS_PER_UNIT)
13894 optab1 = vec_pack_sbool_trunc_optab;
13895 else
13896 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13897 break;
13899 case FIX_TRUNC_EXPR:
13900 c1 = VEC_PACK_FIX_TRUNC_EXPR;
13901 /* The signedness is determined from output operand. */
13902 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13903 break;
13905 case FLOAT_EXPR:
13906 c1 = VEC_PACK_FLOAT_EXPR;
13907 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13908 break;
13910 default:
13911 gcc_unreachable ();
13914 if (!optab1)
13915 return false;
13917 vec_mode = TYPE_MODE (vectype);
13918 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
13919 return false;
13921 *code1 = c1;
13923 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
13925 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13926 return true;
13927 /* For scalar masks we may have different boolean
13928 vector types having the same QImode. Thus we
13929 add additional check for elements number. */
13930 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
13931 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
13932 return true;
13935 if (code == FLOAT_EXPR)
13936 return false;
13938 /* Check if it's a multi-step conversion that can be done using intermediate
13939 types. */
13940 prev_mode = vec_mode;
13941 prev_type = vectype;
13942 if (code == FIX_TRUNC_EXPR)
13943 uns = TYPE_UNSIGNED (vectype_out);
13944 else
13945 uns = TYPE_UNSIGNED (vectype);
13947 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
13948 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
13949 costly than signed. */
13950 if (code == FIX_TRUNC_EXPR && uns)
13952 enum insn_code icode2;
13954 intermediate_type
13955 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
13956 interm_optab
13957 = optab_for_tree_code (c1, intermediate_type, optab_default);
13958 if (interm_optab != unknown_optab
13959 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
13960 && insn_data[icode1].operand[0].mode
13961 == insn_data[icode2].operand[0].mode)
13963 uns = false;
13964 optab1 = interm_optab;
13965 icode1 = icode2;
13969 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13970 intermediate steps in promotion sequence. We try
13971 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
13972 interm_types->create (MAX_INTERM_CVT_STEPS);
13973 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
13975 intermediate_mode = insn_data[icode1].operand[0].mode;
13976 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13977 intermediate_type
13978 = vect_double_mask_nunits (prev_type, intermediate_mode);
13979 else
13980 intermediate_type
13981 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
13982 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13983 && VECTOR_BOOLEAN_TYPE_P (prev_type)
13984 && SCALAR_INT_MODE_P (prev_mode)
13985 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
13986 && n_elts < BITS_PER_UNIT)
13987 interm_optab = vec_pack_sbool_trunc_optab;
13988 else
13989 interm_optab
13990 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
13991 optab_default);
13992 if (!interm_optab
13993 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
13994 || insn_data[icode1].operand[0].mode != intermediate_mode
13995 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
13996 == CODE_FOR_nothing))
13997 break;
13999 interm_types->quick_push (intermediate_type);
14000 (*multi_step_cvt)++;
14002 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14004 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14005 return true;
14006 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
14007 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14008 return true;
14011 prev_mode = intermediate_mode;
14012 prev_type = intermediate_type;
14013 optab1 = interm_optab;
14016 interm_types->release ();
14017 return false;
14020 /* Generate and return a vector mask of MASK_TYPE such that
14021 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14022 Add the statements to SEQ. */
14024 tree
14025 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
14026 tree end_index, const char *name)
14028 tree cmp_type = TREE_TYPE (start_index);
14029 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
14030 cmp_type, mask_type,
14031 OPTIMIZE_FOR_SPEED));
14032 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
14033 start_index, end_index,
14034 build_zero_cst (mask_type));
14035 tree tmp;
14036 if (name)
14037 tmp = make_temp_ssa_name (mask_type, NULL, name);
14038 else
14039 tmp = make_ssa_name (mask_type);
14040 gimple_call_set_lhs (call, tmp);
14041 gimple_seq_add_stmt (seq, call);
14042 return tmp;
14045 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14046 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14048 tree
14049 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
14050 tree end_index)
14052 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
14053 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
14056 /* Try to compute the vector types required to vectorize STMT_INFO,
14057 returning true on success and false if vectorization isn't possible.
14058 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14059 take sure that the number of elements in the vectors is no bigger
14060 than GROUP_SIZE.
14062 On success:
14064 - Set *STMT_VECTYPE_OUT to:
14065 - NULL_TREE if the statement doesn't need to be vectorized;
14066 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14068 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14069 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14070 statement does not help to determine the overall number of units. */
14072 opt_result
14073 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
14074 tree *stmt_vectype_out,
14075 tree *nunits_vectype_out,
14076 unsigned int group_size)
14078 gimple *stmt = stmt_info->stmt;
14080 /* For BB vectorization, we should always have a group size once we've
14081 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14082 are tentative requests during things like early data reference
14083 analysis and pattern recognition. */
14084 if (is_a <bb_vec_info> (vinfo))
14085 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
14086 else
14087 group_size = 0;
14089 *stmt_vectype_out = NULL_TREE;
14090 *nunits_vectype_out = NULL_TREE;
14092 if (gimple_get_lhs (stmt) == NULL_TREE
14093 /* MASK_STORE has no lhs, but is ok. */
14094 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
14096 if (is_a <gcall *> (stmt))
14098 /* Ignore calls with no lhs. These must be calls to
14099 #pragma omp simd functions, and what vectorization factor
14100 it really needs can't be determined until
14101 vectorizable_simd_clone_call. */
14102 if (dump_enabled_p ())
14103 dump_printf_loc (MSG_NOTE, vect_location,
14104 "defer to SIMD clone analysis.\n");
14105 return opt_result::success ();
14108 return opt_result::failure_at (stmt,
14109 "not vectorized: irregular stmt.%G", stmt);
14112 tree vectype;
14113 tree scalar_type = NULL_TREE;
14114 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
14116 vectype = STMT_VINFO_VECTYPE (stmt_info);
14117 if (dump_enabled_p ())
14118 dump_printf_loc (MSG_NOTE, vect_location,
14119 "precomputed vectype: %T\n", vectype);
14121 else if (vect_use_mask_type_p (stmt_info))
14123 unsigned int precision = stmt_info->mask_precision;
14124 scalar_type = build_nonstandard_integer_type (precision, 1);
14125 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
14126 if (!vectype)
14127 return opt_result::failure_at (stmt, "not vectorized: unsupported"
14128 " data-type %T\n", scalar_type);
14129 if (dump_enabled_p ())
14130 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14132 else
14134 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
14135 scalar_type = TREE_TYPE (DR_REF (dr));
14136 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
14137 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
14138 else
14139 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
14141 if (dump_enabled_p ())
14143 if (group_size)
14144 dump_printf_loc (MSG_NOTE, vect_location,
14145 "get vectype for scalar type (group size %d):"
14146 " %T\n", group_size, scalar_type);
14147 else
14148 dump_printf_loc (MSG_NOTE, vect_location,
14149 "get vectype for scalar type: %T\n", scalar_type);
14151 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
14152 if (!vectype)
14153 return opt_result::failure_at (stmt,
14154 "not vectorized:"
14155 " unsupported data-type %T\n",
14156 scalar_type);
14158 if (dump_enabled_p ())
14159 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14162 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
14163 return opt_result::failure_at (stmt,
14164 "not vectorized: vector stmt in loop:%G",
14165 stmt);
14167 *stmt_vectype_out = vectype;
14169 /* Don't try to compute scalar types if the stmt produces a boolean
14170 vector; use the existing vector type instead. */
14171 tree nunits_vectype = vectype;
14172 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14174 /* The number of units is set according to the smallest scalar
14175 type (or the largest vector size, but we only support one
14176 vector size per vectorization). */
14177 scalar_type = vect_get_smallest_scalar_type (stmt_info,
14178 TREE_TYPE (vectype));
14179 if (scalar_type != TREE_TYPE (vectype))
14181 if (dump_enabled_p ())
14182 dump_printf_loc (MSG_NOTE, vect_location,
14183 "get vectype for smallest scalar type: %T\n",
14184 scalar_type);
14185 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
14186 group_size);
14187 if (!nunits_vectype)
14188 return opt_result::failure_at
14189 (stmt, "not vectorized: unsupported data-type %T\n",
14190 scalar_type);
14191 if (dump_enabled_p ())
14192 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
14193 nunits_vectype);
14197 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
14198 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
14199 return opt_result::failure_at (stmt,
14200 "Not vectorized: Incompatible number "
14201 "of vector subparts between %T and %T\n",
14202 nunits_vectype, *stmt_vectype_out);
14204 if (dump_enabled_p ())
14206 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
14207 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
14208 dump_printf (MSG_NOTE, "\n");
14211 *nunits_vectype_out = nunits_vectype;
14212 return opt_result::success ();
14215 /* Generate and return statement sequence that sets vector length LEN that is:
14217 min_of_start_and_end = min (START_INDEX, END_INDEX);
14218 left_len = END_INDEX - min_of_start_and_end;
14219 rhs = min (left_len, LEN_LIMIT);
14220 LEN = rhs;
14222 Note: the cost of the code generated by this function is modeled
14223 by vect_estimate_min_profitable_iters, so changes here may need
14224 corresponding changes there. */
14226 gimple_seq
14227 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
14229 gimple_seq stmts = NULL;
14230 tree len_type = TREE_TYPE (len);
14231 gcc_assert (TREE_TYPE (start_index) == len_type);
14233 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
14234 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
14235 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
14236 gimple* stmt = gimple_build_assign (len, rhs);
14237 gimple_seq_add_stmt (&stmts, stmt);
14239 return stmts;