Skip analyzer strndup test on hppa*-*-hpux*
[official-gcc.git] / gcc / tree-vect-stmts.cc
blobbf8c99779aee4b92f57ec63ec610eed85d87468e
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
57 #include "regs.h"
58 #include "attribs.h"
59 #include "optabs-libfuncs.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* Return the vectorized type for the given statement. */
66 tree
67 stmt_vectype (class _stmt_vec_info *stmt_info)
69 return STMT_VINFO_VECTYPE (stmt_info);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
74 bool
75 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
77 gimple *stmt = STMT_VINFO_STMT (stmt_info);
78 basic_block bb = gimple_bb (stmt);
79 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
80 class loop* loop;
82 if (!loop_vinfo)
83 return false;
85 loop = LOOP_VINFO_LOOP (loop_vinfo);
87 return (bb->loop_father == loop->inner);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
94 static unsigned
95 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
96 enum vect_cost_for_stmt kind,
97 stmt_vec_info stmt_info, slp_tree node,
98 tree vectype, int misalign,
99 enum vect_cost_model_location where)
101 if ((kind == vector_load || kind == unaligned_load)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_gather_load;
104 if ((kind == vector_store || kind == unaligned_store)
105 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
106 kind = vector_scatter_store;
108 stmt_info_for_cost si
109 = { count, kind, where, stmt_info, node, vectype, misalign };
110 body_cost_vec->safe_push (si);
112 return (unsigned)
113 (builtin_vectorization_cost (kind, vectype, misalign) * count);
116 unsigned
117 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
118 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
119 tree vectype, int misalign,
120 enum vect_cost_model_location where)
122 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
123 vectype, misalign, where);
126 unsigned
127 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
128 enum vect_cost_for_stmt kind, slp_tree node,
129 tree vectype, int misalign,
130 enum vect_cost_model_location where)
132 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
133 vectype, misalign, where);
136 unsigned
137 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
138 enum vect_cost_for_stmt kind,
139 enum vect_cost_model_location where)
141 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
142 || kind == scalar_stmt);
143 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
144 NULL_TREE, 0, where);
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
149 static tree
150 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
152 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
153 "vect_array");
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
161 static tree
162 read_vector_array (vec_info *vinfo,
163 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
164 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
166 tree vect_type, vect, vect_name, array_ref;
167 gimple *new_stmt;
169 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170 vect_type = TREE_TYPE (TREE_TYPE (array));
171 vect = vect_create_destination_var (scalar_dest, vect_type);
172 array_ref = build4 (ARRAY_REF, vect_type, array,
173 build_int_cst (size_type_node, n),
174 NULL_TREE, NULL_TREE);
176 new_stmt = gimple_build_assign (vect, array_ref);
177 vect_name = make_ssa_name (vect, new_stmt);
178 gimple_assign_set_lhs (new_stmt, vect_name);
179 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
181 return vect_name;
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
188 static void
189 write_vector_array (vec_info *vinfo,
190 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
191 tree vect, tree array, unsigned HOST_WIDE_INT n)
193 tree array_ref;
194 gimple *new_stmt;
196 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
197 build_int_cst (size_type_node, n),
198 NULL_TREE, NULL_TREE);
200 new_stmt = gimple_build_assign (array_ref, vect);
201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
206 (and its group). */
208 static tree
209 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
211 tree mem_ref;
213 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
216 return mem_ref;
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
222 static void
223 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
224 gimple_stmt_iterator *gsi, tree var)
226 tree clobber = build_clobber (TREE_TYPE (var));
227 gimple *new_stmt = gimple_build_assign (var, clobber);
228 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
233 /* Function vect_mark_relevant.
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
237 static void
238 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
239 enum vect_relevant relevant, bool live_p)
241 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
242 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "mark relevant %d, live %d: %G", relevant, live_p,
247 stmt_info->stmt);
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE, vect_location,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
265 stmt_vec_info old_stmt_info = stmt_info;
266 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
268 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
269 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
271 if (live_p && relevant == vect_unused_in_scope)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
276 "relevant.\n");
277 relevant = vect_used_only_live;
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE, vect_location,
282 "mark relevant %d, live %d: %G", relevant, live_p,
283 stmt_info->stmt);
286 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
287 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
288 STMT_VINFO_RELEVANT (stmt_info) = relevant;
290 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE, vect_location,
295 "already marked relevant/live.\n");
296 return;
299 worklist->safe_push (stmt_info);
303 /* Function is_simple_and_all_uses_invariant
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
307 bool
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
309 loop_vec_info loop_vinfo)
311 tree op;
312 ssa_op_iter iter;
314 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
315 if (!stmt)
316 return false;
318 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
320 enum vect_def_type dt = vect_uninitialized_def;
322 if (!vect_is_simple_use (op, loop_vinfo, &dt))
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "use not simple.\n");
327 return false;
330 if (dt != vect_external_def && dt != vect_constant_def)
331 return false;
333 return true;
336 /* Function vect_stmt_relevant_p.
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
346 CHECKME: what other side effects would the vectorizer allow? */
348 static bool
349 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
350 enum vect_relevant *relevant, bool *live_p)
352 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
353 ssa_op_iter op_iter;
354 imm_use_iterator imm_iter;
355 use_operand_p use_p;
356 def_operand_p def_p;
358 *relevant = vect_unused_in_scope;
359 *live_p = false;
361 /* cond stmt other than loop exit cond. */
362 if (is_ctrl_stmt (stmt_info->stmt)
363 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
364 *relevant = vect_used_in_scope;
366 /* changing memory. */
367 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
368 if (gimple_vdef (stmt_info->stmt)
369 && !gimple_clobber_p (stmt_info->stmt))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: stmt has vdefs.\n");
374 *relevant = vect_used_in_scope;
377 /* uses outside the loop. */
378 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
380 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
382 basic_block bb = gimple_bb (USE_STMT (use_p));
383 if (!flow_bb_inside_loop_p (loop, bb))
385 if (is_gimple_debug (USE_STMT (use_p)))
386 continue;
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: used out of loop.\n");
392 /* We expect all such uses to be in the loop exit phis
393 (because of loop closed form) */
394 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
395 gcc_assert (bb == single_exit (loop)->dest);
397 *live_p = true;
402 if (*live_p && *relevant == vect_unused_in_scope
403 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
405 if (dump_enabled_p ())
406 dump_printf_loc (MSG_NOTE, vect_location,
407 "vec_stmt_relevant_p: stmt live but not relevant.\n");
408 *relevant = vect_used_only_live;
411 return (*live_p || *relevant);
415 /* Function exist_non_indexing_operands_for_use_p
417 USE is one of the uses attached to STMT_INFO. Check if USE is
418 used in STMT_INFO for anything other than indexing an array. */
420 static bool
421 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
423 tree operand;
425 /* USE corresponds to some operand in STMT. If there is no data
426 reference in STMT, then any operand that corresponds to USE
427 is not indexing an array. */
428 if (!STMT_VINFO_DATA_REF (stmt_info))
429 return true;
431 /* STMT has a data_ref. FORNOW this means that its of one of
432 the following forms:
433 -1- ARRAY_REF = var
434 -2- var = ARRAY_REF
435 (This should have been verified in analyze_data_refs).
437 'var' in the second case corresponds to a def, not a use,
438 so USE cannot correspond to any operands that are not used
439 for array indexing.
441 Therefore, all we need to check is if STMT falls into the
442 first case, and whether var corresponds to USE. */
444 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
445 if (!assign || !gimple_assign_copy_p (assign))
447 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
448 if (call && gimple_call_internal_p (call))
450 internal_fn ifn = gimple_call_internal_fn (call);
451 int mask_index = internal_fn_mask_index (ifn);
452 if (mask_index >= 0
453 && use == gimple_call_arg (call, mask_index))
454 return true;
455 int stored_value_index = internal_fn_stored_value_index (ifn);
456 if (stored_value_index >= 0
457 && use == gimple_call_arg (call, stored_value_index))
458 return true;
459 if (internal_gather_scatter_fn_p (ifn)
460 && use == gimple_call_arg (call, 1))
461 return true;
463 return false;
466 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
467 return false;
468 operand = gimple_assign_rhs1 (assign);
469 if (TREE_CODE (operand) != SSA_NAME)
470 return false;
472 if (operand == use)
473 return true;
475 return false;
480 Function process_use.
482 Inputs:
483 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
484 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
485 that defined USE. This is done by calling mark_relevant and passing it
486 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
487 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
488 be performed.
490 Outputs:
491 Generally, LIVE_P and RELEVANT are used to define the liveness and
492 relevance info of the DEF_STMT of this USE:
493 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
494 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
495 Exceptions:
496 - case 1: If USE is used only for address computations (e.g. array indexing),
497 which does not need to be directly vectorized, then the liveness/relevance
498 of the respective DEF_STMT is left unchanged.
499 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
500 we skip DEF_STMT cause it had already been processed.
501 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
502 "relevant" will be modified accordingly.
504 Return true if everything is as expected. Return false otherwise. */
506 static opt_result
507 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
508 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
509 bool force)
511 stmt_vec_info dstmt_vinfo;
512 enum vect_def_type dt;
514 /* case 1: we are only interested in uses that need to be vectorized. Uses
515 that are used for address computation are not considered relevant. */
516 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
517 return opt_result::success ();
519 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
520 return opt_result::failure_at (stmt_vinfo->stmt,
521 "not vectorized:"
522 " unsupported use in stmt.\n");
524 if (!dstmt_vinfo)
525 return opt_result::success ();
527 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
528 basic_block bb = gimple_bb (stmt_vinfo->stmt);
530 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
531 We have to force the stmt live since the epilogue loop needs it to
532 continue computing the reduction. */
533 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
534 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
537 && bb->loop_father == def_bb->loop_father)
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_NOTE, vect_location,
541 "reduc-stmt defining reduc-phi in the same nest.\n");
542 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
543 return opt_result::success ();
546 /* case 3a: outer-loop stmt defining an inner-loop stmt:
547 outer-loop-header-bb:
548 d = dstmt_vinfo
549 inner-loop:
550 stmt # use (d)
551 outer-loop-tail-bb:
552 ... */
553 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE, vect_location,
557 "outer-loop def-stmt defining inner-loop stmt.\n");
559 switch (relevant)
561 case vect_unused_in_scope:
562 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
563 vect_used_in_scope : vect_unused_in_scope;
564 break;
566 case vect_used_in_outer_by_reduction:
567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
568 relevant = vect_used_by_reduction;
569 break;
571 case vect_used_in_outer:
572 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
573 relevant = vect_used_in_scope;
574 break;
576 case vect_used_in_scope:
577 break;
579 default:
580 gcc_unreachable ();
584 /* case 3b: inner-loop stmt defining an outer-loop stmt:
585 outer-loop-header-bb:
587 inner-loop:
588 d = dstmt_vinfo
589 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
590 stmt # use (d) */
591 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
593 if (dump_enabled_p ())
594 dump_printf_loc (MSG_NOTE, vect_location,
595 "inner-loop def-stmt defining outer-loop stmt.\n");
597 switch (relevant)
599 case vect_unused_in_scope:
600 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
601 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
602 vect_used_in_outer_by_reduction : vect_unused_in_scope;
603 break;
605 case vect_used_by_reduction:
606 case vect_used_only_live:
607 relevant = vect_used_in_outer_by_reduction;
608 break;
610 case vect_used_in_scope:
611 relevant = vect_used_in_outer;
612 break;
614 default:
615 gcc_unreachable ();
618 /* We are also not interested in uses on loop PHI backedges that are
619 inductions. Otherwise we'll needlessly vectorize the IV increment
620 and cause hybrid SLP for SLP inductions. Unless the PHI is live
621 of course. */
622 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
623 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
624 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
625 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
626 loop_latch_edge (bb->loop_father))
627 == use))
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE, vect_location,
631 "induction value on backedge.\n");
632 return opt_result::success ();
636 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
637 return opt_result::success ();
641 /* Function vect_mark_stmts_to_be_vectorized.
643 Not all stmts in the loop need to be vectorized. For example:
645 for i...
646 for j...
647 1. T0 = i + j
648 2. T1 = a[T0]
650 3. j = j + 1
652 Stmt 1 and 3 do not need to be vectorized, because loop control and
653 addressing of vectorized data-refs are handled differently.
655 This pass detects such stmts. */
657 opt_result
658 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
660 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
661 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
662 unsigned int nbbs = loop->num_nodes;
663 gimple_stmt_iterator si;
664 unsigned int i;
665 basic_block bb;
666 bool live_p;
667 enum vect_relevant relevant;
669 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
671 auto_vec<stmt_vec_info, 64> worklist;
673 /* 1. Init worklist. */
674 for (i = 0; i < nbbs; i++)
676 bb = bbs[i];
677 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
679 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
682 phi_info->stmt);
684 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
685 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
687 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
689 if (is_gimple_debug (gsi_stmt (si)))
690 continue;
691 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE, vect_location,
694 "init: stmt relevant? %G", stmt_info->stmt);
696 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
697 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
701 /* 2. Process_worklist */
702 while (worklist.length () > 0)
704 use_operand_p use_p;
705 ssa_op_iter iter;
707 stmt_vec_info stmt_vinfo = worklist.pop ();
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE, vect_location,
710 "worklist: examine stmt: %G", stmt_vinfo->stmt);
712 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
713 (DEF_STMT) as relevant/irrelevant according to the relevance property
714 of STMT. */
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
717 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
718 propagated as is to the DEF_STMTs of its USEs.
720 One exception is when STMT has been identified as defining a reduction
721 variable; in this case we set the relevance to vect_used_by_reduction.
722 This is because we distinguish between two kinds of relevant stmts -
723 those that are used by a reduction computation, and those that are
724 (also) used by a regular computation. This allows us later on to
725 identify stmts that are used solely by a reduction, and therefore the
726 order of the results that they produce does not have to be kept. */
728 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
730 case vect_reduction_def:
731 gcc_assert (relevant != vect_unused_in_scope);
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_in_scope
734 && relevant != vect_used_by_reduction
735 && relevant != vect_used_only_live)
736 return opt_result::failure_at
737 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
738 break;
740 case vect_nested_cycle:
741 if (relevant != vect_unused_in_scope
742 && relevant != vect_used_in_outer_by_reduction
743 && relevant != vect_used_in_outer)
744 return opt_result::failure_at
745 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
746 break;
748 case vect_double_reduction_def:
749 if (relevant != vect_unused_in_scope
750 && relevant != vect_used_by_reduction
751 && relevant != vect_used_only_live)
752 return opt_result::failure_at
753 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
754 break;
756 default:
757 break;
760 if (is_pattern_stmt_p (stmt_vinfo))
762 /* Pattern statements are not inserted into the code, so
763 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
764 have to scan the RHS or function arguments instead. */
765 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
767 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
768 tree op = gimple_assign_rhs1 (assign);
770 i = 1;
771 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
773 opt_result res
774 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
775 loop_vinfo, relevant, &worklist, false);
776 if (!res)
777 return res;
778 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
779 loop_vinfo, relevant, &worklist, false);
780 if (!res)
781 return res;
782 i = 2;
784 for (; i < gimple_num_ops (assign); i++)
786 op = gimple_op (assign, i);
787 if (TREE_CODE (op) == SSA_NAME)
789 opt_result res
790 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
791 &worklist, false);
792 if (!res)
793 return res;
797 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
799 for (i = 0; i < gimple_call_num_args (call); i++)
801 tree arg = gimple_call_arg (call, i);
802 opt_result res
803 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
804 &worklist, false);
805 if (!res)
806 return res;
810 else
811 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
813 tree op = USE_FROM_PTR (use_p);
814 opt_result res
815 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
816 &worklist, false);
817 if (!res)
818 return res;
821 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
823 gather_scatter_info gs_info;
824 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
825 gcc_unreachable ();
826 opt_result res
827 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
828 &worklist, true);
829 if (!res)
831 if (fatal)
832 *fatal = false;
833 return res;
836 } /* while worklist */
838 return opt_result::success ();
841 /* Function vect_model_simple_cost.
843 Models cost for simple operations, i.e. those that only emit ncopies of a
844 single op. Right now, this does not account for multiple insns that could
845 be generated for the single vector op. We will handle that shortly. */
847 static void
848 vect_model_simple_cost (vec_info *,
849 stmt_vec_info stmt_info, int ncopies,
850 enum vect_def_type *dt,
851 int ndts,
852 slp_tree node,
853 stmt_vector_for_cost *cost_vec,
854 vect_cost_for_stmt kind = vector_stmt)
856 int inside_cost = 0, prologue_cost = 0;
858 gcc_assert (cost_vec != NULL);
860 /* ??? Somehow we need to fix this at the callers. */
861 if (node)
862 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
864 if (!node)
865 /* Cost the "broadcast" of a scalar operand in to a vector operand.
866 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
867 cost model. */
868 for (int i = 0; i < ndts; i++)
869 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
870 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
871 stmt_info, 0, vect_prologue);
873 /* Pass the inside-of-loop statements to the target-specific cost model. */
874 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
875 stmt_info, 0, vect_body);
877 if (dump_enabled_p ())
878 dump_printf_loc (MSG_NOTE, vect_location,
879 "vect_model_simple_cost: inside_cost = %d, "
880 "prologue_cost = %d .\n", inside_cost, prologue_cost);
884 /* Model cost for type demotion and promotion operations. PWR is
885 normally zero for single-step promotions and demotions. It will be
886 one if two-step promotion/demotion is required, and so on. NCOPIES
887 is the number of vector results (and thus number of instructions)
888 for the narrowest end of the operation chain. Each additional
889 step doubles the number of instructions required. If WIDEN_ARITH
890 is true the stmt is doing widening arithmetic. */
892 static void
893 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
894 enum vect_def_type *dt,
895 unsigned int ncopies, int pwr,
896 stmt_vector_for_cost *cost_vec,
897 bool widen_arith)
899 int i;
900 int inside_cost = 0, prologue_cost = 0;
902 for (i = 0; i < pwr + 1; i++)
904 inside_cost += record_stmt_cost (cost_vec, ncopies,
905 widen_arith
906 ? vector_stmt : vec_promote_demote,
907 stmt_info, 0, vect_body);
908 ncopies *= 2;
911 /* FORNOW: Assuming maximum 2 args per stmts. */
912 for (i = 0; i < 2; i++)
913 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
914 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
915 stmt_info, 0, vect_prologue);
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE, vect_location,
919 "vect_model_promotion_demotion_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost, prologue_cost);
923 /* Returns true if the current function returns DECL. */
925 static bool
926 cfun_returns (tree decl)
928 edge_iterator ei;
929 edge e;
930 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
932 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
933 if (!ret)
934 continue;
935 if (gimple_return_retval (ret) == decl)
936 return true;
937 /* We often end up with an aggregate copy to the result decl,
938 handle that case as well. First skip intermediate clobbers
939 though. */
940 gimple *def = ret;
943 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
945 while (gimple_clobber_p (def));
946 if (is_a <gassign *> (def)
947 && gimple_assign_lhs (def) == gimple_return_retval (ret)
948 && gimple_assign_rhs1 (def) == decl)
949 return true;
951 return false;
954 /* Calculate cost of DR's memory access. */
955 void
956 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
957 dr_alignment_support alignment_support_scheme,
958 int misalignment,
959 unsigned int *inside_cost,
960 stmt_vector_for_cost *body_cost_vec)
962 switch (alignment_support_scheme)
964 case dr_aligned:
966 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
967 vector_store, stmt_info, 0,
968 vect_body);
970 if (dump_enabled_p ())
971 dump_printf_loc (MSG_NOTE, vect_location,
972 "vect_model_store_cost: aligned.\n");
973 break;
976 case dr_unaligned_supported:
978 /* Here, we assign an additional cost for the unaligned store. */
979 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
980 unaligned_store, stmt_info,
981 misalignment, vect_body);
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_NOTE, vect_location,
984 "vect_model_store_cost: unaligned supported by "
985 "hardware.\n");
986 break;
989 case dr_unaligned_unsupported:
991 *inside_cost = VECT_MAX_COST;
993 if (dump_enabled_p ())
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
995 "vect_model_store_cost: unsupported access.\n");
996 break;
999 default:
1000 gcc_unreachable ();
1004 /* Calculate cost of DR's memory access. */
1005 void
1006 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1007 dr_alignment_support alignment_support_scheme,
1008 int misalignment,
1009 bool add_realign_cost, unsigned int *inside_cost,
1010 unsigned int *prologue_cost,
1011 stmt_vector_for_cost *prologue_cost_vec,
1012 stmt_vector_for_cost *body_cost_vec,
1013 bool record_prologue_costs)
1015 switch (alignment_support_scheme)
1017 case dr_aligned:
1019 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1020 stmt_info, 0, vect_body);
1022 if (dump_enabled_p ())
1023 dump_printf_loc (MSG_NOTE, vect_location,
1024 "vect_model_load_cost: aligned.\n");
1026 break;
1028 case dr_unaligned_supported:
1030 /* Here, we assign an additional cost for the unaligned load. */
1031 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1032 unaligned_load, stmt_info,
1033 misalignment, vect_body);
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_NOTE, vect_location,
1037 "vect_model_load_cost: unaligned supported by "
1038 "hardware.\n");
1040 break;
1042 case dr_explicit_realign:
1044 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1045 vector_load, stmt_info, 0, vect_body);
1046 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1047 vec_perm, stmt_info, 0, vect_body);
1049 /* FIXME: If the misalignment remains fixed across the iterations of
1050 the containing loop, the following cost should be added to the
1051 prologue costs. */
1052 if (targetm.vectorize.builtin_mask_for_load)
1053 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1054 stmt_info, 0, vect_body);
1056 if (dump_enabled_p ())
1057 dump_printf_loc (MSG_NOTE, vect_location,
1058 "vect_model_load_cost: explicit realign\n");
1060 break;
1062 case dr_explicit_realign_optimized:
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_NOTE, vect_location,
1066 "vect_model_load_cost: unaligned software "
1067 "pipelined.\n");
1069 /* Unaligned software pipeline has a load of an address, an initial
1070 load, and possibly a mask operation to "prime" the loop. However,
1071 if this is an access in a group of loads, which provide grouped
1072 access, then the above cost should only be considered for one
1073 access in the group. Inside the loop, there is a load op
1074 and a realignment op. */
1076 if (add_realign_cost && record_prologue_costs)
1078 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1079 vector_stmt, stmt_info,
1080 0, vect_prologue);
1081 if (targetm.vectorize.builtin_mask_for_load)
1082 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1083 vector_stmt, stmt_info,
1084 0, vect_prologue);
1087 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1088 stmt_info, 0, vect_body);
1089 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1090 stmt_info, 0, vect_body);
1092 if (dump_enabled_p ())
1093 dump_printf_loc (MSG_NOTE, vect_location,
1094 "vect_model_load_cost: explicit realign optimized"
1095 "\n");
1097 break;
1100 case dr_unaligned_unsupported:
1102 *inside_cost = VECT_MAX_COST;
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1106 "vect_model_load_cost: unsupported access.\n");
1107 break;
1110 default:
1111 gcc_unreachable ();
1115 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1116 the loop preheader for the vectorized stmt STMT_VINFO. */
1118 static void
1119 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1120 gimple_stmt_iterator *gsi)
1122 if (gsi)
1123 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1124 else
1125 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1127 if (dump_enabled_p ())
1128 dump_printf_loc (MSG_NOTE, vect_location,
1129 "created new init_stmt: %G", new_stmt);
1132 /* Function vect_init_vector.
1134 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1135 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1136 vector type a vector with all elements equal to VAL is created first.
1137 Place the initialization at GSI if it is not NULL. Otherwise, place the
1138 initialization at the loop preheader.
1139 Return the DEF of INIT_STMT.
1140 It will be used in the vectorization of STMT_INFO. */
1142 tree
1143 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1144 gimple_stmt_iterator *gsi)
1146 gimple *init_stmt;
1147 tree new_temp;
1149 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1150 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1152 gcc_assert (VECTOR_TYPE_P (type));
1153 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1155 /* Scalar boolean value should be transformed into
1156 all zeros or all ones value before building a vector. */
1157 if (VECTOR_BOOLEAN_TYPE_P (type))
1159 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1160 tree false_val = build_zero_cst (TREE_TYPE (type));
1162 if (CONSTANT_CLASS_P (val))
1163 val = integer_zerop (val) ? false_val : true_val;
1164 else
1166 new_temp = make_ssa_name (TREE_TYPE (type));
1167 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1168 val, true_val, false_val);
1169 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1170 val = new_temp;
1173 else
1175 gimple_seq stmts = NULL;
1176 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1177 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1178 TREE_TYPE (type), val);
1179 else
1180 /* ??? Condition vectorization expects us to do
1181 promotion of invariant/external defs. */
1182 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1183 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1184 !gsi_end_p (gsi2); )
1186 init_stmt = gsi_stmt (gsi2);
1187 gsi_remove (&gsi2, false);
1188 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1192 val = build_vector_from_val (type, val);
1195 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1196 init_stmt = gimple_build_assign (new_temp, val);
1197 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1198 return new_temp;
1202 /* Function vect_get_vec_defs_for_operand.
1204 OP is an operand in STMT_VINFO. This function returns a vector of
1205 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1207 In the case that OP is an SSA_NAME which is defined in the loop, then
1208 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1210 In case OP is an invariant or constant, a new stmt that creates a vector def
1211 needs to be introduced. VECTYPE may be used to specify a required type for
1212 vector invariant. */
1214 void
1215 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1216 unsigned ncopies,
1217 tree op, vec<tree> *vec_oprnds, tree vectype)
1219 gimple *def_stmt;
1220 enum vect_def_type dt;
1221 bool is_simple_use;
1222 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE, vect_location,
1226 "vect_get_vec_defs_for_operand: %T\n", op);
1228 stmt_vec_info def_stmt_info;
1229 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1230 &def_stmt_info, &def_stmt);
1231 gcc_assert (is_simple_use);
1232 if (def_stmt && dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1235 vec_oprnds->create (ncopies);
1236 if (dt == vect_constant_def || dt == vect_external_def)
1238 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1239 tree vector_type;
1241 if (vectype)
1242 vector_type = vectype;
1243 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1244 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1245 vector_type = truth_type_for (stmt_vectype);
1246 else
1247 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1249 gcc_assert (vector_type);
1250 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1251 while (ncopies--)
1252 vec_oprnds->quick_push (vop);
1254 else
1256 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1257 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1258 for (unsigned i = 0; i < ncopies; ++i)
1259 vec_oprnds->quick_push (gimple_get_lhs
1260 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1265 /* Get vectorized definitions for OP0 and OP1. */
1267 void
1268 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1269 unsigned ncopies,
1270 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1271 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1272 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1273 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1275 if (slp_node)
1277 if (op0)
1278 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1279 if (op1)
1280 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1281 if (op2)
1282 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1283 if (op3)
1284 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1286 else
1288 if (op0)
1289 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1290 op0, vec_oprnds0, vectype0);
1291 if (op1)
1292 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1293 op1, vec_oprnds1, vectype1);
1294 if (op2)
1295 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1296 op2, vec_oprnds2, vectype2);
1297 if (op3)
1298 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1299 op3, vec_oprnds3, vectype3);
1303 void
1304 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1305 unsigned ncopies,
1306 tree op0, vec<tree> *vec_oprnds0,
1307 tree op1, vec<tree> *vec_oprnds1,
1308 tree op2, vec<tree> *vec_oprnds2,
1309 tree op3, vec<tree> *vec_oprnds3)
1311 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1312 op0, vec_oprnds0, NULL_TREE,
1313 op1, vec_oprnds1, NULL_TREE,
1314 op2, vec_oprnds2, NULL_TREE,
1315 op3, vec_oprnds3, NULL_TREE);
1318 /* Helper function called by vect_finish_replace_stmt and
1319 vect_finish_stmt_generation. Set the location of the new
1320 statement and create and return a stmt_vec_info for it. */
1322 static void
1323 vect_finish_stmt_generation_1 (vec_info *,
1324 stmt_vec_info stmt_info, gimple *vec_stmt)
1326 if (dump_enabled_p ())
1327 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1329 if (stmt_info)
1331 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1333 /* While EH edges will generally prevent vectorization, stmt might
1334 e.g. be in a must-not-throw region. Ensure newly created stmts
1335 that could throw are part of the same region. */
1336 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1337 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1338 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1340 else
1341 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1344 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1345 which sets the same scalar result as STMT_INFO did. Create and return a
1346 stmt_vec_info for VEC_STMT. */
1348 void
1349 vect_finish_replace_stmt (vec_info *vinfo,
1350 stmt_vec_info stmt_info, gimple *vec_stmt)
1352 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1353 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1355 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1356 gsi_replace (&gsi, vec_stmt, true);
1358 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1361 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1362 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1364 void
1365 vect_finish_stmt_generation (vec_info *vinfo,
1366 stmt_vec_info stmt_info, gimple *vec_stmt,
1367 gimple_stmt_iterator *gsi)
1369 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1371 if (!gsi_end_p (*gsi)
1372 && gimple_has_mem_ops (vec_stmt))
1374 gimple *at_stmt = gsi_stmt (*gsi);
1375 tree vuse = gimple_vuse (at_stmt);
1376 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1378 tree vdef = gimple_vdef (at_stmt);
1379 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1380 gimple_set_modified (vec_stmt, true);
1381 /* If we have an SSA vuse and insert a store, update virtual
1382 SSA form to avoid triggering the renamer. Do so only
1383 if we can easily see all uses - which is what almost always
1384 happens with the way vectorized stmts are inserted. */
1385 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1386 && ((is_gimple_assign (vec_stmt)
1387 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1388 || (is_gimple_call (vec_stmt)
1389 && (!(gimple_call_flags (vec_stmt)
1390 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1391 || (gimple_call_lhs (vec_stmt)
1392 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1394 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1395 gimple_set_vdef (vec_stmt, new_vdef);
1396 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1400 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1401 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1404 /* We want to vectorize a call to combined function CFN with function
1405 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1406 as the types of all inputs. Check whether this is possible using
1407 an internal function, returning its code if so or IFN_LAST if not. */
1409 static internal_fn
1410 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1411 tree vectype_out, tree vectype_in)
1413 internal_fn ifn;
1414 if (internal_fn_p (cfn))
1415 ifn = as_internal_fn (cfn);
1416 else
1417 ifn = associated_internal_fn (fndecl);
1418 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1420 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1421 if (info.vectorizable)
1423 bool same_size_p = TYPE_SIZE (vectype_in) == TYPE_SIZE (vectype_out);
1424 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1425 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1427 /* The type size of both the vectype_in and vectype_out should be
1428 exactly the same when vectype_out isn't participating the optab.
1429 While there is no restriction for type size when vectype_out
1430 is part of the optab query. */
1431 if (type0 != vectype_out && type1 != vectype_out && !same_size_p)
1432 return IFN_LAST;
1434 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1435 OPTIMIZE_FOR_SPEED))
1436 return ifn;
1439 return IFN_LAST;
1443 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1444 gimple_stmt_iterator *);
1446 /* Check whether a load or store statement in the loop described by
1447 LOOP_VINFO is possible in a loop using partial vectors. This is
1448 testing whether the vectorizer pass has the appropriate support,
1449 as well as whether the target does.
1451 VLS_TYPE says whether the statement is a load or store and VECTYPE
1452 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1453 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1454 says how the load or store is going to be implemented and GROUP_SIZE
1455 is the number of load or store statements in the containing group.
1456 If the access is a gather load or scatter store, GS_INFO describes
1457 its arguments. If the load or store is conditional, SCALAR_MASK is the
1458 condition under which it occurs.
1460 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1461 vectors is not supported, otherwise record the required rgroup control
1462 types. */
1464 static void
1465 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1466 slp_tree slp_node,
1467 vec_load_store_type vls_type,
1468 int group_size,
1469 vect_memory_access_type
1470 memory_access_type,
1471 gather_scatter_info *gs_info,
1472 tree scalar_mask)
1474 /* Invariant loads need no special support. */
1475 if (memory_access_type == VMAT_INVARIANT)
1476 return;
1478 unsigned int nvectors;
1479 if (slp_node)
1480 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1481 else
1482 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1484 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1485 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1486 machine_mode vecmode = TYPE_MODE (vectype);
1487 bool is_load = (vls_type == VLS_LOAD);
1488 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1490 internal_fn ifn
1491 = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1492 : vect_store_lanes_supported (vectype, group_size, true));
1493 if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1494 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1495 else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1496 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1497 scalar_mask);
1498 else
1500 if (dump_enabled_p ())
1501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1502 "can't operate on partial vectors because"
1503 " the target doesn't have an appropriate"
1504 " load/store-lanes instruction.\n");
1505 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1507 return;
1510 if (memory_access_type == VMAT_GATHER_SCATTER)
1512 internal_fn ifn = (is_load
1513 ? IFN_MASK_GATHER_LOAD
1514 : IFN_MASK_SCATTER_STORE);
1515 internal_fn len_ifn = (is_load
1516 ? IFN_MASK_LEN_GATHER_LOAD
1517 : IFN_MASK_LEN_SCATTER_STORE);
1518 if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1519 gs_info->memory_type,
1520 gs_info->offset_vectype,
1521 gs_info->scale))
1522 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1523 else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1524 gs_info->memory_type,
1525 gs_info->offset_vectype,
1526 gs_info->scale))
1527 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1528 scalar_mask);
1529 else
1531 if (dump_enabled_p ())
1532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1533 "can't operate on partial vectors because"
1534 " the target doesn't have an appropriate"
1535 " gather load or scatter store instruction.\n");
1536 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1538 return;
1541 if (memory_access_type != VMAT_CONTIGUOUS
1542 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1544 /* Element X of the data must come from iteration i * VF + X of the
1545 scalar loop. We need more work to support other mappings. */
1546 if (dump_enabled_p ())
1547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1548 "can't operate on partial vectors because an"
1549 " access isn't contiguous.\n");
1550 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1551 return;
1554 if (!VECTOR_MODE_P (vecmode))
1556 if (dump_enabled_p ())
1557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1558 "can't operate on partial vectors when emulating"
1559 " vector operations.\n");
1560 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1561 return;
1564 /* We might load more scalars than we need for permuting SLP loads.
1565 We checked in get_group_load_store_type that the extra elements
1566 don't leak into a new vector. */
1567 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1569 unsigned int nvectors;
1570 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1571 return nvectors;
1572 gcc_unreachable ();
1575 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1576 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1577 machine_mode mask_mode;
1578 machine_mode vmode;
1579 bool using_partial_vectors_p = false;
1580 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1582 nvectors = group_memory_nvectors (group_size * vf, nunits);
1583 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1584 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1585 using_partial_vectors_p = true;
1587 else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1588 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1590 nvectors = group_memory_nvectors (group_size * vf, nunits);
1591 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1592 using_partial_vectors_p = true;
1595 if (!using_partial_vectors_p)
1597 if (dump_enabled_p ())
1598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1599 "can't operate on partial vectors because the"
1600 " target doesn't have the appropriate partial"
1601 " vectorization load or store.\n");
1602 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1606 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1607 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1608 that needs to be applied to all loads and stores in a vectorized loop.
1609 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1610 otherwise return VEC_MASK & LOOP_MASK.
1612 MASK_TYPE is the type of both masks. If new statements are needed,
1613 insert them before GSI. */
1615 static tree
1616 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1617 tree vec_mask, gimple_stmt_iterator *gsi)
1619 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1620 if (!loop_mask)
1621 return vec_mask;
1623 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1625 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1626 return vec_mask;
1628 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1629 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1630 vec_mask, loop_mask);
1632 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1633 return and_res;
1636 /* Determine whether we can use a gather load or scatter store to vectorize
1637 strided load or store STMT_INFO by truncating the current offset to a
1638 smaller width. We need to be able to construct an offset vector:
1640 { 0, X, X*2, X*3, ... }
1642 without loss of precision, where X is STMT_INFO's DR_STEP.
1644 Return true if this is possible, describing the gather load or scatter
1645 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1647 static bool
1648 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1649 loop_vec_info loop_vinfo, bool masked_p,
1650 gather_scatter_info *gs_info)
1652 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1653 data_reference *dr = dr_info->dr;
1654 tree step = DR_STEP (dr);
1655 if (TREE_CODE (step) != INTEGER_CST)
1657 /* ??? Perhaps we could use range information here? */
1658 if (dump_enabled_p ())
1659 dump_printf_loc (MSG_NOTE, vect_location,
1660 "cannot truncate variable step.\n");
1661 return false;
1664 /* Get the number of bits in an element. */
1665 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1666 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1667 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1669 /* Set COUNT to the upper limit on the number of elements - 1.
1670 Start with the maximum vectorization factor. */
1671 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1673 /* Try lowering COUNT to the number of scalar latch iterations. */
1674 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1675 widest_int max_iters;
1676 if (max_loop_iterations (loop, &max_iters)
1677 && max_iters < count)
1678 count = max_iters.to_shwi ();
1680 /* Try scales of 1 and the element size. */
1681 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1682 wi::overflow_type overflow = wi::OVF_NONE;
1683 for (int i = 0; i < 2; ++i)
1685 int scale = scales[i];
1686 widest_int factor;
1687 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1688 continue;
1690 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1691 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1692 if (overflow)
1693 continue;
1694 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1695 unsigned int min_offset_bits = wi::min_precision (range, sign);
1697 /* Find the narrowest viable offset type. */
1698 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1699 tree offset_type = build_nonstandard_integer_type (offset_bits,
1700 sign == UNSIGNED);
1702 /* See whether the target supports the operation with an offset
1703 no narrower than OFFSET_TYPE. */
1704 tree memory_type = TREE_TYPE (DR_REF (dr));
1705 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1706 vectype, memory_type, offset_type, scale,
1707 &gs_info->ifn, &gs_info->offset_vectype)
1708 || gs_info->ifn == IFN_LAST)
1709 continue;
1711 gs_info->decl = NULL_TREE;
1712 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1713 but we don't need to store that here. */
1714 gs_info->base = NULL_TREE;
1715 gs_info->element_type = TREE_TYPE (vectype);
1716 gs_info->offset = fold_convert (offset_type, step);
1717 gs_info->offset_dt = vect_constant_def;
1718 gs_info->scale = scale;
1719 gs_info->memory_type = memory_type;
1720 return true;
1723 if (overflow && dump_enabled_p ())
1724 dump_printf_loc (MSG_NOTE, vect_location,
1725 "truncating gather/scatter offset to %d bits"
1726 " might change its value.\n", element_bits);
1728 return false;
1731 /* Return true if we can use gather/scatter internal functions to
1732 vectorize STMT_INFO, which is a grouped or strided load or store.
1733 MASKED_P is true if load or store is conditional. When returning
1734 true, fill in GS_INFO with the information required to perform the
1735 operation. */
1737 static bool
1738 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1739 loop_vec_info loop_vinfo, bool masked_p,
1740 gather_scatter_info *gs_info)
1742 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1743 || gs_info->ifn == IFN_LAST)
1744 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1745 masked_p, gs_info);
1747 tree old_offset_type = TREE_TYPE (gs_info->offset);
1748 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1750 gcc_assert (TYPE_PRECISION (new_offset_type)
1751 >= TYPE_PRECISION (old_offset_type));
1752 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1754 if (dump_enabled_p ())
1755 dump_printf_loc (MSG_NOTE, vect_location,
1756 "using gather/scatter for strided/grouped access,"
1757 " scale = %d\n", gs_info->scale);
1759 return true;
1762 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1763 elements with a known constant step. Return -1 if that step
1764 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1766 static int
1767 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1769 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1770 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1771 size_zero_node);
1774 /* If the target supports a permute mask that reverses the elements in
1775 a vector of type VECTYPE, return that mask, otherwise return null. */
1777 static tree
1778 perm_mask_for_reverse (tree vectype)
1780 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1782 /* The encoding has a single stepped pattern. */
1783 vec_perm_builder sel (nunits, 1, 3);
1784 for (int i = 0; i < 3; ++i)
1785 sel.quick_push (nunits - 1 - i);
1787 vec_perm_indices indices (sel, 1, nunits);
1788 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1789 indices))
1790 return NULL_TREE;
1791 return vect_gen_perm_mask_checked (vectype, indices);
1794 /* A subroutine of get_load_store_type, with a subset of the same
1795 arguments. Handle the case where STMT_INFO is a load or store that
1796 accesses consecutive elements with a negative step. Sets *POFFSET
1797 to the offset to be applied to the DR for the first access. */
1799 static vect_memory_access_type
1800 get_negative_load_store_type (vec_info *vinfo,
1801 stmt_vec_info stmt_info, tree vectype,
1802 vec_load_store_type vls_type,
1803 unsigned int ncopies, poly_int64 *poffset)
1805 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1806 dr_alignment_support alignment_support_scheme;
1808 if (ncopies > 1)
1810 if (dump_enabled_p ())
1811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1812 "multiple types with negative step.\n");
1813 return VMAT_ELEMENTWISE;
1816 /* For backward running DRs the first access in vectype actually is
1817 N-1 elements before the address of the DR. */
1818 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1819 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1821 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
1822 alignment_support_scheme
1823 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1824 if (alignment_support_scheme != dr_aligned
1825 && alignment_support_scheme != dr_unaligned_supported)
1827 if (dump_enabled_p ())
1828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1829 "negative step but alignment required.\n");
1830 *poffset = 0;
1831 return VMAT_ELEMENTWISE;
1834 if (vls_type == VLS_STORE_INVARIANT)
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_NOTE, vect_location,
1838 "negative step with invariant source;"
1839 " no permute needed.\n");
1840 return VMAT_CONTIGUOUS_DOWN;
1843 if (!perm_mask_for_reverse (vectype))
1845 if (dump_enabled_p ())
1846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1847 "negative step and reversing not supported.\n");
1848 *poffset = 0;
1849 return VMAT_ELEMENTWISE;
1852 return VMAT_CONTIGUOUS_REVERSE;
1855 /* STMT_INFO is either a masked or unconditional store. Return the value
1856 being stored. */
1858 tree
1859 vect_get_store_rhs (stmt_vec_info stmt_info)
1861 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1863 gcc_assert (gimple_assign_single_p (assign));
1864 return gimple_assign_rhs1 (assign);
1866 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1868 internal_fn ifn = gimple_call_internal_fn (call);
1869 int index = internal_fn_stored_value_index (ifn);
1870 gcc_assert (index >= 0);
1871 return gimple_call_arg (call, index);
1873 gcc_unreachable ();
1876 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1878 This function returns a vector type which can be composed with NETLS pieces,
1879 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1880 same vector size as the return vector. It checks target whether supports
1881 pieces-size vector mode for construction firstly, if target fails to, check
1882 pieces-size scalar mode for construction further. It returns NULL_TREE if
1883 fails to find the available composition.
1885 For example, for (vtype=V16QI, nelts=4), we can probably get:
1886 - V16QI with PTYPE V4QI.
1887 - V4SI with PTYPE SI.
1888 - NULL_TREE. */
1890 static tree
1891 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
1893 gcc_assert (VECTOR_TYPE_P (vtype));
1894 gcc_assert (known_gt (nelts, 0U));
1896 machine_mode vmode = TYPE_MODE (vtype);
1897 if (!VECTOR_MODE_P (vmode))
1898 return NULL_TREE;
1900 /* When we are asked to compose the vector from its components let
1901 that happen directly. */
1902 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
1904 *ptype = TREE_TYPE (vtype);
1905 return vtype;
1908 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
1909 unsigned int pbsize;
1910 if (constant_multiple_p (vbsize, nelts, &pbsize))
1912 /* First check if vec_init optab supports construction from
1913 vector pieces directly. */
1914 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
1915 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
1916 machine_mode rmode;
1917 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
1918 && (convert_optab_handler (vec_init_optab, vmode, rmode)
1919 != CODE_FOR_nothing))
1921 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
1922 return vtype;
1925 /* Otherwise check if exists an integer type of the same piece size and
1926 if vec_init optab supports construction from it directly. */
1927 if (int_mode_for_size (pbsize, 0).exists (&elmode)
1928 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
1929 && (convert_optab_handler (vec_init_optab, rmode, elmode)
1930 != CODE_FOR_nothing))
1932 *ptype = build_nonstandard_integer_type (pbsize, 1);
1933 return build_vector_type (*ptype, nelts);
1937 return NULL_TREE;
1940 /* A subroutine of get_load_store_type, with a subset of the same
1941 arguments. Handle the case where STMT_INFO is part of a grouped load
1942 or store.
1944 For stores, the statements in the group are all consecutive
1945 and there is no gap at the end. For loads, the statements in the
1946 group might not be consecutive; there can be gaps between statements
1947 as well as at the end. */
1949 static bool
1950 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
1951 tree vectype, slp_tree slp_node,
1952 bool masked_p, vec_load_store_type vls_type,
1953 vect_memory_access_type *memory_access_type,
1954 poly_int64 *poffset,
1955 dr_alignment_support *alignment_support_scheme,
1956 int *misalignment,
1957 gather_scatter_info *gs_info,
1958 internal_fn *lanes_ifn)
1960 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1961 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1962 stmt_vec_info first_stmt_info;
1963 unsigned int group_size;
1964 unsigned HOST_WIDE_INT gap;
1965 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1967 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1968 group_size = DR_GROUP_SIZE (first_stmt_info);
1969 gap = DR_GROUP_GAP (first_stmt_info);
1971 else
1973 first_stmt_info = stmt_info;
1974 group_size = 1;
1975 gap = 0;
1977 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
1978 bool single_element_p = (stmt_info == first_stmt_info
1979 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
1980 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1982 /* True if the vectorized statements would access beyond the last
1983 statement in the group. */
1984 bool overrun_p = false;
1986 /* True if we can cope with such overrun by peeling for gaps, so that
1987 there is at least one final scalar iteration after the vector loop. */
1988 bool can_overrun_p = (!masked_p
1989 && vls_type == VLS_LOAD
1990 && loop_vinfo
1991 && !loop->inner);
1993 /* There can only be a gap at the end of the group if the stride is
1994 known at compile time. */
1995 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
1997 /* Stores can't yet have gaps. */
1998 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2000 if (slp_node)
2002 /* For SLP vectorization we directly vectorize a subchain
2003 without permutation. */
2004 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2005 first_dr_info
2006 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2007 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2009 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2010 separated by the stride, until we have a complete vector.
2011 Fall back to scalar accesses if that isn't possible. */
2012 if (multiple_p (nunits, group_size))
2013 *memory_access_type = VMAT_STRIDED_SLP;
2014 else
2015 *memory_access_type = VMAT_ELEMENTWISE;
2017 else
2019 overrun_p = loop_vinfo && gap != 0;
2020 if (overrun_p && vls_type != VLS_LOAD)
2022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2023 "Grouped store with gaps requires"
2024 " non-consecutive accesses\n");
2025 return false;
2027 /* An overrun is fine if the trailing elements are smaller
2028 than the alignment boundary B. Every vector access will
2029 be a multiple of B and so we are guaranteed to access a
2030 non-gap element in the same B-sized block. */
2031 if (overrun_p
2032 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2033 vectype)
2034 / vect_get_scalar_dr_size (first_dr_info)))
2035 overrun_p = false;
2037 /* If the gap splits the vector in half and the target
2038 can do half-vector operations avoid the epilogue peeling
2039 by simply loading half of the vector only. Usually
2040 the construction with an upper zero half will be elided. */
2041 dr_alignment_support alss;
2042 int misalign = dr_misalignment (first_dr_info, vectype);
2043 tree half_vtype;
2044 if (overrun_p
2045 && !masked_p
2046 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2047 vectype, misalign)))
2048 == dr_aligned
2049 || alss == dr_unaligned_supported)
2050 && known_eq (nunits, (group_size - gap) * 2)
2051 && known_eq (nunits, group_size)
2052 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2053 != NULL_TREE))
2054 overrun_p = false;
2056 if (overrun_p && !can_overrun_p)
2058 if (dump_enabled_p ())
2059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2060 "Peeling for outer loop is not supported\n");
2061 return false;
2063 int cmp = compare_step_with_zero (vinfo, stmt_info);
2064 if (cmp < 0)
2066 if (single_element_p)
2067 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2068 only correct for single element "interleaving" SLP. */
2069 *memory_access_type = get_negative_load_store_type
2070 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2071 else
2073 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2074 separated by the stride, until we have a complete vector.
2075 Fall back to scalar accesses if that isn't possible. */
2076 if (multiple_p (nunits, group_size))
2077 *memory_access_type = VMAT_STRIDED_SLP;
2078 else
2079 *memory_access_type = VMAT_ELEMENTWISE;
2082 else if (cmp == 0 && loop_vinfo)
2084 gcc_assert (vls_type == VLS_LOAD);
2085 *memory_access_type = VMAT_INVARIANT;
2086 /* Invariant accesses perform only component accesses, alignment
2087 is irrelevant for them. */
2088 *alignment_support_scheme = dr_unaligned_supported;
2090 else
2091 *memory_access_type = VMAT_CONTIGUOUS;
2093 /* When we have a contiguous access across loop iterations
2094 but the access in the loop doesn't cover the full vector
2095 we can end up with no gap recorded but still excess
2096 elements accessed, see PR103116. Make sure we peel for
2097 gaps if necessary and sufficient and give up if not.
2099 If there is a combination of the access not covering the full
2100 vector and a gap recorded then we may need to peel twice. */
2101 if (loop_vinfo
2102 && *memory_access_type == VMAT_CONTIGUOUS
2103 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2104 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2105 nunits))
2107 unsigned HOST_WIDE_INT cnunits, cvf;
2108 if (!can_overrun_p
2109 || !nunits.is_constant (&cnunits)
2110 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2111 /* Peeling for gaps assumes that a single scalar iteration
2112 is enough to make sure the last vector iteration doesn't
2113 access excess elements.
2114 ??? Enhancements include peeling multiple iterations
2115 or using masked loads with a static mask. */
2116 || (group_size * cvf) % cnunits + group_size - gap < cnunits)
2118 if (dump_enabled_p ())
2119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2120 "peeling for gaps insufficient for "
2121 "access\n");
2122 return false;
2124 overrun_p = true;
2128 else
2130 /* We can always handle this case using elementwise accesses,
2131 but see if something more efficient is available. */
2132 *memory_access_type = VMAT_ELEMENTWISE;
2134 /* If there is a gap at the end of the group then these optimizations
2135 would access excess elements in the last iteration. */
2136 bool would_overrun_p = (gap != 0);
2137 /* An overrun is fine if the trailing elements are smaller than the
2138 alignment boundary B. Every vector access will be a multiple of B
2139 and so we are guaranteed to access a non-gap element in the
2140 same B-sized block. */
2141 if (would_overrun_p
2142 && !masked_p
2143 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2144 / vect_get_scalar_dr_size (first_dr_info)))
2145 would_overrun_p = false;
2147 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2148 && (can_overrun_p || !would_overrun_p)
2149 && compare_step_with_zero (vinfo, stmt_info) > 0)
2151 /* First cope with the degenerate case of a single-element
2152 vector. */
2153 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2156 else
2158 /* Otherwise try using LOAD/STORE_LANES. */
2159 *lanes_ifn
2160 = vls_type == VLS_LOAD
2161 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2162 : vect_store_lanes_supported (vectype, group_size,
2163 masked_p);
2164 if (*lanes_ifn != IFN_LAST)
2166 *memory_access_type = VMAT_LOAD_STORE_LANES;
2167 overrun_p = would_overrun_p;
2170 /* If that fails, try using permuting loads. */
2171 else if (vls_type == VLS_LOAD
2172 ? vect_grouped_load_supported (vectype,
2173 single_element_p,
2174 group_size)
2175 : vect_grouped_store_supported (vectype, group_size))
2177 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2178 overrun_p = would_overrun_p;
2183 /* As a last resort, trying using a gather load or scatter store.
2185 ??? Although the code can handle all group sizes correctly,
2186 it probably isn't a win to use separate strided accesses based
2187 on nearby locations. Or, even if it's a win over scalar code,
2188 it might not be a win over vectorizing at a lower VF, if that
2189 allows us to use contiguous accesses. */
2190 if (*memory_access_type == VMAT_ELEMENTWISE
2191 && single_element_p
2192 && loop_vinfo
2193 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2194 masked_p, gs_info))
2195 *memory_access_type = VMAT_GATHER_SCATTER;
2198 if (*memory_access_type == VMAT_GATHER_SCATTER
2199 || *memory_access_type == VMAT_ELEMENTWISE)
2201 *alignment_support_scheme = dr_unaligned_supported;
2202 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2204 else
2206 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2207 *alignment_support_scheme
2208 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2209 *misalignment);
2212 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2214 /* STMT is the leader of the group. Check the operands of all the
2215 stmts of the group. */
2216 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2217 while (next_stmt_info)
2219 tree op = vect_get_store_rhs (next_stmt_info);
2220 enum vect_def_type dt;
2221 if (!vect_is_simple_use (op, vinfo, &dt))
2223 if (dump_enabled_p ())
2224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2225 "use not simple.\n");
2226 return false;
2228 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2232 if (overrun_p)
2234 gcc_assert (can_overrun_p);
2235 if (dump_enabled_p ())
2236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2237 "Data access with gaps requires scalar "
2238 "epilogue loop\n");
2239 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2242 return true;
2245 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2246 if there is a memory access type that the vectorized form can use,
2247 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2248 or scatters, fill in GS_INFO accordingly. In addition
2249 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2250 the target does not support the alignment scheme. *MISALIGNMENT
2251 is set according to the alignment of the access (including
2252 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2254 SLP says whether we're performing SLP rather than loop vectorization.
2255 MASKED_P is true if the statement is conditional on a vectorized mask.
2256 VECTYPE is the vector type that the vectorized statements will use.
2257 NCOPIES is the number of vector statements that will be needed. */
2259 static bool
2260 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2261 tree vectype, slp_tree slp_node,
2262 bool masked_p, vec_load_store_type vls_type,
2263 unsigned int ncopies,
2264 vect_memory_access_type *memory_access_type,
2265 poly_int64 *poffset,
2266 dr_alignment_support *alignment_support_scheme,
2267 int *misalignment,
2268 gather_scatter_info *gs_info,
2269 internal_fn *lanes_ifn)
2271 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2272 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2273 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2274 *poffset = 0;
2275 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2277 *memory_access_type = VMAT_GATHER_SCATTER;
2278 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2279 gcc_unreachable ();
2280 /* When using internal functions, we rely on pattern recognition
2281 to convert the type of the offset to the type that the target
2282 requires, with the result being a call to an internal function.
2283 If that failed for some reason (e.g. because another pattern
2284 took priority), just handle cases in which the offset already
2285 has the right type. */
2286 else if (gs_info->ifn != IFN_LAST
2287 && !is_gimple_call (stmt_info->stmt)
2288 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2289 TREE_TYPE (gs_info->offset_vectype)))
2291 if (dump_enabled_p ())
2292 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2293 "%s offset requires a conversion\n",
2294 vls_type == VLS_LOAD ? "gather" : "scatter");
2295 return false;
2297 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2298 &gs_info->offset_dt,
2299 &gs_info->offset_vectype))
2301 if (dump_enabled_p ())
2302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2303 "%s index use not simple.\n",
2304 vls_type == VLS_LOAD ? "gather" : "scatter");
2305 return false;
2307 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2309 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2310 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2311 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2312 (gs_info->offset_vectype),
2313 TYPE_VECTOR_SUBPARTS (vectype)))
2315 if (dump_enabled_p ())
2316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2317 "unsupported vector types for emulated "
2318 "gather.\n");
2319 return false;
2322 /* Gather-scatter accesses perform only component accesses, alignment
2323 is irrelevant for them. */
2324 *alignment_support_scheme = dr_unaligned_supported;
2326 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
2328 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2329 masked_p,
2330 vls_type, memory_access_type, poffset,
2331 alignment_support_scheme,
2332 misalignment, gs_info, lanes_ifn))
2333 return false;
2335 else if (STMT_VINFO_STRIDED_P (stmt_info))
2337 gcc_assert (!slp_node);
2338 if (loop_vinfo
2339 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2340 masked_p, gs_info))
2341 *memory_access_type = VMAT_GATHER_SCATTER;
2342 else
2343 *memory_access_type = VMAT_ELEMENTWISE;
2344 /* Alignment is irrelevant here. */
2345 *alignment_support_scheme = dr_unaligned_supported;
2347 else
2349 int cmp = compare_step_with_zero (vinfo, stmt_info);
2350 if (cmp == 0)
2352 gcc_assert (vls_type == VLS_LOAD);
2353 *memory_access_type = VMAT_INVARIANT;
2354 /* Invariant accesses perform only component accesses, alignment
2355 is irrelevant for them. */
2356 *alignment_support_scheme = dr_unaligned_supported;
2358 else
2360 if (cmp < 0)
2361 *memory_access_type = get_negative_load_store_type
2362 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2363 else
2364 *memory_access_type = VMAT_CONTIGUOUS;
2365 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2366 vectype, *poffset);
2367 *alignment_support_scheme
2368 = vect_supportable_dr_alignment (vinfo,
2369 STMT_VINFO_DR_INFO (stmt_info),
2370 vectype, *misalignment);
2374 if ((*memory_access_type == VMAT_ELEMENTWISE
2375 || *memory_access_type == VMAT_STRIDED_SLP)
2376 && !nunits.is_constant ())
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2380 "Not using elementwise accesses due to variable "
2381 "vectorization factor.\n");
2382 return false;
2385 if (*alignment_support_scheme == dr_unaligned_unsupported)
2387 if (dump_enabled_p ())
2388 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2389 "unsupported unaligned access\n");
2390 return false;
2393 /* FIXME: At the moment the cost model seems to underestimate the
2394 cost of using elementwise accesses. This check preserves the
2395 traditional behavior until that can be fixed. */
2396 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2397 if (!first_stmt_info)
2398 first_stmt_info = stmt_info;
2399 if (*memory_access_type == VMAT_ELEMENTWISE
2400 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2401 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2402 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2403 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2407 "not falling back to elementwise accesses\n");
2408 return false;
2410 return true;
2413 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2414 conditional operation STMT_INFO. When returning true, store the mask
2415 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2416 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2417 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2419 static bool
2420 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2421 slp_tree slp_node, unsigned mask_index,
2422 tree *mask, slp_tree *mask_node,
2423 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2425 enum vect_def_type mask_dt;
2426 tree mask_vectype;
2427 slp_tree mask_node_1;
2428 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2429 mask, &mask_node_1, &mask_dt, &mask_vectype))
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2433 "mask use not simple.\n");
2434 return false;
2437 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2439 if (dump_enabled_p ())
2440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2441 "mask argument is not a boolean.\n");
2442 return false;
2445 /* If the caller is not prepared for adjusting an external/constant
2446 SLP mask vector type fail. */
2447 if (slp_node
2448 && !mask_node
2449 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2451 if (dump_enabled_p ())
2452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2453 "SLP mask argument is not vectorized.\n");
2454 return false;
2457 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2458 if (!mask_vectype)
2459 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
2460 mask_node_1);
2462 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2464 if (dump_enabled_p ())
2465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2466 "could not find an appropriate vector mask type.\n");
2467 return false;
2470 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2471 TYPE_VECTOR_SUBPARTS (vectype)))
2473 if (dump_enabled_p ())
2474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2475 "vector mask type %T"
2476 " does not match vector data type %T.\n",
2477 mask_vectype, vectype);
2479 return false;
2482 *mask_dt_out = mask_dt;
2483 *mask_vectype_out = mask_vectype;
2484 if (mask_node)
2485 *mask_node = mask_node_1;
2486 return true;
2489 /* Return true if stored value is suitable for vectorizing store
2490 statement STMT_INFO. When returning true, store the scalar stored
2491 in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
2492 the type of the vectorized store value in
2493 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2495 static bool
2496 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2497 slp_tree slp_node, tree *rhs, slp_tree *rhs_node,
2498 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2499 vec_load_store_type *vls_type_out)
2501 int op_no = 0;
2502 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2504 if (gimple_call_internal_p (call)
2505 && internal_store_fn_p (gimple_call_internal_fn (call)))
2506 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2508 if (slp_node)
2509 op_no = vect_slp_child_index_for_operand
2510 (stmt_info->stmt, op_no, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
2512 enum vect_def_type rhs_dt;
2513 tree rhs_vectype;
2514 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2515 rhs, rhs_node, &rhs_dt, &rhs_vectype))
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2519 "use not simple.\n");
2520 return false;
2523 /* In the case this is a store from a constant make sure
2524 native_encode_expr can handle it. */
2525 if (CONSTANT_CLASS_P (*rhs) && native_encode_expr (*rhs, NULL, 64) == 0)
2527 if (dump_enabled_p ())
2528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2529 "cannot encode constant as a byte sequence.\n");
2530 return false;
2533 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2534 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2536 if (dump_enabled_p ())
2537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2538 "incompatible vector types.\n");
2539 return false;
2542 *rhs_dt_out = rhs_dt;
2543 *rhs_vectype_out = rhs_vectype;
2544 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2545 *vls_type_out = VLS_STORE_INVARIANT;
2546 else
2547 *vls_type_out = VLS_STORE;
2548 return true;
2551 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2552 Note that we support masks with floating-point type, in which case the
2553 floats are interpreted as a bitmask. */
2555 static tree
2556 vect_build_all_ones_mask (vec_info *vinfo,
2557 stmt_vec_info stmt_info, tree masktype)
2559 if (TREE_CODE (masktype) == INTEGER_TYPE)
2560 return build_int_cst (masktype, -1);
2561 else if (VECTOR_BOOLEAN_TYPE_P (masktype)
2562 || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2564 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2565 mask = build_vector_from_val (masktype, mask);
2566 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2568 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2570 REAL_VALUE_TYPE r;
2571 long tmp[6];
2572 for (int j = 0; j < 6; ++j)
2573 tmp[j] = -1;
2574 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2575 tree mask = build_real (TREE_TYPE (masktype), r);
2576 mask = build_vector_from_val (masktype, mask);
2577 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2579 gcc_unreachable ();
2582 /* Build an all-zero merge value of type VECTYPE while vectorizing
2583 STMT_INFO as a gather load. */
2585 static tree
2586 vect_build_zero_merge_argument (vec_info *vinfo,
2587 stmt_vec_info stmt_info, tree vectype)
2589 tree merge;
2590 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2591 merge = build_int_cst (TREE_TYPE (vectype), 0);
2592 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2594 REAL_VALUE_TYPE r;
2595 long tmp[6];
2596 for (int j = 0; j < 6; ++j)
2597 tmp[j] = 0;
2598 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2599 merge = build_real (TREE_TYPE (vectype), r);
2601 else
2602 gcc_unreachable ();
2603 merge = build_vector_from_val (vectype, merge);
2604 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2607 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2608 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2609 the gather load operation. If the load is conditional, MASK is the
2610 vectorized condition, otherwise MASK is null. PTR is the base
2611 pointer and OFFSET is the vectorized offset. */
2613 static gimple *
2614 vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
2615 gimple_stmt_iterator *gsi,
2616 gather_scatter_info *gs_info,
2617 tree ptr, tree offset, tree mask)
2619 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2620 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2621 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2622 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2623 /* ptrtype */ arglist = TREE_CHAIN (arglist);
2624 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2625 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2626 tree scaletype = TREE_VALUE (arglist);
2627 tree var;
2628 gcc_checking_assert (types_compatible_p (srctype, rettype)
2629 && (!mask
2630 || TREE_CODE (masktype) == INTEGER_TYPE
2631 || types_compatible_p (srctype, masktype)));
2633 tree op = offset;
2634 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2636 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2637 TYPE_VECTOR_SUBPARTS (idxtype)));
2638 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2639 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2640 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2641 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2642 op = var;
2645 tree src_op = NULL_TREE;
2646 tree mask_op = NULL_TREE;
2647 if (mask)
2649 if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
2651 tree utype, optype = TREE_TYPE (mask);
2652 if (VECTOR_TYPE_P (masktype)
2653 || TYPE_MODE (masktype) == TYPE_MODE (optype))
2654 utype = masktype;
2655 else
2656 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2657 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2658 tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
2659 gassign *new_stmt
2660 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2661 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2662 mask_arg = var;
2663 if (!useless_type_conversion_p (masktype, utype))
2665 gcc_assert (TYPE_PRECISION (utype)
2666 <= TYPE_PRECISION (masktype));
2667 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2668 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2669 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2670 mask_arg = var;
2672 src_op = build_zero_cst (srctype);
2673 mask_op = mask_arg;
2675 else
2677 src_op = mask;
2678 mask_op = mask;
2681 else
2683 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2684 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2687 tree scale = build_int_cst (scaletype, gs_info->scale);
2688 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2689 mask_op, scale);
2691 if (!useless_type_conversion_p (vectype, rettype))
2693 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2694 TYPE_VECTOR_SUBPARTS (rettype)));
2695 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2696 gimple_call_set_lhs (new_stmt, op);
2697 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2698 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2699 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
2702 return new_stmt;
2705 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2706 instructions before GSI. GS_INFO describes the scatter store operation.
2707 PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
2708 vectorized data to store.
2709 If the store is conditional, MASK is the vectorized condition, otherwise
2710 MASK is null. */
2712 static gimple *
2713 vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
2714 gimple_stmt_iterator *gsi,
2715 gather_scatter_info *gs_info,
2716 tree ptr, tree offset, tree oprnd, tree mask)
2718 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2719 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2720 /* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist);
2721 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2722 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2723 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2724 tree scaletype = TREE_VALUE (arglist);
2725 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
2726 && TREE_CODE (rettype) == VOID_TYPE);
2728 tree mask_arg = NULL_TREE;
2729 if (mask)
2731 mask_arg = mask;
2732 tree optype = TREE_TYPE (mask_arg);
2733 tree utype;
2734 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
2735 utype = masktype;
2736 else
2737 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2738 tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
2739 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
2740 gassign *new_stmt
2741 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2742 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2743 mask_arg = var;
2744 if (!useless_type_conversion_p (masktype, utype))
2746 gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
2747 tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2748 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2749 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2750 mask_arg = var;
2753 else
2755 mask_arg = build_int_cst (masktype, -1);
2756 mask_arg = vect_init_vector (vinfo, stmt_info, mask_arg, masktype, NULL);
2759 tree src = oprnd;
2760 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
2762 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
2763 TYPE_VECTOR_SUBPARTS (srctype)));
2764 tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
2765 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
2766 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
2767 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2768 src = var;
2771 tree op = offset;
2772 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2774 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2775 TYPE_VECTOR_SUBPARTS (idxtype)));
2776 tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2777 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2778 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2779 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2780 op = var;
2783 tree scale = build_int_cst (scaletype, gs_info->scale);
2784 gcall *new_stmt
2785 = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
2786 return new_stmt;
2789 /* Prepare the base and offset in GS_INFO for vectorization.
2790 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2791 to the vectorized offset argument for the first copy of STMT_INFO.
2792 STMT_INFO is the statement described by GS_INFO and LOOP is the
2793 containing loop. */
2795 static void
2796 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2797 class loop *loop, stmt_vec_info stmt_info,
2798 slp_tree slp_node, gather_scatter_info *gs_info,
2799 tree *dataref_ptr, vec<tree> *vec_offset)
2801 gimple_seq stmts = NULL;
2802 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2803 if (stmts != NULL)
2805 basic_block new_bb;
2806 edge pe = loop_preheader_edge (loop);
2807 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2808 gcc_assert (!new_bb);
2810 if (slp_node)
2811 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
2812 else
2814 unsigned ncopies
2815 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2816 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2817 gs_info->offset, vec_offset,
2818 gs_info->offset_vectype);
2822 /* Prepare to implement a grouped or strided load or store using
2823 the gather load or scatter store operation described by GS_INFO.
2824 STMT_INFO is the load or store statement.
2826 Set *DATAREF_BUMP to the amount that should be added to the base
2827 address after each copy of the vectorized statement. Set *VEC_OFFSET
2828 to an invariant offset vector in which element I has the value
2829 I * DR_STEP / SCALE. */
2831 static void
2832 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2833 loop_vec_info loop_vinfo,
2834 gimple_stmt_iterator *gsi,
2835 gather_scatter_info *gs_info,
2836 tree *dataref_bump, tree *vec_offset,
2837 vec_loop_lens *loop_lens)
2839 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2840 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2842 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2844 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
2845 ivtmp_8 = _31 * 16 (step in bytes);
2846 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2847 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
2848 tree loop_len
2849 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
2850 tree tmp
2851 = fold_build2 (MULT_EXPR, sizetype,
2852 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2853 loop_len);
2854 *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
2855 GSI_SAME_STMT);
2857 else
2859 tree bump
2860 = size_binop (MULT_EXPR,
2861 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2862 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2863 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2866 /* The offset given in GS_INFO can have pointer type, so use the element
2867 type of the vector instead. */
2868 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2870 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2871 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2872 ssize_int (gs_info->scale));
2873 step = fold_convert (offset_type, step);
2875 /* Create {0, X, X*2, X*3, ...}. */
2876 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2877 build_zero_cst (offset_type), step);
2878 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2881 /* Prepare the pointer IVs which needs to be updated by a variable amount.
2882 Such variable amount is the outcome of .SELECT_VL. In this case, we can
2883 allow each iteration process the flexible number of elements as long as
2884 the number <= vf elments.
2886 Return data reference according to SELECT_VL.
2887 If new statements are needed, insert them before GSI. */
2889 static tree
2890 vect_get_loop_variant_data_ptr_increment (
2891 vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
2892 vec_loop_lens *loop_lens, dr_vec_info *dr_info,
2893 vect_memory_access_type memory_access_type)
2895 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2896 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2898 /* gather/scatter never reach here. */
2899 gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
2901 /* When we support SELECT_VL pattern, we dynamic adjust
2902 the memory address by .SELECT_VL result.
2904 The result of .SELECT_VL is the number of elements to
2905 be processed of each iteration. So the memory address
2906 adjustment operation should be:
2908 addr = addr + .SELECT_VL (ARG..) * step;
2910 tree loop_len
2911 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
2912 tree len_type = TREE_TYPE (loop_len);
2913 /* Since the outcome of .SELECT_VL is element size, we should adjust
2914 it into bytesize so that it can be used in address pointer variable
2915 amount IVs adjustment. */
2916 tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
2917 wide_int_to_tree (len_type, wi::to_widest (step)));
2918 tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
2919 gassign *assign = gimple_build_assign (bump, tmp);
2920 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
2921 return bump;
2924 /* Return the amount that should be added to a vector pointer to move
2925 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2926 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2927 vectorization. */
2929 static tree
2930 vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
2931 dr_vec_info *dr_info, tree aggr_type,
2932 vect_memory_access_type memory_access_type,
2933 vec_loop_lens *loop_lens = nullptr)
2935 if (memory_access_type == VMAT_INVARIANT)
2936 return size_zero_node;
2938 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2939 if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2940 return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
2941 loop_lens, dr_info,
2942 memory_access_type);
2944 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2945 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2946 if (tree_int_cst_sgn (step) == -1)
2947 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2948 return iv_step;
2951 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2953 static bool
2954 vectorizable_bswap (vec_info *vinfo,
2955 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2956 gimple **vec_stmt, slp_tree slp_node,
2957 slp_tree *slp_op,
2958 tree vectype_in, stmt_vector_for_cost *cost_vec)
2960 tree op, vectype;
2961 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2962 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2963 unsigned ncopies;
2965 op = gimple_call_arg (stmt, 0);
2966 vectype = STMT_VINFO_VECTYPE (stmt_info);
2967 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2969 /* Multiple types in SLP are handled by creating the appropriate number of
2970 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2971 case of SLP. */
2972 if (slp_node)
2973 ncopies = 1;
2974 else
2975 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2977 gcc_assert (ncopies >= 1);
2979 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2980 if (! char_vectype)
2981 return false;
2983 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2984 unsigned word_bytes;
2985 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2986 return false;
2988 /* The encoding uses one stepped pattern for each byte in the word. */
2989 vec_perm_builder elts (num_bytes, word_bytes, 3);
2990 for (unsigned i = 0; i < 3; ++i)
2991 for (unsigned j = 0; j < word_bytes; ++j)
2992 elts.quick_push ((i + 1) * word_bytes - j - 1);
2994 vec_perm_indices indices (elts, 1, num_bytes);
2995 machine_mode vmode = TYPE_MODE (char_vectype);
2996 if (!can_vec_perm_const_p (vmode, vmode, indices))
2997 return false;
2999 if (! vec_stmt)
3001 if (slp_node
3002 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3004 if (dump_enabled_p ())
3005 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3006 "incompatible vector types for invariants\n");
3007 return false;
3010 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3011 DUMP_VECT_SCOPE ("vectorizable_bswap");
3012 record_stmt_cost (cost_vec,
3013 1, vector_stmt, stmt_info, 0, vect_prologue);
3014 record_stmt_cost (cost_vec,
3015 slp_node
3016 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3017 vec_perm, stmt_info, 0, vect_body);
3018 return true;
3021 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3023 /* Transform. */
3024 vec<tree> vec_oprnds = vNULL;
3025 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3026 op, &vec_oprnds);
3027 /* Arguments are ready. create the new vector stmt. */
3028 unsigned i;
3029 tree vop;
3030 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3032 gimple *new_stmt;
3033 tree tem = make_ssa_name (char_vectype);
3034 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3035 char_vectype, vop));
3036 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3037 tree tem2 = make_ssa_name (char_vectype);
3038 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3039 tem, tem, bswap_vconst);
3040 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3041 tem = make_ssa_name (vectype);
3042 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3043 vectype, tem2));
3044 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3045 if (slp_node)
3046 slp_node->push_vec_def (new_stmt);
3047 else
3048 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3051 if (!slp_node)
3052 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3054 vec_oprnds.release ();
3055 return true;
3058 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3059 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3060 in a single step. On success, store the binary pack code in
3061 *CONVERT_CODE. */
3063 static bool
3064 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3065 code_helper *convert_code)
3067 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3068 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3069 return false;
3071 code_helper code;
3072 int multi_step_cvt = 0;
3073 auto_vec <tree, 8> interm_types;
3074 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3075 &code, &multi_step_cvt, &interm_types)
3076 || multi_step_cvt)
3077 return false;
3079 *convert_code = code;
3080 return true;
3083 /* Function vectorizable_call.
3085 Check if STMT_INFO performs a function call that can be vectorized.
3086 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3087 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3088 Return true if STMT_INFO is vectorizable in this way. */
3090 static bool
3091 vectorizable_call (vec_info *vinfo,
3092 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3093 gimple **vec_stmt, slp_tree slp_node,
3094 stmt_vector_for_cost *cost_vec)
3096 gcall *stmt;
3097 tree vec_dest;
3098 tree scalar_dest;
3099 tree op;
3100 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3101 tree vectype_out, vectype_in;
3102 poly_uint64 nunits_in;
3103 poly_uint64 nunits_out;
3104 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3105 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3106 tree fndecl, new_temp, rhs_type;
3107 enum vect_def_type dt[4]
3108 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3109 vect_unknown_def_type };
3110 tree vectypes[ARRAY_SIZE (dt)] = {};
3111 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3112 int ndts = ARRAY_SIZE (dt);
3113 int ncopies, j;
3114 auto_vec<tree, 8> vargs;
3115 enum { NARROW, NONE, WIDEN } modifier;
3116 size_t i, nargs;
3117 tree lhs;
3118 tree clz_ctz_arg1 = NULL_TREE;
3120 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3121 return false;
3123 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3124 && ! vec_stmt)
3125 return false;
3127 /* Is STMT_INFO a vectorizable call? */
3128 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3129 if (!stmt)
3130 return false;
3132 if (gimple_call_internal_p (stmt)
3133 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3134 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3135 /* Handled by vectorizable_load and vectorizable_store. */
3136 return false;
3138 if (gimple_call_lhs (stmt) == NULL_TREE
3139 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3140 return false;
3142 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3144 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3146 /* Process function arguments. */
3147 rhs_type = NULL_TREE;
3148 vectype_in = NULL_TREE;
3149 nargs = gimple_call_num_args (stmt);
3151 /* Bail out if the function has more than four arguments, we do not have
3152 interesting builtin functions to vectorize with more than two arguments
3153 except for fma. No arguments is also not good. */
3154 if (nargs == 0 || nargs > 4)
3155 return false;
3157 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3158 combined_fn cfn = gimple_call_combined_fn (stmt);
3159 if (cfn == CFN_GOMP_SIMD_LANE)
3161 nargs = 0;
3162 rhs_type = unsigned_type_node;
3164 /* Similarly pretend IFN_CLZ and IFN_CTZ only has one argument, the second
3165 argument just says whether it is well-defined at zero or not and what
3166 value should be returned for it. */
3167 if ((cfn == CFN_CLZ || cfn == CFN_CTZ) && nargs == 2)
3169 nargs = 1;
3170 clz_ctz_arg1 = gimple_call_arg (stmt, 1);
3173 int mask_opno = -1;
3174 if (internal_fn_p (cfn))
3175 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3177 for (i = 0; i < nargs; i++)
3179 if ((int) i == mask_opno)
3181 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3182 &op, &slp_op[i], &dt[i], &vectypes[i]))
3183 return false;
3184 continue;
3187 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3188 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3190 if (dump_enabled_p ())
3191 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3192 "use not simple.\n");
3193 return false;
3196 /* We can only handle calls with arguments of the same type. */
3197 if (rhs_type
3198 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3200 if (dump_enabled_p ())
3201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3202 "argument types differ.\n");
3203 return false;
3205 if (!rhs_type)
3206 rhs_type = TREE_TYPE (op);
3208 if (!vectype_in)
3209 vectype_in = vectypes[i];
3210 else if (vectypes[i]
3211 && !types_compatible_p (vectypes[i], vectype_in))
3213 if (dump_enabled_p ())
3214 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3215 "argument vector types differ.\n");
3216 return false;
3219 /* If all arguments are external or constant defs, infer the vector type
3220 from the scalar type. */
3221 if (!vectype_in)
3222 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3223 if (vec_stmt)
3224 gcc_assert (vectype_in);
3225 if (!vectype_in)
3227 if (dump_enabled_p ())
3228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3229 "no vectype for scalar type %T\n", rhs_type);
3231 return false;
3234 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3235 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3237 if (dump_enabled_p ())
3238 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3239 "mixed mask and nonmask vector types\n");
3240 return false;
3243 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3245 if (dump_enabled_p ())
3246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3247 "use emulated vector type for call\n");
3248 return false;
3251 /* FORNOW */
3252 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3253 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3254 if (known_eq (nunits_in * 2, nunits_out))
3255 modifier = NARROW;
3256 else if (known_eq (nunits_out, nunits_in))
3257 modifier = NONE;
3258 else if (known_eq (nunits_out * 2, nunits_in))
3259 modifier = WIDEN;
3260 else
3261 return false;
3263 /* We only handle functions that do not read or clobber memory. */
3264 if (gimple_vuse (stmt))
3266 if (dump_enabled_p ())
3267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3268 "function reads from or writes to memory.\n");
3269 return false;
3272 /* For now, we only vectorize functions if a target specific builtin
3273 is available. TODO -- in some cases, it might be profitable to
3274 insert the calls for pieces of the vector, in order to be able
3275 to vectorize other operations in the loop. */
3276 fndecl = NULL_TREE;
3277 internal_fn ifn = IFN_LAST;
3278 tree callee = gimple_call_fndecl (stmt);
3280 /* First try using an internal function. */
3281 code_helper convert_code = MAX_TREE_CODES;
3282 if (cfn != CFN_LAST
3283 && (modifier == NONE
3284 || (modifier == NARROW
3285 && simple_integer_narrowing (vectype_out, vectype_in,
3286 &convert_code))))
3287 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3288 vectype_in);
3290 /* If that fails, try asking for a target-specific built-in function. */
3291 if (ifn == IFN_LAST)
3293 if (cfn != CFN_LAST)
3294 fndecl = targetm.vectorize.builtin_vectorized_function
3295 (cfn, vectype_out, vectype_in);
3296 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3297 fndecl = targetm.vectorize.builtin_md_vectorized_function
3298 (callee, vectype_out, vectype_in);
3301 if (ifn == IFN_LAST && !fndecl)
3303 if (cfn == CFN_GOMP_SIMD_LANE
3304 && !slp_node
3305 && loop_vinfo
3306 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3307 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3308 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3309 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3311 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3312 { 0, 1, 2, ... vf - 1 } vector. */
3313 gcc_assert (nargs == 0);
3315 else if (modifier == NONE
3316 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3317 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3318 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3319 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3320 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3321 slp_op, vectype_in, cost_vec);
3322 else
3324 if (dump_enabled_p ())
3325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3326 "function is not vectorizable.\n");
3327 return false;
3331 if (slp_node)
3332 ncopies = 1;
3333 else if (modifier == NARROW && ifn == IFN_LAST)
3334 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3335 else
3336 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3338 /* Sanity check: make sure that at least one copy of the vectorized stmt
3339 needs to be generated. */
3340 gcc_assert (ncopies >= 1);
3342 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3343 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3344 internal_fn cond_len_fn = get_len_internal_fn (ifn);
3345 int len_opno = internal_fn_len_index (cond_len_fn);
3346 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3347 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3348 if (!vec_stmt) /* transformation not required. */
3350 if (slp_node)
3351 for (i = 0; i < nargs; ++i)
3352 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3353 vectypes[i]
3354 ? vectypes[i] : vectype_in))
3356 if (dump_enabled_p ())
3357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3358 "incompatible vector types for invariants\n");
3359 return false;
3361 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3362 DUMP_VECT_SCOPE ("vectorizable_call");
3363 vect_model_simple_cost (vinfo, stmt_info,
3364 ncopies, dt, ndts, slp_node, cost_vec);
3365 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3366 record_stmt_cost (cost_vec, ncopies / 2,
3367 vec_promote_demote, stmt_info, 0, vect_body);
3369 if (loop_vinfo
3370 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3371 && (reduc_idx >= 0 || mask_opno >= 0))
3373 if (reduc_idx >= 0
3374 && (cond_fn == IFN_LAST
3375 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3376 OPTIMIZE_FOR_SPEED))
3377 && (cond_len_fn == IFN_LAST
3378 || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3379 OPTIMIZE_FOR_SPEED)))
3381 if (dump_enabled_p ())
3382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3383 "can't use a fully-masked loop because no"
3384 " conditional operation is available.\n");
3385 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3387 else
3389 unsigned int nvectors
3390 = (slp_node
3391 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3392 : ncopies);
3393 tree scalar_mask = NULL_TREE;
3394 if (mask_opno >= 0)
3395 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3396 if (cond_len_fn != IFN_LAST
3397 && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3398 OPTIMIZE_FOR_SPEED))
3399 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3401 else
3402 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3403 scalar_mask);
3406 return true;
3409 /* Transform. */
3411 if (dump_enabled_p ())
3412 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3414 /* Handle def. */
3415 scalar_dest = gimple_call_lhs (stmt);
3416 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3418 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3419 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3420 unsigned int vect_nargs = nargs;
3421 if (len_loop_p)
3423 if (len_opno >= 0)
3425 ifn = cond_len_fn;
3426 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3427 vect_nargs += 2;
3429 else if (reduc_idx >= 0)
3430 gcc_unreachable ();
3432 else if (masked_loop_p && reduc_idx >= 0)
3434 ifn = cond_fn;
3435 vect_nargs += 2;
3437 if (clz_ctz_arg1)
3438 ++vect_nargs;
3440 if (modifier == NONE || ifn != IFN_LAST)
3442 tree prev_res = NULL_TREE;
3443 vargs.safe_grow (vect_nargs, true);
3444 auto_vec<vec<tree> > vec_defs (nargs);
3445 for (j = 0; j < ncopies; ++j)
3447 /* Build argument list for the vectorized call. */
3448 if (slp_node)
3450 vec<tree> vec_oprnds0;
3452 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3453 vec_oprnds0 = vec_defs[0];
3455 /* Arguments are ready. Create the new vector stmt. */
3456 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3458 int varg = 0;
3459 if (masked_loop_p && reduc_idx >= 0)
3461 unsigned int vec_num = vec_oprnds0.length ();
3462 /* Always true for SLP. */
3463 gcc_assert (ncopies == 1);
3464 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
3465 gsi, masks, vec_num,
3466 vectype_out, i);
3468 size_t k;
3469 for (k = 0; k < nargs; k++)
3471 vec<tree> vec_oprndsk = vec_defs[k];
3472 vargs[varg++] = vec_oprndsk[i];
3474 if (masked_loop_p && reduc_idx >= 0)
3475 vargs[varg++] = vargs[reduc_idx + 1];
3476 if (clz_ctz_arg1)
3477 vargs[varg++] = clz_ctz_arg1;
3479 gimple *new_stmt;
3480 if (modifier == NARROW)
3482 /* We don't define any narrowing conditional functions
3483 at present. */
3484 gcc_assert (mask_opno < 0);
3485 tree half_res = make_ssa_name (vectype_in);
3486 gcall *call
3487 = gimple_build_call_internal_vec (ifn, vargs);
3488 gimple_call_set_lhs (call, half_res);
3489 gimple_call_set_nothrow (call, true);
3490 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3491 if ((i & 1) == 0)
3493 prev_res = half_res;
3494 continue;
3496 new_temp = make_ssa_name (vec_dest);
3497 new_stmt = vect_gimple_build (new_temp, convert_code,
3498 prev_res, half_res);
3499 vect_finish_stmt_generation (vinfo, stmt_info,
3500 new_stmt, gsi);
3502 else
3504 if (len_opno >= 0 && len_loop_p)
3506 unsigned int vec_num = vec_oprnds0.length ();
3507 /* Always true for SLP. */
3508 gcc_assert (ncopies == 1);
3509 tree len
3510 = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3511 vectype_out, i, 1);
3512 signed char biasval
3513 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3514 tree bias = build_int_cst (intQI_type_node, biasval);
3515 vargs[len_opno] = len;
3516 vargs[len_opno + 1] = bias;
3518 else if (mask_opno >= 0 && masked_loop_p)
3520 unsigned int vec_num = vec_oprnds0.length ();
3521 /* Always true for SLP. */
3522 gcc_assert (ncopies == 1);
3523 tree mask = vect_get_loop_mask (loop_vinfo,
3524 gsi, masks, vec_num,
3525 vectype_out, i);
3526 vargs[mask_opno] = prepare_vec_mask
3527 (loop_vinfo, TREE_TYPE (mask), mask,
3528 vargs[mask_opno], gsi);
3531 gcall *call;
3532 if (ifn != IFN_LAST)
3533 call = gimple_build_call_internal_vec (ifn, vargs);
3534 else
3535 call = gimple_build_call_vec (fndecl, vargs);
3536 new_temp = make_ssa_name (vec_dest, call);
3537 gimple_call_set_lhs (call, new_temp);
3538 gimple_call_set_nothrow (call, true);
3539 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3540 new_stmt = call;
3542 slp_node->push_vec_def (new_stmt);
3544 continue;
3547 int varg = 0;
3548 if (masked_loop_p && reduc_idx >= 0)
3549 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3550 vectype_out, j);
3551 for (i = 0; i < nargs; i++)
3553 op = gimple_call_arg (stmt, i);
3554 if (j == 0)
3556 vec_defs.quick_push (vNULL);
3557 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3558 op, &vec_defs[i],
3559 vectypes[i]);
3561 vargs[varg++] = vec_defs[i][j];
3563 if (masked_loop_p && reduc_idx >= 0)
3564 vargs[varg++] = vargs[reduc_idx + 1];
3565 if (clz_ctz_arg1)
3566 vargs[varg++] = clz_ctz_arg1;
3568 if (len_opno >= 0 && len_loop_p)
3570 tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3571 vectype_out, j, 1);
3572 signed char biasval
3573 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3574 tree bias = build_int_cst (intQI_type_node, biasval);
3575 vargs[len_opno] = len;
3576 vargs[len_opno + 1] = bias;
3578 else if (mask_opno >= 0 && masked_loop_p)
3580 tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3581 vectype_out, j);
3582 vargs[mask_opno]
3583 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3584 vargs[mask_opno], gsi);
3587 gimple *new_stmt;
3588 if (cfn == CFN_GOMP_SIMD_LANE)
3590 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3591 tree new_var
3592 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3593 gimple *init_stmt = gimple_build_assign (new_var, cst);
3594 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3595 new_temp = make_ssa_name (vec_dest);
3596 new_stmt = gimple_build_assign (new_temp, new_var);
3597 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3599 else if (modifier == NARROW)
3601 /* We don't define any narrowing conditional functions at
3602 present. */
3603 gcc_assert (mask_opno < 0);
3604 tree half_res = make_ssa_name (vectype_in);
3605 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3606 gimple_call_set_lhs (call, half_res);
3607 gimple_call_set_nothrow (call, true);
3608 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3609 if ((j & 1) == 0)
3611 prev_res = half_res;
3612 continue;
3614 new_temp = make_ssa_name (vec_dest);
3615 new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
3616 half_res);
3617 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3619 else
3621 gcall *call;
3622 if (ifn != IFN_LAST)
3623 call = gimple_build_call_internal_vec (ifn, vargs);
3624 else
3625 call = gimple_build_call_vec (fndecl, vargs);
3626 new_temp = make_ssa_name (vec_dest, call);
3627 gimple_call_set_lhs (call, new_temp);
3628 gimple_call_set_nothrow (call, true);
3629 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3630 new_stmt = call;
3633 if (j == (modifier == NARROW ? 1 : 0))
3634 *vec_stmt = new_stmt;
3635 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3637 for (i = 0; i < nargs; i++)
3639 vec<tree> vec_oprndsi = vec_defs[i];
3640 vec_oprndsi.release ();
3643 else if (modifier == NARROW)
3645 auto_vec<vec<tree> > vec_defs (nargs);
3646 /* We don't define any narrowing conditional functions at present. */
3647 gcc_assert (mask_opno < 0);
3648 for (j = 0; j < ncopies; ++j)
3650 /* Build argument list for the vectorized call. */
3651 if (j == 0)
3652 vargs.create (nargs * 2);
3653 else
3654 vargs.truncate (0);
3656 if (slp_node)
3658 vec<tree> vec_oprnds0;
3660 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3661 vec_oprnds0 = vec_defs[0];
3663 /* Arguments are ready. Create the new vector stmt. */
3664 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3666 size_t k;
3667 vargs.truncate (0);
3668 for (k = 0; k < nargs; k++)
3670 vec<tree> vec_oprndsk = vec_defs[k];
3671 vargs.quick_push (vec_oprndsk[i]);
3672 vargs.quick_push (vec_oprndsk[i + 1]);
3674 gcall *call;
3675 if (ifn != IFN_LAST)
3676 call = gimple_build_call_internal_vec (ifn, vargs);
3677 else
3678 call = gimple_build_call_vec (fndecl, vargs);
3679 new_temp = make_ssa_name (vec_dest, call);
3680 gimple_call_set_lhs (call, new_temp);
3681 gimple_call_set_nothrow (call, true);
3682 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3683 slp_node->push_vec_def (call);
3685 continue;
3688 for (i = 0; i < nargs; i++)
3690 op = gimple_call_arg (stmt, i);
3691 if (j == 0)
3693 vec_defs.quick_push (vNULL);
3694 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3695 op, &vec_defs[i], vectypes[i]);
3697 vec_oprnd0 = vec_defs[i][2*j];
3698 vec_oprnd1 = vec_defs[i][2*j+1];
3700 vargs.quick_push (vec_oprnd0);
3701 vargs.quick_push (vec_oprnd1);
3704 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3705 new_temp = make_ssa_name (vec_dest, new_stmt);
3706 gimple_call_set_lhs (new_stmt, new_temp);
3707 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3709 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3712 if (!slp_node)
3713 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3715 for (i = 0; i < nargs; i++)
3717 vec<tree> vec_oprndsi = vec_defs[i];
3718 vec_oprndsi.release ();
3721 else
3722 /* No current target implements this case. */
3723 return false;
3725 vargs.release ();
3727 /* The call in STMT might prevent it from being removed in dce.
3728 We however cannot remove it here, due to the way the ssa name
3729 it defines is mapped to the new definition. So just replace
3730 rhs of the statement with something harmless. */
3732 if (slp_node)
3733 return true;
3735 stmt_info = vect_orig_stmt (stmt_info);
3736 lhs = gimple_get_lhs (stmt_info->stmt);
3738 gassign *new_stmt
3739 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3740 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3742 return true;
3746 struct simd_call_arg_info
3748 tree vectype;
3749 tree op;
3750 HOST_WIDE_INT linear_step;
3751 enum vect_def_type dt;
3752 unsigned int align;
3753 bool simd_lane_linear;
3756 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3757 is linear within simd lane (but not within whole loop), note it in
3758 *ARGINFO. */
3760 static void
3761 vect_simd_lane_linear (tree op, class loop *loop,
3762 struct simd_call_arg_info *arginfo)
3764 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3766 if (!is_gimple_assign (def_stmt)
3767 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3768 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3769 return;
3771 tree base = gimple_assign_rhs1 (def_stmt);
3772 HOST_WIDE_INT linear_step = 0;
3773 tree v = gimple_assign_rhs2 (def_stmt);
3774 while (TREE_CODE (v) == SSA_NAME)
3776 tree t;
3777 def_stmt = SSA_NAME_DEF_STMT (v);
3778 if (is_gimple_assign (def_stmt))
3779 switch (gimple_assign_rhs_code (def_stmt))
3781 case PLUS_EXPR:
3782 t = gimple_assign_rhs2 (def_stmt);
3783 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3784 return;
3785 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3786 v = gimple_assign_rhs1 (def_stmt);
3787 continue;
3788 case MULT_EXPR:
3789 t = gimple_assign_rhs2 (def_stmt);
3790 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3791 return;
3792 linear_step = tree_to_shwi (t);
3793 v = gimple_assign_rhs1 (def_stmt);
3794 continue;
3795 CASE_CONVERT:
3796 t = gimple_assign_rhs1 (def_stmt);
3797 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3798 || (TYPE_PRECISION (TREE_TYPE (v))
3799 < TYPE_PRECISION (TREE_TYPE (t))))
3800 return;
3801 if (!linear_step)
3802 linear_step = 1;
3803 v = t;
3804 continue;
3805 default:
3806 return;
3808 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3809 && loop->simduid
3810 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3811 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3812 == loop->simduid))
3814 if (!linear_step)
3815 linear_step = 1;
3816 arginfo->linear_step = linear_step;
3817 arginfo->op = base;
3818 arginfo->simd_lane_linear = true;
3819 return;
3824 /* Function vectorizable_simd_clone_call.
3826 Check if STMT_INFO performs a function call that can be vectorized
3827 by calling a simd clone of the function.
3828 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3829 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3830 Return true if STMT_INFO is vectorizable in this way. */
3832 static bool
3833 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3834 gimple_stmt_iterator *gsi,
3835 gimple **vec_stmt, slp_tree slp_node,
3836 stmt_vector_for_cost *)
3838 tree vec_dest;
3839 tree scalar_dest;
3840 tree op, type;
3841 tree vec_oprnd0 = NULL_TREE;
3842 tree vectype;
3843 poly_uint64 nunits;
3844 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3845 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3846 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3847 tree fndecl, new_temp;
3848 int ncopies, j;
3849 auto_vec<simd_call_arg_info> arginfo;
3850 vec<tree> vargs = vNULL;
3851 size_t i, nargs;
3852 tree lhs, rtype, ratype;
3853 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3854 int masked_call_offset = 0;
3856 /* Is STMT a vectorizable call? */
3857 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3858 if (!stmt)
3859 return false;
3861 fndecl = gimple_call_fndecl (stmt);
3862 if (fndecl == NULL_TREE
3863 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
3865 fndecl = gimple_call_arg (stmt, 0);
3866 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
3867 fndecl = TREE_OPERAND (fndecl, 0);
3868 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
3869 masked_call_offset = 1;
3871 if (fndecl == NULL_TREE)
3872 return false;
3874 struct cgraph_node *node = cgraph_node::get (fndecl);
3875 if (node == NULL || node->simd_clones == NULL)
3876 return false;
3878 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3879 return false;
3881 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3882 && ! vec_stmt)
3883 return false;
3885 if (gimple_call_lhs (stmt)
3886 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3887 return false;
3889 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3891 vectype = STMT_VINFO_VECTYPE (stmt_info);
3893 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3894 return false;
3896 /* Process function arguments. */
3897 nargs = gimple_call_num_args (stmt) - masked_call_offset;
3899 /* Bail out if the function has zero arguments. */
3900 if (nargs == 0)
3901 return false;
3903 vec<tree>& simd_clone_info = (slp_node ? SLP_TREE_SIMD_CLONE_INFO (slp_node)
3904 : STMT_VINFO_SIMD_CLONE_INFO (stmt_info));
3905 arginfo.reserve (nargs, true);
3906 auto_vec<slp_tree> slp_op;
3907 slp_op.safe_grow_cleared (nargs);
3909 for (i = 0; i < nargs; i++)
3911 simd_call_arg_info thisarginfo;
3912 affine_iv iv;
3914 thisarginfo.linear_step = 0;
3915 thisarginfo.align = 0;
3916 thisarginfo.op = NULL_TREE;
3917 thisarginfo.simd_lane_linear = false;
3919 int op_no = i + masked_call_offset;
3920 if (slp_node)
3921 op_no = vect_slp_child_index_for_operand (stmt, op_no, false);
3922 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3923 op_no, &op, &slp_op[i],
3924 &thisarginfo.dt, &thisarginfo.vectype)
3925 || thisarginfo.dt == vect_uninitialized_def)
3927 if (dump_enabled_p ())
3928 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3929 "use not simple.\n");
3930 return false;
3933 if (thisarginfo.dt == vect_constant_def
3934 || thisarginfo.dt == vect_external_def)
3936 /* With SLP we determine the vector type of constants/externals
3937 at analysis time, handling conflicts via
3938 vect_maybe_update_slp_op_vectype. At transform time
3939 we have a vector type recorded for SLP. */
3940 gcc_assert (!vec_stmt
3941 || !slp_node
3942 || thisarginfo.vectype != NULL_TREE);
3943 if (!vec_stmt)
3944 thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
3945 TREE_TYPE (op),
3946 slp_node);
3948 else
3949 gcc_assert (thisarginfo.vectype != NULL_TREE);
3951 /* For linear arguments, the analyze phase should have saved
3952 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */
3953 if (i * 3 + 4 <= simd_clone_info.length ()
3954 && simd_clone_info[i * 3 + 2])
3956 gcc_assert (vec_stmt);
3957 thisarginfo.linear_step = tree_to_shwi (simd_clone_info[i * 3 + 2]);
3958 thisarginfo.op = simd_clone_info[i * 3 + 1];
3959 thisarginfo.simd_lane_linear
3960 = (simd_clone_info[i * 3 + 3] == boolean_true_node);
3961 /* If loop has been peeled for alignment, we need to adjust it. */
3962 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3963 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3964 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3966 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3967 tree step = simd_clone_info[i * 3 + 2];
3968 tree opt = TREE_TYPE (thisarginfo.op);
3969 bias = fold_convert (TREE_TYPE (step), bias);
3970 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3971 thisarginfo.op
3972 = fold_build2 (POINTER_TYPE_P (opt)
3973 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3974 thisarginfo.op, bias);
3977 else if (!vec_stmt
3978 && thisarginfo.dt != vect_constant_def
3979 && thisarginfo.dt != vect_external_def
3980 && loop_vinfo
3981 && TREE_CODE (op) == SSA_NAME
3982 && simple_iv (loop, loop_containing_stmt (stmt), op,
3983 &iv, false)
3984 && tree_fits_shwi_p (iv.step))
3986 thisarginfo.linear_step = tree_to_shwi (iv.step);
3987 thisarginfo.op = iv.base;
3989 else if ((thisarginfo.dt == vect_constant_def
3990 || thisarginfo.dt == vect_external_def)
3991 && POINTER_TYPE_P (TREE_TYPE (op)))
3992 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3993 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3994 linear too. */
3995 if (POINTER_TYPE_P (TREE_TYPE (op))
3996 && !thisarginfo.linear_step
3997 && !vec_stmt
3998 && thisarginfo.dt != vect_constant_def
3999 && thisarginfo.dt != vect_external_def
4000 && loop_vinfo
4001 && TREE_CODE (op) == SSA_NAME)
4002 vect_simd_lane_linear (op, loop, &thisarginfo);
4004 arginfo.quick_push (thisarginfo);
4007 poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
4008 unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : 1;
4009 unsigned int badness = 0;
4010 struct cgraph_node *bestn = NULL;
4011 if (simd_clone_info.exists ())
4012 bestn = cgraph_node::get (simd_clone_info[0]);
4013 else
4014 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4015 n = n->simdclone->next_clone)
4017 unsigned int this_badness = 0;
4018 unsigned int num_calls;
4019 /* The number of arguments in the call and the number of parameters in
4020 the simdclone should match. However, when the simdclone is
4021 'inbranch', it could have one more paramater than nargs when using
4022 an inbranch simdclone to call a non-inbranch call, either in a
4023 non-masked loop using a all true constant mask, or inside a masked
4024 loop using it's mask. */
4025 size_t simd_nargs = n->simdclone->nargs;
4026 if (!masked_call_offset && n->simdclone->inbranch)
4027 simd_nargs--;
4028 if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen,
4029 &num_calls)
4030 || (!n->simdclone->inbranch && (masked_call_offset > 0))
4031 || (nargs != simd_nargs))
4032 continue;
4033 if (num_calls != 1)
4034 this_badness += exact_log2 (num_calls) * 4096;
4035 if (n->simdclone->inbranch)
4036 this_badness += 8192;
4037 int target_badness = targetm.simd_clone.usable (n);
4038 if (target_badness < 0)
4039 continue;
4040 this_badness += target_badness * 512;
4041 for (i = 0; i < nargs; i++)
4043 switch (n->simdclone->args[i].arg_type)
4045 case SIMD_CLONE_ARG_TYPE_VECTOR:
4046 if (!useless_type_conversion_p
4047 (n->simdclone->args[i].orig_type,
4048 TREE_TYPE (gimple_call_arg (stmt,
4049 i + masked_call_offset))))
4050 i = -1;
4051 else if (arginfo[i].dt == vect_constant_def
4052 || arginfo[i].dt == vect_external_def
4053 || arginfo[i].linear_step)
4054 this_badness += 64;
4055 break;
4056 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4057 if (arginfo[i].dt != vect_constant_def
4058 && arginfo[i].dt != vect_external_def)
4059 i = -1;
4060 break;
4061 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4062 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4063 if (arginfo[i].dt == vect_constant_def
4064 || arginfo[i].dt == vect_external_def
4065 || (arginfo[i].linear_step
4066 != n->simdclone->args[i].linear_step))
4067 i = -1;
4068 break;
4069 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4070 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4071 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4072 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4073 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4074 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4075 /* FORNOW */
4076 i = -1;
4077 break;
4078 case SIMD_CLONE_ARG_TYPE_MASK:
4079 /* While we can create a traditional data vector from
4080 an incoming integer mode mask we have no good way to
4081 force generate an integer mode mask from a traditional
4082 boolean vector input. */
4083 if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4084 && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4085 i = -1;
4086 else if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4087 && SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4088 this_badness += 2048;
4089 break;
4091 if (i == (size_t) -1)
4092 break;
4093 if (n->simdclone->args[i].alignment > arginfo[i].align)
4095 i = -1;
4096 break;
4098 if (arginfo[i].align)
4099 this_badness += (exact_log2 (arginfo[i].align)
4100 - exact_log2 (n->simdclone->args[i].alignment));
4102 if (i == (size_t) -1)
4103 continue;
4104 if (masked_call_offset == 0
4105 && n->simdclone->inbranch
4106 && n->simdclone->nargs > nargs)
4108 gcc_assert (n->simdclone->args[n->simdclone->nargs - 1].arg_type ==
4109 SIMD_CLONE_ARG_TYPE_MASK);
4110 /* Penalize using a masked SIMD clone in a non-masked loop, that is
4111 not in a branch, as we'd have to construct an all-true mask. */
4112 if (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4113 this_badness += 64;
4115 if (bestn == NULL || this_badness < badness)
4117 bestn = n;
4118 badness = this_badness;
4122 if (bestn == NULL)
4123 return false;
4125 unsigned int num_mask_args = 0;
4126 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4127 for (i = 0; i < nargs; i++)
4128 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4129 num_mask_args++;
4131 for (i = 0; i < nargs; i++)
4133 if ((arginfo[i].dt == vect_constant_def
4134 || arginfo[i].dt == vect_external_def)
4135 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4137 tree arg_type = TREE_TYPE (gimple_call_arg (stmt,
4138 i + masked_call_offset));
4139 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4140 slp_node);
4141 if (arginfo[i].vectype == NULL
4142 || !constant_multiple_p (bestn->simdclone->simdlen,
4143 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4144 return false;
4147 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4148 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4150 if (dump_enabled_p ())
4151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4152 "vector mask arguments are not supported.\n");
4153 return false;
4156 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4158 tree clone_arg_vectype = bestn->simdclone->args[i].vector_type;
4159 if (bestn->simdclone->mask_mode == VOIDmode)
4161 if (maybe_ne (TYPE_VECTOR_SUBPARTS (clone_arg_vectype),
4162 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4164 /* FORNOW we only have partial support for vector-type masks
4165 that can't hold all of simdlen. */
4166 if (dump_enabled_p ())
4167 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4168 vect_location,
4169 "in-branch vector clones are not yet"
4170 " supported for mismatched vector sizes.\n");
4171 return false;
4174 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4176 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype))
4177 || maybe_ne (exact_div (bestn->simdclone->simdlen,
4178 num_mask_args),
4179 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4181 /* FORNOW we only have partial support for integer-type masks
4182 that represent the same number of lanes as the
4183 vectorized mask inputs. */
4184 if (dump_enabled_p ())
4185 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4186 vect_location,
4187 "in-branch vector clones are not yet "
4188 "supported for mismatched vector sizes.\n");
4189 return false;
4192 else
4194 if (dump_enabled_p ())
4195 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4196 vect_location,
4197 "in-branch vector clones not supported"
4198 " on this target.\n");
4199 return false;
4204 fndecl = bestn->decl;
4205 nunits = bestn->simdclone->simdlen;
4206 if (slp_node)
4207 ncopies = vector_unroll_factor (vf * group_size, nunits);
4208 else
4209 ncopies = vector_unroll_factor (vf, nunits);
4211 /* If the function isn't const, only allow it in simd loops where user
4212 has asserted that at least nunits consecutive iterations can be
4213 performed using SIMD instructions. */
4214 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4215 && gimple_vuse (stmt))
4216 return false;
4218 /* Sanity check: make sure that at least one copy of the vectorized stmt
4219 needs to be generated. */
4220 gcc_assert (ncopies >= 1);
4222 if (!vec_stmt) /* transformation not required. */
4224 if (slp_node)
4225 for (unsigned i = 0; i < nargs; ++i)
4226 if (!vect_maybe_update_slp_op_vectype (slp_op[i], arginfo[i].vectype))
4228 if (dump_enabled_p ())
4229 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4230 "incompatible vector types for invariants\n");
4231 return false;
4233 /* When the original call is pure or const but the SIMD ABI dictates
4234 an aggregate return we will have to use a virtual definition and
4235 in a loop eventually even need to add a virtual PHI. That's
4236 not straight-forward so allow to fix this up via renaming. */
4237 if (gimple_call_lhs (stmt)
4238 && !gimple_vdef (stmt)
4239 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4240 vinfo->any_known_not_updated_vssa = true;
4241 /* ??? For SLP code-gen we end up inserting after the last
4242 vector argument def rather than at the original call position
4243 so automagic virtual operand updating doesn't work. */
4244 if (gimple_vuse (stmt) && slp_node)
4245 vinfo->any_known_not_updated_vssa = true;
4246 simd_clone_info.safe_push (bestn->decl);
4247 for (i = 0; i < bestn->simdclone->nargs; i++)
4249 switch (bestn->simdclone->args[i].arg_type)
4251 default:
4252 continue;
4253 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4254 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4256 simd_clone_info.safe_grow_cleared (i * 3 + 1, true);
4257 simd_clone_info.safe_push (arginfo[i].op);
4258 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4259 ? size_type_node : TREE_TYPE (arginfo[i].op);
4260 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4261 simd_clone_info.safe_push (ls);
4262 tree sll = arginfo[i].simd_lane_linear
4263 ? boolean_true_node : boolean_false_node;
4264 simd_clone_info.safe_push (sll);
4266 break;
4267 case SIMD_CLONE_ARG_TYPE_MASK:
4268 if (loop_vinfo
4269 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4270 vect_record_loop_mask (loop_vinfo,
4271 &LOOP_VINFO_MASKS (loop_vinfo),
4272 ncopies, vectype, op);
4274 break;
4278 if (!bestn->simdclone->inbranch && loop_vinfo)
4280 if (dump_enabled_p ()
4281 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4282 dump_printf_loc (MSG_NOTE, vect_location,
4283 "can't use a fully-masked loop because a"
4284 " non-masked simd clone was selected.\n");
4285 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
4288 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4289 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4290 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4291 dt, slp_node, cost_vec); */
4292 return true;
4295 /* Transform. */
4297 if (dump_enabled_p ())
4298 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4300 /* Handle def. */
4301 scalar_dest = gimple_call_lhs (stmt);
4302 vec_dest = NULL_TREE;
4303 rtype = NULL_TREE;
4304 ratype = NULL_TREE;
4305 if (scalar_dest)
4307 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4308 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4309 if (TREE_CODE (rtype) == ARRAY_TYPE)
4311 ratype = rtype;
4312 rtype = TREE_TYPE (ratype);
4316 auto_vec<vec<tree> > vec_oprnds;
4317 auto_vec<unsigned> vec_oprnds_i;
4318 vec_oprnds_i.safe_grow_cleared (nargs, true);
4319 if (slp_node)
4321 vec_oprnds.reserve_exact (nargs);
4322 vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
4324 else
4325 vec_oprnds.safe_grow_cleared (nargs, true);
4326 for (j = 0; j < ncopies; ++j)
4328 poly_uint64 callee_nelements;
4329 poly_uint64 caller_nelements;
4330 /* Build argument list for the vectorized call. */
4331 if (j == 0)
4332 vargs.create (nargs);
4333 else
4334 vargs.truncate (0);
4336 for (i = 0; i < nargs; i++)
4338 unsigned int k, l, m, o;
4339 tree atype;
4340 op = gimple_call_arg (stmt, i + masked_call_offset);
4341 switch (bestn->simdclone->args[i].arg_type)
4343 case SIMD_CLONE_ARG_TYPE_VECTOR:
4344 atype = bestn->simdclone->args[i].vector_type;
4345 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4346 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4347 o = vector_unroll_factor (nunits, callee_nelements);
4348 for (m = j * o; m < (j + 1) * o; m++)
4350 if (known_lt (callee_nelements, caller_nelements))
4352 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4353 if (!constant_multiple_p (caller_nelements,
4354 callee_nelements, &k))
4355 gcc_unreachable ();
4357 gcc_assert ((k & (k - 1)) == 0);
4358 if (m == 0)
4360 if (!slp_node)
4361 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4362 ncopies * o / k, op,
4363 &vec_oprnds[i]);
4364 vec_oprnds_i[i] = 0;
4365 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4367 else
4369 vec_oprnd0 = arginfo[i].op;
4370 if ((m & (k - 1)) == 0)
4371 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4373 arginfo[i].op = vec_oprnd0;
4374 vec_oprnd0
4375 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4376 bitsize_int (prec),
4377 bitsize_int ((m & (k - 1)) * prec));
4378 gassign *new_stmt
4379 = gimple_build_assign (make_ssa_name (atype),
4380 vec_oprnd0);
4381 vect_finish_stmt_generation (vinfo, stmt_info,
4382 new_stmt, gsi);
4383 vargs.safe_push (gimple_assign_lhs (new_stmt));
4385 else
4387 if (!constant_multiple_p (callee_nelements,
4388 caller_nelements, &k))
4389 gcc_unreachable ();
4390 gcc_assert ((k & (k - 1)) == 0);
4391 vec<constructor_elt, va_gc> *ctor_elts;
4392 if (k != 1)
4393 vec_alloc (ctor_elts, k);
4394 else
4395 ctor_elts = NULL;
4396 for (l = 0; l < k; l++)
4398 if (m == 0 && l == 0)
4400 if (!slp_node)
4401 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4402 k * o * ncopies,
4404 &vec_oprnds[i]);
4405 vec_oprnds_i[i] = 0;
4406 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4408 else
4409 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4410 arginfo[i].op = vec_oprnd0;
4411 if (k == 1)
4412 break;
4413 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4414 vec_oprnd0);
4416 if (k == 1)
4417 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4418 atype))
4420 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, atype,
4421 vec_oprnd0);
4422 gassign *new_stmt
4423 = gimple_build_assign (make_ssa_name (atype),
4424 vec_oprnd0);
4425 vect_finish_stmt_generation (vinfo, stmt_info,
4426 new_stmt, gsi);
4427 vargs.safe_push (gimple_get_lhs (new_stmt));
4429 else
4430 vargs.safe_push (vec_oprnd0);
4431 else
4433 vec_oprnd0 = build_constructor (atype, ctor_elts);
4434 gassign *new_stmt
4435 = gimple_build_assign (make_ssa_name (atype),
4436 vec_oprnd0);
4437 vect_finish_stmt_generation (vinfo, stmt_info,
4438 new_stmt, gsi);
4439 vargs.safe_push (gimple_assign_lhs (new_stmt));
4443 break;
4444 case SIMD_CLONE_ARG_TYPE_MASK:
4445 if (bestn->simdclone->mask_mode == VOIDmode)
4447 atype = bestn->simdclone->args[i].vector_type;
4448 tree elt_type = TREE_TYPE (atype);
4449 tree one = fold_convert (elt_type, integer_one_node);
4450 tree zero = fold_convert (elt_type, integer_zero_node);
4451 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4452 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4453 o = vector_unroll_factor (nunits, callee_nelements);
4454 for (m = j * o; m < (j + 1) * o; m++)
4456 if (maybe_lt (callee_nelements, caller_nelements))
4458 /* The mask type has fewer elements than simdlen. */
4460 /* FORNOW */
4461 gcc_unreachable ();
4463 else if (known_eq (callee_nelements, caller_nelements))
4465 /* The SIMD clone function has the same number of
4466 elements as the current function. */
4467 if (m == 0)
4469 if (!slp_node)
4470 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4471 o * ncopies,
4473 &vec_oprnds[i]);
4474 vec_oprnds_i[i] = 0;
4476 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4477 if (loop_vinfo
4478 && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4480 vec_loop_masks *loop_masks
4481 = &LOOP_VINFO_MASKS (loop_vinfo);
4482 tree loop_mask
4483 = vect_get_loop_mask (loop_vinfo, gsi,
4484 loop_masks, ncopies,
4485 vectype, j);
4486 vec_oprnd0
4487 = prepare_vec_mask (loop_vinfo,
4488 TREE_TYPE (loop_mask),
4489 loop_mask, vec_oprnd0,
4490 gsi);
4491 loop_vinfo->vec_cond_masked_set.add ({ vec_oprnd0,
4492 loop_mask });
4495 vec_oprnd0
4496 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4497 build_vector_from_val (atype, one),
4498 build_vector_from_val (atype, zero));
4499 gassign *new_stmt
4500 = gimple_build_assign (make_ssa_name (atype),
4501 vec_oprnd0);
4502 vect_finish_stmt_generation (vinfo, stmt_info,
4503 new_stmt, gsi);
4504 vargs.safe_push (gimple_assign_lhs (new_stmt));
4506 else
4508 /* The mask type has more elements than simdlen. */
4510 /* FORNOW */
4511 gcc_unreachable ();
4515 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4517 atype = bestn->simdclone->args[i].vector_type;
4518 /* Guess the number of lanes represented by atype. */
4519 poly_uint64 atype_subparts
4520 = exact_div (bestn->simdclone->simdlen,
4521 num_mask_args);
4522 o = vector_unroll_factor (nunits, atype_subparts);
4523 for (m = j * o; m < (j + 1) * o; m++)
4525 if (m == 0)
4527 if (!slp_node)
4528 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4529 o * ncopies,
4531 &vec_oprnds[i]);
4532 vec_oprnds_i[i] = 0;
4534 if (maybe_lt (atype_subparts,
4535 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4537 /* The mask argument has fewer elements than the
4538 input vector. */
4539 /* FORNOW */
4540 gcc_unreachable ();
4542 else if (known_eq (atype_subparts,
4543 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4545 /* The vector mask argument matches the input
4546 in the number of lanes, but not necessarily
4547 in the mode. */
4548 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4549 tree st = lang_hooks.types.type_for_mode
4550 (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
4551 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
4552 vec_oprnd0);
4553 gassign *new_stmt
4554 = gimple_build_assign (make_ssa_name (st),
4555 vec_oprnd0);
4556 vect_finish_stmt_generation (vinfo, stmt_info,
4557 new_stmt, gsi);
4558 if (!types_compatible_p (atype, st))
4560 new_stmt
4561 = gimple_build_assign (make_ssa_name (atype),
4562 NOP_EXPR,
4563 gimple_assign_lhs
4564 (new_stmt));
4565 vect_finish_stmt_generation (vinfo, stmt_info,
4566 new_stmt, gsi);
4568 vargs.safe_push (gimple_assign_lhs (new_stmt));
4570 else
4572 /* The mask argument has more elements than the
4573 input vector. */
4574 /* FORNOW */
4575 gcc_unreachable ();
4579 else
4580 gcc_unreachable ();
4581 break;
4582 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4583 vargs.safe_push (op);
4584 break;
4585 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4586 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4587 if (j == 0)
4589 gimple_seq stmts;
4590 arginfo[i].op
4591 = force_gimple_operand (unshare_expr (arginfo[i].op),
4592 &stmts, true, NULL_TREE);
4593 if (stmts != NULL)
4595 basic_block new_bb;
4596 edge pe = loop_preheader_edge (loop);
4597 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4598 gcc_assert (!new_bb);
4600 if (arginfo[i].simd_lane_linear)
4602 vargs.safe_push (arginfo[i].op);
4603 break;
4605 tree phi_res = copy_ssa_name (op);
4606 gphi *new_phi = create_phi_node (phi_res, loop->header);
4607 add_phi_arg (new_phi, arginfo[i].op,
4608 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4609 enum tree_code code
4610 = POINTER_TYPE_P (TREE_TYPE (op))
4611 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4612 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4613 ? sizetype : TREE_TYPE (op);
4614 poly_widest_int cst
4615 = wi::mul (bestn->simdclone->args[i].linear_step,
4616 ncopies * nunits);
4617 tree tcst = wide_int_to_tree (type, cst);
4618 tree phi_arg = copy_ssa_name (op);
4619 gassign *new_stmt
4620 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4621 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4622 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4623 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4624 UNKNOWN_LOCATION);
4625 arginfo[i].op = phi_res;
4626 vargs.safe_push (phi_res);
4628 else
4630 enum tree_code code
4631 = POINTER_TYPE_P (TREE_TYPE (op))
4632 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4633 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4634 ? sizetype : TREE_TYPE (op);
4635 poly_widest_int cst
4636 = wi::mul (bestn->simdclone->args[i].linear_step,
4637 j * nunits);
4638 tree tcst = wide_int_to_tree (type, cst);
4639 new_temp = make_ssa_name (TREE_TYPE (op));
4640 gassign *new_stmt
4641 = gimple_build_assign (new_temp, code,
4642 arginfo[i].op, tcst);
4643 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4644 vargs.safe_push (new_temp);
4646 break;
4647 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4648 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4649 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4650 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4651 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4652 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4653 default:
4654 gcc_unreachable ();
4658 if (masked_call_offset == 0
4659 && bestn->simdclone->inbranch
4660 && bestn->simdclone->nargs > nargs)
4662 unsigned long m, o;
4663 size_t mask_i = bestn->simdclone->nargs - 1;
4664 tree mask;
4665 gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
4666 SIMD_CLONE_ARG_TYPE_MASK);
4668 tree masktype = bestn->simdclone->args[mask_i].vector_type;
4669 callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
4670 o = vector_unroll_factor (nunits, callee_nelements);
4671 for (m = j * o; m < (j + 1) * o; m++)
4673 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4675 vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
4676 mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
4677 ncopies, vectype, j);
4679 else
4680 mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
4682 gassign *new_stmt;
4683 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4685 /* This means we are dealing with integer mask modes.
4686 First convert to an integer type with the same size as
4687 the current vector type. */
4688 unsigned HOST_WIDE_INT intermediate_size
4689 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
4690 tree mid_int_type =
4691 build_nonstandard_integer_type (intermediate_size, 1);
4692 mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
4693 new_stmt
4694 = gimple_build_assign (make_ssa_name (mid_int_type),
4695 mask);
4696 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4697 /* Then zero-extend to the mask mode. */
4698 mask = fold_build1 (NOP_EXPR, masktype,
4699 gimple_get_lhs (new_stmt));
4701 else if (bestn->simdclone->mask_mode == VOIDmode)
4703 tree one = fold_convert (TREE_TYPE (masktype),
4704 integer_one_node);
4705 tree zero = fold_convert (TREE_TYPE (masktype),
4706 integer_zero_node);
4707 mask = build3 (VEC_COND_EXPR, masktype, mask,
4708 build_vector_from_val (masktype, one),
4709 build_vector_from_val (masktype, zero));
4711 else
4712 gcc_unreachable ();
4714 new_stmt = gimple_build_assign (make_ssa_name (masktype), mask);
4715 vect_finish_stmt_generation (vinfo, stmt_info,
4716 new_stmt, gsi);
4717 mask = gimple_assign_lhs (new_stmt);
4718 vargs.safe_push (mask);
4722 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4723 if (vec_dest)
4725 gcc_assert (ratype
4726 || known_eq (TYPE_VECTOR_SUBPARTS (rtype), nunits));
4727 if (ratype)
4728 new_temp = create_tmp_var (ratype);
4729 else if (useless_type_conversion_p (vectype, rtype))
4730 new_temp = make_ssa_name (vec_dest, new_call);
4731 else
4732 new_temp = make_ssa_name (rtype, new_call);
4733 gimple_call_set_lhs (new_call, new_temp);
4735 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4736 gimple *new_stmt = new_call;
4738 if (vec_dest)
4740 if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype), nunits))
4742 unsigned int k, l;
4743 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4744 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4745 k = vector_unroll_factor (nunits,
4746 TYPE_VECTOR_SUBPARTS (vectype));
4747 gcc_assert ((k & (k - 1)) == 0);
4748 for (l = 0; l < k; l++)
4750 tree t;
4751 if (ratype)
4753 t = build_fold_addr_expr (new_temp);
4754 t = build2 (MEM_REF, vectype, t,
4755 build_int_cst (TREE_TYPE (t), l * bytes));
4757 else
4758 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4759 bitsize_int (prec), bitsize_int (l * prec));
4760 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4761 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4763 if (j == 0 && l == 0)
4764 *vec_stmt = new_stmt;
4765 if (slp_node)
4766 SLP_TREE_VEC_DEFS (slp_node)
4767 .quick_push (gimple_assign_lhs (new_stmt));
4768 else
4769 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4772 if (ratype)
4773 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4774 continue;
4776 else if (!multiple_p (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
4778 unsigned int k;
4779 if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
4780 TYPE_VECTOR_SUBPARTS (rtype), &k))
4781 gcc_unreachable ();
4782 gcc_assert ((k & (k - 1)) == 0);
4783 if ((j & (k - 1)) == 0)
4784 vec_alloc (ret_ctor_elts, k);
4785 if (ratype)
4787 unsigned int m, o;
4788 o = vector_unroll_factor (nunits,
4789 TYPE_VECTOR_SUBPARTS (rtype));
4790 for (m = 0; m < o; m++)
4792 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4793 size_int (m), NULL_TREE, NULL_TREE);
4794 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4795 tem);
4796 vect_finish_stmt_generation (vinfo, stmt_info,
4797 new_stmt, gsi);
4798 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4799 gimple_assign_lhs (new_stmt));
4801 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4803 else
4804 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4805 if ((j & (k - 1)) != k - 1)
4806 continue;
4807 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4808 new_stmt
4809 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4810 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4812 if ((unsigned) j == k - 1)
4813 *vec_stmt = new_stmt;
4814 if (slp_node)
4815 SLP_TREE_VEC_DEFS (slp_node)
4816 .quick_push (gimple_assign_lhs (new_stmt));
4817 else
4818 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4819 continue;
4821 else if (ratype)
4823 tree t = build_fold_addr_expr (new_temp);
4824 t = build2 (MEM_REF, vectype, t,
4825 build_int_cst (TREE_TYPE (t), 0));
4826 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4827 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4828 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4830 else if (!useless_type_conversion_p (vectype, rtype))
4832 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4833 new_stmt
4834 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4835 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4839 if (j == 0)
4840 *vec_stmt = new_stmt;
4841 if (slp_node)
4842 SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
4843 else
4844 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4847 for (i = 0; i < nargs; ++i)
4849 vec<tree> oprndsi = vec_oprnds[i];
4850 oprndsi.release ();
4852 vargs.release ();
4854 /* Mark the clone as no longer being a candidate for GC. */
4855 bestn->gc_candidate = false;
4857 /* The call in STMT might prevent it from being removed in dce.
4858 We however cannot remove it here, due to the way the ssa name
4859 it defines is mapped to the new definition. So just replace
4860 rhs of the statement with something harmless. */
4862 if (slp_node)
4863 return true;
4865 gimple *new_stmt;
4866 if (scalar_dest)
4868 type = TREE_TYPE (scalar_dest);
4869 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4870 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4872 else
4873 new_stmt = gimple_build_nop ();
4874 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4875 unlink_stmt_vdef (stmt);
4877 return true;
4881 /* Function vect_gen_widened_results_half
4883 Create a vector stmt whose code, type, number of arguments, and result
4884 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4885 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4886 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4887 needs to be created (DECL is a function-decl of a target-builtin).
4888 STMT_INFO is the original scalar stmt that we are vectorizing. */
4890 static gimple *
4891 vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
4892 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4893 tree vec_dest, gimple_stmt_iterator *gsi,
4894 stmt_vec_info stmt_info)
4896 gimple *new_stmt;
4897 tree new_temp;
4899 /* Generate half of the widened result: */
4900 if (op_type != binary_op)
4901 vec_oprnd1 = NULL;
4902 new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
4903 new_temp = make_ssa_name (vec_dest, new_stmt);
4904 gimple_set_lhs (new_stmt, new_temp);
4905 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4907 return new_stmt;
4911 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4912 For multi-step conversions store the resulting vectors and call the function
4913 recursively. When NARROW_SRC_P is true, there's still a conversion after
4914 narrowing, don't store the vectors in the SLP_NODE or in vector info of
4915 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
4917 static void
4918 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4919 int multi_step_cvt,
4920 stmt_vec_info stmt_info,
4921 vec<tree> &vec_dsts,
4922 gimple_stmt_iterator *gsi,
4923 slp_tree slp_node, code_helper code,
4924 bool narrow_src_p)
4926 unsigned int i;
4927 tree vop0, vop1, new_tmp, vec_dest;
4929 vec_dest = vec_dsts.pop ();
4931 for (i = 0; i < vec_oprnds->length (); i += 2)
4933 /* Create demotion operation. */
4934 vop0 = (*vec_oprnds)[i];
4935 vop1 = (*vec_oprnds)[i + 1];
4936 gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
4937 new_tmp = make_ssa_name (vec_dest, new_stmt);
4938 gimple_set_lhs (new_stmt, new_tmp);
4939 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4940 if (multi_step_cvt || narrow_src_p)
4941 /* Store the resulting vector for next recursive call,
4942 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
4943 (*vec_oprnds)[i/2] = new_tmp;
4944 else
4946 /* This is the last step of the conversion sequence. Store the
4947 vectors in SLP_NODE or in vector info of the scalar statement
4948 (or in STMT_VINFO_RELATED_STMT chain). */
4949 if (slp_node)
4950 slp_node->push_vec_def (new_stmt);
4951 else
4952 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4956 /* For multi-step demotion operations we first generate demotion operations
4957 from the source type to the intermediate types, and then combine the
4958 results (stored in VEC_OPRNDS) in demotion operation to the destination
4959 type. */
4960 if (multi_step_cvt)
4962 /* At each level of recursion we have half of the operands we had at the
4963 previous level. */
4964 vec_oprnds->truncate ((i+1)/2);
4965 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4966 multi_step_cvt - 1,
4967 stmt_info, vec_dsts, gsi,
4968 slp_node, VEC_PACK_TRUNC_EXPR,
4969 narrow_src_p);
4972 vec_dsts.quick_push (vec_dest);
4976 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4977 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4978 STMT_INFO. For multi-step conversions store the resulting vectors and
4979 call the function recursively. */
4981 static void
4982 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4983 vec<tree> *vec_oprnds0,
4984 vec<tree> *vec_oprnds1,
4985 stmt_vec_info stmt_info, tree vec_dest,
4986 gimple_stmt_iterator *gsi,
4987 code_helper ch1,
4988 code_helper ch2, int op_type)
4990 int i;
4991 tree vop0, vop1, new_tmp1, new_tmp2;
4992 gimple *new_stmt1, *new_stmt2;
4993 vec<tree> vec_tmp = vNULL;
4995 vec_tmp.create (vec_oprnds0->length () * 2);
4996 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4998 if (op_type == binary_op)
4999 vop1 = (*vec_oprnds1)[i];
5000 else
5001 vop1 = NULL_TREE;
5003 /* Generate the two halves of promotion operation. */
5004 new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
5005 op_type, vec_dest, gsi,
5006 stmt_info);
5007 new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
5008 op_type, vec_dest, gsi,
5009 stmt_info);
5010 if (is_gimple_call (new_stmt1))
5012 new_tmp1 = gimple_call_lhs (new_stmt1);
5013 new_tmp2 = gimple_call_lhs (new_stmt2);
5015 else
5017 new_tmp1 = gimple_assign_lhs (new_stmt1);
5018 new_tmp2 = gimple_assign_lhs (new_stmt2);
5021 /* Store the results for the next step. */
5022 vec_tmp.quick_push (new_tmp1);
5023 vec_tmp.quick_push (new_tmp2);
5026 vec_oprnds0->release ();
5027 *vec_oprnds0 = vec_tmp;
5030 /* Create vectorized promotion stmts for widening stmts using only half the
5031 potential vector size for input. */
5032 static void
5033 vect_create_half_widening_stmts (vec_info *vinfo,
5034 vec<tree> *vec_oprnds0,
5035 vec<tree> *vec_oprnds1,
5036 stmt_vec_info stmt_info, tree vec_dest,
5037 gimple_stmt_iterator *gsi,
5038 code_helper code1,
5039 int op_type)
5041 int i;
5042 tree vop0, vop1;
5043 gimple *new_stmt1;
5044 gimple *new_stmt2;
5045 gimple *new_stmt3;
5046 vec<tree> vec_tmp = vNULL;
5048 vec_tmp.create (vec_oprnds0->length ());
5049 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5051 tree new_tmp1, new_tmp2, new_tmp3, out_type;
5053 gcc_assert (op_type == binary_op);
5054 vop1 = (*vec_oprnds1)[i];
5056 /* Widen the first vector input. */
5057 out_type = TREE_TYPE (vec_dest);
5058 new_tmp1 = make_ssa_name (out_type);
5059 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5060 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
5061 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5063 /* Widen the second vector input. */
5064 new_tmp2 = make_ssa_name (out_type);
5065 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5066 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
5067 /* Perform the operation. With both vector inputs widened. */
5068 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5070 else
5072 /* Perform the operation. With the single vector input widened. */
5073 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5076 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
5077 gimple_assign_set_lhs (new_stmt3, new_tmp3);
5078 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
5080 /* Store the results for the next step. */
5081 vec_tmp.quick_push (new_tmp3);
5084 vec_oprnds0->release ();
5085 *vec_oprnds0 = vec_tmp;
5089 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5090 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5091 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5092 Return true if STMT_INFO is vectorizable in this way. */
5094 static bool
5095 vectorizable_conversion (vec_info *vinfo,
5096 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5097 gimple **vec_stmt, slp_tree slp_node,
5098 stmt_vector_for_cost *cost_vec)
5100 tree vec_dest, cvt_op = NULL_TREE;
5101 tree scalar_dest;
5102 tree op0, op1 = NULL_TREE;
5103 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5104 tree_code tc1, tc2;
5105 code_helper code, code1, code2;
5106 code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5107 tree new_temp;
5108 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5109 int ndts = 2;
5110 poly_uint64 nunits_in;
5111 poly_uint64 nunits_out;
5112 tree vectype_out, vectype_in;
5113 int ncopies, i;
5114 tree lhs_type, rhs_type;
5115 /* For conversions between floating point and integer, there're 2 NARROW
5116 cases. NARROW_SRC is for FLOAT_EXPR, means
5117 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5118 This is safe when the range of the source integer can fit into the lower
5119 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5120 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5121 For other conversions, when there's narrowing, NARROW_DST is used as
5122 default. */
5123 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5124 vec<tree> vec_oprnds0 = vNULL;
5125 vec<tree> vec_oprnds1 = vNULL;
5126 tree vop0;
5127 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5128 int multi_step_cvt = 0;
5129 vec<tree> interm_types = vNULL;
5130 tree intermediate_type, cvt_type = NULL_TREE;
5131 int op_type;
5132 unsigned short fltsz;
5134 /* Is STMT a vectorizable conversion? */
5136 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5137 return false;
5139 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5140 && ! vec_stmt)
5141 return false;
5143 gimple* stmt = stmt_info->stmt;
5144 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
5145 return false;
5147 if (gimple_get_lhs (stmt) == NULL_TREE
5148 || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5149 return false;
5151 if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5152 return false;
5154 if (is_gimple_assign (stmt))
5156 code = gimple_assign_rhs_code (stmt);
5157 op_type = TREE_CODE_LENGTH ((tree_code) code);
5159 else if (gimple_call_internal_p (stmt))
5161 code = gimple_call_internal_fn (stmt);
5162 op_type = gimple_call_num_args (stmt);
5164 else
5165 return false;
5167 bool widen_arith = (code == WIDEN_MULT_EXPR
5168 || code == WIDEN_LSHIFT_EXPR
5169 || widening_fn_p (code));
5171 if (!widen_arith
5172 && !CONVERT_EXPR_CODE_P (code)
5173 && code != FIX_TRUNC_EXPR
5174 && code != FLOAT_EXPR)
5175 return false;
5177 /* Check types of lhs and rhs. */
5178 scalar_dest = gimple_get_lhs (stmt);
5179 lhs_type = TREE_TYPE (scalar_dest);
5180 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5182 /* Check the operands of the operation. */
5183 slp_tree slp_op0, slp_op1 = NULL;
5184 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5185 0, &op0, &slp_op0, &dt[0], &vectype_in))
5187 if (dump_enabled_p ())
5188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5189 "use not simple.\n");
5190 return false;
5193 rhs_type = TREE_TYPE (op0);
5194 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5195 && !((INTEGRAL_TYPE_P (lhs_type)
5196 && INTEGRAL_TYPE_P (rhs_type))
5197 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5198 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5199 return false;
5201 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5202 && ((INTEGRAL_TYPE_P (lhs_type)
5203 && !type_has_mode_precision_p (lhs_type))
5204 || (INTEGRAL_TYPE_P (rhs_type)
5205 && !type_has_mode_precision_p (rhs_type))))
5207 if (dump_enabled_p ())
5208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5209 "type conversion to/from bit-precision unsupported."
5210 "\n");
5211 return false;
5214 if (op_type == binary_op)
5216 gcc_assert (code == WIDEN_MULT_EXPR
5217 || code == WIDEN_LSHIFT_EXPR
5218 || widening_fn_p (code));
5220 op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
5221 gimple_call_arg (stmt, 0);
5222 tree vectype1_in;
5223 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5224 &op1, &slp_op1, &dt[1], &vectype1_in))
5226 if (dump_enabled_p ())
5227 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5228 "use not simple.\n");
5229 return false;
5231 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5232 OP1. */
5233 if (!vectype_in)
5234 vectype_in = vectype1_in;
5237 /* If op0 is an external or constant def, infer the vector type
5238 from the scalar type. */
5239 if (!vectype_in)
5240 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5241 if (vec_stmt)
5242 gcc_assert (vectype_in);
5243 if (!vectype_in)
5245 if (dump_enabled_p ())
5246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5247 "no vectype for scalar type %T\n", rhs_type);
5249 return false;
5252 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5253 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5255 if (dump_enabled_p ())
5256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5257 "can't convert between boolean and non "
5258 "boolean vectors %T\n", rhs_type);
5260 return false;
5263 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5264 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5265 if (known_eq (nunits_out, nunits_in))
5266 if (widen_arith)
5267 modifier = WIDEN;
5268 else
5269 modifier = NONE;
5270 else if (multiple_p (nunits_out, nunits_in))
5271 modifier = NARROW_DST;
5272 else
5274 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5275 modifier = WIDEN;
5278 /* Multiple types in SLP are handled by creating the appropriate number of
5279 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5280 case of SLP. */
5281 if (slp_node)
5282 ncopies = 1;
5283 else if (modifier == NARROW_DST)
5284 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5285 else
5286 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5288 /* Sanity check: make sure that at least one copy of the vectorized stmt
5289 needs to be generated. */
5290 gcc_assert (ncopies >= 1);
5292 bool found_mode = false;
5293 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5294 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5295 opt_scalar_mode rhs_mode_iter;
5297 /* Supportable by target? */
5298 switch (modifier)
5300 case NONE:
5301 if (code != FIX_TRUNC_EXPR
5302 && code != FLOAT_EXPR
5303 && !CONVERT_EXPR_CODE_P (code))
5304 return false;
5305 gcc_assert (code.is_tree_code ());
5306 if (supportable_convert_operation ((tree_code) code, vectype_out,
5307 vectype_in, &tc1))
5309 code1 = tc1;
5310 break;
5313 /* For conversions between float and integer types try whether
5314 we can use intermediate signed integer types to support the
5315 conversion. */
5316 if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
5317 && (code == FLOAT_EXPR ||
5318 (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
5320 bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
5321 bool float_expr_p = code == FLOAT_EXPR;
5322 unsigned short target_size;
5323 scalar_mode intermediate_mode;
5324 if (demotion)
5326 intermediate_mode = lhs_mode;
5327 target_size = GET_MODE_SIZE (rhs_mode);
5329 else
5331 target_size = GET_MODE_SIZE (lhs_mode);
5332 if (!int_mode_for_size
5333 (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
5334 goto unsupported;
5336 code1 = float_expr_p ? code : NOP_EXPR;
5337 codecvt1 = float_expr_p ? NOP_EXPR : code;
5338 opt_scalar_mode mode_iter;
5339 FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
5341 intermediate_mode = mode_iter.require ();
5343 if (GET_MODE_SIZE (intermediate_mode) > target_size)
5344 break;
5346 scalar_mode cvt_mode;
5347 if (!int_mode_for_size
5348 (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
5349 break;
5351 cvt_type = build_nonstandard_integer_type
5352 (GET_MODE_BITSIZE (cvt_mode), 0);
5354 /* Check if the intermediate type can hold OP0's range.
5355 When converting from float to integer this is not necessary
5356 because values that do not fit the (smaller) target type are
5357 unspecified anyway. */
5358 if (demotion && float_expr_p)
5360 wide_int op_min_value, op_max_value;
5361 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5362 break;
5364 if (cvt_type == NULL_TREE
5365 || (wi::min_precision (op_max_value, SIGNED)
5366 > TYPE_PRECISION (cvt_type))
5367 || (wi::min_precision (op_min_value, SIGNED)
5368 > TYPE_PRECISION (cvt_type)))
5369 continue;
5372 cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
5373 /* This should only happened for SLP as long as loop vectorizer
5374 only supports same-sized vector. */
5375 if (cvt_type == NULL_TREE
5376 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
5377 || !supportable_convert_operation ((tree_code) code1,
5378 vectype_out,
5379 cvt_type, &tc1)
5380 || !supportable_convert_operation ((tree_code) codecvt1,
5381 cvt_type,
5382 vectype_in, &tc2))
5383 continue;
5385 found_mode = true;
5386 break;
5389 if (found_mode)
5391 multi_step_cvt++;
5392 interm_types.safe_push (cvt_type);
5393 cvt_type = NULL_TREE;
5394 code1 = tc1;
5395 codecvt1 = tc2;
5396 break;
5399 /* FALLTHRU */
5400 unsupported:
5401 if (dump_enabled_p ())
5402 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5403 "conversion not supported by target.\n");
5404 return false;
5406 case WIDEN:
5407 if (known_eq (nunits_in, nunits_out))
5409 if (!(code.is_tree_code ()
5410 && supportable_half_widening_operation ((tree_code) code,
5411 vectype_out, vectype_in,
5412 &tc1)))
5413 goto unsupported;
5414 code1 = tc1;
5415 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5416 break;
5418 if (supportable_widening_operation (vinfo, code, stmt_info,
5419 vectype_out, vectype_in, &code1,
5420 &code2, &multi_step_cvt,
5421 &interm_types))
5423 /* Binary widening operation can only be supported directly by the
5424 architecture. */
5425 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5426 break;
5429 if (code != FLOAT_EXPR
5430 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5431 goto unsupported;
5433 fltsz = GET_MODE_SIZE (lhs_mode);
5434 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5436 rhs_mode = rhs_mode_iter.require ();
5437 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5438 break;
5440 cvt_type
5441 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5442 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5443 if (cvt_type == NULL_TREE)
5444 goto unsupported;
5446 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5448 tc1 = ERROR_MARK;
5449 gcc_assert (code.is_tree_code ());
5450 if (!supportable_convert_operation ((tree_code) code, vectype_out,
5451 cvt_type, &tc1))
5452 goto unsupported;
5453 codecvt1 = tc1;
5455 else if (!supportable_widening_operation (vinfo, code,
5456 stmt_info, vectype_out,
5457 cvt_type, &codecvt1,
5458 &codecvt2, &multi_step_cvt,
5459 &interm_types))
5460 continue;
5461 else
5462 gcc_assert (multi_step_cvt == 0);
5464 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5465 cvt_type,
5466 vectype_in, &code1,
5467 &code2, &multi_step_cvt,
5468 &interm_types))
5470 found_mode = true;
5471 break;
5475 if (!found_mode)
5476 goto unsupported;
5478 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5479 codecvt2 = ERROR_MARK;
5480 else
5482 multi_step_cvt++;
5483 interm_types.safe_push (cvt_type);
5484 cvt_type = NULL_TREE;
5486 break;
5488 case NARROW_DST:
5489 gcc_assert (op_type == unary_op);
5490 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5491 &code1, &multi_step_cvt,
5492 &interm_types))
5493 break;
5495 if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5496 goto unsupported;
5498 if (code == FIX_TRUNC_EXPR)
5500 cvt_type
5501 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5502 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5503 if (cvt_type == NULL_TREE)
5504 goto unsupported;
5505 if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5506 &tc1))
5507 codecvt1 = tc1;
5508 else
5509 goto unsupported;
5510 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5511 &code1, &multi_step_cvt,
5512 &interm_types))
5513 break;
5515 /* If op0 can be represented with low precision integer,
5516 truncate it to cvt_type and the do FLOAT_EXPR. */
5517 else if (code == FLOAT_EXPR)
5519 wide_int op_min_value, op_max_value;
5520 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5521 goto unsupported;
5523 cvt_type
5524 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5525 if (cvt_type == NULL_TREE
5526 || (wi::min_precision (op_max_value, SIGNED)
5527 > TYPE_PRECISION (cvt_type))
5528 || (wi::min_precision (op_min_value, SIGNED)
5529 > TYPE_PRECISION (cvt_type)))
5530 goto unsupported;
5532 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5533 if (cvt_type == NULL_TREE)
5534 goto unsupported;
5535 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5536 &code1, &multi_step_cvt,
5537 &interm_types))
5538 goto unsupported;
5539 if (supportable_convert_operation ((tree_code) code, vectype_out,
5540 cvt_type, &tc1))
5542 codecvt1 = tc1;
5543 modifier = NARROW_SRC;
5544 break;
5548 goto unsupported;
5550 default:
5551 gcc_unreachable ();
5554 if (!vec_stmt) /* transformation not required. */
5556 if (slp_node
5557 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5558 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5560 if (dump_enabled_p ())
5561 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5562 "incompatible vector types for invariants\n");
5563 return false;
5565 DUMP_VECT_SCOPE ("vectorizable_conversion");
5566 if (modifier == NONE)
5568 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5569 vect_model_simple_cost (vinfo, stmt_info,
5570 ncopies * (1 + multi_step_cvt),
5571 dt, ndts, slp_node, cost_vec);
5573 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5575 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5576 /* The final packing step produces one vector result per copy. */
5577 unsigned int nvectors
5578 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5579 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5580 multi_step_cvt, cost_vec,
5581 widen_arith);
5583 else
5585 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5586 /* The initial unpacking step produces two vector results
5587 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5588 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5589 unsigned int nvectors
5590 = (slp_node
5591 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5592 : ncopies * 2);
5593 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5594 multi_step_cvt, cost_vec,
5595 widen_arith);
5597 interm_types.release ();
5598 return true;
5601 /* Transform. */
5602 if (dump_enabled_p ())
5603 dump_printf_loc (MSG_NOTE, vect_location,
5604 "transform conversion. ncopies = %d.\n", ncopies);
5606 if (op_type == binary_op)
5608 if (CONSTANT_CLASS_P (op0))
5609 op0 = fold_convert (TREE_TYPE (op1), op0);
5610 else if (CONSTANT_CLASS_P (op1))
5611 op1 = fold_convert (TREE_TYPE (op0), op1);
5614 /* In case of multi-step conversion, we first generate conversion operations
5615 to the intermediate types, and then from that types to the final one.
5616 We create vector destinations for the intermediate type (TYPES) received
5617 from supportable_*_operation, and store them in the correct order
5618 for future use in vect_create_vectorized_*_stmts (). */
5619 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5620 bool widen_or_narrow_float_p
5621 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5622 vec_dest = vect_create_destination_var (scalar_dest,
5623 widen_or_narrow_float_p
5624 ? cvt_type : vectype_out);
5625 vec_dsts.quick_push (vec_dest);
5627 if (multi_step_cvt)
5629 for (i = interm_types.length () - 1;
5630 interm_types.iterate (i, &intermediate_type); i--)
5632 vec_dest = vect_create_destination_var (scalar_dest,
5633 intermediate_type);
5634 vec_dsts.quick_push (vec_dest);
5638 if (cvt_type)
5639 vec_dest = vect_create_destination_var (scalar_dest,
5640 widen_or_narrow_float_p
5641 ? vectype_out : cvt_type);
5643 int ninputs = 1;
5644 if (!slp_node)
5646 if (modifier == WIDEN)
5648 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5650 if (multi_step_cvt)
5651 ninputs = vect_pow2 (multi_step_cvt);
5652 ninputs *= 2;
5656 switch (modifier)
5658 case NONE:
5659 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5660 op0, &vec_oprnds0);
5661 /* vec_dest is intermediate type operand when multi_step_cvt. */
5662 if (multi_step_cvt)
5664 cvt_op = vec_dest;
5665 vec_dest = vec_dsts[0];
5668 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5670 /* Arguments are ready, create the new vector stmt. */
5671 gimple* new_stmt;
5672 if (multi_step_cvt)
5674 gcc_assert (multi_step_cvt == 1);
5675 new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5676 new_temp = make_ssa_name (cvt_op, new_stmt);
5677 gimple_assign_set_lhs (new_stmt, new_temp);
5678 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5679 vop0 = new_temp;
5681 new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5682 new_temp = make_ssa_name (vec_dest, new_stmt);
5683 gimple_set_lhs (new_stmt, new_temp);
5684 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5686 if (slp_node)
5687 slp_node->push_vec_def (new_stmt);
5688 else
5689 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5691 break;
5693 case WIDEN:
5694 /* In case the vectorization factor (VF) is bigger than the number
5695 of elements that we can fit in a vectype (nunits), we have to
5696 generate more than one vector stmt - i.e - we need to "unroll"
5697 the vector stmt by a factor VF/nunits. */
5698 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5699 op0, &vec_oprnds0,
5700 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5701 &vec_oprnds1);
5702 if (code == WIDEN_LSHIFT_EXPR)
5704 int oprnds_size = vec_oprnds0.length ();
5705 vec_oprnds1.create (oprnds_size);
5706 for (i = 0; i < oprnds_size; ++i)
5707 vec_oprnds1.quick_push (op1);
5709 /* Arguments are ready. Create the new vector stmts. */
5710 for (i = multi_step_cvt; i >= 0; i--)
5712 tree this_dest = vec_dsts[i];
5713 code_helper c1 = code1, c2 = code2;
5714 if (i == 0 && codecvt2 != ERROR_MARK)
5716 c1 = codecvt1;
5717 c2 = codecvt2;
5719 if (known_eq (nunits_out, nunits_in))
5720 vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
5721 stmt_info, this_dest, gsi, c1,
5722 op_type);
5723 else
5724 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5725 &vec_oprnds1, stmt_info,
5726 this_dest, gsi,
5727 c1, c2, op_type);
5730 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5732 gimple *new_stmt;
5733 if (cvt_type)
5735 new_temp = make_ssa_name (vec_dest);
5736 new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5737 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5739 else
5740 new_stmt = SSA_NAME_DEF_STMT (vop0);
5742 if (slp_node)
5743 slp_node->push_vec_def (new_stmt);
5744 else
5745 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5747 break;
5749 case NARROW_SRC:
5750 case NARROW_DST:
5751 /* In case the vectorization factor (VF) is bigger than the number
5752 of elements that we can fit in a vectype (nunits), we have to
5753 generate more than one vector stmt - i.e - we need to "unroll"
5754 the vector stmt by a factor VF/nunits. */
5755 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5756 op0, &vec_oprnds0);
5757 /* Arguments are ready. Create the new vector stmts. */
5758 if (cvt_type && modifier == NARROW_DST)
5759 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5761 new_temp = make_ssa_name (vec_dest);
5762 gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5763 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5764 vec_oprnds0[i] = new_temp;
5767 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5768 multi_step_cvt,
5769 stmt_info, vec_dsts, gsi,
5770 slp_node, code1,
5771 modifier == NARROW_SRC);
5772 /* After demoting op0 to cvt_type, convert it to dest. */
5773 if (cvt_type && code == FLOAT_EXPR)
5775 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5777 /* Arguments are ready, create the new vector stmt. */
5778 gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5779 gimple *new_stmt
5780 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5781 new_temp = make_ssa_name (vec_dest, new_stmt);
5782 gimple_set_lhs (new_stmt, new_temp);
5783 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5785 /* This is the last step of the conversion sequence. Store the
5786 vectors in SLP_NODE or in vector info of the scalar statement
5787 (or in STMT_VINFO_RELATED_STMT chain). */
5788 if (slp_node)
5789 slp_node->push_vec_def (new_stmt);
5790 else
5791 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5794 break;
5796 if (!slp_node)
5797 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5799 vec_oprnds0.release ();
5800 vec_oprnds1.release ();
5801 interm_types.release ();
5803 return true;
5806 /* Return true if we can assume from the scalar form of STMT_INFO that
5807 neither the scalar nor the vector forms will generate code. STMT_INFO
5808 is known not to involve a data reference. */
5810 bool
5811 vect_nop_conversion_p (stmt_vec_info stmt_info)
5813 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5814 if (!stmt)
5815 return false;
5817 tree lhs = gimple_assign_lhs (stmt);
5818 tree_code code = gimple_assign_rhs_code (stmt);
5819 tree rhs = gimple_assign_rhs1 (stmt);
5821 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5822 return true;
5824 if (CONVERT_EXPR_CODE_P (code))
5825 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5827 return false;
5830 /* Function vectorizable_assignment.
5832 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5833 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5834 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5835 Return true if STMT_INFO is vectorizable in this way. */
5837 static bool
5838 vectorizable_assignment (vec_info *vinfo,
5839 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5840 gimple **vec_stmt, slp_tree slp_node,
5841 stmt_vector_for_cost *cost_vec)
5843 tree vec_dest;
5844 tree scalar_dest;
5845 tree op;
5846 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5847 tree new_temp;
5848 enum vect_def_type dt[1] = {vect_unknown_def_type};
5849 int ndts = 1;
5850 int ncopies;
5851 int i;
5852 vec<tree> vec_oprnds = vNULL;
5853 tree vop;
5854 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5855 enum tree_code code;
5856 tree vectype_in;
5858 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5859 return false;
5861 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5862 && ! vec_stmt)
5863 return false;
5865 /* Is vectorizable assignment? */
5866 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5867 if (!stmt)
5868 return false;
5870 scalar_dest = gimple_assign_lhs (stmt);
5871 if (TREE_CODE (scalar_dest) != SSA_NAME)
5872 return false;
5874 if (STMT_VINFO_DATA_REF (stmt_info))
5875 return false;
5877 code = gimple_assign_rhs_code (stmt);
5878 if (!(gimple_assign_single_p (stmt)
5879 || code == PAREN_EXPR
5880 || CONVERT_EXPR_CODE_P (code)))
5881 return false;
5883 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5884 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5886 /* Multiple types in SLP are handled by creating the appropriate number of
5887 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5888 case of SLP. */
5889 if (slp_node)
5890 ncopies = 1;
5891 else
5892 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5894 gcc_assert (ncopies >= 1);
5896 slp_tree slp_op;
5897 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5898 &dt[0], &vectype_in))
5900 if (dump_enabled_p ())
5901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5902 "use not simple.\n");
5903 return false;
5905 if (!vectype_in)
5906 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5908 /* We can handle NOP_EXPR conversions that do not change the number
5909 of elements or the vector size. */
5910 if ((CONVERT_EXPR_CODE_P (code)
5911 || code == VIEW_CONVERT_EXPR)
5912 && (!vectype_in
5913 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5914 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5915 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5916 return false;
5918 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5920 if (dump_enabled_p ())
5921 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5922 "can't convert between boolean and non "
5923 "boolean vectors %T\n", TREE_TYPE (op));
5925 return false;
5928 /* We do not handle bit-precision changes. */
5929 if ((CONVERT_EXPR_CODE_P (code)
5930 || code == VIEW_CONVERT_EXPR)
5931 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5932 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5933 || (INTEGRAL_TYPE_P (TREE_TYPE (op))
5934 && !type_has_mode_precision_p (TREE_TYPE (op))))
5935 /* But a conversion that does not change the bit-pattern is ok. */
5936 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5937 && INTEGRAL_TYPE_P (TREE_TYPE (op))
5938 && (((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5939 > TYPE_PRECISION (TREE_TYPE (op)))
5940 && TYPE_UNSIGNED (TREE_TYPE (op)))
5941 || (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5942 == TYPE_PRECISION (TREE_TYPE (op))))))
5944 if (dump_enabled_p ())
5945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5946 "type conversion to/from bit-precision "
5947 "unsupported.\n");
5948 return false;
5951 if (!vec_stmt) /* transformation not required. */
5953 if (slp_node
5954 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5956 if (dump_enabled_p ())
5957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5958 "incompatible vector types for invariants\n");
5959 return false;
5961 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5962 DUMP_VECT_SCOPE ("vectorizable_assignment");
5963 if (!vect_nop_conversion_p (stmt_info))
5964 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5965 cost_vec);
5966 return true;
5969 /* Transform. */
5970 if (dump_enabled_p ())
5971 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5973 /* Handle def. */
5974 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5976 /* Handle use. */
5977 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5979 /* Arguments are ready. create the new vector stmt. */
5980 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5982 if (CONVERT_EXPR_CODE_P (code)
5983 || code == VIEW_CONVERT_EXPR)
5984 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5985 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5986 new_temp = make_ssa_name (vec_dest, new_stmt);
5987 gimple_assign_set_lhs (new_stmt, new_temp);
5988 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5989 if (slp_node)
5990 slp_node->push_vec_def (new_stmt);
5991 else
5992 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5994 if (!slp_node)
5995 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5997 vec_oprnds.release ();
5998 return true;
6002 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6003 either as shift by a scalar or by a vector. */
6005 bool
6006 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
6009 machine_mode vec_mode;
6010 optab optab;
6011 int icode;
6012 tree vectype;
6014 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6015 if (!vectype)
6016 return false;
6018 optab = optab_for_tree_code (code, vectype, optab_scalar);
6019 if (!optab
6020 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
6022 optab = optab_for_tree_code (code, vectype, optab_vector);
6023 if (!optab
6024 || (optab_handler (optab, TYPE_MODE (vectype))
6025 == CODE_FOR_nothing))
6026 return false;
6029 vec_mode = TYPE_MODE (vectype);
6030 icode = (int) optab_handler (optab, vec_mode);
6031 if (icode == CODE_FOR_nothing)
6032 return false;
6034 return true;
6038 /* Function vectorizable_shift.
6040 Check if STMT_INFO performs a shift operation that can be vectorized.
6041 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6042 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6043 Return true if STMT_INFO is vectorizable in this way. */
6045 static bool
6046 vectorizable_shift (vec_info *vinfo,
6047 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6048 gimple **vec_stmt, slp_tree slp_node,
6049 stmt_vector_for_cost *cost_vec)
6051 tree vec_dest;
6052 tree scalar_dest;
6053 tree op0, op1 = NULL;
6054 tree vec_oprnd1 = NULL_TREE;
6055 tree vectype;
6056 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6057 enum tree_code code;
6058 machine_mode vec_mode;
6059 tree new_temp;
6060 optab optab;
6061 int icode;
6062 machine_mode optab_op2_mode;
6063 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
6064 int ndts = 2;
6065 poly_uint64 nunits_in;
6066 poly_uint64 nunits_out;
6067 tree vectype_out;
6068 tree op1_vectype;
6069 int ncopies;
6070 int i;
6071 vec<tree> vec_oprnds0 = vNULL;
6072 vec<tree> vec_oprnds1 = vNULL;
6073 tree vop0, vop1;
6074 unsigned int k;
6075 bool scalar_shift_arg = true;
6076 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6077 bool incompatible_op1_vectype_p = false;
6079 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6080 return false;
6082 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6083 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6084 && ! vec_stmt)
6085 return false;
6087 /* Is STMT a vectorizable binary/unary operation? */
6088 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6089 if (!stmt)
6090 return false;
6092 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6093 return false;
6095 code = gimple_assign_rhs_code (stmt);
6097 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6098 || code == RROTATE_EXPR))
6099 return false;
6101 scalar_dest = gimple_assign_lhs (stmt);
6102 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6103 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6105 if (dump_enabled_p ())
6106 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6107 "bit-precision shifts not supported.\n");
6108 return false;
6111 slp_tree slp_op0;
6112 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6113 0, &op0, &slp_op0, &dt[0], &vectype))
6115 if (dump_enabled_p ())
6116 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6117 "use not simple.\n");
6118 return false;
6120 /* If op0 is an external or constant def, infer the vector type
6121 from the scalar type. */
6122 if (!vectype)
6123 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6124 if (vec_stmt)
6125 gcc_assert (vectype);
6126 if (!vectype)
6128 if (dump_enabled_p ())
6129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6130 "no vectype for scalar type\n");
6131 return false;
6134 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6135 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6136 if (maybe_ne (nunits_out, nunits_in))
6137 return false;
6139 stmt_vec_info op1_def_stmt_info;
6140 slp_tree slp_op1;
6141 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
6142 &dt[1], &op1_vectype, &op1_def_stmt_info))
6144 if (dump_enabled_p ())
6145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6146 "use not simple.\n");
6147 return false;
6150 /* Multiple types in SLP are handled by creating the appropriate number of
6151 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6152 case of SLP. */
6153 if (slp_node)
6154 ncopies = 1;
6155 else
6156 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6158 gcc_assert (ncopies >= 1);
6160 /* Determine whether the shift amount is a vector, or scalar. If the
6161 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6163 if ((dt[1] == vect_internal_def
6164 || dt[1] == vect_induction_def
6165 || dt[1] == vect_nested_cycle)
6166 && !slp_node)
6167 scalar_shift_arg = false;
6168 else if (dt[1] == vect_constant_def
6169 || dt[1] == vect_external_def
6170 || dt[1] == vect_internal_def)
6172 /* In SLP, need to check whether the shift count is the same,
6173 in loops if it is a constant or invariant, it is always
6174 a scalar shift. */
6175 if (slp_node)
6177 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6178 stmt_vec_info slpstmt_info;
6180 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6182 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
6183 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
6184 scalar_shift_arg = false;
6187 /* For internal SLP defs we have to make sure we see scalar stmts
6188 for all vector elements.
6189 ??? For different vectors we could resort to a different
6190 scalar shift operand but code-generation below simply always
6191 takes the first. */
6192 if (dt[1] == vect_internal_def
6193 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6194 stmts.length ()))
6195 scalar_shift_arg = false;
6198 /* If the shift amount is computed by a pattern stmt we cannot
6199 use the scalar amount directly thus give up and use a vector
6200 shift. */
6201 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
6202 scalar_shift_arg = false;
6204 else
6206 if (dump_enabled_p ())
6207 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6208 "operand mode requires invariant argument.\n");
6209 return false;
6212 /* Vector shifted by vector. */
6213 bool was_scalar_shift_arg = scalar_shift_arg;
6214 if (!scalar_shift_arg)
6216 optab = optab_for_tree_code (code, vectype, optab_vector);
6217 if (dump_enabled_p ())
6218 dump_printf_loc (MSG_NOTE, vect_location,
6219 "vector/vector shift/rotate found.\n");
6221 if (!op1_vectype)
6222 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6223 slp_op1);
6224 incompatible_op1_vectype_p
6225 = (op1_vectype == NULL_TREE
6226 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
6227 TYPE_VECTOR_SUBPARTS (vectype))
6228 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6229 if (incompatible_op1_vectype_p
6230 && (!slp_node
6231 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6232 || slp_op1->refcnt != 1))
6234 if (dump_enabled_p ())
6235 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6236 "unusable type for last operand in"
6237 " vector/vector shift/rotate.\n");
6238 return false;
6241 /* See if the machine has a vector shifted by scalar insn and if not
6242 then see if it has a vector shifted by vector insn. */
6243 else
6245 optab = optab_for_tree_code (code, vectype, optab_scalar);
6246 if (optab
6247 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6249 if (dump_enabled_p ())
6250 dump_printf_loc (MSG_NOTE, vect_location,
6251 "vector/scalar shift/rotate found.\n");
6253 else
6255 optab = optab_for_tree_code (code, vectype, optab_vector);
6256 if (optab
6257 && (optab_handler (optab, TYPE_MODE (vectype))
6258 != CODE_FOR_nothing))
6260 scalar_shift_arg = false;
6262 if (dump_enabled_p ())
6263 dump_printf_loc (MSG_NOTE, vect_location,
6264 "vector/vector shift/rotate found.\n");
6266 if (!op1_vectype)
6267 op1_vectype = get_vectype_for_scalar_type (vinfo,
6268 TREE_TYPE (op1),
6269 slp_op1);
6271 /* Unlike the other binary operators, shifts/rotates have
6272 the rhs being int, instead of the same type as the lhs,
6273 so make sure the scalar is the right type if we are
6274 dealing with vectors of long long/long/short/char. */
6275 incompatible_op1_vectype_p
6276 = (!op1_vectype
6277 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6278 TREE_TYPE (op1)));
6279 if (incompatible_op1_vectype_p
6280 && dt[1] == vect_internal_def)
6282 if (dump_enabled_p ())
6283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6284 "unusable type for last operand in"
6285 " vector/vector shift/rotate.\n");
6286 return false;
6292 /* Supportable by target? */
6293 if (!optab)
6295 if (dump_enabled_p ())
6296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6297 "no optab.\n");
6298 return false;
6300 vec_mode = TYPE_MODE (vectype);
6301 icode = (int) optab_handler (optab, vec_mode);
6302 if (icode == CODE_FOR_nothing)
6304 if (dump_enabled_p ())
6305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6306 "op not supported by target.\n");
6307 return false;
6309 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6310 if (vect_emulated_vector_p (vectype))
6311 return false;
6313 if (!vec_stmt) /* transformation not required. */
6315 if (slp_node
6316 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6317 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6318 && (!incompatible_op1_vectype_p
6319 || dt[1] == vect_constant_def)
6320 && !vect_maybe_update_slp_op_vectype
6321 (slp_op1,
6322 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6324 if (dump_enabled_p ())
6325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6326 "incompatible vector types for invariants\n");
6327 return false;
6329 /* Now adjust the constant shift amount in place. */
6330 if (slp_node
6331 && incompatible_op1_vectype_p
6332 && dt[1] == vect_constant_def)
6334 for (unsigned i = 0;
6335 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6337 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6338 = fold_convert (TREE_TYPE (vectype),
6339 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6340 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6341 == INTEGER_CST));
6344 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6345 DUMP_VECT_SCOPE ("vectorizable_shift");
6346 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6347 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6348 return true;
6351 /* Transform. */
6353 if (dump_enabled_p ())
6354 dump_printf_loc (MSG_NOTE, vect_location,
6355 "transform binary/unary operation.\n");
6357 if (incompatible_op1_vectype_p && !slp_node)
6359 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6360 op1 = fold_convert (TREE_TYPE (vectype), op1);
6361 if (dt[1] != vect_constant_def)
6362 op1 = vect_init_vector (vinfo, stmt_info, op1,
6363 TREE_TYPE (vectype), NULL);
6366 /* Handle def. */
6367 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6369 if (scalar_shift_arg && dt[1] != vect_internal_def)
6371 /* Vector shl and shr insn patterns can be defined with scalar
6372 operand 2 (shift operand). In this case, use constant or loop
6373 invariant op1 directly, without extending it to vector mode
6374 first. */
6375 optab_op2_mode = insn_data[icode].operand[2].mode;
6376 if (!VECTOR_MODE_P (optab_op2_mode))
6378 if (dump_enabled_p ())
6379 dump_printf_loc (MSG_NOTE, vect_location,
6380 "operand 1 using scalar mode.\n");
6381 vec_oprnd1 = op1;
6382 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6383 vec_oprnds1.quick_push (vec_oprnd1);
6384 /* Store vec_oprnd1 for every vector stmt to be created.
6385 We check during the analysis that all the shift arguments
6386 are the same.
6387 TODO: Allow different constants for different vector
6388 stmts generated for an SLP instance. */
6389 for (k = 0;
6390 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6391 vec_oprnds1.quick_push (vec_oprnd1);
6394 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6396 if (was_scalar_shift_arg)
6398 /* If the argument was the same in all lanes create
6399 the correctly typed vector shift amount directly. */
6400 op1 = fold_convert (TREE_TYPE (vectype), op1);
6401 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6402 !loop_vinfo ? gsi : NULL);
6403 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6404 !loop_vinfo ? gsi : NULL);
6405 vec_oprnds1.create (slp_node->vec_stmts_size);
6406 for (k = 0; k < slp_node->vec_stmts_size; k++)
6407 vec_oprnds1.quick_push (vec_oprnd1);
6409 else if (dt[1] == vect_constant_def)
6410 /* The constant shift amount has been adjusted in place. */
6412 else
6413 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6416 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6417 (a special case for certain kind of vector shifts); otherwise,
6418 operand 1 should be of a vector type (the usual case). */
6419 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6420 op0, &vec_oprnds0,
6421 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6423 /* Arguments are ready. Create the new vector stmt. */
6424 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6426 /* For internal defs where we need to use a scalar shift arg
6427 extract the first lane. */
6428 if (scalar_shift_arg && dt[1] == vect_internal_def)
6430 vop1 = vec_oprnds1[0];
6431 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6432 gassign *new_stmt
6433 = gimple_build_assign (new_temp,
6434 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6435 vop1,
6436 TYPE_SIZE (TREE_TYPE (new_temp)),
6437 bitsize_zero_node));
6438 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6439 vop1 = new_temp;
6441 else
6442 vop1 = vec_oprnds1[i];
6443 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6444 new_temp = make_ssa_name (vec_dest, new_stmt);
6445 gimple_assign_set_lhs (new_stmt, new_temp);
6446 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6447 if (slp_node)
6448 slp_node->push_vec_def (new_stmt);
6449 else
6450 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6453 if (!slp_node)
6454 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6456 vec_oprnds0.release ();
6457 vec_oprnds1.release ();
6459 return true;
6462 /* Function vectorizable_operation.
6464 Check if STMT_INFO performs a binary, unary or ternary operation that can
6465 be vectorized.
6466 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6467 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6468 Return true if STMT_INFO is vectorizable in this way. */
6470 static bool
6471 vectorizable_operation (vec_info *vinfo,
6472 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6473 gimple **vec_stmt, slp_tree slp_node,
6474 stmt_vector_for_cost *cost_vec)
6476 tree vec_dest;
6477 tree scalar_dest;
6478 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6479 tree vectype;
6480 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6481 enum tree_code code, orig_code;
6482 machine_mode vec_mode;
6483 tree new_temp;
6484 int op_type;
6485 optab optab;
6486 bool target_support_p;
6487 enum vect_def_type dt[3]
6488 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6489 int ndts = 3;
6490 poly_uint64 nunits_in;
6491 poly_uint64 nunits_out;
6492 tree vectype_out;
6493 int ncopies, vec_num;
6494 int i;
6495 vec<tree> vec_oprnds0 = vNULL;
6496 vec<tree> vec_oprnds1 = vNULL;
6497 vec<tree> vec_oprnds2 = vNULL;
6498 tree vop0, vop1, vop2;
6499 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6501 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6502 return false;
6504 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6505 && ! vec_stmt)
6506 return false;
6508 /* Is STMT a vectorizable binary/unary operation? */
6509 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6510 if (!stmt)
6511 return false;
6513 /* Loads and stores are handled in vectorizable_{load,store}. */
6514 if (STMT_VINFO_DATA_REF (stmt_info))
6515 return false;
6517 orig_code = code = gimple_assign_rhs_code (stmt);
6519 /* Shifts are handled in vectorizable_shift. */
6520 if (code == LSHIFT_EXPR
6521 || code == RSHIFT_EXPR
6522 || code == LROTATE_EXPR
6523 || code == RROTATE_EXPR)
6524 return false;
6526 /* Comparisons are handled in vectorizable_comparison. */
6527 if (TREE_CODE_CLASS (code) == tcc_comparison)
6528 return false;
6530 /* Conditions are handled in vectorizable_condition. */
6531 if (code == COND_EXPR)
6532 return false;
6534 /* For pointer addition and subtraction, we should use the normal
6535 plus and minus for the vector operation. */
6536 if (code == POINTER_PLUS_EXPR)
6537 code = PLUS_EXPR;
6538 if (code == POINTER_DIFF_EXPR)
6539 code = MINUS_EXPR;
6541 /* Support only unary or binary operations. */
6542 op_type = TREE_CODE_LENGTH (code);
6543 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6545 if (dump_enabled_p ())
6546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6547 "num. args = %d (not unary/binary/ternary op).\n",
6548 op_type);
6549 return false;
6552 scalar_dest = gimple_assign_lhs (stmt);
6553 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6555 /* Most operations cannot handle bit-precision types without extra
6556 truncations. */
6557 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6558 if (!mask_op_p
6559 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6560 /* Exception are bitwise binary operations. */
6561 && code != BIT_IOR_EXPR
6562 && code != BIT_XOR_EXPR
6563 && code != BIT_AND_EXPR)
6565 if (dump_enabled_p ())
6566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6567 "bit-precision arithmetic not supported.\n");
6568 return false;
6571 slp_tree slp_op0;
6572 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6573 0, &op0, &slp_op0, &dt[0], &vectype))
6575 if (dump_enabled_p ())
6576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6577 "use not simple.\n");
6578 return false;
6580 bool is_invariant = (dt[0] == vect_external_def
6581 || dt[0] == vect_constant_def);
6582 /* If op0 is an external or constant def, infer the vector type
6583 from the scalar type. */
6584 if (!vectype)
6586 /* For boolean type we cannot determine vectype by
6587 invariant value (don't know whether it is a vector
6588 of booleans or vector of integers). We use output
6589 vectype because operations on boolean don't change
6590 type. */
6591 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6593 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6595 if (dump_enabled_p ())
6596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6597 "not supported operation on bool value.\n");
6598 return false;
6600 vectype = vectype_out;
6602 else
6603 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6604 slp_node);
6606 if (vec_stmt)
6607 gcc_assert (vectype);
6608 if (!vectype)
6610 if (dump_enabled_p ())
6611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6612 "no vectype for scalar type %T\n",
6613 TREE_TYPE (op0));
6615 return false;
6618 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6619 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6620 if (maybe_ne (nunits_out, nunits_in))
6621 return false;
6623 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6624 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6625 if (op_type == binary_op || op_type == ternary_op)
6627 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6628 1, &op1, &slp_op1, &dt[1], &vectype2))
6630 if (dump_enabled_p ())
6631 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6632 "use not simple.\n");
6633 return false;
6635 is_invariant &= (dt[1] == vect_external_def
6636 || dt[1] == vect_constant_def);
6637 if (vectype2
6638 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6639 return false;
6641 if (op_type == ternary_op)
6643 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6644 2, &op2, &slp_op2, &dt[2], &vectype3))
6646 if (dump_enabled_p ())
6647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6648 "use not simple.\n");
6649 return false;
6651 is_invariant &= (dt[2] == vect_external_def
6652 || dt[2] == vect_constant_def);
6653 if (vectype3
6654 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6655 return false;
6658 /* Multiple types in SLP are handled by creating the appropriate number of
6659 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6660 case of SLP. */
6661 if (slp_node)
6663 ncopies = 1;
6664 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6666 else
6668 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6669 vec_num = 1;
6672 gcc_assert (ncopies >= 1);
6674 /* Reject attempts to combine mask types with nonmask types, e.g. if
6675 we have an AND between a (nonmask) boolean loaded from memory and
6676 a (mask) boolean result of a comparison.
6678 TODO: We could easily fix these cases up using pattern statements. */
6679 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6680 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6681 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6683 if (dump_enabled_p ())
6684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6685 "mixed mask and nonmask vector types\n");
6686 return false;
6689 /* Supportable by target? */
6691 vec_mode = TYPE_MODE (vectype);
6692 if (code == MULT_HIGHPART_EXPR)
6693 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6694 else
6696 optab = optab_for_tree_code (code, vectype, optab_default);
6697 if (!optab)
6699 if (dump_enabled_p ())
6700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6701 "no optab.\n");
6702 return false;
6704 target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing
6705 || optab_libfunc (optab, vec_mode));
6708 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6709 if (!target_support_p || using_emulated_vectors_p)
6711 if (dump_enabled_p ())
6712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6713 "op not supported by target.\n");
6714 /* When vec_mode is not a vector mode and we verified ops we
6715 do not have to lower like AND are natively supported let
6716 those through even when the mode isn't word_mode. For
6717 ops we have to lower the lowering code assumes we are
6718 dealing with word_mode. */
6719 if ((((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6720 || !target_support_p)
6721 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6722 /* Check only during analysis. */
6723 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6725 if (dump_enabled_p ())
6726 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6727 return false;
6729 if (dump_enabled_p ())
6730 dump_printf_loc (MSG_NOTE, vect_location,
6731 "proceeding using word mode.\n");
6732 using_emulated_vectors_p = true;
6735 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6736 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6737 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6738 internal_fn cond_fn = get_conditional_internal_fn (code);
6739 internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6741 /* If operating on inactive elements could generate spurious traps,
6742 we need to restrict the operation to active lanes. Note that this
6743 specifically doesn't apply to unhoisted invariants, since they
6744 operate on the same value for every lane.
6746 Similarly, if this operation is part of a reduction, a fully-masked
6747 loop should only change the active lanes of the reduction chain,
6748 keeping the inactive lanes as-is. */
6749 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6750 || reduc_idx >= 0);
6752 if (!vec_stmt) /* transformation not required. */
6754 if (loop_vinfo
6755 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6756 && mask_out_inactive)
6758 if (cond_len_fn != IFN_LAST
6759 && direct_internal_fn_supported_p (cond_len_fn, vectype,
6760 OPTIMIZE_FOR_SPEED))
6761 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6763 else if (cond_fn != IFN_LAST
6764 && direct_internal_fn_supported_p (cond_fn, vectype,
6765 OPTIMIZE_FOR_SPEED))
6766 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6767 vectype, NULL);
6768 else
6770 if (dump_enabled_p ())
6771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6772 "can't use a fully-masked loop because no"
6773 " conditional operation is available.\n");
6774 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6778 /* Put types on constant and invariant SLP children. */
6779 if (slp_node
6780 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6781 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6782 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6784 if (dump_enabled_p ())
6785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6786 "incompatible vector types for invariants\n");
6787 return false;
6790 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6791 DUMP_VECT_SCOPE ("vectorizable_operation");
6792 vect_model_simple_cost (vinfo, stmt_info,
6793 ncopies, dt, ndts, slp_node, cost_vec);
6794 if (using_emulated_vectors_p)
6796 /* The above vect_model_simple_cost call handles constants
6797 in the prologue and (mis-)costs one of the stmts as
6798 vector stmt. See below for the actual lowering that will
6799 be applied. */
6800 unsigned n
6801 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6802 switch (code)
6804 case PLUS_EXPR:
6805 n *= 5;
6806 break;
6807 case MINUS_EXPR:
6808 n *= 6;
6809 break;
6810 case NEGATE_EXPR:
6811 n *= 4;
6812 break;
6813 default:
6814 /* Bit operations do not have extra cost and are accounted
6815 as vector stmt by vect_model_simple_cost. */
6816 n = 0;
6817 break;
6819 if (n != 0)
6821 /* We also need to materialize two large constants. */
6822 record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6823 0, vect_prologue);
6824 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6825 0, vect_body);
6828 return true;
6831 /* Transform. */
6833 if (dump_enabled_p ())
6834 dump_printf_loc (MSG_NOTE, vect_location,
6835 "transform binary/unary operation.\n");
6837 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6838 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6840 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6841 vectors with unsigned elements, but the result is signed. So, we
6842 need to compute the MINUS_EXPR into vectype temporary and
6843 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6844 tree vec_cvt_dest = NULL_TREE;
6845 if (orig_code == POINTER_DIFF_EXPR)
6847 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6848 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6850 /* Handle def. */
6851 else
6852 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6854 /* In case the vectorization factor (VF) is bigger than the number
6855 of elements that we can fit in a vectype (nunits), we have to generate
6856 more than one vector stmt - i.e - we need to "unroll" the
6857 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6858 from one copy of the vector stmt to the next, in the field
6859 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6860 stages to find the correct vector defs to be used when vectorizing
6861 stmts that use the defs of the current stmt. The example below
6862 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6863 we need to create 4 vectorized stmts):
6865 before vectorization:
6866 RELATED_STMT VEC_STMT
6867 S1: x = memref - -
6868 S2: z = x + 1 - -
6870 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6871 there):
6872 RELATED_STMT VEC_STMT
6873 VS1_0: vx0 = memref0 VS1_1 -
6874 VS1_1: vx1 = memref1 VS1_2 -
6875 VS1_2: vx2 = memref2 VS1_3 -
6876 VS1_3: vx3 = memref3 - -
6877 S1: x = load - VS1_0
6878 S2: z = x + 1 - -
6880 step2: vectorize stmt S2 (done here):
6881 To vectorize stmt S2 we first need to find the relevant vector
6882 def for the first operand 'x'. This is, as usual, obtained from
6883 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6884 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6885 relevant vector def 'vx0'. Having found 'vx0' we can generate
6886 the vector stmt VS2_0, and as usual, record it in the
6887 STMT_VINFO_VEC_STMT of stmt S2.
6888 When creating the second copy (VS2_1), we obtain the relevant vector
6889 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6890 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6891 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6892 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6893 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6894 chain of stmts and pointers:
6895 RELATED_STMT VEC_STMT
6896 VS1_0: vx0 = memref0 VS1_1 -
6897 VS1_1: vx1 = memref1 VS1_2 -
6898 VS1_2: vx2 = memref2 VS1_3 -
6899 VS1_3: vx3 = memref3 - -
6900 S1: x = load - VS1_0
6901 VS2_0: vz0 = vx0 + v1 VS2_1 -
6902 VS2_1: vz1 = vx1 + v1 VS2_2 -
6903 VS2_2: vz2 = vx2 + v1 VS2_3 -
6904 VS2_3: vz3 = vx3 + v1 - -
6905 S2: z = x + 1 - VS2_0 */
6907 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6908 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6909 /* Arguments are ready. Create the new vector stmt. */
6910 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6912 gimple *new_stmt = NULL;
6913 vop1 = ((op_type == binary_op || op_type == ternary_op)
6914 ? vec_oprnds1[i] : NULL_TREE);
6915 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6916 if (using_emulated_vectors_p
6917 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
6919 /* Lower the operation. This follows vector lowering. */
6920 unsigned int width = vector_element_bits (vectype);
6921 tree inner_type = TREE_TYPE (vectype);
6922 tree word_type
6923 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
6924 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
6925 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
6926 tree high_bits
6927 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
6928 tree wvop0 = make_ssa_name (word_type);
6929 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
6930 build1 (VIEW_CONVERT_EXPR,
6931 word_type, vop0));
6932 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6933 tree result_low, signs;
6934 if (code == PLUS_EXPR || code == MINUS_EXPR)
6936 tree wvop1 = make_ssa_name (word_type);
6937 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
6938 build1 (VIEW_CONVERT_EXPR,
6939 word_type, vop1));
6940 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6941 signs = make_ssa_name (word_type);
6942 new_stmt = gimple_build_assign (signs,
6943 BIT_XOR_EXPR, wvop0, wvop1);
6944 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6945 tree b_low = make_ssa_name (word_type);
6946 new_stmt = gimple_build_assign (b_low,
6947 BIT_AND_EXPR, wvop1, low_bits);
6948 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6949 tree a_low = make_ssa_name (word_type);
6950 if (code == PLUS_EXPR)
6951 new_stmt = gimple_build_assign (a_low,
6952 BIT_AND_EXPR, wvop0, low_bits);
6953 else
6954 new_stmt = gimple_build_assign (a_low,
6955 BIT_IOR_EXPR, wvop0, high_bits);
6956 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6957 if (code == MINUS_EXPR)
6959 new_stmt = gimple_build_assign (NULL_TREE,
6960 BIT_NOT_EXPR, signs);
6961 signs = make_ssa_name (word_type);
6962 gimple_assign_set_lhs (new_stmt, signs);
6963 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6965 new_stmt = gimple_build_assign (NULL_TREE,
6966 BIT_AND_EXPR, signs, high_bits);
6967 signs = make_ssa_name (word_type);
6968 gimple_assign_set_lhs (new_stmt, signs);
6969 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6970 result_low = make_ssa_name (word_type);
6971 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
6972 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6974 else
6976 tree a_low = make_ssa_name (word_type);
6977 new_stmt = gimple_build_assign (a_low,
6978 BIT_AND_EXPR, wvop0, low_bits);
6979 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6980 signs = make_ssa_name (word_type);
6981 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
6982 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6983 new_stmt = gimple_build_assign (NULL_TREE,
6984 BIT_AND_EXPR, signs, high_bits);
6985 signs = make_ssa_name (word_type);
6986 gimple_assign_set_lhs (new_stmt, signs);
6987 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6988 result_low = make_ssa_name (word_type);
6989 new_stmt = gimple_build_assign (result_low,
6990 MINUS_EXPR, high_bits, a_low);
6991 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6993 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
6994 signs);
6995 result_low = make_ssa_name (word_type);
6996 gimple_assign_set_lhs (new_stmt, result_low);
6997 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6998 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
6999 build1 (VIEW_CONVERT_EXPR,
7000 vectype, result_low));
7001 new_temp = make_ssa_name (vectype);
7002 gimple_assign_set_lhs (new_stmt, new_temp);
7003 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7005 else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
7007 tree mask;
7008 if (masked_loop_p)
7009 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7010 vec_num * ncopies, vectype, i);
7011 else
7012 /* Dummy mask. */
7013 mask = build_minus_one_cst (truth_type_for (vectype));
7014 auto_vec<tree> vops (6);
7015 vops.quick_push (mask);
7016 vops.quick_push (vop0);
7017 if (vop1)
7018 vops.quick_push (vop1);
7019 if (vop2)
7020 vops.quick_push (vop2);
7021 if (reduc_idx >= 0)
7023 /* Perform the operation on active elements only and take
7024 inactive elements from the reduction chain input. */
7025 gcc_assert (!vop2);
7026 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
7028 else
7030 auto else_value = targetm.preferred_else_value
7031 (cond_fn, vectype, vops.length () - 1, &vops[1]);
7032 vops.quick_push (else_value);
7034 if (len_loop_p)
7036 tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7037 vec_num * ncopies, vectype, i, 1);
7038 signed char biasval
7039 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7040 tree bias = build_int_cst (intQI_type_node, biasval);
7041 vops.quick_push (len);
7042 vops.quick_push (bias);
7044 gcall *call
7045 = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7046 : cond_len_fn,
7047 vops);
7048 new_temp = make_ssa_name (vec_dest, call);
7049 gimple_call_set_lhs (call, new_temp);
7050 gimple_call_set_nothrow (call, true);
7051 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7052 new_stmt = call;
7054 else
7056 tree mask = NULL_TREE;
7057 /* When combining two masks check if either of them is elsewhere
7058 combined with a loop mask, if that's the case we can mark that the
7059 new combined mask doesn't need to be combined with a loop mask. */
7060 if (masked_loop_p
7061 && code == BIT_AND_EXPR
7062 && VECTOR_BOOLEAN_TYPE_P (vectype))
7064 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
7065 ncopies}))
7067 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7068 vec_num * ncopies, vectype, i);
7070 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7071 vop0, gsi);
7074 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
7075 ncopies }))
7077 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7078 vec_num * ncopies, vectype, i);
7080 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7081 vop1, gsi);
7085 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7086 new_temp = make_ssa_name (vec_dest, new_stmt);
7087 gimple_assign_set_lhs (new_stmt, new_temp);
7088 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7089 if (using_emulated_vectors_p)
7090 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7092 /* Enter the combined value into the vector cond hash so we don't
7093 AND it with a loop mask again. */
7094 if (mask)
7095 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
7098 if (vec_cvt_dest)
7100 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7101 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7102 new_temp);
7103 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
7104 gimple_assign_set_lhs (new_stmt, new_temp);
7105 vect_finish_stmt_generation (vinfo, stmt_info,
7106 new_stmt, gsi);
7109 if (slp_node)
7110 slp_node->push_vec_def (new_stmt);
7111 else
7112 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7115 if (!slp_node)
7116 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7118 vec_oprnds0.release ();
7119 vec_oprnds1.release ();
7120 vec_oprnds2.release ();
7122 return true;
7125 /* A helper function to ensure data reference DR_INFO's base alignment. */
7127 static void
7128 ensure_base_align (dr_vec_info *dr_info)
7130 /* Alignment is only analyzed for the first element of a DR group,
7131 use that to look at base alignment we need to enforce. */
7132 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7133 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7135 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7137 if (dr_info->base_misaligned)
7139 tree base_decl = dr_info->base_decl;
7141 // We should only be able to increase the alignment of a base object if
7142 // we know what its new alignment should be at compile time.
7143 unsigned HOST_WIDE_INT align_base_to =
7144 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7146 if (decl_in_symtab_p (base_decl))
7147 symtab_node::get (base_decl)->increase_alignment (align_base_to);
7148 else if (DECL_ALIGN (base_decl) < align_base_to)
7150 SET_DECL_ALIGN (base_decl, align_base_to);
7151 DECL_USER_ALIGN (base_decl) = 1;
7153 dr_info->base_misaligned = false;
7158 /* Function get_group_alias_ptr_type.
7160 Return the alias type for the group starting at FIRST_STMT_INFO. */
7162 static tree
7163 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7165 struct data_reference *first_dr, *next_dr;
7167 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7168 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7169 while (next_stmt_info)
7171 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7172 if (get_alias_set (DR_REF (first_dr))
7173 != get_alias_set (DR_REF (next_dr)))
7175 if (dump_enabled_p ())
7176 dump_printf_loc (MSG_NOTE, vect_location,
7177 "conflicting alias set types.\n");
7178 return ptr_type_node;
7180 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7182 return reference_alias_ptr_type (DR_REF (first_dr));
7186 /* Function scan_operand_equal_p.
7188 Helper function for check_scan_store. Compare two references
7189 with .GOMP_SIMD_LANE bases. */
7191 static bool
7192 scan_operand_equal_p (tree ref1, tree ref2)
7194 tree ref[2] = { ref1, ref2 };
7195 poly_int64 bitsize[2], bitpos[2];
7196 tree offset[2], base[2];
7197 for (int i = 0; i < 2; ++i)
7199 machine_mode mode;
7200 int unsignedp, reversep, volatilep = 0;
7201 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7202 &offset[i], &mode, &unsignedp,
7203 &reversep, &volatilep);
7204 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
7205 return false;
7206 if (TREE_CODE (base[i]) == MEM_REF
7207 && offset[i] == NULL_TREE
7208 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7210 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7211 if (is_gimple_assign (def_stmt)
7212 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
7213 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7214 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7216 if (maybe_ne (mem_ref_offset (base[i]), 0))
7217 return false;
7218 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7219 offset[i] = gimple_assign_rhs2 (def_stmt);
7224 if (!operand_equal_p (base[0], base[1], 0))
7225 return false;
7226 if (maybe_ne (bitsize[0], bitsize[1]))
7227 return false;
7228 if (offset[0] != offset[1])
7230 if (!offset[0] || !offset[1])
7231 return false;
7232 if (!operand_equal_p (offset[0], offset[1], 0))
7234 tree step[2];
7235 for (int i = 0; i < 2; ++i)
7237 step[i] = integer_one_node;
7238 if (TREE_CODE (offset[i]) == SSA_NAME)
7240 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7241 if (is_gimple_assign (def_stmt)
7242 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
7243 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7244 == INTEGER_CST))
7246 step[i] = gimple_assign_rhs2 (def_stmt);
7247 offset[i] = gimple_assign_rhs1 (def_stmt);
7250 else if (TREE_CODE (offset[i]) == MULT_EXPR)
7252 step[i] = TREE_OPERAND (offset[i], 1);
7253 offset[i] = TREE_OPERAND (offset[i], 0);
7255 tree rhs1 = NULL_TREE;
7256 if (TREE_CODE (offset[i]) == SSA_NAME)
7258 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7259 if (gimple_assign_cast_p (def_stmt))
7260 rhs1 = gimple_assign_rhs1 (def_stmt);
7262 else if (CONVERT_EXPR_P (offset[i]))
7263 rhs1 = TREE_OPERAND (offset[i], 0);
7264 if (rhs1
7265 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7266 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7267 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7268 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7269 offset[i] = rhs1;
7271 if (!operand_equal_p (offset[0], offset[1], 0)
7272 || !operand_equal_p (step[0], step[1], 0))
7273 return false;
7276 return true;
7280 enum scan_store_kind {
7281 /* Normal permutation. */
7282 scan_store_kind_perm,
7284 /* Whole vector left shift permutation with zero init. */
7285 scan_store_kind_lshift_zero,
7287 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7288 scan_store_kind_lshift_cond
7291 /* Function check_scan_store.
7293 Verify if we can perform the needed permutations or whole vector shifts.
7294 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7295 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7296 to do at each step. */
7298 static int
7299 scan_store_can_perm_p (tree vectype, tree init,
7300 vec<enum scan_store_kind> *use_whole_vector = NULL)
7302 enum machine_mode vec_mode = TYPE_MODE (vectype);
7303 unsigned HOST_WIDE_INT nunits;
7304 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7305 return -1;
7306 int units_log2 = exact_log2 (nunits);
7307 if (units_log2 <= 0)
7308 return -1;
7310 int i;
7311 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7312 for (i = 0; i <= units_log2; ++i)
7314 unsigned HOST_WIDE_INT j, k;
7315 enum scan_store_kind kind = scan_store_kind_perm;
7316 vec_perm_builder sel (nunits, nunits, 1);
7317 sel.quick_grow (nunits);
7318 if (i == units_log2)
7320 for (j = 0; j < nunits; ++j)
7321 sel[j] = nunits - 1;
7323 else
7325 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7326 sel[j] = j;
7327 for (k = 0; j < nunits; ++j, ++k)
7328 sel[j] = nunits + k;
7330 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7331 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7333 if (i == units_log2)
7334 return -1;
7336 if (whole_vector_shift_kind == scan_store_kind_perm)
7338 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
7339 return -1;
7340 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7341 /* Whole vector shifts shift in zeros, so if init is all zero
7342 constant, there is no need to do anything further. */
7343 if ((TREE_CODE (init) != INTEGER_CST
7344 && TREE_CODE (init) != REAL_CST)
7345 || !initializer_zerop (init))
7347 tree masktype = truth_type_for (vectype);
7348 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7349 return -1;
7350 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7353 kind = whole_vector_shift_kind;
7355 if (use_whole_vector)
7357 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7358 use_whole_vector->safe_grow_cleared (i, true);
7359 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7360 use_whole_vector->safe_push (kind);
7364 return units_log2;
7368 /* Function check_scan_store.
7370 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7372 static bool
7373 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7374 enum vect_def_type rhs_dt, bool slp, tree mask,
7375 vect_memory_access_type memory_access_type)
7377 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7378 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7379 tree ref_type;
7381 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7382 if (slp
7383 || mask
7384 || memory_access_type != VMAT_CONTIGUOUS
7385 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7386 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7387 || loop_vinfo == NULL
7388 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7389 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7390 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7391 || !integer_zerop (DR_INIT (dr_info->dr))
7392 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7393 || !alias_sets_conflict_p (get_alias_set (vectype),
7394 get_alias_set (TREE_TYPE (ref_type))))
7396 if (dump_enabled_p ())
7397 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7398 "unsupported OpenMP scan store.\n");
7399 return false;
7402 /* We need to pattern match code built by OpenMP lowering and simplified
7403 by following optimizations into something we can handle.
7404 #pragma omp simd reduction(inscan,+:r)
7405 for (...)
7407 r += something ();
7408 #pragma omp scan inclusive (r)
7409 use (r);
7411 shall have body with:
7412 // Initialization for input phase, store the reduction initializer:
7413 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7414 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7415 D.2042[_21] = 0;
7416 // Actual input phase:
7418 r.0_5 = D.2042[_20];
7419 _6 = _4 + r.0_5;
7420 D.2042[_20] = _6;
7421 // Initialization for scan phase:
7422 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7423 _26 = D.2043[_25];
7424 _27 = D.2042[_25];
7425 _28 = _26 + _27;
7426 D.2043[_25] = _28;
7427 D.2042[_25] = _28;
7428 // Actual scan phase:
7430 r.1_8 = D.2042[_20];
7432 The "omp simd array" variable D.2042 holds the privatized copy used
7433 inside of the loop and D.2043 is another one that holds copies of
7434 the current original list item. The separate GOMP_SIMD_LANE ifn
7435 kinds are there in order to allow optimizing the initializer store
7436 and combiner sequence, e.g. if it is originally some C++ish user
7437 defined reduction, but allow the vectorizer to pattern recognize it
7438 and turn into the appropriate vectorized scan.
7440 For exclusive scan, this is slightly different:
7441 #pragma omp simd reduction(inscan,+:r)
7442 for (...)
7444 use (r);
7445 #pragma omp scan exclusive (r)
7446 r += something ();
7448 shall have body with:
7449 // Initialization for input phase, store the reduction initializer:
7450 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7451 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7452 D.2042[_21] = 0;
7453 // Actual input phase:
7455 r.0_5 = D.2042[_20];
7456 _6 = _4 + r.0_5;
7457 D.2042[_20] = _6;
7458 // Initialization for scan phase:
7459 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7460 _26 = D.2043[_25];
7461 D.2044[_25] = _26;
7462 _27 = D.2042[_25];
7463 _28 = _26 + _27;
7464 D.2043[_25] = _28;
7465 // Actual scan phase:
7467 r.1_8 = D.2044[_20];
7468 ... */
7470 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7472 /* Match the D.2042[_21] = 0; store above. Just require that
7473 it is a constant or external definition store. */
7474 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7476 fail_init:
7477 if (dump_enabled_p ())
7478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7479 "unsupported OpenMP scan initializer store.\n");
7480 return false;
7483 if (! loop_vinfo->scan_map)
7484 loop_vinfo->scan_map = new hash_map<tree, tree>;
7485 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7486 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7487 if (cached)
7488 goto fail_init;
7489 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7491 /* These stores can be vectorized normally. */
7492 return true;
7495 if (rhs_dt != vect_internal_def)
7497 fail:
7498 if (dump_enabled_p ())
7499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7500 "unsupported OpenMP scan combiner pattern.\n");
7501 return false;
7504 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7505 tree rhs = gimple_assign_rhs1 (stmt);
7506 if (TREE_CODE (rhs) != SSA_NAME)
7507 goto fail;
7509 gimple *other_store_stmt = NULL;
7510 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7511 bool inscan_var_store
7512 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7514 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7516 if (!inscan_var_store)
7518 use_operand_p use_p;
7519 imm_use_iterator iter;
7520 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7522 gimple *use_stmt = USE_STMT (use_p);
7523 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7524 continue;
7525 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7526 || !is_gimple_assign (use_stmt)
7527 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7528 || other_store_stmt
7529 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7530 goto fail;
7531 other_store_stmt = use_stmt;
7533 if (other_store_stmt == NULL)
7534 goto fail;
7535 rhs = gimple_assign_lhs (other_store_stmt);
7536 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7537 goto fail;
7540 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7542 use_operand_p use_p;
7543 imm_use_iterator iter;
7544 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7546 gimple *use_stmt = USE_STMT (use_p);
7547 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7548 continue;
7549 if (other_store_stmt)
7550 goto fail;
7551 other_store_stmt = use_stmt;
7554 else
7555 goto fail;
7557 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7558 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7559 || !is_gimple_assign (def_stmt)
7560 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7561 goto fail;
7563 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7564 /* For pointer addition, we should use the normal plus for the vector
7565 operation. */
7566 switch (code)
7568 case POINTER_PLUS_EXPR:
7569 code = PLUS_EXPR;
7570 break;
7571 case MULT_HIGHPART_EXPR:
7572 goto fail;
7573 default:
7574 break;
7576 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7577 goto fail;
7579 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7580 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7581 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7582 goto fail;
7584 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7585 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7586 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7587 || !gimple_assign_load_p (load1_stmt)
7588 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7589 || !gimple_assign_load_p (load2_stmt))
7590 goto fail;
7592 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7593 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7594 if (load1_stmt_info == NULL
7595 || load2_stmt_info == NULL
7596 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7597 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7598 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7599 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7600 goto fail;
7602 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7604 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7605 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7606 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7607 goto fail;
7608 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7609 tree lrhs;
7610 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7611 lrhs = rhs1;
7612 else
7613 lrhs = rhs2;
7614 use_operand_p use_p;
7615 imm_use_iterator iter;
7616 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7618 gimple *use_stmt = USE_STMT (use_p);
7619 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7620 continue;
7621 if (other_store_stmt)
7622 goto fail;
7623 other_store_stmt = use_stmt;
7627 if (other_store_stmt == NULL)
7628 goto fail;
7629 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7630 || !gimple_store_p (other_store_stmt))
7631 goto fail;
7633 stmt_vec_info other_store_stmt_info
7634 = loop_vinfo->lookup_stmt (other_store_stmt);
7635 if (other_store_stmt_info == NULL
7636 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7637 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7638 goto fail;
7640 gimple *stmt1 = stmt;
7641 gimple *stmt2 = other_store_stmt;
7642 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7643 std::swap (stmt1, stmt2);
7644 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7645 gimple_assign_rhs1 (load2_stmt)))
7647 std::swap (rhs1, rhs2);
7648 std::swap (load1_stmt, load2_stmt);
7649 std::swap (load1_stmt_info, load2_stmt_info);
7651 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7652 gimple_assign_rhs1 (load1_stmt)))
7653 goto fail;
7655 tree var3 = NULL_TREE;
7656 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7657 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7658 gimple_assign_rhs1 (load2_stmt)))
7659 goto fail;
7660 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7662 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7663 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7664 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7665 goto fail;
7666 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7667 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7668 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7669 || lookup_attribute ("omp simd inscan exclusive",
7670 DECL_ATTRIBUTES (var3)))
7671 goto fail;
7674 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7675 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7676 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7677 goto fail;
7679 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7680 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7681 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7682 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7683 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7684 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7685 goto fail;
7687 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7688 std::swap (var1, var2);
7690 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7692 if (!lookup_attribute ("omp simd inscan exclusive",
7693 DECL_ATTRIBUTES (var1)))
7694 goto fail;
7695 var1 = var3;
7698 if (loop_vinfo->scan_map == NULL)
7699 goto fail;
7700 tree *init = loop_vinfo->scan_map->get (var1);
7701 if (init == NULL)
7702 goto fail;
7704 /* The IL is as expected, now check if we can actually vectorize it.
7705 Inclusive scan:
7706 _26 = D.2043[_25];
7707 _27 = D.2042[_25];
7708 _28 = _26 + _27;
7709 D.2043[_25] = _28;
7710 D.2042[_25] = _28;
7711 should be vectorized as (where _40 is the vectorized rhs
7712 from the D.2042[_21] = 0; store):
7713 _30 = MEM <vector(8) int> [(int *)&D.2043];
7714 _31 = MEM <vector(8) int> [(int *)&D.2042];
7715 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7716 _33 = _31 + _32;
7717 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7718 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7719 _35 = _33 + _34;
7720 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7721 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7722 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7723 _37 = _35 + _36;
7724 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7725 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7726 _38 = _30 + _37;
7727 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7728 MEM <vector(8) int> [(int *)&D.2043] = _39;
7729 MEM <vector(8) int> [(int *)&D.2042] = _38;
7730 Exclusive scan:
7731 _26 = D.2043[_25];
7732 D.2044[_25] = _26;
7733 _27 = D.2042[_25];
7734 _28 = _26 + _27;
7735 D.2043[_25] = _28;
7736 should be vectorized as (where _40 is the vectorized rhs
7737 from the D.2042[_21] = 0; store):
7738 _30 = MEM <vector(8) int> [(int *)&D.2043];
7739 _31 = MEM <vector(8) int> [(int *)&D.2042];
7740 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7741 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7742 _34 = _32 + _33;
7743 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7744 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7745 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7746 _36 = _34 + _35;
7747 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7748 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7749 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7750 _38 = _36 + _37;
7751 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7752 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7753 _39 = _30 + _38;
7754 _50 = _31 + _39;
7755 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7756 MEM <vector(8) int> [(int *)&D.2044] = _39;
7757 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7758 enum machine_mode vec_mode = TYPE_MODE (vectype);
7759 optab optab = optab_for_tree_code (code, vectype, optab_default);
7760 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7761 goto fail;
7763 int units_log2 = scan_store_can_perm_p (vectype, *init);
7764 if (units_log2 == -1)
7765 goto fail;
7767 return true;
7771 /* Function vectorizable_scan_store.
7773 Helper of vectorizable_score, arguments like on vectorizable_store.
7774 Handle only the transformation, checking is done in check_scan_store. */
7776 static bool
7777 vectorizable_scan_store (vec_info *vinfo,
7778 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7779 gimple **vec_stmt, int ncopies)
7781 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7782 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7783 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7784 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7786 if (dump_enabled_p ())
7787 dump_printf_loc (MSG_NOTE, vect_location,
7788 "transform scan store. ncopies = %d\n", ncopies);
7790 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7791 tree rhs = gimple_assign_rhs1 (stmt);
7792 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7794 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7795 bool inscan_var_store
7796 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7798 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7800 use_operand_p use_p;
7801 imm_use_iterator iter;
7802 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7804 gimple *use_stmt = USE_STMT (use_p);
7805 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7806 continue;
7807 rhs = gimple_assign_lhs (use_stmt);
7808 break;
7812 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7813 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7814 if (code == POINTER_PLUS_EXPR)
7815 code = PLUS_EXPR;
7816 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7817 && commutative_tree_code (code));
7818 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7819 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7820 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7821 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7822 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7823 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7824 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7825 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7826 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7827 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7828 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7830 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7832 std::swap (rhs1, rhs2);
7833 std::swap (var1, var2);
7834 std::swap (load1_dr_info, load2_dr_info);
7837 tree *init = loop_vinfo->scan_map->get (var1);
7838 gcc_assert (init);
7840 unsigned HOST_WIDE_INT nunits;
7841 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7842 gcc_unreachable ();
7843 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7844 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7845 gcc_assert (units_log2 > 0);
7846 auto_vec<tree, 16> perms;
7847 perms.quick_grow (units_log2 + 1);
7848 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7849 for (int i = 0; i <= units_log2; ++i)
7851 unsigned HOST_WIDE_INT j, k;
7852 vec_perm_builder sel (nunits, nunits, 1);
7853 sel.quick_grow (nunits);
7854 if (i == units_log2)
7855 for (j = 0; j < nunits; ++j)
7856 sel[j] = nunits - 1;
7857 else
7859 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7860 sel[j] = j;
7861 for (k = 0; j < nunits; ++j, ++k)
7862 sel[j] = nunits + k;
7864 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7865 if (!use_whole_vector.is_empty ()
7866 && use_whole_vector[i] != scan_store_kind_perm)
7868 if (zero_vec == NULL_TREE)
7869 zero_vec = build_zero_cst (vectype);
7870 if (masktype == NULL_TREE
7871 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7872 masktype = truth_type_for (vectype);
7873 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7875 else
7876 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7879 tree vec_oprnd1 = NULL_TREE;
7880 tree vec_oprnd2 = NULL_TREE;
7881 tree vec_oprnd3 = NULL_TREE;
7882 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7883 tree dataref_offset = build_int_cst (ref_type, 0);
7884 tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
7885 vectype, VMAT_CONTIGUOUS);
7886 tree ldataref_ptr = NULL_TREE;
7887 tree orig = NULL_TREE;
7888 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7889 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7890 auto_vec<tree> vec_oprnds1;
7891 auto_vec<tree> vec_oprnds2;
7892 auto_vec<tree> vec_oprnds3;
7893 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7894 *init, &vec_oprnds1,
7895 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7896 rhs2, &vec_oprnds3);
7897 for (int j = 0; j < ncopies; j++)
7899 vec_oprnd1 = vec_oprnds1[j];
7900 if (ldataref_ptr == NULL)
7901 vec_oprnd2 = vec_oprnds2[j];
7902 vec_oprnd3 = vec_oprnds3[j];
7903 if (j == 0)
7904 orig = vec_oprnd3;
7905 else if (!inscan_var_store)
7906 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7908 if (ldataref_ptr)
7910 vec_oprnd2 = make_ssa_name (vectype);
7911 tree data_ref = fold_build2 (MEM_REF, vectype,
7912 unshare_expr (ldataref_ptr),
7913 dataref_offset);
7914 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7915 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7916 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7917 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7918 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7921 tree v = vec_oprnd2;
7922 for (int i = 0; i < units_log2; ++i)
7924 tree new_temp = make_ssa_name (vectype);
7925 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7926 (zero_vec
7927 && (use_whole_vector[i]
7928 != scan_store_kind_perm))
7929 ? zero_vec : vec_oprnd1, v,
7930 perms[i]);
7931 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7932 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7933 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7935 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7937 /* Whole vector shift shifted in zero bits, but if *init
7938 is not initializer_zerop, we need to replace those elements
7939 with elements from vec_oprnd1. */
7940 tree_vector_builder vb (masktype, nunits, 1);
7941 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7942 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7943 ? boolean_false_node : boolean_true_node);
7945 tree new_temp2 = make_ssa_name (vectype);
7946 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7947 new_temp, vec_oprnd1);
7948 vect_finish_stmt_generation (vinfo, stmt_info,
7949 g, gsi);
7950 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7951 new_temp = new_temp2;
7954 /* For exclusive scan, perform the perms[i] permutation once
7955 more. */
7956 if (i == 0
7957 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7958 && v == vec_oprnd2)
7960 v = new_temp;
7961 --i;
7962 continue;
7965 tree new_temp2 = make_ssa_name (vectype);
7966 g = gimple_build_assign (new_temp2, code, v, new_temp);
7967 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7968 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7970 v = new_temp2;
7973 tree new_temp = make_ssa_name (vectype);
7974 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7975 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7976 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7978 tree last_perm_arg = new_temp;
7979 /* For exclusive scan, new_temp computed above is the exclusive scan
7980 prefix sum. Turn it into inclusive prefix sum for the broadcast
7981 of the last element into orig. */
7982 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7984 last_perm_arg = make_ssa_name (vectype);
7985 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7986 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7987 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7990 orig = make_ssa_name (vectype);
7991 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7992 last_perm_arg, perms[units_log2]);
7993 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7994 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7996 if (!inscan_var_store)
7998 tree data_ref = fold_build2 (MEM_REF, vectype,
7999 unshare_expr (dataref_ptr),
8000 dataref_offset);
8001 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8002 g = gimple_build_assign (data_ref, new_temp);
8003 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8004 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8008 if (inscan_var_store)
8009 for (int j = 0; j < ncopies; j++)
8011 if (j != 0)
8012 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8014 tree data_ref = fold_build2 (MEM_REF, vectype,
8015 unshare_expr (dataref_ptr),
8016 dataref_offset);
8017 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8018 gimple *g = gimple_build_assign (data_ref, orig);
8019 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8020 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8022 return true;
8026 /* Function vectorizable_store.
8028 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8029 that can be vectorized.
8030 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8031 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8032 Return true if STMT_INFO is vectorizable in this way. */
8034 static bool
8035 vectorizable_store (vec_info *vinfo,
8036 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8037 gimple **vec_stmt, slp_tree slp_node,
8038 stmt_vector_for_cost *cost_vec)
8040 tree data_ref;
8041 tree vec_oprnd = NULL_TREE;
8042 tree elem_type;
8043 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8044 class loop *loop = NULL;
8045 machine_mode vec_mode;
8046 tree dummy;
8047 enum vect_def_type rhs_dt = vect_unknown_def_type;
8048 enum vect_def_type mask_dt = vect_unknown_def_type;
8049 tree dataref_ptr = NULL_TREE;
8050 tree dataref_offset = NULL_TREE;
8051 gimple *ptr_incr = NULL;
8052 int ncopies;
8053 int j;
8054 stmt_vec_info first_stmt_info;
8055 bool grouped_store;
8056 unsigned int group_size, i;
8057 bool slp = (slp_node != NULL);
8058 unsigned int vec_num;
8059 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8060 tree aggr_type;
8061 gather_scatter_info gs_info;
8062 poly_uint64 vf;
8063 vec_load_store_type vls_type;
8064 tree ref_type;
8066 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8067 return false;
8069 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8070 && ! vec_stmt)
8071 return false;
8073 /* Is vectorizable store? */
8075 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8076 slp_tree mask_node = NULL;
8077 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8079 tree scalar_dest = gimple_assign_lhs (assign);
8080 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8081 && is_pattern_stmt_p (stmt_info))
8082 scalar_dest = TREE_OPERAND (scalar_dest, 0);
8083 if (TREE_CODE (scalar_dest) != ARRAY_REF
8084 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8085 && TREE_CODE (scalar_dest) != INDIRECT_REF
8086 && TREE_CODE (scalar_dest) != COMPONENT_REF
8087 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8088 && TREE_CODE (scalar_dest) != REALPART_EXPR
8089 && TREE_CODE (scalar_dest) != MEM_REF)
8090 return false;
8092 else
8094 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8095 if (!call || !gimple_call_internal_p (call))
8096 return false;
8098 internal_fn ifn = gimple_call_internal_fn (call);
8099 if (!internal_store_fn_p (ifn))
8100 return false;
8102 int mask_index = internal_fn_mask_index (ifn);
8103 if (mask_index >= 0 && slp_node)
8104 mask_index = vect_slp_child_index_for_operand
8105 (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8106 if (mask_index >= 0
8107 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8108 &mask, &mask_node, &mask_dt,
8109 &mask_vectype))
8110 return false;
8113 /* Cannot have hybrid store SLP -- that would mean storing to the
8114 same location twice. */
8115 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8117 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8118 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8120 if (loop_vinfo)
8122 loop = LOOP_VINFO_LOOP (loop_vinfo);
8123 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8125 else
8126 vf = 1;
8128 /* Multiple types in SLP are handled by creating the appropriate number of
8129 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8130 case of SLP. */
8131 if (slp)
8132 ncopies = 1;
8133 else
8134 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8136 gcc_assert (ncopies >= 1);
8138 /* FORNOW. This restriction should be relaxed. */
8139 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
8141 if (dump_enabled_p ())
8142 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8143 "multiple types in nested loop.\n");
8144 return false;
8147 tree op;
8148 slp_tree op_node;
8149 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8150 &op, &op_node, &rhs_dt, &rhs_vectype, &vls_type))
8151 return false;
8153 elem_type = TREE_TYPE (vectype);
8154 vec_mode = TYPE_MODE (vectype);
8156 if (!STMT_VINFO_DATA_REF (stmt_info))
8157 return false;
8159 vect_memory_access_type memory_access_type;
8160 enum dr_alignment_support alignment_support_scheme;
8161 int misalignment;
8162 poly_int64 poffset;
8163 internal_fn lanes_ifn;
8164 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
8165 ncopies, &memory_access_type, &poffset,
8166 &alignment_support_scheme, &misalignment, &gs_info,
8167 &lanes_ifn))
8168 return false;
8170 if (mask)
8172 if (memory_access_type == VMAT_CONTIGUOUS)
8174 if (!VECTOR_MODE_P (vec_mode)
8175 || !can_vec_mask_load_store_p (vec_mode,
8176 TYPE_MODE (mask_vectype), false))
8177 return false;
8179 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8180 && (memory_access_type != VMAT_GATHER_SCATTER
8181 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8183 if (dump_enabled_p ())
8184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8185 "unsupported access type for masked store.\n");
8186 return false;
8188 else if (memory_access_type == VMAT_GATHER_SCATTER
8189 && gs_info.ifn == IFN_LAST
8190 && !gs_info.decl)
8192 if (dump_enabled_p ())
8193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8194 "unsupported masked emulated scatter.\n");
8195 return false;
8198 else
8200 /* FORNOW. In some cases can vectorize even if data-type not supported
8201 (e.g. - array initialization with 0). */
8202 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
8203 return false;
8206 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8207 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8208 && memory_access_type != VMAT_GATHER_SCATTER
8209 && (slp || memory_access_type != VMAT_CONTIGUOUS));
8210 if (grouped_store)
8212 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8213 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8214 group_size = DR_GROUP_SIZE (first_stmt_info);
8216 else
8218 first_stmt_info = stmt_info;
8219 first_dr_info = dr_info;
8220 group_size = vec_num = 1;
8223 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
8225 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
8226 memory_access_type))
8227 return false;
8230 bool costing_p = !vec_stmt;
8231 if (costing_p) /* transformation not required. */
8233 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8235 if (loop_vinfo
8236 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8237 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8238 vls_type, group_size,
8239 memory_access_type, &gs_info,
8240 mask);
8242 if (slp_node
8243 && (!vect_maybe_update_slp_op_vectype (op_node, vectype)
8244 || (mask
8245 && !vect_maybe_update_slp_op_vectype (mask_node,
8246 mask_vectype))))
8248 if (dump_enabled_p ())
8249 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8250 "incompatible vector types for invariants\n");
8251 return false;
8254 if (dump_enabled_p ()
8255 && memory_access_type != VMAT_ELEMENTWISE
8256 && memory_access_type != VMAT_GATHER_SCATTER
8257 && alignment_support_scheme != dr_aligned)
8258 dump_printf_loc (MSG_NOTE, vect_location,
8259 "Vectorizing an unaligned access.\n");
8261 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8263 /* As function vect_transform_stmt shows, for interleaving stores
8264 the whole chain is vectorized when the last store in the chain
8265 is reached, the other stores in the group are skipped. So we
8266 want to only cost the last one here, but it's not trivial to
8267 get the last, as it's equivalent to use the first one for
8268 costing, use the first one instead. */
8269 if (grouped_store
8270 && !slp
8271 && first_stmt_info != stmt_info)
8272 return true;
8274 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8276 /* Transform. */
8278 ensure_base_align (dr_info);
8280 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8282 gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
8283 gcc_assert (!slp);
8284 if (costing_p)
8286 unsigned int inside_cost = 0, prologue_cost = 0;
8287 if (vls_type == VLS_STORE_INVARIANT)
8288 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8289 stmt_info, 0, vect_prologue);
8290 vect_get_store_cost (vinfo, stmt_info, ncopies,
8291 alignment_support_scheme, misalignment,
8292 &inside_cost, cost_vec);
8294 if (dump_enabled_p ())
8295 dump_printf_loc (MSG_NOTE, vect_location,
8296 "vect_model_store_cost: inside_cost = %d, "
8297 "prologue_cost = %d .\n",
8298 inside_cost, prologue_cost);
8300 return true;
8302 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8305 if (grouped_store)
8307 /* FORNOW */
8308 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8310 if (slp)
8312 grouped_store = false;
8313 /* VEC_NUM is the number of vect stmts to be created for this
8314 group. */
8315 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8316 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8317 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8318 == first_stmt_info);
8319 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8320 op = vect_get_store_rhs (first_stmt_info);
8322 else
8323 /* VEC_NUM is the number of vect stmts to be created for this
8324 group. */
8325 vec_num = group_size;
8327 ref_type = get_group_alias_ptr_type (first_stmt_info);
8329 else
8330 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8332 if (!costing_p && dump_enabled_p ())
8333 dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n",
8334 ncopies);
8336 /* Check if we need to update prologue cost for invariant,
8337 and update it accordingly if so. If it's not for
8338 interleaving store, we can just check vls_type; but if
8339 it's for interleaving store, need to check the def_type
8340 of the stored value since the current vls_type is just
8341 for first_stmt_info. */
8342 auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
8344 gcc_assert (costing_p);
8345 if (slp)
8346 return;
8347 if (grouped_store)
8349 gcc_assert (store_rhs);
8350 enum vect_def_type cdt;
8351 gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
8352 if (cdt != vect_constant_def && cdt != vect_external_def)
8353 return;
8355 else if (vls_type != VLS_STORE_INVARIANT)
8356 return;
8357 *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info,
8358 0, vect_prologue);
8361 if (memory_access_type == VMAT_ELEMENTWISE
8362 || memory_access_type == VMAT_STRIDED_SLP)
8364 unsigned inside_cost = 0, prologue_cost = 0;
8365 gimple_stmt_iterator incr_gsi;
8366 bool insert_after;
8367 gimple *incr;
8368 tree offvar;
8369 tree ivstep;
8370 tree running_off;
8371 tree stride_base, stride_step, alias_off;
8372 tree vec_oprnd = NULL_TREE;
8373 tree dr_offset;
8374 unsigned int g;
8375 /* Checked by get_load_store_type. */
8376 unsigned int const_nunits = nunits.to_constant ();
8378 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8379 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8381 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8382 stride_base
8383 = fold_build_pointer_plus
8384 (DR_BASE_ADDRESS (first_dr_info->dr),
8385 size_binop (PLUS_EXPR,
8386 convert_to_ptrofftype (dr_offset),
8387 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8388 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8390 /* For a store with loop-invariant (but other than power-of-2)
8391 stride (i.e. not a grouped access) like so:
8393 for (i = 0; i < n; i += stride)
8394 array[i] = ...;
8396 we generate a new induction variable and new stores from
8397 the components of the (vectorized) rhs:
8399 for (j = 0; ; j += VF*stride)
8400 vectemp = ...;
8401 tmp1 = vectemp[0];
8402 array[j] = tmp1;
8403 tmp2 = vectemp[1];
8404 array[j + stride] = tmp2;
8408 unsigned nstores = const_nunits;
8409 unsigned lnel = 1;
8410 tree ltype = elem_type;
8411 tree lvectype = vectype;
8412 if (slp)
8414 if (group_size < const_nunits
8415 && const_nunits % group_size == 0)
8417 nstores = const_nunits / group_size;
8418 lnel = group_size;
8419 ltype = build_vector_type (elem_type, group_size);
8420 lvectype = vectype;
8422 /* First check if vec_extract optab doesn't support extraction
8423 of vector elts directly. */
8424 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8425 machine_mode vmode;
8426 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8427 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8428 group_size).exists (&vmode)
8429 || (convert_optab_handler (vec_extract_optab,
8430 TYPE_MODE (vectype), vmode)
8431 == CODE_FOR_nothing))
8433 /* Try to avoid emitting an extract of vector elements
8434 by performing the extracts using an integer type of the
8435 same size, extracting from a vector of those and then
8436 re-interpreting it as the original vector type if
8437 supported. */
8438 unsigned lsize
8439 = group_size * GET_MODE_BITSIZE (elmode);
8440 unsigned int lnunits = const_nunits / group_size;
8441 /* If we can't construct such a vector fall back to
8442 element extracts from the original vector type and
8443 element size stores. */
8444 if (int_mode_for_size (lsize, 0).exists (&elmode)
8445 && VECTOR_MODE_P (TYPE_MODE (vectype))
8446 && related_vector_mode (TYPE_MODE (vectype), elmode,
8447 lnunits).exists (&vmode)
8448 && (convert_optab_handler (vec_extract_optab,
8449 vmode, elmode)
8450 != CODE_FOR_nothing))
8452 nstores = lnunits;
8453 lnel = group_size;
8454 ltype = build_nonstandard_integer_type (lsize, 1);
8455 lvectype = build_vector_type (ltype, nstores);
8457 /* Else fall back to vector extraction anyway.
8458 Fewer stores are more important than avoiding spilling
8459 of the vector we extract from. Compared to the
8460 construction case in vectorizable_load no store-forwarding
8461 issue exists here for reasonable archs. */
8464 else if (group_size >= const_nunits
8465 && group_size % const_nunits == 0)
8467 int mis_align = dr_misalignment (first_dr_info, vectype);
8468 dr_alignment_support dr_align
8469 = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
8470 mis_align);
8471 if (dr_align == dr_aligned
8472 || dr_align == dr_unaligned_supported)
8474 nstores = 1;
8475 lnel = const_nunits;
8476 ltype = vectype;
8477 lvectype = vectype;
8478 alignment_support_scheme = dr_align;
8479 misalignment = mis_align;
8482 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8483 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8486 if (!costing_p)
8488 ivstep = stride_step;
8489 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8490 build_int_cst (TREE_TYPE (ivstep), vf));
8492 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8494 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8495 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8496 create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
8497 insert_after, &offvar, NULL);
8498 incr = gsi_stmt (incr_gsi);
8500 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8503 alias_off = build_int_cst (ref_type, 0);
8504 stmt_vec_info next_stmt_info = first_stmt_info;
8505 auto_vec<tree> vec_oprnds (ncopies);
8506 /* For costing some adjacent vector stores, we'd like to cost with
8507 the total number of them once instead of cost each one by one. */
8508 unsigned int n_adjacent_stores = 0;
8509 for (g = 0; g < group_size; g++)
8511 running_off = offvar;
8512 if (!costing_p)
8514 if (g)
8516 tree size = TYPE_SIZE_UNIT (ltype);
8517 tree pos
8518 = fold_build2 (MULT_EXPR, sizetype, size_int (g), size);
8519 tree newoff = copy_ssa_name (running_off, NULL);
8520 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8521 running_off, pos);
8522 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8523 running_off = newoff;
8526 if (!slp)
8527 op = vect_get_store_rhs (next_stmt_info);
8528 if (!costing_p)
8529 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies, op,
8530 &vec_oprnds);
8531 else
8532 update_prologue_cost (&prologue_cost, op);
8533 unsigned int group_el = 0;
8534 unsigned HOST_WIDE_INT
8535 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8536 for (j = 0; j < ncopies; j++)
8538 if (!costing_p)
8540 vec_oprnd = vec_oprnds[j];
8541 /* Pun the vector to extract from if necessary. */
8542 if (lvectype != vectype)
8544 tree tem = make_ssa_name (lvectype);
8545 tree cvt
8546 = build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
8547 gimple *pun = gimple_build_assign (tem, cvt);
8548 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8549 vec_oprnd = tem;
8552 for (i = 0; i < nstores; i++)
8554 if (costing_p)
8556 /* Only need vector extracting when there are more
8557 than one stores. */
8558 if (nstores > 1)
8559 inside_cost
8560 += record_stmt_cost (cost_vec, 1, vec_to_scalar,
8561 stmt_info, 0, vect_body);
8562 /* Take a single lane vector type store as scalar
8563 store to avoid ICE like 110776. */
8564 if (VECTOR_TYPE_P (ltype)
8565 && known_ne (TYPE_VECTOR_SUBPARTS (ltype), 1U))
8566 n_adjacent_stores++;
8567 else
8568 inside_cost
8569 += record_stmt_cost (cost_vec, 1, scalar_store,
8570 stmt_info, 0, vect_body);
8571 continue;
8573 tree newref, newoff;
8574 gimple *incr, *assign;
8575 tree size = TYPE_SIZE (ltype);
8576 /* Extract the i'th component. */
8577 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8578 bitsize_int (i), size);
8579 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8580 size, pos);
8582 elem = force_gimple_operand_gsi (gsi, elem, true,
8583 NULL_TREE, true,
8584 GSI_SAME_STMT);
8586 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8587 group_el * elsz);
8588 newref = build2 (MEM_REF, ltype,
8589 running_off, this_off);
8590 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8592 /* And store it to *running_off. */
8593 assign = gimple_build_assign (newref, elem);
8594 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8596 group_el += lnel;
8597 if (! slp
8598 || group_el == group_size)
8600 newoff = copy_ssa_name (running_off, NULL);
8601 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8602 running_off, stride_step);
8603 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8605 running_off = newoff;
8606 group_el = 0;
8608 if (g == group_size - 1
8609 && !slp)
8611 if (j == 0 && i == 0)
8612 *vec_stmt = assign;
8613 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8617 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8618 vec_oprnds.truncate(0);
8619 if (slp)
8620 break;
8623 if (costing_p)
8625 if (n_adjacent_stores > 0)
8626 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
8627 alignment_support_scheme, misalignment,
8628 &inside_cost, cost_vec);
8629 if (dump_enabled_p ())
8630 dump_printf_loc (MSG_NOTE, vect_location,
8631 "vect_model_store_cost: inside_cost = %d, "
8632 "prologue_cost = %d .\n",
8633 inside_cost, prologue_cost);
8636 return true;
8639 gcc_assert (alignment_support_scheme);
8640 vec_loop_masks *loop_masks
8641 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8642 ? &LOOP_VINFO_MASKS (loop_vinfo)
8643 : NULL);
8644 vec_loop_lens *loop_lens
8645 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8646 ? &LOOP_VINFO_LENS (loop_vinfo)
8647 : NULL);
8649 /* Shouldn't go with length-based approach if fully masked. */
8650 gcc_assert (!loop_lens || !loop_masks);
8652 /* Targets with store-lane instructions must not require explicit
8653 realignment. vect_supportable_dr_alignment always returns either
8654 dr_aligned or dr_unaligned_supported for masked operations. */
8655 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8656 && !mask
8657 && !loop_masks)
8658 || alignment_support_scheme == dr_aligned
8659 || alignment_support_scheme == dr_unaligned_supported);
8661 tree offset = NULL_TREE;
8662 if (!known_eq (poffset, 0))
8663 offset = size_int (poffset);
8665 tree bump;
8666 tree vec_offset = NULL_TREE;
8667 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8669 aggr_type = NULL_TREE;
8670 bump = NULL_TREE;
8672 else if (memory_access_type == VMAT_GATHER_SCATTER)
8674 aggr_type = elem_type;
8675 if (!costing_p)
8676 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
8677 &bump, &vec_offset, loop_lens);
8679 else
8681 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8682 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8683 else
8684 aggr_type = vectype;
8685 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8686 memory_access_type, loop_lens);
8689 if (mask && !costing_p)
8690 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8692 /* In case the vectorization factor (VF) is bigger than the number
8693 of elements that we can fit in a vectype (nunits), we have to generate
8694 more than one vector stmt - i.e - we need to "unroll" the
8695 vector stmt by a factor VF/nunits. */
8697 /* In case of interleaving (non-unit grouped access):
8699 S1: &base + 2 = x2
8700 S2: &base = x0
8701 S3: &base + 1 = x1
8702 S4: &base + 3 = x3
8704 We create vectorized stores starting from base address (the access of the
8705 first stmt in the chain (S2 in the above example), when the last store stmt
8706 of the chain (S4) is reached:
8708 VS1: &base = vx2
8709 VS2: &base + vec_size*1 = vx0
8710 VS3: &base + vec_size*2 = vx1
8711 VS4: &base + vec_size*3 = vx3
8713 Then permutation statements are generated:
8715 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8716 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8719 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8720 (the order of the data-refs in the output of vect_permute_store_chain
8721 corresponds to the order of scalar stmts in the interleaving chain - see
8722 the documentation of vect_permute_store_chain()).
8724 In case of both multiple types and interleaving, above vector stores and
8725 permutation stmts are created for every copy. The result vector stmts are
8726 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8727 STMT_VINFO_RELATED_STMT for the next copies.
8730 auto_vec<tree> dr_chain (group_size);
8731 auto_vec<tree> vec_masks;
8732 tree vec_mask = NULL;
8733 auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8734 for (i = 0; i < group_size; i++)
8735 gvec_oprnds.quick_push (new auto_vec<tree> (ncopies));
8737 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8739 gcc_assert (!slp && grouped_store);
8740 unsigned inside_cost = 0, prologue_cost = 0;
8741 /* For costing some adjacent vector stores, we'd like to cost with
8742 the total number of them once instead of cost each one by one. */
8743 unsigned int n_adjacent_stores = 0;
8744 for (j = 0; j < ncopies; j++)
8746 gimple *new_stmt;
8747 if (j == 0)
8749 /* For interleaved stores we collect vectorized defs for all
8750 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8751 as an input to vect_permute_store_chain(). */
8752 stmt_vec_info next_stmt_info = first_stmt_info;
8753 for (i = 0; i < group_size; i++)
8755 /* Since gaps are not supported for interleaved stores,
8756 DR_GROUP_SIZE is the exact number of stmts in the
8757 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8758 op = vect_get_store_rhs (next_stmt_info);
8759 if (costing_p)
8760 update_prologue_cost (&prologue_cost, op);
8761 else
8763 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8764 ncopies, op,
8765 gvec_oprnds[i]);
8766 vec_oprnd = (*gvec_oprnds[i])[0];
8767 dr_chain.quick_push (vec_oprnd);
8769 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8772 if (!costing_p)
8774 if (mask)
8776 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8777 mask, &vec_masks,
8778 mask_vectype);
8779 vec_mask = vec_masks[0];
8782 /* We should have catched mismatched types earlier. */
8783 gcc_assert (
8784 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
8785 dataref_ptr
8786 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8787 aggr_type, NULL, offset, &dummy,
8788 gsi, &ptr_incr, false, bump);
8791 else if (!costing_p)
8793 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8794 /* DR_CHAIN is then used as an input to
8795 vect_permute_store_chain(). */
8796 for (i = 0; i < group_size; i++)
8798 vec_oprnd = (*gvec_oprnds[i])[j];
8799 dr_chain[i] = vec_oprnd;
8801 if (mask)
8802 vec_mask = vec_masks[j];
8803 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8804 stmt_info, bump);
8807 if (costing_p)
8809 n_adjacent_stores += vec_num;
8810 continue;
8813 /* Get an array into which we can store the individual vectors. */
8814 tree vec_array = create_vector_array (vectype, vec_num);
8816 /* Invalidate the current contents of VEC_ARRAY. This should
8817 become an RTL clobber too, which prevents the vector registers
8818 from being upward-exposed. */
8819 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8821 /* Store the individual vectors into the array. */
8822 for (i = 0; i < vec_num; i++)
8824 vec_oprnd = dr_chain[i];
8825 write_vector_array (vinfo, stmt_info, gsi, vec_oprnd, vec_array,
8829 tree final_mask = NULL;
8830 tree final_len = NULL;
8831 tree bias = NULL;
8832 if (loop_masks)
8833 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8834 ncopies, vectype, j);
8835 if (vec_mask)
8836 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
8837 vec_mask, gsi);
8839 if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8841 if (loop_lens)
8842 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8843 ncopies, vectype, j, 1);
8844 else
8845 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8846 signed char biasval
8847 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8848 bias = build_int_cst (intQI_type_node, biasval);
8849 if (!final_mask)
8851 mask_vectype = truth_type_for (vectype);
8852 final_mask = build_minus_one_cst (mask_vectype);
8856 gcall *call;
8857 if (final_len && final_mask)
8859 /* Emit:
8860 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8861 LEN, BIAS, VEC_ARRAY). */
8862 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8863 tree alias_ptr = build_int_cst (ref_type, align);
8864 call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
8865 dataref_ptr, alias_ptr,
8866 final_mask, final_len, bias,
8867 vec_array);
8869 else if (final_mask)
8871 /* Emit:
8872 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8873 VEC_ARRAY). */
8874 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8875 tree alias_ptr = build_int_cst (ref_type, align);
8876 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8877 dataref_ptr, alias_ptr,
8878 final_mask, vec_array);
8880 else
8882 /* Emit:
8883 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8884 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8885 call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
8886 gimple_call_set_lhs (call, data_ref);
8888 gimple_call_set_nothrow (call, true);
8889 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8890 new_stmt = call;
8892 /* Record that VEC_ARRAY is now dead. */
8893 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8894 if (j == 0)
8895 *vec_stmt = new_stmt;
8896 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8899 if (costing_p)
8901 if (n_adjacent_stores > 0)
8902 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
8903 alignment_support_scheme, misalignment,
8904 &inside_cost, cost_vec);
8905 if (dump_enabled_p ())
8906 dump_printf_loc (MSG_NOTE, vect_location,
8907 "vect_model_store_cost: inside_cost = %d, "
8908 "prologue_cost = %d .\n",
8909 inside_cost, prologue_cost);
8912 return true;
8915 if (memory_access_type == VMAT_GATHER_SCATTER)
8917 gcc_assert (!grouped_store);
8918 auto_vec<tree> vec_offsets;
8919 unsigned int inside_cost = 0, prologue_cost = 0;
8920 for (j = 0; j < ncopies; j++)
8922 gimple *new_stmt;
8923 if (j == 0)
8925 if (costing_p && vls_type == VLS_STORE_INVARIANT)
8926 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8927 stmt_info, 0, vect_prologue);
8928 else if (!costing_p)
8930 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
8931 DR_CHAIN is of size 1. */
8932 gcc_assert (group_size == 1);
8933 if (slp_node)
8934 vect_get_slp_defs (op_node, gvec_oprnds[0]);
8935 else
8936 vect_get_vec_defs_for_operand (vinfo, first_stmt_info,
8937 ncopies, op, gvec_oprnds[0]);
8938 if (mask)
8940 if (slp_node)
8941 vect_get_slp_defs (mask_node, &vec_masks);
8942 else
8943 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8944 ncopies,
8945 mask, &vec_masks,
8946 mask_vectype);
8949 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8950 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8951 slp_node, &gs_info,
8952 &dataref_ptr, &vec_offsets);
8953 else
8954 dataref_ptr
8955 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8956 aggr_type, NULL, offset,
8957 &dummy, gsi, &ptr_incr, false,
8958 bump);
8961 else if (!costing_p)
8963 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8964 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8965 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8966 gsi, stmt_info, bump);
8969 new_stmt = NULL;
8970 for (i = 0; i < vec_num; ++i)
8972 if (!costing_p)
8974 vec_oprnd = (*gvec_oprnds[0])[vec_num * j + i];
8975 if (mask)
8976 vec_mask = vec_masks[vec_num * j + i];
8977 /* We should have catched mismatched types earlier. */
8978 gcc_assert (useless_type_conversion_p (vectype,
8979 TREE_TYPE (vec_oprnd)));
8981 unsigned HOST_WIDE_INT align;
8982 tree final_mask = NULL_TREE;
8983 tree final_len = NULL_TREE;
8984 tree bias = NULL_TREE;
8985 if (!costing_p)
8987 if (loop_masks)
8988 final_mask = vect_get_loop_mask (loop_vinfo, gsi,
8989 loop_masks, ncopies,
8990 vectype, j);
8991 if (vec_mask)
8992 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8993 final_mask, vec_mask, gsi);
8996 if (gs_info.ifn != IFN_LAST)
8998 if (costing_p)
9000 unsigned int cnunits = vect_nunits_for_cost (vectype);
9001 inside_cost
9002 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9003 stmt_info, 0, vect_body);
9004 continue;
9007 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9008 vec_offset = vec_offsets[vec_num * j + i];
9009 tree scale = size_int (gs_info.scale);
9011 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
9013 if (loop_lens)
9014 final_len = vect_get_loop_len (loop_vinfo, gsi,
9015 loop_lens, ncopies,
9016 vectype, j, 1);
9017 else
9018 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9019 signed char biasval
9020 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9021 bias = build_int_cst (intQI_type_node, biasval);
9022 if (!final_mask)
9024 mask_vectype = truth_type_for (vectype);
9025 final_mask = build_minus_one_cst (mask_vectype);
9029 gcall *call;
9030 if (final_len && final_mask)
9031 call = gimple_build_call_internal
9032 (IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr,
9033 vec_offset, scale, vec_oprnd, final_mask,
9034 final_len, bias);
9035 else if (final_mask)
9036 call = gimple_build_call_internal
9037 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr,
9038 vec_offset, scale, vec_oprnd, final_mask);
9039 else
9040 call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
9041 dataref_ptr, vec_offset,
9042 scale, vec_oprnd);
9043 gimple_call_set_nothrow (call, true);
9044 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9045 new_stmt = call;
9047 else if (gs_info.decl)
9049 /* The builtin decls path for scatter is legacy, x86 only. */
9050 gcc_assert (nunits.is_constant ()
9051 && (!final_mask
9052 || SCALAR_INT_MODE_P
9053 (TYPE_MODE (TREE_TYPE (final_mask)))));
9054 if (costing_p)
9056 unsigned int cnunits = vect_nunits_for_cost (vectype);
9057 inside_cost
9058 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9059 stmt_info, 0, vect_body);
9060 continue;
9062 poly_uint64 offset_nunits
9063 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
9064 if (known_eq (nunits, offset_nunits))
9066 new_stmt = vect_build_one_scatter_store_call
9067 (vinfo, stmt_info, gsi, &gs_info,
9068 dataref_ptr, vec_offsets[vec_num * j + i],
9069 vec_oprnd, final_mask);
9070 vect_finish_stmt_generation (vinfo, stmt_info,
9071 new_stmt, gsi);
9073 else if (known_eq (nunits, offset_nunits * 2))
9075 /* We have a offset vector with half the number of
9076 lanes but the builtins will store full vectype
9077 data from the lower lanes. */
9078 new_stmt = vect_build_one_scatter_store_call
9079 (vinfo, stmt_info, gsi, &gs_info,
9080 dataref_ptr,
9081 vec_offsets[2 * vec_num * j + 2 * i],
9082 vec_oprnd, final_mask);
9083 vect_finish_stmt_generation (vinfo, stmt_info,
9084 new_stmt, gsi);
9085 int count = nunits.to_constant ();
9086 vec_perm_builder sel (count, count, 1);
9087 sel.quick_grow (count);
9088 for (int i = 0; i < count; ++i)
9089 sel[i] = i | (count / 2);
9090 vec_perm_indices indices (sel, 2, count);
9091 tree perm_mask
9092 = vect_gen_perm_mask_checked (vectype, indices);
9093 new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
9094 vec_oprnd, vec_oprnd,
9095 perm_mask);
9096 vec_oprnd = make_ssa_name (vectype);
9097 gimple_set_lhs (new_stmt, vec_oprnd);
9098 vect_finish_stmt_generation (vinfo, stmt_info,
9099 new_stmt, gsi);
9100 if (final_mask)
9102 new_stmt = gimple_build_assign (NULL_TREE,
9103 VEC_UNPACK_HI_EXPR,
9104 final_mask);
9105 final_mask = make_ssa_name
9106 (truth_type_for (gs_info.offset_vectype));
9107 gimple_set_lhs (new_stmt, final_mask);
9108 vect_finish_stmt_generation (vinfo, stmt_info,
9109 new_stmt, gsi);
9111 new_stmt = vect_build_one_scatter_store_call
9112 (vinfo, stmt_info, gsi, &gs_info,
9113 dataref_ptr,
9114 vec_offsets[2 * vec_num * j + 2 * i + 1],
9115 vec_oprnd, final_mask);
9116 vect_finish_stmt_generation (vinfo, stmt_info,
9117 new_stmt, gsi);
9119 else if (known_eq (nunits * 2, offset_nunits))
9121 /* We have a offset vector with double the number of
9122 lanes. Select the low/high part accordingly. */
9123 vec_offset = vec_offsets[(vec_num * j + i) / 2];
9124 if ((vec_num * j + i) & 1)
9126 int count = offset_nunits.to_constant ();
9127 vec_perm_builder sel (count, count, 1);
9128 sel.quick_grow (count);
9129 for (int i = 0; i < count; ++i)
9130 sel[i] = i | (count / 2);
9131 vec_perm_indices indices (sel, 2, count);
9132 tree perm_mask = vect_gen_perm_mask_checked
9133 (TREE_TYPE (vec_offset), indices);
9134 new_stmt = gimple_build_assign (NULL_TREE,
9135 VEC_PERM_EXPR,
9136 vec_offset,
9137 vec_offset,
9138 perm_mask);
9139 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
9140 gimple_set_lhs (new_stmt, vec_offset);
9141 vect_finish_stmt_generation (vinfo, stmt_info,
9142 new_stmt, gsi);
9144 new_stmt = vect_build_one_scatter_store_call
9145 (vinfo, stmt_info, gsi, &gs_info,
9146 dataref_ptr, vec_offset,
9147 vec_oprnd, final_mask);
9148 vect_finish_stmt_generation (vinfo, stmt_info,
9149 new_stmt, gsi);
9151 else
9152 gcc_unreachable ();
9154 else
9156 /* Emulated scatter. */
9157 gcc_assert (!final_mask);
9158 if (costing_p)
9160 unsigned int cnunits = vect_nunits_for_cost (vectype);
9161 /* For emulated scatter N offset vector element extracts
9162 (we assume the scalar scaling and ptr + offset add is
9163 consumed by the load). */
9164 inside_cost
9165 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9166 stmt_info, 0, vect_body);
9167 /* N scalar stores plus extracting the elements. */
9168 inside_cost
9169 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9170 stmt_info, 0, vect_body);
9171 inside_cost
9172 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9173 stmt_info, 0, vect_body);
9174 continue;
9177 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9178 unsigned HOST_WIDE_INT const_offset_nunits
9179 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant ();
9180 vec<constructor_elt, va_gc> *ctor_elts;
9181 vec_alloc (ctor_elts, const_nunits);
9182 gimple_seq stmts = NULL;
9183 tree elt_type = TREE_TYPE (vectype);
9184 unsigned HOST_WIDE_INT elt_size
9185 = tree_to_uhwi (TYPE_SIZE (elt_type));
9186 /* We support offset vectors with more elements
9187 than the data vector for now. */
9188 unsigned HOST_WIDE_INT factor
9189 = const_offset_nunits / const_nunits;
9190 vec_offset = vec_offsets[(vec_num * j + i) / factor];
9191 unsigned elt_offset
9192 = ((vec_num * j + i) % factor) * const_nunits;
9193 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9194 tree scale = size_int (gs_info.scale);
9195 align = get_object_alignment (DR_REF (first_dr_info->dr));
9196 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9197 for (unsigned k = 0; k < const_nunits; ++k)
9199 /* Compute the offsetted pointer. */
9200 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9201 bitsize_int (k + elt_offset));
9202 tree idx
9203 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
9204 vec_offset, TYPE_SIZE (idx_type), boff);
9205 idx = gimple_convert (&stmts, sizetype, idx);
9206 idx = gimple_build (&stmts, MULT_EXPR, sizetype,
9207 idx, scale);
9208 tree ptr
9209 = gimple_build (&stmts, PLUS_EXPR,
9210 TREE_TYPE (dataref_ptr),
9211 dataref_ptr, idx);
9212 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9213 /* Extract the element to be stored. */
9214 tree elt
9215 = gimple_build (&stmts, BIT_FIELD_REF,
9216 TREE_TYPE (vectype),
9217 vec_oprnd, TYPE_SIZE (elt_type),
9218 bitsize_int (k * elt_size));
9219 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9220 stmts = NULL;
9221 tree ref
9222 = build2 (MEM_REF, ltype, ptr,
9223 build_int_cst (ref_type, 0));
9224 new_stmt = gimple_build_assign (ref, elt);
9225 vect_finish_stmt_generation (vinfo, stmt_info,
9226 new_stmt, gsi);
9228 if (slp)
9229 slp_node->push_vec_def (new_stmt);
9232 if (!slp && !costing_p)
9233 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9236 if (!slp && !costing_p)
9237 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9239 if (costing_p && dump_enabled_p ())
9240 dump_printf_loc (MSG_NOTE, vect_location,
9241 "vect_model_store_cost: inside_cost = %d, "
9242 "prologue_cost = %d .\n",
9243 inside_cost, prologue_cost);
9245 return true;
9248 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
9249 || memory_access_type == VMAT_CONTIGUOUS_DOWN
9250 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE
9251 || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
9253 unsigned inside_cost = 0, prologue_cost = 0;
9254 /* For costing some adjacent vector stores, we'd like to cost with
9255 the total number of them once instead of cost each one by one. */
9256 unsigned int n_adjacent_stores = 0;
9257 auto_vec<tree> result_chain (group_size);
9258 auto_vec<tree, 1> vec_oprnds;
9259 for (j = 0; j < ncopies; j++)
9261 gimple *new_stmt;
9262 if (j == 0)
9264 if (slp && !costing_p)
9266 /* Get vectorized arguments for SLP_NODE. */
9267 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
9268 &vec_oprnds, mask, &vec_masks);
9269 vec_oprnd = vec_oprnds[0];
9270 if (mask)
9271 vec_mask = vec_masks[0];
9273 else
9275 /* For interleaved stores we collect vectorized defs for all the
9276 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9277 input to vect_permute_store_chain().
9279 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9280 is of size 1. */
9281 stmt_vec_info next_stmt_info = first_stmt_info;
9282 for (i = 0; i < group_size; i++)
9284 /* Since gaps are not supported for interleaved stores,
9285 DR_GROUP_SIZE is the exact number of stmts in the chain.
9286 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9287 that there is no interleaving, DR_GROUP_SIZE is 1,
9288 and only one iteration of the loop will be executed. */
9289 op = vect_get_store_rhs (next_stmt_info);
9290 if (costing_p)
9291 update_prologue_cost (&prologue_cost, op);
9292 else
9294 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
9295 ncopies, op,
9296 gvec_oprnds[i]);
9297 vec_oprnd = (*gvec_oprnds[i])[0];
9298 dr_chain.quick_push (vec_oprnd);
9300 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9302 if (mask && !costing_p)
9304 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
9305 mask, &vec_masks,
9306 mask_vectype);
9307 vec_mask = vec_masks[0];
9311 /* We should have catched mismatched types earlier. */
9312 gcc_assert (costing_p
9313 || useless_type_conversion_p (vectype,
9314 TREE_TYPE (vec_oprnd)));
9315 bool simd_lane_access_p
9316 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9317 if (!costing_p
9318 && simd_lane_access_p
9319 && !loop_masks
9320 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9321 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9322 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9323 && integer_zerop (DR_INIT (first_dr_info->dr))
9324 && alias_sets_conflict_p (get_alias_set (aggr_type),
9325 get_alias_set (TREE_TYPE (ref_type))))
9327 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9328 dataref_offset = build_int_cst (ref_type, 0);
9330 else if (!costing_p)
9331 dataref_ptr
9332 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9333 simd_lane_access_p ? loop : NULL,
9334 offset, &dummy, gsi, &ptr_incr,
9335 simd_lane_access_p, bump);
9337 else if (!costing_p)
9339 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9340 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9341 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9342 of size 1. */
9343 for (i = 0; i < group_size; i++)
9345 vec_oprnd = (*gvec_oprnds[i])[j];
9346 dr_chain[i] = vec_oprnd;
9348 if (mask)
9349 vec_mask = vec_masks[j];
9350 if (dataref_offset)
9351 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
9352 else
9353 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9354 stmt_info, bump);
9357 new_stmt = NULL;
9358 if (grouped_store)
9360 /* Permute. */
9361 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
9362 if (costing_p)
9364 int group_size = DR_GROUP_SIZE (first_stmt_info);
9365 int nstmts = ceil_log2 (group_size) * group_size;
9366 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
9367 stmt_info, 0, vect_body);
9368 if (dump_enabled_p ())
9369 dump_printf_loc (MSG_NOTE, vect_location,
9370 "vect_model_store_cost: "
9371 "strided group_size = %d .\n",
9372 group_size);
9374 else
9375 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
9376 gsi, &result_chain);
9379 stmt_vec_info next_stmt_info = first_stmt_info;
9380 for (i = 0; i < vec_num; i++)
9382 if (!costing_p)
9384 if (slp)
9385 vec_oprnd = vec_oprnds[i];
9386 else if (grouped_store)
9387 /* For grouped stores vectorized defs are interleaved in
9388 vect_permute_store_chain(). */
9389 vec_oprnd = result_chain[i];
9392 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9394 if (costing_p)
9395 inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
9396 stmt_info, 0, vect_body);
9397 else
9399 tree perm_mask = perm_mask_for_reverse (vectype);
9400 tree perm_dest = vect_create_destination_var (
9401 vect_get_store_rhs (stmt_info), vectype);
9402 tree new_temp = make_ssa_name (perm_dest);
9404 /* Generate the permute statement. */
9405 gimple *perm_stmt
9406 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9407 vec_oprnd, perm_mask);
9408 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
9409 gsi);
9411 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9412 vec_oprnd = new_temp;
9416 if (costing_p)
9418 n_adjacent_stores++;
9420 if (!slp)
9422 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9423 if (!next_stmt_info)
9424 break;
9427 continue;
9430 tree final_mask = NULL_TREE;
9431 tree final_len = NULL_TREE;
9432 tree bias = NULL_TREE;
9433 if (loop_masks)
9434 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9435 vec_num * ncopies, vectype,
9436 vec_num * j + i);
9437 if (slp && vec_mask)
9438 vec_mask = vec_masks[i];
9439 if (vec_mask)
9440 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
9441 vec_mask, gsi);
9443 if (i > 0)
9444 /* Bump the vector pointer. */
9445 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9446 stmt_info, bump);
9448 unsigned misalign;
9449 unsigned HOST_WIDE_INT align;
9450 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9451 if (alignment_support_scheme == dr_aligned)
9452 misalign = 0;
9453 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9455 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
9456 misalign = 0;
9458 else
9459 misalign = misalignment;
9460 if (dataref_offset == NULL_TREE
9461 && TREE_CODE (dataref_ptr) == SSA_NAME)
9462 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9463 misalign);
9464 align = least_bit_hwi (misalign | align);
9466 /* Compute IFN when LOOP_LENS or final_mask valid. */
9467 machine_mode vmode = TYPE_MODE (vectype);
9468 machine_mode new_vmode = vmode;
9469 internal_fn partial_ifn = IFN_LAST;
9470 if (loop_lens)
9472 opt_machine_mode new_ovmode
9473 = get_len_load_store_mode (vmode, false, &partial_ifn);
9474 new_vmode = new_ovmode.require ();
9475 unsigned factor
9476 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9477 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9478 vec_num * ncopies, vectype,
9479 vec_num * j + i, factor);
9481 else if (final_mask)
9483 if (!can_vec_mask_load_store_p (
9484 vmode, TYPE_MODE (TREE_TYPE (final_mask)), false,
9485 &partial_ifn))
9486 gcc_unreachable ();
9489 if (partial_ifn == IFN_MASK_LEN_STORE)
9491 if (!final_len)
9493 /* Pass VF value to 'len' argument of
9494 MASK_LEN_STORE if LOOP_LENS is invalid. */
9495 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9497 if (!final_mask)
9499 /* Pass all ones value to 'mask' argument of
9500 MASK_LEN_STORE if final_mask is invalid. */
9501 mask_vectype = truth_type_for (vectype);
9502 final_mask = build_minus_one_cst (mask_vectype);
9505 if (final_len)
9507 signed char biasval
9508 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9510 bias = build_int_cst (intQI_type_node, biasval);
9513 /* Arguments are ready. Create the new vector stmt. */
9514 if (final_len)
9516 gcall *call;
9517 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9518 /* Need conversion if it's wrapped with VnQI. */
9519 if (vmode != new_vmode)
9521 tree new_vtype
9522 = build_vector_type_for_mode (unsigned_intQI_type_node,
9523 new_vmode);
9524 tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9525 vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9526 gassign *new_stmt
9527 = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9528 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9529 vec_oprnd = var;
9532 if (partial_ifn == IFN_MASK_LEN_STORE)
9533 call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9534 dataref_ptr, ptr, final_mask,
9535 final_len, bias, vec_oprnd);
9536 else
9537 call = gimple_build_call_internal (IFN_LEN_STORE, 5,
9538 dataref_ptr, ptr, final_len,
9539 bias, vec_oprnd);
9540 gimple_call_set_nothrow (call, true);
9541 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9542 new_stmt = call;
9544 else if (final_mask)
9546 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9547 gcall *call
9548 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
9549 ptr, final_mask, vec_oprnd);
9550 gimple_call_set_nothrow (call, true);
9551 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9552 new_stmt = call;
9554 else
9556 data_ref
9557 = fold_build2 (MEM_REF, vectype, dataref_ptr,
9558 dataref_offset ? dataref_offset
9559 : build_int_cst (ref_type, 0));
9560 if (alignment_support_scheme == dr_aligned)
9562 else
9563 TREE_TYPE (data_ref)
9564 = build_aligned_type (TREE_TYPE (data_ref),
9565 align * BITS_PER_UNIT);
9566 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9567 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9568 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9571 if (slp)
9572 continue;
9574 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9575 if (!next_stmt_info)
9576 break;
9578 if (!slp && !costing_p)
9580 if (j == 0)
9581 *vec_stmt = new_stmt;
9582 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9586 if (costing_p)
9588 if (n_adjacent_stores > 0)
9589 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
9590 alignment_support_scheme, misalignment,
9591 &inside_cost, cost_vec);
9593 /* When vectorizing a store into the function result assign
9594 a penalty if the function returns in a multi-register location.
9595 In this case we assume we'll end up with having to spill the
9596 vector result and do piecewise loads as a conservative estimate. */
9597 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
9598 if (base
9599 && (TREE_CODE (base) == RESULT_DECL
9600 || (DECL_P (base) && cfun_returns (base)))
9601 && !aggregate_value_p (base, cfun->decl))
9603 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
9604 /* ??? Handle PARALLEL in some way. */
9605 if (REG_P (reg))
9607 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
9608 /* Assume that a single reg-reg move is possible and cheap,
9609 do not account for vector to gp register move cost. */
9610 if (nregs > 1)
9612 /* Spill. */
9613 prologue_cost
9614 += record_stmt_cost (cost_vec, ncopies, vector_store,
9615 stmt_info, 0, vect_epilogue);
9616 /* Loads. */
9617 prologue_cost
9618 += record_stmt_cost (cost_vec, ncopies * nregs, scalar_load,
9619 stmt_info, 0, vect_epilogue);
9623 if (dump_enabled_p ())
9624 dump_printf_loc (MSG_NOTE, vect_location,
9625 "vect_model_store_cost: inside_cost = %d, "
9626 "prologue_cost = %d .\n",
9627 inside_cost, prologue_cost);
9630 return true;
9633 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9634 VECTOR_CST mask. No checks are made that the target platform supports the
9635 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9636 vect_gen_perm_mask_checked. */
9638 tree
9639 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9641 tree mask_type;
9643 poly_uint64 nunits = sel.length ();
9644 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9646 mask_type = build_vector_type (ssizetype, nunits);
9647 return vec_perm_indices_to_tree (mask_type, sel);
9650 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9651 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9653 tree
9654 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9656 machine_mode vmode = TYPE_MODE (vectype);
9657 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9658 return vect_gen_perm_mask_any (vectype, sel);
9661 /* Given a vector variable X and Y, that was generated for the scalar
9662 STMT_INFO, generate instructions to permute the vector elements of X and Y
9663 using permutation mask MASK_VEC, insert them at *GSI and return the
9664 permuted vector variable. */
9666 static tree
9667 permute_vec_elements (vec_info *vinfo,
9668 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9669 gimple_stmt_iterator *gsi)
9671 tree vectype = TREE_TYPE (x);
9672 tree perm_dest, data_ref;
9673 gimple *perm_stmt;
9675 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9676 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9677 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9678 else
9679 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9680 data_ref = make_ssa_name (perm_dest);
9682 /* Generate the permute statement. */
9683 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9684 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9686 return data_ref;
9689 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9690 inserting them on the loops preheader edge. Returns true if we
9691 were successful in doing so (and thus STMT_INFO can be moved then),
9692 otherwise returns false. HOIST_P indicates if we want to hoist the
9693 definitions of all SSA uses, it would be false when we are costing. */
9695 static bool
9696 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
9698 ssa_op_iter i;
9699 tree op;
9700 bool any = false;
9702 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9704 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9705 if (!gimple_nop_p (def_stmt)
9706 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9708 /* Make sure we don't need to recurse. While we could do
9709 so in simple cases when there are more complex use webs
9710 we don't have an easy way to preserve stmt order to fulfil
9711 dependencies within them. */
9712 tree op2;
9713 ssa_op_iter i2;
9714 if (gimple_code (def_stmt) == GIMPLE_PHI)
9715 return false;
9716 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9718 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9719 if (!gimple_nop_p (def_stmt2)
9720 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9721 return false;
9723 any = true;
9727 if (!any)
9728 return true;
9730 if (!hoist_p)
9731 return true;
9733 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9735 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9736 if (!gimple_nop_p (def_stmt)
9737 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9739 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9740 gsi_remove (&gsi, false);
9741 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9745 return true;
9748 /* vectorizable_load.
9750 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9751 that can be vectorized.
9752 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9753 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9754 Return true if STMT_INFO is vectorizable in this way. */
9756 static bool
9757 vectorizable_load (vec_info *vinfo,
9758 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9759 gimple **vec_stmt, slp_tree slp_node,
9760 stmt_vector_for_cost *cost_vec)
9762 tree scalar_dest;
9763 tree vec_dest = NULL;
9764 tree data_ref = NULL;
9765 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9766 class loop *loop = NULL;
9767 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9768 bool nested_in_vect_loop = false;
9769 tree elem_type;
9770 /* Avoid false positive uninitialized warning, see PR110652. */
9771 tree new_temp = NULL_TREE;
9772 machine_mode mode;
9773 tree dummy;
9774 tree dataref_ptr = NULL_TREE;
9775 tree dataref_offset = NULL_TREE;
9776 gimple *ptr_incr = NULL;
9777 int ncopies;
9778 int i, j;
9779 unsigned int group_size;
9780 poly_uint64 group_gap_adj;
9781 tree msq = NULL_TREE, lsq;
9782 tree realignment_token = NULL_TREE;
9783 gphi *phi = NULL;
9784 vec<tree> dr_chain = vNULL;
9785 bool grouped_load = false;
9786 stmt_vec_info first_stmt_info;
9787 stmt_vec_info first_stmt_info_for_drptr = NULL;
9788 bool compute_in_loop = false;
9789 class loop *at_loop;
9790 int vec_num;
9791 bool slp = (slp_node != NULL);
9792 bool slp_perm = false;
9793 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9794 poly_uint64 vf;
9795 tree aggr_type;
9796 gather_scatter_info gs_info;
9797 tree ref_type;
9798 enum vect_def_type mask_dt = vect_unknown_def_type;
9800 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9801 return false;
9803 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9804 && ! vec_stmt)
9805 return false;
9807 if (!STMT_VINFO_DATA_REF (stmt_info))
9808 return false;
9810 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9811 int mask_index = -1;
9812 slp_tree slp_op = NULL;
9813 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9815 scalar_dest = gimple_assign_lhs (assign);
9816 if (TREE_CODE (scalar_dest) != SSA_NAME)
9817 return false;
9819 tree_code code = gimple_assign_rhs_code (assign);
9820 if (code != ARRAY_REF
9821 && code != BIT_FIELD_REF
9822 && code != INDIRECT_REF
9823 && code != COMPONENT_REF
9824 && code != IMAGPART_EXPR
9825 && code != REALPART_EXPR
9826 && code != MEM_REF
9827 && TREE_CODE_CLASS (code) != tcc_declaration)
9828 return false;
9830 else
9832 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9833 if (!call || !gimple_call_internal_p (call))
9834 return false;
9836 internal_fn ifn = gimple_call_internal_fn (call);
9837 if (!internal_load_fn_p (ifn))
9838 return false;
9840 scalar_dest = gimple_call_lhs (call);
9841 if (!scalar_dest)
9842 return false;
9844 mask_index = internal_fn_mask_index (ifn);
9845 if (mask_index >= 0 && slp_node)
9846 mask_index = vect_slp_child_index_for_operand
9847 (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9848 if (mask_index >= 0
9849 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9850 &mask, &slp_op, &mask_dt, &mask_vectype))
9851 return false;
9854 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9855 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9857 if (loop_vinfo)
9859 loop = LOOP_VINFO_LOOP (loop_vinfo);
9860 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9861 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9863 else
9864 vf = 1;
9866 /* Multiple types in SLP are handled by creating the appropriate number of
9867 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9868 case of SLP. */
9869 if (slp)
9870 ncopies = 1;
9871 else
9872 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9874 gcc_assert (ncopies >= 1);
9876 /* FORNOW. This restriction should be relaxed. */
9877 if (nested_in_vect_loop && ncopies > 1)
9879 if (dump_enabled_p ())
9880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9881 "multiple types in nested loop.\n");
9882 return false;
9885 /* Invalidate assumptions made by dependence analysis when vectorization
9886 on the unrolled body effectively re-orders stmts. */
9887 if (ncopies > 1
9888 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9889 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9890 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9892 if (dump_enabled_p ())
9893 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9894 "cannot perform implicit CSE when unrolling "
9895 "with negative dependence distance\n");
9896 return false;
9899 elem_type = TREE_TYPE (vectype);
9900 mode = TYPE_MODE (vectype);
9902 /* FORNOW. In some cases can vectorize even if data-type not supported
9903 (e.g. - data copies). */
9904 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
9906 if (dump_enabled_p ())
9907 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9908 "Aligned load, but unsupported type.\n");
9909 return false;
9912 /* Check if the load is a part of an interleaving chain. */
9913 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9915 grouped_load = true;
9916 /* FORNOW */
9917 gcc_assert (!nested_in_vect_loop);
9918 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9920 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9921 group_size = DR_GROUP_SIZE (first_stmt_info);
9923 /* Refuse non-SLP vectorization of SLP-only groups. */
9924 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9926 if (dump_enabled_p ())
9927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9928 "cannot vectorize load in non-SLP mode.\n");
9929 return false;
9932 /* Invalidate assumptions made by dependence analysis when vectorization
9933 on the unrolled body effectively re-orders stmts. */
9934 if (!PURE_SLP_STMT (stmt_info)
9935 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9936 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9937 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9939 if (dump_enabled_p ())
9940 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9941 "cannot perform implicit CSE when performing "
9942 "group loads with negative dependence distance\n");
9943 return false;
9946 else
9947 group_size = 1;
9949 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9951 slp_perm = true;
9953 if (!loop_vinfo)
9955 /* In BB vectorization we may not actually use a loaded vector
9956 accessing elements in excess of DR_GROUP_SIZE. */
9957 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9958 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9959 unsigned HOST_WIDE_INT nunits;
9960 unsigned j, k, maxk = 0;
9961 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
9962 if (k > maxk)
9963 maxk = k;
9964 tree vectype = SLP_TREE_VECTYPE (slp_node);
9965 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
9966 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
9968 if (dump_enabled_p ())
9969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9970 "BB vectorization with gaps at the end of "
9971 "a load is not supported\n");
9972 return false;
9976 auto_vec<tree> tem;
9977 unsigned n_perms;
9978 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
9979 true, &n_perms))
9981 if (dump_enabled_p ())
9982 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
9983 vect_location,
9984 "unsupported load permutation\n");
9985 return false;
9989 vect_memory_access_type memory_access_type;
9990 enum dr_alignment_support alignment_support_scheme;
9991 int misalignment;
9992 poly_int64 poffset;
9993 internal_fn lanes_ifn;
9994 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
9995 ncopies, &memory_access_type, &poffset,
9996 &alignment_support_scheme, &misalignment, &gs_info,
9997 &lanes_ifn))
9998 return false;
10000 if (mask)
10002 if (memory_access_type == VMAT_CONTIGUOUS)
10004 machine_mode vec_mode = TYPE_MODE (vectype);
10005 if (!VECTOR_MODE_P (vec_mode)
10006 || !can_vec_mask_load_store_p (vec_mode,
10007 TYPE_MODE (mask_vectype), true))
10008 return false;
10010 else if (memory_access_type != VMAT_LOAD_STORE_LANES
10011 && memory_access_type != VMAT_GATHER_SCATTER)
10013 if (dump_enabled_p ())
10014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10015 "unsupported access type for masked load.\n");
10016 return false;
10018 else if (memory_access_type == VMAT_GATHER_SCATTER
10019 && gs_info.ifn == IFN_LAST
10020 && !gs_info.decl)
10022 if (dump_enabled_p ())
10023 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10024 "unsupported masked emulated gather.\n");
10025 return false;
10029 bool costing_p = !vec_stmt;
10031 if (costing_p) /* transformation not required. */
10033 if (slp_node
10034 && mask
10035 && !vect_maybe_update_slp_op_vectype (slp_op,
10036 mask_vectype))
10038 if (dump_enabled_p ())
10039 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10040 "incompatible vector types for invariants\n");
10041 return false;
10044 if (!slp)
10045 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
10047 if (loop_vinfo
10048 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10049 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
10050 VLS_LOAD, group_size,
10051 memory_access_type, &gs_info,
10052 mask);
10054 if (dump_enabled_p ()
10055 && memory_access_type != VMAT_ELEMENTWISE
10056 && memory_access_type != VMAT_GATHER_SCATTER
10057 && alignment_support_scheme != dr_aligned)
10058 dump_printf_loc (MSG_NOTE, vect_location,
10059 "Vectorizing an unaligned access.\n");
10061 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10062 vinfo->any_known_not_updated_vssa = true;
10064 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
10067 if (!slp)
10068 gcc_assert (memory_access_type
10069 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
10071 if (dump_enabled_p () && !costing_p)
10072 dump_printf_loc (MSG_NOTE, vect_location,
10073 "transform load. ncopies = %d\n", ncopies);
10075 /* Transform. */
10077 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
10078 ensure_base_align (dr_info);
10080 if (memory_access_type == VMAT_INVARIANT)
10082 gcc_assert (!grouped_load && !mask && !bb_vinfo);
10083 /* If we have versioned for aliasing or the loop doesn't
10084 have any data dependencies that would preclude this,
10085 then we are sure this is a loop invariant load and
10086 thus we can insert it on the preheader edge.
10087 TODO: hoist_defs_of_uses should ideally be computed
10088 once at analysis time, remembered and used in the
10089 transform time. */
10090 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
10091 && !nested_in_vect_loop
10092 && hoist_defs_of_uses (stmt_info, loop, !costing_p));
10093 if (costing_p)
10095 enum vect_cost_model_location cost_loc
10096 = hoist_p ? vect_prologue : vect_body;
10097 unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
10098 stmt_info, 0, cost_loc);
10099 cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0,
10100 cost_loc);
10101 unsigned int prologue_cost = hoist_p ? cost : 0;
10102 unsigned int inside_cost = hoist_p ? 0 : cost;
10103 if (dump_enabled_p ())
10104 dump_printf_loc (MSG_NOTE, vect_location,
10105 "vect_model_load_cost: inside_cost = %d, "
10106 "prologue_cost = %d .\n",
10107 inside_cost, prologue_cost);
10108 return true;
10110 if (hoist_p)
10112 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
10113 if (dump_enabled_p ())
10114 dump_printf_loc (MSG_NOTE, vect_location,
10115 "hoisting out of the vectorized loop: %G",
10116 (gimple *) stmt);
10117 scalar_dest = copy_ssa_name (scalar_dest);
10118 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
10119 edge pe = loop_preheader_edge (loop);
10120 gphi *vphi = get_virtual_phi (loop->header);
10121 tree vuse;
10122 if (vphi)
10123 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
10124 else
10125 vuse = gimple_vuse (gsi_stmt (*gsi));
10126 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
10127 gimple_set_vuse (new_stmt, vuse);
10128 gsi_insert_on_edge_immediate (pe, new_stmt);
10130 /* These copies are all equivalent. */
10131 if (hoist_p)
10132 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10133 vectype, NULL);
10134 else
10136 gimple_stmt_iterator gsi2 = *gsi;
10137 gsi_next (&gsi2);
10138 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10139 vectype, &gsi2);
10141 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
10142 if (slp)
10143 for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
10144 slp_node->push_vec_def (new_stmt);
10145 else
10147 for (j = 0; j < ncopies; ++j)
10148 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10149 *vec_stmt = new_stmt;
10151 return true;
10154 if (memory_access_type == VMAT_ELEMENTWISE
10155 || memory_access_type == VMAT_STRIDED_SLP)
10157 gimple_stmt_iterator incr_gsi;
10158 bool insert_after;
10159 tree offvar;
10160 tree ivstep;
10161 tree running_off;
10162 vec<constructor_elt, va_gc> *v = NULL;
10163 tree stride_base, stride_step, alias_off;
10164 /* Checked by get_load_store_type. */
10165 unsigned int const_nunits = nunits.to_constant ();
10166 unsigned HOST_WIDE_INT cst_offset = 0;
10167 tree dr_offset;
10168 unsigned int inside_cost = 0;
10170 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
10171 gcc_assert (!nested_in_vect_loop);
10173 if (grouped_load)
10175 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10176 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10178 else
10180 first_stmt_info = stmt_info;
10181 first_dr_info = dr_info;
10184 if (slp && grouped_load)
10186 group_size = DR_GROUP_SIZE (first_stmt_info);
10187 ref_type = get_group_alias_ptr_type (first_stmt_info);
10189 else
10191 if (grouped_load)
10192 cst_offset
10193 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
10194 * vect_get_place_in_interleaving_chain (stmt_info,
10195 first_stmt_info));
10196 group_size = 1;
10197 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
10200 if (!costing_p)
10202 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
10203 stride_base = fold_build_pointer_plus (
10204 DR_BASE_ADDRESS (first_dr_info->dr),
10205 size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
10206 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
10207 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
10209 /* For a load with loop-invariant (but other than power-of-2)
10210 stride (i.e. not a grouped access) like so:
10212 for (i = 0; i < n; i += stride)
10213 ... = array[i];
10215 we generate a new induction variable and new accesses to
10216 form a new vector (or vectors, depending on ncopies):
10218 for (j = 0; ; j += VF*stride)
10219 tmp1 = array[j];
10220 tmp2 = array[j + stride];
10222 vectemp = {tmp1, tmp2, ...}
10225 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
10226 build_int_cst (TREE_TYPE (stride_step), vf));
10228 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
10230 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
10231 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
10232 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
10233 loop, &incr_gsi, insert_after,
10234 &offvar, NULL);
10236 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
10239 running_off = offvar;
10240 alias_off = build_int_cst (ref_type, 0);
10241 int nloads = const_nunits;
10242 int lnel = 1;
10243 tree ltype = TREE_TYPE (vectype);
10244 tree lvectype = vectype;
10245 auto_vec<tree> dr_chain;
10246 if (memory_access_type == VMAT_STRIDED_SLP)
10248 if (group_size < const_nunits)
10250 /* First check if vec_init optab supports construction from vector
10251 elts directly. Otherwise avoid emitting a constructor of
10252 vector elements by performing the loads using an integer type
10253 of the same size, constructing a vector of those and then
10254 re-interpreting it as the original vector type. This avoids a
10255 huge runtime penalty due to the general inability to perform
10256 store forwarding from smaller stores to a larger load. */
10257 tree ptype;
10258 tree vtype
10259 = vector_vector_composition_type (vectype,
10260 const_nunits / group_size,
10261 &ptype);
10262 if (vtype != NULL_TREE)
10264 nloads = const_nunits / group_size;
10265 lnel = group_size;
10266 lvectype = vtype;
10267 ltype = ptype;
10270 else
10272 nloads = 1;
10273 lnel = const_nunits;
10274 ltype = vectype;
10276 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
10278 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10279 else if (nloads == 1)
10280 ltype = vectype;
10282 if (slp)
10284 /* For SLP permutation support we need to load the whole group,
10285 not only the number of vector stmts the permutation result
10286 fits in. */
10287 if (slp_perm)
10289 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10290 variable VF. */
10291 unsigned int const_vf = vf.to_constant ();
10292 ncopies = CEIL (group_size * const_vf, const_nunits);
10293 dr_chain.create (ncopies);
10295 else
10296 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10298 unsigned int group_el = 0;
10299 unsigned HOST_WIDE_INT
10300 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10301 unsigned int n_groups = 0;
10302 /* For costing some adjacent vector loads, we'd like to cost with
10303 the total number of them once instead of cost each one by one. */
10304 unsigned int n_adjacent_loads = 0;
10305 for (j = 0; j < ncopies; j++)
10307 if (nloads > 1 && !costing_p)
10308 vec_alloc (v, nloads);
10309 gimple *new_stmt = NULL;
10310 for (i = 0; i < nloads; i++)
10312 if (costing_p)
10314 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10315 avoid ICE, see PR110776. */
10316 if (VECTOR_TYPE_P (ltype)
10317 && memory_access_type != VMAT_ELEMENTWISE)
10318 n_adjacent_loads++;
10319 else
10320 inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
10321 stmt_info, 0, vect_body);
10322 continue;
10324 tree this_off = build_int_cst (TREE_TYPE (alias_off),
10325 group_el * elsz + cst_offset);
10326 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10327 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10328 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
10329 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10330 if (nloads > 1)
10331 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10332 gimple_assign_lhs (new_stmt));
10334 group_el += lnel;
10335 if (! slp
10336 || group_el == group_size)
10338 n_groups++;
10339 /* When doing SLP make sure to not load elements from
10340 the next vector iteration, those will not be accessed
10341 so just use the last element again. See PR107451. */
10342 if (!slp || known_lt (n_groups, vf))
10344 tree newoff = copy_ssa_name (running_off);
10345 gimple *incr
10346 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10347 running_off, stride_step);
10348 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
10349 running_off = newoff;
10351 group_el = 0;
10355 if (nloads > 1)
10357 if (costing_p)
10358 inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
10359 stmt_info, 0, vect_body);
10360 else
10362 tree vec_inv = build_constructor (lvectype, v);
10363 new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
10364 lvectype, gsi);
10365 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10366 if (lvectype != vectype)
10368 new_stmt
10369 = gimple_build_assign (make_ssa_name (vectype),
10370 VIEW_CONVERT_EXPR,
10371 build1 (VIEW_CONVERT_EXPR,
10372 vectype, new_temp));
10373 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10374 gsi);
10379 if (!costing_p)
10381 if (slp)
10383 if (slp_perm)
10384 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
10385 else
10386 slp_node->push_vec_def (new_stmt);
10388 else
10390 if (j == 0)
10391 *vec_stmt = new_stmt;
10392 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10396 if (slp_perm)
10398 unsigned n_perms;
10399 if (costing_p)
10401 unsigned n_loads;
10402 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10403 true, &n_perms, &n_loads);
10404 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
10405 first_stmt_info, 0, vect_body);
10407 else
10408 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10409 false, &n_perms);
10412 if (costing_p)
10414 if (n_adjacent_loads > 0)
10415 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
10416 alignment_support_scheme, misalignment, false,
10417 &inside_cost, nullptr, cost_vec, cost_vec,
10418 true);
10419 if (dump_enabled_p ())
10420 dump_printf_loc (MSG_NOTE, vect_location,
10421 "vect_model_load_cost: inside_cost = %u, "
10422 "prologue_cost = 0 .\n",
10423 inside_cost);
10426 return true;
10429 if (memory_access_type == VMAT_GATHER_SCATTER
10430 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
10431 grouped_load = false;
10433 if (grouped_load
10434 || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10436 if (grouped_load)
10438 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10439 group_size = DR_GROUP_SIZE (first_stmt_info);
10441 else
10443 first_stmt_info = stmt_info;
10444 group_size = 1;
10446 /* For SLP vectorization we directly vectorize a subchain
10447 without permutation. */
10448 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10449 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10450 /* For BB vectorization always use the first stmt to base
10451 the data ref pointer on. */
10452 if (bb_vinfo)
10453 first_stmt_info_for_drptr
10454 = vect_find_first_scalar_stmt_in_slp (slp_node);
10456 /* Check if the chain of loads is already vectorized. */
10457 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10458 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10459 ??? But we can only do so if there is exactly one
10460 as we have no way to get at the rest. Leave the CSE
10461 opportunity alone.
10462 ??? With the group load eventually participating
10463 in multiple different permutations (having multiple
10464 slp nodes which refer to the same group) the CSE
10465 is even wrong code. See PR56270. */
10466 && !slp)
10468 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10469 return true;
10471 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10472 group_gap_adj = 0;
10474 /* VEC_NUM is the number of vect stmts to be created for this group. */
10475 if (slp)
10477 grouped_load = false;
10478 /* If an SLP permutation is from N elements to N elements,
10479 and if one vector holds a whole number of N, we can load
10480 the inputs to the permutation in the same way as an
10481 unpermuted sequence. In other cases we need to load the
10482 whole group, not only the number of vector stmts the
10483 permutation result fits in. */
10484 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10485 if (slp_perm
10486 && (group_size != scalar_lanes
10487 || !multiple_p (nunits, group_size)))
10489 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10490 variable VF; see vect_transform_slp_perm_load. */
10491 unsigned int const_vf = vf.to_constant ();
10492 unsigned int const_nunits = nunits.to_constant ();
10493 vec_num = CEIL (group_size * const_vf, const_nunits);
10494 group_gap_adj = vf * group_size - nunits * vec_num;
10496 else
10498 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10499 group_gap_adj
10500 = group_size - scalar_lanes;
10503 else
10504 vec_num = group_size;
10506 ref_type = get_group_alias_ptr_type (first_stmt_info);
10508 else
10510 first_stmt_info = stmt_info;
10511 first_dr_info = dr_info;
10512 group_size = vec_num = 1;
10513 group_gap_adj = 0;
10514 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10515 if (slp)
10516 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10519 gcc_assert (alignment_support_scheme);
10520 vec_loop_masks *loop_masks
10521 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10522 ? &LOOP_VINFO_MASKS (loop_vinfo)
10523 : NULL);
10524 vec_loop_lens *loop_lens
10525 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10526 ? &LOOP_VINFO_LENS (loop_vinfo)
10527 : NULL);
10529 /* Shouldn't go with length-based approach if fully masked. */
10530 gcc_assert (!loop_lens || !loop_masks);
10532 /* Targets with store-lane instructions must not require explicit
10533 realignment. vect_supportable_dr_alignment always returns either
10534 dr_aligned or dr_unaligned_supported for masked operations. */
10535 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10536 && !mask
10537 && !loop_masks)
10538 || alignment_support_scheme == dr_aligned
10539 || alignment_support_scheme == dr_unaligned_supported);
10541 /* In case the vectorization factor (VF) is bigger than the number
10542 of elements that we can fit in a vectype (nunits), we have to generate
10543 more than one vector stmt - i.e - we need to "unroll" the
10544 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10545 from one copy of the vector stmt to the next, in the field
10546 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10547 stages to find the correct vector defs to be used when vectorizing
10548 stmts that use the defs of the current stmt. The example below
10549 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10550 need to create 4 vectorized stmts):
10552 before vectorization:
10553 RELATED_STMT VEC_STMT
10554 S1: x = memref - -
10555 S2: z = x + 1 - -
10557 step 1: vectorize stmt S1:
10558 We first create the vector stmt VS1_0, and, as usual, record a
10559 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10560 Next, we create the vector stmt VS1_1, and record a pointer to
10561 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10562 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10563 stmts and pointers:
10564 RELATED_STMT VEC_STMT
10565 VS1_0: vx0 = memref0 VS1_1 -
10566 VS1_1: vx1 = memref1 VS1_2 -
10567 VS1_2: vx2 = memref2 VS1_3 -
10568 VS1_3: vx3 = memref3 - -
10569 S1: x = load - VS1_0
10570 S2: z = x + 1 - -
10573 /* In case of interleaving (non-unit grouped access):
10575 S1: x2 = &base + 2
10576 S2: x0 = &base
10577 S3: x1 = &base + 1
10578 S4: x3 = &base + 3
10580 Vectorized loads are created in the order of memory accesses
10581 starting from the access of the first stmt of the chain:
10583 VS1: vx0 = &base
10584 VS2: vx1 = &base + vec_size*1
10585 VS3: vx3 = &base + vec_size*2
10586 VS4: vx4 = &base + vec_size*3
10588 Then permutation statements are generated:
10590 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10591 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10594 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10595 (the order of the data-refs in the output of vect_permute_load_chain
10596 corresponds to the order of scalar stmts in the interleaving chain - see
10597 the documentation of vect_permute_load_chain()).
10598 The generation of permutation stmts and recording them in
10599 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10601 In case of both multiple types and interleaving, the vector loads and
10602 permutation stmts above are created for every copy. The result vector
10603 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10604 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10606 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10607 on a target that supports unaligned accesses (dr_unaligned_supported)
10608 we generate the following code:
10609 p = initial_addr;
10610 indx = 0;
10611 loop {
10612 p = p + indx * vectype_size;
10613 vec_dest = *(p);
10614 indx = indx + 1;
10617 Otherwise, the data reference is potentially unaligned on a target that
10618 does not support unaligned accesses (dr_explicit_realign_optimized) -
10619 then generate the following code, in which the data in each iteration is
10620 obtained by two vector loads, one from the previous iteration, and one
10621 from the current iteration:
10622 p1 = initial_addr;
10623 msq_init = *(floor(p1))
10624 p2 = initial_addr + VS - 1;
10625 realignment_token = call target_builtin;
10626 indx = 0;
10627 loop {
10628 p2 = p2 + indx * vectype_size
10629 lsq = *(floor(p2))
10630 vec_dest = realign_load (msq, lsq, realignment_token)
10631 indx = indx + 1;
10632 msq = lsq;
10633 } */
10635 /* If the misalignment remains the same throughout the execution of the
10636 loop, we can create the init_addr and permutation mask at the loop
10637 preheader. Otherwise, it needs to be created inside the loop.
10638 This can only occur when vectorizing memory accesses in the inner-loop
10639 nested within an outer-loop that is being vectorized. */
10641 if (nested_in_vect_loop
10642 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10643 GET_MODE_SIZE (TYPE_MODE (vectype))))
10645 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10646 compute_in_loop = true;
10649 bool diff_first_stmt_info
10650 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10652 tree offset = NULL_TREE;
10653 if ((alignment_support_scheme == dr_explicit_realign_optimized
10654 || alignment_support_scheme == dr_explicit_realign)
10655 && !compute_in_loop)
10657 /* If we have different first_stmt_info, we can't set up realignment
10658 here, since we can't guarantee first_stmt_info DR has been
10659 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10660 distance from first_stmt_info DR instead as below. */
10661 if (!costing_p)
10663 if (!diff_first_stmt_info)
10664 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10665 &realignment_token,
10666 alignment_support_scheme, NULL_TREE,
10667 &at_loop);
10668 if (alignment_support_scheme == dr_explicit_realign_optimized)
10670 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10671 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10672 size_one_node);
10673 gcc_assert (!first_stmt_info_for_drptr);
10677 else
10678 at_loop = loop;
10680 if (!known_eq (poffset, 0))
10681 offset = (offset
10682 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10683 : size_int (poffset));
10685 tree bump;
10686 tree vec_offset = NULL_TREE;
10687 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10689 aggr_type = NULL_TREE;
10690 bump = NULL_TREE;
10692 else if (memory_access_type == VMAT_GATHER_SCATTER)
10694 aggr_type = elem_type;
10695 if (!costing_p)
10696 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
10697 &bump, &vec_offset, loop_lens);
10699 else
10701 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10702 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
10703 else
10704 aggr_type = vectype;
10705 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10706 memory_access_type, loop_lens);
10709 auto_vec<tree> vec_offsets;
10710 auto_vec<tree> vec_masks;
10711 if (mask && !costing_p)
10713 if (slp_node)
10714 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10715 &vec_masks);
10716 else
10717 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
10718 &vec_masks, mask_vectype);
10721 tree vec_mask = NULL_TREE;
10722 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10724 gcc_assert (alignment_support_scheme == dr_aligned
10725 || alignment_support_scheme == dr_unaligned_supported);
10726 gcc_assert (grouped_load && !slp);
10728 unsigned int inside_cost = 0, prologue_cost = 0;
10729 /* For costing some adjacent vector loads, we'd like to cost with
10730 the total number of them once instead of cost each one by one. */
10731 unsigned int n_adjacent_loads = 0;
10732 for (j = 0; j < ncopies; j++)
10734 if (costing_p)
10736 /* An IFN_LOAD_LANES will load all its vector results,
10737 regardless of which ones we actually need. Account
10738 for the cost of unused results. */
10739 if (first_stmt_info == stmt_info)
10741 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10742 stmt_vec_info next_stmt_info = first_stmt_info;
10745 gaps -= 1;
10746 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10748 while (next_stmt_info);
10749 if (gaps)
10751 if (dump_enabled_p ())
10752 dump_printf_loc (MSG_NOTE, vect_location,
10753 "vect_model_load_cost: %d "
10754 "unused vectors.\n",
10755 gaps);
10756 vect_get_load_cost (vinfo, stmt_info, gaps,
10757 alignment_support_scheme,
10758 misalignment, false, &inside_cost,
10759 &prologue_cost, cost_vec, cost_vec,
10760 true);
10763 n_adjacent_loads++;
10764 continue;
10767 /* 1. Create the vector or array pointer update chain. */
10768 if (j == 0)
10769 dataref_ptr
10770 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10771 at_loop, offset, &dummy, gsi,
10772 &ptr_incr, false, bump);
10773 else
10775 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10776 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10777 stmt_info, bump);
10779 if (mask)
10780 vec_mask = vec_masks[j];
10782 tree vec_array = create_vector_array (vectype, vec_num);
10784 tree final_mask = NULL_TREE;
10785 tree final_len = NULL_TREE;
10786 tree bias = NULL_TREE;
10787 if (loop_masks)
10788 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10789 ncopies, vectype, j);
10790 if (vec_mask)
10791 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
10792 vec_mask, gsi);
10794 if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10796 if (loop_lens)
10797 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10798 ncopies, vectype, j, 1);
10799 else
10800 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10801 signed char biasval
10802 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10803 bias = build_int_cst (intQI_type_node, biasval);
10804 if (!final_mask)
10806 mask_vectype = truth_type_for (vectype);
10807 final_mask = build_minus_one_cst (mask_vectype);
10811 gcall *call;
10812 if (final_len && final_mask)
10814 /* Emit:
10815 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10816 VEC_MASK, LEN, BIAS). */
10817 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10818 tree alias_ptr = build_int_cst (ref_type, align);
10819 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
10820 dataref_ptr, alias_ptr,
10821 final_mask, final_len, bias);
10823 else if (final_mask)
10825 /* Emit:
10826 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10827 VEC_MASK). */
10828 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10829 tree alias_ptr = build_int_cst (ref_type, align);
10830 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
10831 dataref_ptr, alias_ptr,
10832 final_mask);
10834 else
10836 /* Emit:
10837 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10838 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
10839 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10841 gimple_call_set_lhs (call, vec_array);
10842 gimple_call_set_nothrow (call, true);
10843 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
10845 dr_chain.create (vec_num);
10846 /* Extract each vector into an SSA_NAME. */
10847 for (i = 0; i < vec_num; i++)
10849 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10850 vec_array, i);
10851 dr_chain.quick_push (new_temp);
10854 /* Record the mapping between SSA_NAMEs and statements. */
10855 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10857 /* Record that VEC_ARRAY is now dead. */
10858 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10860 dr_chain.release ();
10862 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10865 if (costing_p)
10867 if (n_adjacent_loads > 0)
10868 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
10869 alignment_support_scheme, misalignment, false,
10870 &inside_cost, &prologue_cost, cost_vec,
10871 cost_vec, true);
10872 if (dump_enabled_p ())
10873 dump_printf_loc (MSG_NOTE, vect_location,
10874 "vect_model_load_cost: inside_cost = %u, "
10875 "prologue_cost = %u .\n",
10876 inside_cost, prologue_cost);
10879 return true;
10882 if (memory_access_type == VMAT_GATHER_SCATTER)
10884 gcc_assert (alignment_support_scheme == dr_aligned
10885 || alignment_support_scheme == dr_unaligned_supported);
10886 gcc_assert (!grouped_load && !slp_perm);
10888 unsigned int inside_cost = 0, prologue_cost = 0;
10889 for (j = 0; j < ncopies; j++)
10891 /* 1. Create the vector or array pointer update chain. */
10892 if (j == 0 && !costing_p)
10894 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10895 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10896 slp_node, &gs_info, &dataref_ptr,
10897 &vec_offsets);
10898 else
10899 dataref_ptr
10900 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10901 at_loop, offset, &dummy, gsi,
10902 &ptr_incr, false, bump);
10904 else if (!costing_p)
10906 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10907 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10908 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10909 gsi, stmt_info, bump);
10912 gimple *new_stmt = NULL;
10913 for (i = 0; i < vec_num; i++)
10915 tree final_mask = NULL_TREE;
10916 tree final_len = NULL_TREE;
10917 tree bias = NULL_TREE;
10918 if (!costing_p)
10920 if (mask)
10921 vec_mask = vec_masks[vec_num * j + i];
10922 if (loop_masks)
10923 final_mask
10924 = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10925 vec_num * ncopies, vectype,
10926 vec_num * j + i);
10927 if (vec_mask)
10928 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10929 final_mask, vec_mask, gsi);
10931 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10932 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10933 gsi, stmt_info, bump);
10936 /* 2. Create the vector-load in the loop. */
10937 unsigned HOST_WIDE_INT align;
10938 if (gs_info.ifn != IFN_LAST)
10940 if (costing_p)
10942 unsigned int cnunits = vect_nunits_for_cost (vectype);
10943 inside_cost
10944 = record_stmt_cost (cost_vec, cnunits, scalar_load,
10945 stmt_info, 0, vect_body);
10946 continue;
10948 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10949 vec_offset = vec_offsets[vec_num * j + i];
10950 tree zero = build_zero_cst (vectype);
10951 tree scale = size_int (gs_info.scale);
10953 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
10955 if (loop_lens)
10956 final_len
10957 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10958 vec_num * ncopies, vectype,
10959 vec_num * j + i, 1);
10960 else
10961 final_len
10962 = build_int_cst (sizetype,
10963 TYPE_VECTOR_SUBPARTS (vectype));
10964 signed char biasval
10965 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10966 bias = build_int_cst (intQI_type_node, biasval);
10967 if (!final_mask)
10969 mask_vectype = truth_type_for (vectype);
10970 final_mask = build_minus_one_cst (mask_vectype);
10974 gcall *call;
10975 if (final_len && final_mask)
10976 call
10977 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
10978 dataref_ptr, vec_offset,
10979 scale, zero, final_mask,
10980 final_len, bias);
10981 else if (final_mask)
10982 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
10983 dataref_ptr, vec_offset,
10984 scale, zero, final_mask);
10985 else
10986 call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
10987 dataref_ptr, vec_offset,
10988 scale, zero);
10989 gimple_call_set_nothrow (call, true);
10990 new_stmt = call;
10991 data_ref = NULL_TREE;
10993 else if (gs_info.decl)
10995 /* The builtin decls path for gather is legacy, x86 only. */
10996 gcc_assert (!final_len && nunits.is_constant ());
10997 if (costing_p)
10999 unsigned int cnunits = vect_nunits_for_cost (vectype);
11000 inside_cost
11001 = record_stmt_cost (cost_vec, cnunits, scalar_load,
11002 stmt_info, 0, vect_body);
11003 continue;
11005 poly_uint64 offset_nunits
11006 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
11007 if (known_eq (nunits, offset_nunits))
11009 new_stmt = vect_build_one_gather_load_call
11010 (vinfo, stmt_info, gsi, &gs_info,
11011 dataref_ptr, vec_offsets[vec_num * j + i],
11012 final_mask);
11013 data_ref = NULL_TREE;
11015 else if (known_eq (nunits, offset_nunits * 2))
11017 /* We have a offset vector with half the number of
11018 lanes but the builtins will produce full vectype
11019 data with just the lower lanes filled. */
11020 new_stmt = vect_build_one_gather_load_call
11021 (vinfo, stmt_info, gsi, &gs_info,
11022 dataref_ptr, vec_offsets[2 * vec_num * j + 2 * i],
11023 final_mask);
11024 tree low = make_ssa_name (vectype);
11025 gimple_set_lhs (new_stmt, low);
11026 vect_finish_stmt_generation (vinfo, stmt_info,
11027 new_stmt, gsi);
11029 /* now put upper half of final_mask in final_mask low. */
11030 if (final_mask
11031 && !SCALAR_INT_MODE_P
11032 (TYPE_MODE (TREE_TYPE (final_mask))))
11034 int count = nunits.to_constant ();
11035 vec_perm_builder sel (count, count, 1);
11036 sel.quick_grow (count);
11037 for (int i = 0; i < count; ++i)
11038 sel[i] = i | (count / 2);
11039 vec_perm_indices indices (sel, 2, count);
11040 tree perm_mask = vect_gen_perm_mask_checked
11041 (TREE_TYPE (final_mask), indices);
11042 new_stmt = gimple_build_assign (NULL_TREE,
11043 VEC_PERM_EXPR,
11044 final_mask,
11045 final_mask,
11046 perm_mask);
11047 final_mask = make_ssa_name (TREE_TYPE (final_mask));
11048 gimple_set_lhs (new_stmt, final_mask);
11049 vect_finish_stmt_generation (vinfo, stmt_info,
11050 new_stmt, gsi);
11052 else if (final_mask)
11054 new_stmt = gimple_build_assign (NULL_TREE,
11055 VEC_UNPACK_HI_EXPR,
11056 final_mask);
11057 final_mask = make_ssa_name
11058 (truth_type_for (gs_info.offset_vectype));
11059 gimple_set_lhs (new_stmt, final_mask);
11060 vect_finish_stmt_generation (vinfo, stmt_info,
11061 new_stmt, gsi);
11064 new_stmt = vect_build_one_gather_load_call
11065 (vinfo, stmt_info, gsi, &gs_info,
11066 dataref_ptr,
11067 vec_offsets[2 * vec_num * j + 2 * i + 1],
11068 final_mask);
11069 tree high = make_ssa_name (vectype);
11070 gimple_set_lhs (new_stmt, high);
11071 vect_finish_stmt_generation (vinfo, stmt_info,
11072 new_stmt, gsi);
11074 /* compose low + high. */
11075 int count = nunits.to_constant ();
11076 vec_perm_builder sel (count, count, 1);
11077 sel.quick_grow (count);
11078 for (int i = 0; i < count; ++i)
11079 sel[i] = i < count / 2 ? i : i + count / 2;
11080 vec_perm_indices indices (sel, 2, count);
11081 tree perm_mask
11082 = vect_gen_perm_mask_checked (vectype, indices);
11083 new_stmt = gimple_build_assign (NULL_TREE,
11084 VEC_PERM_EXPR,
11085 low, high, perm_mask);
11086 data_ref = NULL_TREE;
11088 else if (known_eq (nunits * 2, offset_nunits))
11090 /* We have a offset vector with double the number of
11091 lanes. Select the low/high part accordingly. */
11092 vec_offset = vec_offsets[(vec_num * j + i) / 2];
11093 if ((vec_num * j + i) & 1)
11095 int count = offset_nunits.to_constant ();
11096 vec_perm_builder sel (count, count, 1);
11097 sel.quick_grow (count);
11098 for (int i = 0; i < count; ++i)
11099 sel[i] = i | (count / 2);
11100 vec_perm_indices indices (sel, 2, count);
11101 tree perm_mask = vect_gen_perm_mask_checked
11102 (TREE_TYPE (vec_offset), indices);
11103 new_stmt = gimple_build_assign (NULL_TREE,
11104 VEC_PERM_EXPR,
11105 vec_offset,
11106 vec_offset,
11107 perm_mask);
11108 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
11109 gimple_set_lhs (new_stmt, vec_offset);
11110 vect_finish_stmt_generation (vinfo, stmt_info,
11111 new_stmt, gsi);
11113 new_stmt = vect_build_one_gather_load_call
11114 (vinfo, stmt_info, gsi, &gs_info,
11115 dataref_ptr, vec_offset, final_mask);
11116 data_ref = NULL_TREE;
11118 else
11119 gcc_unreachable ();
11121 else
11123 /* Emulated gather-scatter. */
11124 gcc_assert (!final_mask);
11125 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
11126 if (costing_p)
11128 /* For emulated gathers N offset vector element
11129 offset add is consumed by the load). */
11130 inside_cost = record_stmt_cost (cost_vec, const_nunits,
11131 vec_to_scalar, stmt_info,
11132 0, vect_body);
11133 /* N scalar loads plus gathering them into a
11134 vector. */
11135 inside_cost
11136 = record_stmt_cost (cost_vec, const_nunits, scalar_load,
11137 stmt_info, 0, vect_body);
11138 inside_cost
11139 = record_stmt_cost (cost_vec, 1, vec_construct,
11140 stmt_info, 0, vect_body);
11141 continue;
11143 unsigned HOST_WIDE_INT const_offset_nunits
11144 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
11145 .to_constant ();
11146 vec<constructor_elt, va_gc> *ctor_elts;
11147 vec_alloc (ctor_elts, const_nunits);
11148 gimple_seq stmts = NULL;
11149 /* We support offset vectors with more elements
11150 than the data vector for now. */
11151 unsigned HOST_WIDE_INT factor
11152 = const_offset_nunits / const_nunits;
11153 vec_offset = vec_offsets[(vec_num * j + i) / factor];
11154 unsigned elt_offset
11155 = ((vec_num * j + i) % factor) * const_nunits;
11156 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
11157 tree scale = size_int (gs_info.scale);
11158 align = get_object_alignment (DR_REF (first_dr_info->dr));
11159 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
11160 for (unsigned k = 0; k < const_nunits; ++k)
11162 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
11163 bitsize_int (k + elt_offset));
11164 tree idx
11165 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
11166 vec_offset, TYPE_SIZE (idx_type), boff);
11167 idx = gimple_convert (&stmts, sizetype, idx);
11168 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx,
11169 scale);
11170 tree ptr = gimple_build (&stmts, PLUS_EXPR,
11171 TREE_TYPE (dataref_ptr),
11172 dataref_ptr, idx);
11173 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
11174 tree elt = make_ssa_name (TREE_TYPE (vectype));
11175 tree ref = build2 (MEM_REF, ltype, ptr,
11176 build_int_cst (ref_type, 0));
11177 new_stmt = gimple_build_assign (elt, ref);
11178 gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
11179 gimple_seq_add_stmt (&stmts, new_stmt);
11180 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
11182 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
11183 new_stmt = gimple_build_assign (
11184 NULL_TREE, build_constructor (vectype, ctor_elts));
11185 data_ref = NULL_TREE;
11188 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11189 /* DATA_REF is null if we've already built the statement. */
11190 if (data_ref)
11192 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11193 new_stmt = gimple_build_assign (vec_dest, data_ref);
11195 new_temp = make_ssa_name (vec_dest, new_stmt);
11196 gimple_set_lhs (new_stmt, new_temp);
11197 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11199 /* Store vector loads in the corresponding SLP_NODE. */
11200 if (slp)
11201 slp_node->push_vec_def (new_stmt);
11204 if (!slp && !costing_p)
11205 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11208 if (!slp && !costing_p)
11209 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11211 if (costing_p && dump_enabled_p ())
11212 dump_printf_loc (MSG_NOTE, vect_location,
11213 "vect_model_load_cost: inside_cost = %u, "
11214 "prologue_cost = %u .\n",
11215 inside_cost, prologue_cost);
11216 return true;
11219 poly_uint64 group_elt = 0;
11220 unsigned int inside_cost = 0, prologue_cost = 0;
11221 /* For costing some adjacent vector loads, we'd like to cost with
11222 the total number of them once instead of cost each one by one. */
11223 unsigned int n_adjacent_loads = 0;
11224 for (j = 0; j < ncopies; j++)
11226 /* 1. Create the vector or array pointer update chain. */
11227 if (j == 0 && !costing_p)
11229 bool simd_lane_access_p
11230 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
11231 if (simd_lane_access_p
11232 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
11233 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
11234 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
11235 && integer_zerop (DR_INIT (first_dr_info->dr))
11236 && alias_sets_conflict_p (get_alias_set (aggr_type),
11237 get_alias_set (TREE_TYPE (ref_type)))
11238 && (alignment_support_scheme == dr_aligned
11239 || alignment_support_scheme == dr_unaligned_supported))
11241 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
11242 dataref_offset = build_int_cst (ref_type, 0);
11244 else if (diff_first_stmt_info)
11246 dataref_ptr
11247 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
11248 aggr_type, at_loop, offset, &dummy,
11249 gsi, &ptr_incr, simd_lane_access_p,
11250 bump);
11251 /* Adjust the pointer by the difference to first_stmt. */
11252 data_reference_p ptrdr
11253 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
11254 tree diff
11255 = fold_convert (sizetype,
11256 size_binop (MINUS_EXPR,
11257 DR_INIT (first_dr_info->dr),
11258 DR_INIT (ptrdr)));
11259 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11260 stmt_info, diff);
11261 if (alignment_support_scheme == dr_explicit_realign)
11263 msq = vect_setup_realignment (vinfo,
11264 first_stmt_info_for_drptr, gsi,
11265 &realignment_token,
11266 alignment_support_scheme,
11267 dataref_ptr, &at_loop);
11268 gcc_assert (!compute_in_loop);
11271 else
11272 dataref_ptr
11273 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11274 at_loop,
11275 offset, &dummy, gsi, &ptr_incr,
11276 simd_lane_access_p, bump);
11278 else if (!costing_p)
11280 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11281 if (dataref_offset)
11282 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
11283 bump);
11284 else
11285 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11286 stmt_info, bump);
11289 if (grouped_load || slp_perm)
11290 dr_chain.create (vec_num);
11292 gimple *new_stmt = NULL;
11293 for (i = 0; i < vec_num; i++)
11295 tree final_mask = NULL_TREE;
11296 tree final_len = NULL_TREE;
11297 tree bias = NULL_TREE;
11298 if (!costing_p)
11300 if (mask)
11301 vec_mask = vec_masks[vec_num * j + i];
11302 if (loop_masks)
11303 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11304 vec_num * ncopies, vectype,
11305 vec_num * j + i);
11306 if (vec_mask)
11307 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
11308 final_mask, vec_mask, gsi);
11310 if (i > 0)
11311 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11312 gsi, stmt_info, bump);
11315 /* 2. Create the vector-load in the loop. */
11316 switch (alignment_support_scheme)
11318 case dr_aligned:
11319 case dr_unaligned_supported:
11321 if (costing_p)
11322 break;
11324 unsigned int misalign;
11325 unsigned HOST_WIDE_INT align;
11326 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
11327 if (alignment_support_scheme == dr_aligned)
11328 misalign = 0;
11329 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
11331 align
11332 = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
11333 misalign = 0;
11335 else
11336 misalign = misalignment;
11337 if (dataref_offset == NULL_TREE
11338 && TREE_CODE (dataref_ptr) == SSA_NAME)
11339 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
11340 misalign);
11341 align = least_bit_hwi (misalign | align);
11343 /* Compute IFN when LOOP_LENS or final_mask valid. */
11344 machine_mode vmode = TYPE_MODE (vectype);
11345 machine_mode new_vmode = vmode;
11346 internal_fn partial_ifn = IFN_LAST;
11347 if (loop_lens)
11349 opt_machine_mode new_ovmode
11350 = get_len_load_store_mode (vmode, true, &partial_ifn);
11351 new_vmode = new_ovmode.require ();
11352 unsigned factor
11353 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
11354 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11355 vec_num * ncopies, vectype,
11356 vec_num * j + i, factor);
11358 else if (final_mask)
11360 if (!can_vec_mask_load_store_p (
11361 vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
11362 &partial_ifn))
11363 gcc_unreachable ();
11366 if (partial_ifn == IFN_MASK_LEN_LOAD)
11368 if (!final_len)
11370 /* Pass VF value to 'len' argument of
11371 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11372 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11374 if (!final_mask)
11376 /* Pass all ones value to 'mask' argument of
11377 MASK_LEN_LOAD if final_mask is invalid. */
11378 mask_vectype = truth_type_for (vectype);
11379 final_mask = build_minus_one_cst (mask_vectype);
11382 if (final_len)
11384 signed char biasval
11385 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11387 bias = build_int_cst (intQI_type_node, biasval);
11390 if (final_len)
11392 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11393 gcall *call;
11394 if (partial_ifn == IFN_MASK_LEN_LOAD)
11395 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
11396 dataref_ptr, ptr,
11397 final_mask, final_len,
11398 bias);
11399 else
11400 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
11401 dataref_ptr, ptr,
11402 final_len, bias);
11403 gimple_call_set_nothrow (call, true);
11404 new_stmt = call;
11405 data_ref = NULL_TREE;
11407 /* Need conversion if it's wrapped with VnQI. */
11408 if (vmode != new_vmode)
11410 tree new_vtype = build_vector_type_for_mode (
11411 unsigned_intQI_type_node, new_vmode);
11412 tree var
11413 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
11414 gimple_set_lhs (call, var);
11415 vect_finish_stmt_generation (vinfo, stmt_info, call,
11416 gsi);
11417 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11418 new_stmt = gimple_build_assign (vec_dest,
11419 VIEW_CONVERT_EXPR, op);
11422 else if (final_mask)
11424 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11425 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
11426 dataref_ptr, ptr,
11427 final_mask);
11428 gimple_call_set_nothrow (call, true);
11429 new_stmt = call;
11430 data_ref = NULL_TREE;
11432 else
11434 tree ltype = vectype;
11435 tree new_vtype = NULL_TREE;
11436 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11437 unsigned int vect_align
11438 = vect_known_alignment_in_bytes (first_dr_info, vectype);
11439 unsigned int scalar_dr_size
11440 = vect_get_scalar_dr_size (first_dr_info);
11441 /* If there's no peeling for gaps but we have a gap
11442 with slp loads then load the lower half of the
11443 vector only. See get_group_load_store_type for
11444 when we apply this optimization. */
11445 if (slp
11446 && loop_vinfo
11447 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != 0
11448 && known_eq (nunits, (group_size - gap) * 2)
11449 && known_eq (nunits, group_size)
11450 && gap >= (vect_align / scalar_dr_size))
11452 tree half_vtype;
11453 new_vtype
11454 = vector_vector_composition_type (vectype, 2,
11455 &half_vtype);
11456 if (new_vtype != NULL_TREE)
11457 ltype = half_vtype;
11459 tree offset
11460 = (dataref_offset ? dataref_offset
11461 : build_int_cst (ref_type, 0));
11462 if (ltype != vectype
11463 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11465 unsigned HOST_WIDE_INT gap_offset
11466 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
11467 tree gapcst = build_int_cst (ref_type, gap_offset);
11468 offset = size_binop (PLUS_EXPR, offset, gapcst);
11470 data_ref
11471 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
11472 if (alignment_support_scheme == dr_aligned)
11474 else
11475 TREE_TYPE (data_ref)
11476 = build_aligned_type (TREE_TYPE (data_ref),
11477 align * BITS_PER_UNIT);
11478 if (ltype != vectype)
11480 vect_copy_ref_info (data_ref,
11481 DR_REF (first_dr_info->dr));
11482 tree tem = make_ssa_name (ltype);
11483 new_stmt = gimple_build_assign (tem, data_ref);
11484 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
11485 gsi);
11486 data_ref = NULL;
11487 vec<constructor_elt, va_gc> *v;
11488 vec_alloc (v, 2);
11489 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11491 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11492 build_zero_cst (ltype));
11493 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11495 else
11497 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11498 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11499 build_zero_cst (ltype));
11501 gcc_assert (new_vtype != NULL_TREE);
11502 if (new_vtype == vectype)
11503 new_stmt = gimple_build_assign (
11504 vec_dest, build_constructor (vectype, v));
11505 else
11507 tree new_vname = make_ssa_name (new_vtype);
11508 new_stmt = gimple_build_assign (
11509 new_vname, build_constructor (new_vtype, v));
11510 vect_finish_stmt_generation (vinfo, stmt_info,
11511 new_stmt, gsi);
11512 new_stmt = gimple_build_assign (
11513 vec_dest,
11514 build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
11518 break;
11520 case dr_explicit_realign:
11522 if (costing_p)
11523 break;
11524 tree ptr, bump;
11526 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11528 if (compute_in_loop)
11529 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
11530 &realignment_token,
11531 dr_explicit_realign,
11532 dataref_ptr, NULL);
11534 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11535 ptr = copy_ssa_name (dataref_ptr);
11536 else
11537 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11538 // For explicit realign the target alignment should be
11539 // known at compile time.
11540 unsigned HOST_WIDE_INT align
11541 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11542 new_stmt = gimple_build_assign (
11543 ptr, BIT_AND_EXPR, dataref_ptr,
11544 build_int_cst (TREE_TYPE (dataref_ptr),
11545 -(HOST_WIDE_INT) align));
11546 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11547 data_ref
11548 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11549 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11550 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11551 new_stmt = gimple_build_assign (vec_dest, data_ref);
11552 new_temp = make_ssa_name (vec_dest, new_stmt);
11553 gimple_assign_set_lhs (new_stmt, new_temp);
11554 gimple_move_vops (new_stmt, stmt_info->stmt);
11555 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11556 msq = new_temp;
11558 bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11559 bump = size_binop (MINUS_EXPR, bump, size_one_node);
11560 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11561 bump);
11562 new_stmt = gimple_build_assign (
11563 NULL_TREE, BIT_AND_EXPR, ptr,
11564 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11565 if (TREE_CODE (ptr) == SSA_NAME)
11566 ptr = copy_ssa_name (ptr, new_stmt);
11567 else
11568 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
11569 gimple_assign_set_lhs (new_stmt, ptr);
11570 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11571 data_ref
11572 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11573 break;
11575 case dr_explicit_realign_optimized:
11577 if (costing_p)
11578 break;
11579 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11580 new_temp = copy_ssa_name (dataref_ptr);
11581 else
11582 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11583 // We should only be doing this if we know the target
11584 // alignment at compile time.
11585 unsigned HOST_WIDE_INT align
11586 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11587 new_stmt = gimple_build_assign (
11588 new_temp, BIT_AND_EXPR, dataref_ptr,
11589 build_int_cst (TREE_TYPE (dataref_ptr),
11590 -(HOST_WIDE_INT) align));
11591 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11592 data_ref = build2 (MEM_REF, vectype, new_temp,
11593 build_int_cst (ref_type, 0));
11594 break;
11596 default:
11597 gcc_unreachable ();
11600 /* One common place to cost the above vect load for different
11601 alignment support schemes. */
11602 if (costing_p)
11604 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11605 only need to take care of the first stmt, whose
11606 stmt_info is first_stmt_info, vec_num iterating on it
11607 will cover the cost for the remaining, it's consistent
11608 with transforming. For the prologue cost for realign,
11609 we only need to count it once for the whole group. */
11610 bool first_stmt_info_p = first_stmt_info == stmt_info;
11611 bool add_realign_cost = first_stmt_info_p && i == 0;
11612 if (memory_access_type == VMAT_CONTIGUOUS
11613 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11614 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11615 && (!grouped_load || first_stmt_info_p)))
11617 /* Leave realign cases alone to keep them simple. */
11618 if (alignment_support_scheme == dr_explicit_realign_optimized
11619 || alignment_support_scheme == dr_explicit_realign)
11620 vect_get_load_cost (vinfo, stmt_info, 1,
11621 alignment_support_scheme, misalignment,
11622 add_realign_cost, &inside_cost,
11623 &prologue_cost, cost_vec, cost_vec,
11624 true);
11625 else
11626 n_adjacent_loads++;
11629 else
11631 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11632 /* DATA_REF is null if we've already built the statement. */
11633 if (data_ref)
11635 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11636 new_stmt = gimple_build_assign (vec_dest, data_ref);
11638 new_temp = make_ssa_name (vec_dest, new_stmt);
11639 gimple_set_lhs (new_stmt, new_temp);
11640 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11643 /* 3. Handle explicit realignment if necessary/supported.
11644 Create in loop:
11645 vec_dest = realign_load (msq, lsq, realignment_token) */
11646 if (!costing_p
11647 && (alignment_support_scheme == dr_explicit_realign_optimized
11648 || alignment_support_scheme == dr_explicit_realign))
11650 lsq = gimple_assign_lhs (new_stmt);
11651 if (!realignment_token)
11652 realignment_token = dataref_ptr;
11653 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11654 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11655 lsq, realignment_token);
11656 new_temp = make_ssa_name (vec_dest, new_stmt);
11657 gimple_assign_set_lhs (new_stmt, new_temp);
11658 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11660 if (alignment_support_scheme == dr_explicit_realign_optimized)
11662 gcc_assert (phi);
11663 if (i == vec_num - 1 && j == ncopies - 1)
11664 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11665 UNKNOWN_LOCATION);
11666 msq = lsq;
11670 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11672 if (costing_p)
11673 inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
11674 stmt_info, 0, vect_body);
11675 else
11677 tree perm_mask = perm_mask_for_reverse (vectype);
11678 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
11679 perm_mask, stmt_info, gsi);
11680 new_stmt = SSA_NAME_DEF_STMT (new_temp);
11684 /* Collect vector loads and later create their permutation in
11685 vect_transform_grouped_load (). */
11686 if (!costing_p && (grouped_load || slp_perm))
11687 dr_chain.quick_push (new_temp);
11689 /* Store vector loads in the corresponding SLP_NODE. */
11690 if (!costing_p && slp && !slp_perm)
11691 slp_node->push_vec_def (new_stmt);
11693 /* With SLP permutation we load the gaps as well, without
11694 we need to skip the gaps after we manage to fully load
11695 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11696 group_elt += nunits;
11697 if (!costing_p
11698 && maybe_ne (group_gap_adj, 0U)
11699 && !slp_perm
11700 && known_eq (group_elt, group_size - group_gap_adj))
11702 poly_wide_int bump_val
11703 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11704 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11705 == -1)
11706 bump_val = -bump_val;
11707 tree bump = wide_int_to_tree (sizetype, bump_val);
11708 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11709 stmt_info, bump);
11710 group_elt = 0;
11713 /* Bump the vector pointer to account for a gap or for excess
11714 elements loaded for a permuted SLP load. */
11715 if (!costing_p
11716 && maybe_ne (group_gap_adj, 0U)
11717 && slp_perm)
11719 poly_wide_int bump_val
11720 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11721 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11722 bump_val = -bump_val;
11723 tree bump = wide_int_to_tree (sizetype, bump_val);
11724 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11725 stmt_info, bump);
11728 if (slp && !slp_perm)
11729 continue;
11731 if (slp_perm)
11733 unsigned n_perms;
11734 /* For SLP we know we've seen all possible uses of dr_chain so
11735 direct vect_transform_slp_perm_load to DCE the unused parts.
11736 ??? This is a hack to prevent compile-time issues as seen
11737 in PR101120 and friends. */
11738 if (costing_p)
11740 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
11741 true, &n_perms, nullptr);
11742 inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
11743 stmt_info, 0, vect_body);
11745 else
11747 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11748 gsi, vf, false, &n_perms,
11749 nullptr, true);
11750 gcc_assert (ok);
11753 else
11755 if (grouped_load)
11757 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11758 /* We assume that the cost of a single load-lanes instruction
11759 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11760 If a grouped access is instead being provided by a
11761 load-and-permute operation, include the cost of the
11762 permutes. */
11763 if (costing_p && first_stmt_info == stmt_info)
11765 /* Uses an even and odd extract operations or shuffle
11766 operations for each needed permute. */
11767 int group_size = DR_GROUP_SIZE (first_stmt_info);
11768 int nstmts = ceil_log2 (group_size) * group_size;
11769 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
11770 stmt_info, 0, vect_body);
11772 if (dump_enabled_p ())
11773 dump_printf_loc (MSG_NOTE, vect_location,
11774 "vect_model_load_cost:"
11775 "strided group_size = %d .\n",
11776 group_size);
11778 else if (!costing_p)
11780 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
11781 group_size, gsi);
11782 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11785 else if (!costing_p)
11786 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11788 dr_chain.release ();
11790 if (!slp && !costing_p)
11791 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11793 if (costing_p)
11795 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
11796 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11797 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11798 if (n_adjacent_loads > 0)
11799 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
11800 alignment_support_scheme, misalignment, false,
11801 &inside_cost, &prologue_cost, cost_vec, cost_vec,
11802 true);
11803 if (dump_enabled_p ())
11804 dump_printf_loc (MSG_NOTE, vect_location,
11805 "vect_model_load_cost: inside_cost = %u, "
11806 "prologue_cost = %u .\n",
11807 inside_cost, prologue_cost);
11810 return true;
11813 /* Function vect_is_simple_cond.
11815 Input:
11816 LOOP - the loop that is being vectorized.
11817 COND - Condition that is checked for simple use.
11819 Output:
11820 *COMP_VECTYPE - the vector type for the comparison.
11821 *DTS - The def types for the arguments of the comparison
11823 Returns whether a COND can be vectorized. Checks whether
11824 condition operands are supportable using vec_is_simple_use. */
11826 static bool
11827 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
11828 slp_tree slp_node, tree *comp_vectype,
11829 enum vect_def_type *dts, tree vectype)
11831 tree lhs, rhs;
11832 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11833 slp_tree slp_op;
11835 /* Mask case. */
11836 if (TREE_CODE (cond) == SSA_NAME
11837 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11839 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
11840 &slp_op, &dts[0], comp_vectype)
11841 || !*comp_vectype
11842 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11843 return false;
11844 return true;
11847 if (!COMPARISON_CLASS_P (cond))
11848 return false;
11850 lhs = TREE_OPERAND (cond, 0);
11851 rhs = TREE_OPERAND (cond, 1);
11853 if (TREE_CODE (lhs) == SSA_NAME)
11855 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
11856 &lhs, &slp_op, &dts[0], &vectype1))
11857 return false;
11859 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
11860 || TREE_CODE (lhs) == FIXED_CST)
11861 dts[0] = vect_constant_def;
11862 else
11863 return false;
11865 if (TREE_CODE (rhs) == SSA_NAME)
11867 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
11868 &rhs, &slp_op, &dts[1], &vectype2))
11869 return false;
11871 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
11872 || TREE_CODE (rhs) == FIXED_CST)
11873 dts[1] = vect_constant_def;
11874 else
11875 return false;
11877 if (vectype1 && vectype2
11878 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11879 TYPE_VECTOR_SUBPARTS (vectype2)))
11880 return false;
11882 *comp_vectype = vectype1 ? vectype1 : vectype2;
11883 /* Invariant comparison. */
11884 if (! *comp_vectype)
11886 tree scalar_type = TREE_TYPE (lhs);
11887 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11888 *comp_vectype = truth_type_for (vectype);
11889 else
11891 /* If we can widen the comparison to match vectype do so. */
11892 if (INTEGRAL_TYPE_P (scalar_type)
11893 && !slp_node
11894 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
11895 TYPE_SIZE (TREE_TYPE (vectype))))
11896 scalar_type = build_nonstandard_integer_type
11897 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
11898 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
11899 slp_node);
11903 return true;
11906 /* vectorizable_condition.
11908 Check if STMT_INFO is conditional modify expression that can be vectorized.
11909 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11910 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
11911 at GSI.
11913 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
11915 Return true if STMT_INFO is vectorizable in this way. */
11917 static bool
11918 vectorizable_condition (vec_info *vinfo,
11919 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11920 gimple **vec_stmt,
11921 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11923 tree scalar_dest = NULL_TREE;
11924 tree vec_dest = NULL_TREE;
11925 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
11926 tree then_clause, else_clause;
11927 tree comp_vectype = NULL_TREE;
11928 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
11929 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
11930 tree vec_compare;
11931 tree new_temp;
11932 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11933 enum vect_def_type dts[4]
11934 = {vect_unknown_def_type, vect_unknown_def_type,
11935 vect_unknown_def_type, vect_unknown_def_type};
11936 int ndts = 4;
11937 int ncopies;
11938 int vec_num;
11939 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11940 int i;
11941 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11942 vec<tree> vec_oprnds0 = vNULL;
11943 vec<tree> vec_oprnds1 = vNULL;
11944 vec<tree> vec_oprnds2 = vNULL;
11945 vec<tree> vec_oprnds3 = vNULL;
11946 tree vec_cmp_type;
11947 bool masked = false;
11949 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11950 return false;
11952 /* Is vectorizable conditional operation? */
11953 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11954 if (!stmt)
11955 return false;
11957 code = gimple_assign_rhs_code (stmt);
11958 if (code != COND_EXPR)
11959 return false;
11961 stmt_vec_info reduc_info = NULL;
11962 int reduc_index = -1;
11963 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
11964 bool for_reduction
11965 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
11966 if (for_reduction)
11968 if (slp_node)
11969 return false;
11970 reduc_info = info_for_reduction (vinfo, stmt_info);
11971 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
11972 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
11973 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
11974 || reduc_index != -1);
11976 else
11978 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11979 return false;
11982 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11983 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11985 if (slp_node)
11987 ncopies = 1;
11988 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
11990 else
11992 ncopies = vect_get_num_copies (loop_vinfo, vectype);
11993 vec_num = 1;
11996 gcc_assert (ncopies >= 1);
11997 if (for_reduction && ncopies > 1)
11998 return false; /* FORNOW */
12000 cond_expr = gimple_assign_rhs1 (stmt);
12002 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
12003 &comp_vectype, &dts[0], vectype)
12004 || !comp_vectype)
12005 return false;
12007 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
12008 slp_tree then_slp_node, else_slp_node;
12009 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
12010 &then_clause, &then_slp_node, &dts[2], &vectype1))
12011 return false;
12012 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
12013 &else_clause, &else_slp_node, &dts[3], &vectype2))
12014 return false;
12016 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
12017 return false;
12019 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
12020 return false;
12022 masked = !COMPARISON_CLASS_P (cond_expr);
12023 vec_cmp_type = truth_type_for (comp_vectype);
12025 if (vec_cmp_type == NULL_TREE)
12026 return false;
12028 cond_code = TREE_CODE (cond_expr);
12029 if (!masked)
12031 cond_expr0 = TREE_OPERAND (cond_expr, 0);
12032 cond_expr1 = TREE_OPERAND (cond_expr, 1);
12035 /* For conditional reductions, the "then" value needs to be the candidate
12036 value calculated by this iteration while the "else" value needs to be
12037 the result carried over from previous iterations. If the COND_EXPR
12038 is the other way around, we need to swap it. */
12039 bool must_invert_cmp_result = false;
12040 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
12042 if (masked)
12043 must_invert_cmp_result = true;
12044 else
12046 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
12047 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
12048 if (new_code == ERROR_MARK)
12049 must_invert_cmp_result = true;
12050 else
12052 cond_code = new_code;
12053 /* Make sure we don't accidentally use the old condition. */
12054 cond_expr = NULL_TREE;
12057 std::swap (then_clause, else_clause);
12060 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
12062 /* Boolean values may have another representation in vectors
12063 and therefore we prefer bit operations over comparison for
12064 them (which also works for scalar masks). We store opcodes
12065 to use in bitop1 and bitop2. Statement is vectorized as
12066 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12067 depending on bitop1 and bitop2 arity. */
12068 switch (cond_code)
12070 case GT_EXPR:
12071 bitop1 = BIT_NOT_EXPR;
12072 bitop2 = BIT_AND_EXPR;
12073 break;
12074 case GE_EXPR:
12075 bitop1 = BIT_NOT_EXPR;
12076 bitop2 = BIT_IOR_EXPR;
12077 break;
12078 case LT_EXPR:
12079 bitop1 = BIT_NOT_EXPR;
12080 bitop2 = BIT_AND_EXPR;
12081 std::swap (cond_expr0, cond_expr1);
12082 break;
12083 case LE_EXPR:
12084 bitop1 = BIT_NOT_EXPR;
12085 bitop2 = BIT_IOR_EXPR;
12086 std::swap (cond_expr0, cond_expr1);
12087 break;
12088 case NE_EXPR:
12089 bitop1 = BIT_XOR_EXPR;
12090 break;
12091 case EQ_EXPR:
12092 bitop1 = BIT_XOR_EXPR;
12093 bitop2 = BIT_NOT_EXPR;
12094 break;
12095 default:
12096 return false;
12098 cond_code = SSA_NAME;
12101 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
12102 && reduction_type == EXTRACT_LAST_REDUCTION
12103 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
12105 if (dump_enabled_p ())
12106 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12107 "reduction comparison operation not supported.\n");
12108 return false;
12111 if (!vec_stmt)
12113 if (bitop1 != NOP_EXPR)
12115 machine_mode mode = TYPE_MODE (comp_vectype);
12116 optab optab;
12118 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
12119 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12120 return false;
12122 if (bitop2 != NOP_EXPR)
12124 optab = optab_for_tree_code (bitop2, comp_vectype,
12125 optab_default);
12126 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12127 return false;
12131 vect_cost_for_stmt kind = vector_stmt;
12132 if (reduction_type == EXTRACT_LAST_REDUCTION)
12133 /* Count one reduction-like operation per vector. */
12134 kind = vec_to_scalar;
12135 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
12136 && (masked
12137 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
12138 cond_code)
12139 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
12140 ERROR_MARK))))
12141 return false;
12143 if (slp_node
12144 && (!vect_maybe_update_slp_op_vectype
12145 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
12146 || (op_adjust == 1
12147 && !vect_maybe_update_slp_op_vectype
12148 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
12149 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
12150 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
12152 if (dump_enabled_p ())
12153 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12154 "incompatible vector types for invariants\n");
12155 return false;
12158 if (loop_vinfo && for_reduction
12159 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12161 if (reduction_type == EXTRACT_LAST_REDUCTION)
12163 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12164 vectype, OPTIMIZE_FOR_SPEED))
12165 vect_record_loop_len (loop_vinfo,
12166 &LOOP_VINFO_LENS (loop_vinfo),
12167 ncopies * vec_num, vectype, 1);
12168 else
12169 vect_record_loop_mask (loop_vinfo,
12170 &LOOP_VINFO_MASKS (loop_vinfo),
12171 ncopies * vec_num, vectype, NULL);
12173 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12174 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
12176 if (dump_enabled_p ())
12177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12178 "conditional reduction prevents the use"
12179 " of partial vectors.\n");
12180 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
12184 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
12185 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
12186 cost_vec, kind);
12187 return true;
12190 /* Transform. */
12192 /* Handle def. */
12193 scalar_dest = gimple_assign_lhs (stmt);
12194 if (reduction_type != EXTRACT_LAST_REDUCTION)
12195 vec_dest = vect_create_destination_var (scalar_dest, vectype);
12197 bool swap_cond_operands = false;
12199 /* See whether another part of the vectorized code applies a loop
12200 mask to the condition, or to its inverse. */
12202 vec_loop_masks *masks = NULL;
12203 vec_loop_lens *lens = NULL;
12204 if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
12206 if (reduction_type == EXTRACT_LAST_REDUCTION)
12207 lens = &LOOP_VINFO_LENS (loop_vinfo);
12209 else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
12211 if (reduction_type == EXTRACT_LAST_REDUCTION)
12212 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12213 else
12215 scalar_cond_masked_key cond (cond_expr, ncopies);
12216 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12217 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12218 else
12220 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
12221 tree_code orig_code = cond.code;
12222 cond.code = invert_tree_comparison (cond.code, honor_nans);
12223 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
12225 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12226 cond_code = cond.code;
12227 swap_cond_operands = true;
12229 else
12231 /* Try the inverse of the current mask. We check if the
12232 inverse mask is live and if so we generate a negate of
12233 the current mask such that we still honor NaNs. */
12234 cond.inverted_p = true;
12235 cond.code = orig_code;
12236 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12238 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12239 cond_code = cond.code;
12240 swap_cond_operands = true;
12241 must_invert_cmp_result = true;
12248 /* Handle cond expr. */
12249 if (masked)
12250 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12251 cond_expr, &vec_oprnds0, comp_vectype,
12252 then_clause, &vec_oprnds2, vectype,
12253 reduction_type != EXTRACT_LAST_REDUCTION
12254 ? else_clause : NULL, &vec_oprnds3, vectype);
12255 else
12256 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12257 cond_expr0, &vec_oprnds0, comp_vectype,
12258 cond_expr1, &vec_oprnds1, comp_vectype,
12259 then_clause, &vec_oprnds2, vectype,
12260 reduction_type != EXTRACT_LAST_REDUCTION
12261 ? else_clause : NULL, &vec_oprnds3, vectype);
12263 /* Arguments are ready. Create the new vector stmt. */
12264 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
12266 vec_then_clause = vec_oprnds2[i];
12267 if (reduction_type != EXTRACT_LAST_REDUCTION)
12268 vec_else_clause = vec_oprnds3[i];
12270 if (swap_cond_operands)
12271 std::swap (vec_then_clause, vec_else_clause);
12273 if (masked)
12274 vec_compare = vec_cond_lhs;
12275 else
12277 vec_cond_rhs = vec_oprnds1[i];
12278 if (bitop1 == NOP_EXPR)
12280 gimple_seq stmts = NULL;
12281 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
12282 vec_cond_lhs, vec_cond_rhs);
12283 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
12285 else
12287 new_temp = make_ssa_name (vec_cmp_type);
12288 gassign *new_stmt;
12289 if (bitop1 == BIT_NOT_EXPR)
12290 new_stmt = gimple_build_assign (new_temp, bitop1,
12291 vec_cond_rhs);
12292 else
12293 new_stmt
12294 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
12295 vec_cond_rhs);
12296 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12297 if (bitop2 == NOP_EXPR)
12298 vec_compare = new_temp;
12299 else if (bitop2 == BIT_NOT_EXPR
12300 && reduction_type != EXTRACT_LAST_REDUCTION)
12302 /* Instead of doing ~x ? y : z do x ? z : y. */
12303 vec_compare = new_temp;
12304 std::swap (vec_then_clause, vec_else_clause);
12306 else
12308 vec_compare = make_ssa_name (vec_cmp_type);
12309 if (bitop2 == BIT_NOT_EXPR)
12310 new_stmt
12311 = gimple_build_assign (vec_compare, bitop2, new_temp);
12312 else
12313 new_stmt
12314 = gimple_build_assign (vec_compare, bitop2,
12315 vec_cond_lhs, new_temp);
12316 vect_finish_stmt_generation (vinfo, stmt_info,
12317 new_stmt, gsi);
12322 /* If we decided to apply a loop mask to the result of the vector
12323 comparison, AND the comparison with the mask now. Later passes
12324 should then be able to reuse the AND results between mulitple
12325 vector statements.
12327 For example:
12328 for (int i = 0; i < 100; ++i)
12329 x[i] = y[i] ? z[i] : 10;
12331 results in following optimized GIMPLE:
12333 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12334 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12335 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12336 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12337 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12338 vect_iftmp.11_47, { 10, ... }>;
12340 instead of using a masked and unmasked forms of
12341 vec != { 0, ... } (masked in the MASK_LOAD,
12342 unmasked in the VEC_COND_EXPR). */
12344 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12345 in cases where that's necessary. */
12347 tree len = NULL_TREE, bias = NULL_TREE;
12348 if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
12350 if (!is_gimple_val (vec_compare))
12352 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12353 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12354 vec_compare);
12355 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12356 vec_compare = vec_compare_name;
12359 if (must_invert_cmp_result)
12361 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12362 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12363 BIT_NOT_EXPR,
12364 vec_compare);
12365 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12366 vec_compare = vec_compare_name;
12369 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12370 vectype, OPTIMIZE_FOR_SPEED))
12372 if (lens)
12374 len = vect_get_loop_len (loop_vinfo, gsi, lens,
12375 vec_num * ncopies, vectype, i, 1);
12376 signed char biasval
12377 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
12378 bias = build_int_cst (intQI_type_node, biasval);
12380 else
12382 len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
12383 bias = build_int_cst (intQI_type_node, 0);
12386 if (masks)
12388 tree loop_mask
12389 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
12390 vectype, i);
12391 tree tmp2 = make_ssa_name (vec_cmp_type);
12392 gassign *g
12393 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
12394 loop_mask);
12395 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
12396 vec_compare = tmp2;
12400 gimple *new_stmt;
12401 if (reduction_type == EXTRACT_LAST_REDUCTION)
12403 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
12404 tree lhs = gimple_get_lhs (old_stmt);
12405 if (len)
12406 new_stmt = gimple_build_call_internal
12407 (IFN_LEN_FOLD_EXTRACT_LAST, 5, else_clause, vec_compare,
12408 vec_then_clause, len, bias);
12409 else
12410 new_stmt = gimple_build_call_internal
12411 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
12412 vec_then_clause);
12413 gimple_call_set_lhs (new_stmt, lhs);
12414 SSA_NAME_DEF_STMT (lhs) = new_stmt;
12415 if (old_stmt == gsi_stmt (*gsi))
12416 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
12417 else
12419 /* In this case we're moving the definition to later in the
12420 block. That doesn't matter because the only uses of the
12421 lhs are in phi statements. */
12422 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12423 gsi_remove (&old_gsi, true);
12424 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12427 else
12429 new_temp = make_ssa_name (vec_dest);
12430 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12431 vec_then_clause, vec_else_clause);
12432 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12434 if (slp_node)
12435 slp_node->push_vec_def (new_stmt);
12436 else
12437 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12440 if (!slp_node)
12441 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12443 vec_oprnds0.release ();
12444 vec_oprnds1.release ();
12445 vec_oprnds2.release ();
12446 vec_oprnds3.release ();
12448 return true;
12451 /* Helper of vectorizable_comparison.
12453 Check if STMT_INFO is comparison expression CODE that can be vectorized.
12454 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12455 comparison, put it in VEC_STMT, and insert it at GSI.
12457 Return true if STMT_INFO is vectorizable in this way. */
12459 static bool
12460 vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
12461 stmt_vec_info stmt_info, tree_code code,
12462 gimple_stmt_iterator *gsi, gimple **vec_stmt,
12463 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12465 tree lhs, rhs1, rhs2;
12466 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12467 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12468 tree new_temp;
12469 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12470 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
12471 int ndts = 2;
12472 poly_uint64 nunits;
12473 int ncopies;
12474 enum tree_code bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12475 int i;
12476 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12477 vec<tree> vec_oprnds0 = vNULL;
12478 vec<tree> vec_oprnds1 = vNULL;
12479 tree mask_type;
12480 tree mask;
12482 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12483 return false;
12485 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
12486 return false;
12488 mask_type = vectype;
12489 nunits = TYPE_VECTOR_SUBPARTS (vectype);
12491 if (slp_node)
12492 ncopies = 1;
12493 else
12494 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12496 gcc_assert (ncopies >= 1);
12498 if (TREE_CODE_CLASS (code) != tcc_comparison)
12499 return false;
12501 slp_tree slp_rhs1, slp_rhs2;
12502 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12503 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
12504 return false;
12506 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12507 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
12508 return false;
12510 if (vectype1 && vectype2
12511 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12512 TYPE_VECTOR_SUBPARTS (vectype2)))
12513 return false;
12515 vectype = vectype1 ? vectype1 : vectype2;
12517 /* Invariant comparison. */
12518 if (!vectype)
12520 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
12521 vectype = mask_type;
12522 else
12523 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
12524 slp_node);
12525 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
12526 return false;
12528 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
12529 return false;
12531 /* Can't compare mask and non-mask types. */
12532 if (vectype1 && vectype2
12533 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12534 return false;
12536 /* Boolean values may have another representation in vectors
12537 and therefore we prefer bit operations over comparison for
12538 them (which also works for scalar masks). We store opcodes
12539 to use in bitop1 and bitop2. Statement is vectorized as
12540 BITOP2 (rhs1 BITOP1 rhs2) or
12541 rhs1 BITOP2 (BITOP1 rhs2)
12542 depending on bitop1 and bitop2 arity. */
12543 bool swap_p = false;
12544 if (VECTOR_BOOLEAN_TYPE_P (vectype))
12546 if (code == GT_EXPR)
12548 bitop1 = BIT_NOT_EXPR;
12549 bitop2 = BIT_AND_EXPR;
12551 else if (code == GE_EXPR)
12553 bitop1 = BIT_NOT_EXPR;
12554 bitop2 = BIT_IOR_EXPR;
12556 else if (code == LT_EXPR)
12558 bitop1 = BIT_NOT_EXPR;
12559 bitop2 = BIT_AND_EXPR;
12560 swap_p = true;
12562 else if (code == LE_EXPR)
12564 bitop1 = BIT_NOT_EXPR;
12565 bitop2 = BIT_IOR_EXPR;
12566 swap_p = true;
12568 else
12570 bitop1 = BIT_XOR_EXPR;
12571 if (code == EQ_EXPR)
12572 bitop2 = BIT_NOT_EXPR;
12576 if (!vec_stmt)
12578 if (bitop1 == NOP_EXPR)
12580 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12581 return false;
12583 else
12585 machine_mode mode = TYPE_MODE (vectype);
12586 optab optab;
12588 optab = optab_for_tree_code (bitop1, vectype, optab_default);
12589 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12590 return false;
12592 if (bitop2 != NOP_EXPR)
12594 optab = optab_for_tree_code (bitop2, vectype, optab_default);
12595 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12596 return false;
12600 /* Put types on constant and invariant SLP children. */
12601 if (slp_node
12602 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12603 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
12605 if (dump_enabled_p ())
12606 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12607 "incompatible vector types for invariants\n");
12608 return false;
12611 vect_model_simple_cost (vinfo, stmt_info,
12612 ncopies * (1 + (bitop2 != NOP_EXPR)),
12613 dts, ndts, slp_node, cost_vec);
12614 return true;
12617 /* Transform. */
12619 /* Handle def. */
12620 lhs = gimple_assign_lhs (STMT_VINFO_STMT (stmt_info));
12621 mask = vect_create_destination_var (lhs, mask_type);
12623 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12624 rhs1, &vec_oprnds0, vectype,
12625 rhs2, &vec_oprnds1, vectype);
12626 if (swap_p)
12627 std::swap (vec_oprnds0, vec_oprnds1);
12629 /* Arguments are ready. Create the new vector stmt. */
12630 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12632 gimple *new_stmt;
12633 vec_rhs2 = vec_oprnds1[i];
12635 new_temp = make_ssa_name (mask);
12636 if (bitop1 == NOP_EXPR)
12638 new_stmt = gimple_build_assign (new_temp, code,
12639 vec_rhs1, vec_rhs2);
12640 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12642 else
12644 if (bitop1 == BIT_NOT_EXPR)
12645 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12646 else
12647 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12648 vec_rhs2);
12649 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12650 if (bitop2 != NOP_EXPR)
12652 tree res = make_ssa_name (mask);
12653 if (bitop2 == BIT_NOT_EXPR)
12654 new_stmt = gimple_build_assign (res, bitop2, new_temp);
12655 else
12656 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12657 new_temp);
12658 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12661 if (slp_node)
12662 slp_node->push_vec_def (new_stmt);
12663 else
12664 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12667 if (!slp_node)
12668 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12670 vec_oprnds0.release ();
12671 vec_oprnds1.release ();
12673 return true;
12676 /* vectorizable_comparison.
12678 Check if STMT_INFO is comparison expression that can be vectorized.
12679 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12680 comparison, put it in VEC_STMT, and insert it at GSI.
12682 Return true if STMT_INFO is vectorizable in this way. */
12684 static bool
12685 vectorizable_comparison (vec_info *vinfo,
12686 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12687 gimple **vec_stmt,
12688 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12690 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12692 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12693 return false;
12695 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12696 return false;
12698 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12699 if (!stmt)
12700 return false;
12702 enum tree_code code = gimple_assign_rhs_code (stmt);
12703 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12704 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12705 vec_stmt, slp_node, cost_vec))
12706 return false;
12708 if (!vec_stmt)
12709 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
12711 return true;
12714 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
12715 can handle all live statements in the node. Otherwise return true
12716 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
12717 VEC_STMT_P is as for vectorizable_live_operation. */
12719 static bool
12720 can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
12721 slp_tree slp_node, slp_instance slp_node_instance,
12722 bool vec_stmt_p,
12723 stmt_vector_for_cost *cost_vec)
12725 if (slp_node)
12727 stmt_vec_info slp_stmt_info;
12728 unsigned int i;
12729 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
12731 if (STMT_VINFO_LIVE_P (slp_stmt_info)
12732 && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
12733 slp_node_instance, i,
12734 vec_stmt_p, cost_vec))
12735 return false;
12738 else if (STMT_VINFO_LIVE_P (stmt_info)
12739 && !vectorizable_live_operation (vinfo, stmt_info,
12740 slp_node, slp_node_instance, -1,
12741 vec_stmt_p, cost_vec))
12742 return false;
12744 return true;
12747 /* Make sure the statement is vectorizable. */
12749 opt_result
12750 vect_analyze_stmt (vec_info *vinfo,
12751 stmt_vec_info stmt_info, bool *need_to_vectorize,
12752 slp_tree node, slp_instance node_instance,
12753 stmt_vector_for_cost *cost_vec)
12755 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12756 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
12757 bool ok;
12758 gimple_seq pattern_def_seq;
12760 if (dump_enabled_p ())
12761 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
12762 stmt_info->stmt);
12764 if (gimple_has_volatile_ops (stmt_info->stmt))
12765 return opt_result::failure_at (stmt_info->stmt,
12766 "not vectorized:"
12767 " stmt has volatile operands: %G\n",
12768 stmt_info->stmt);
12770 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12771 && node == NULL
12772 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
12774 gimple_stmt_iterator si;
12776 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
12778 stmt_vec_info pattern_def_stmt_info
12779 = vinfo->lookup_stmt (gsi_stmt (si));
12780 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
12781 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
12783 /* Analyze def stmt of STMT if it's a pattern stmt. */
12784 if (dump_enabled_p ())
12785 dump_printf_loc (MSG_NOTE, vect_location,
12786 "==> examining pattern def statement: %G",
12787 pattern_def_stmt_info->stmt);
12789 opt_result res
12790 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
12791 need_to_vectorize, node, node_instance,
12792 cost_vec);
12793 if (!res)
12794 return res;
12799 /* Skip stmts that do not need to be vectorized. In loops this is expected
12800 to include:
12801 - the COND_EXPR which is the loop exit condition
12802 - any LABEL_EXPRs in the loop
12803 - computations that are used only for array indexing or loop control.
12804 In basic blocks we only analyze statements that are a part of some SLP
12805 instance, therefore, all the statements are relevant.
12807 Pattern statement needs to be analyzed instead of the original statement
12808 if the original statement is not relevant. Otherwise, we analyze both
12809 statements. In basic blocks we are called from some SLP instance
12810 traversal, don't analyze pattern stmts instead, the pattern stmts
12811 already will be part of SLP instance. */
12813 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
12814 if (!STMT_VINFO_RELEVANT_P (stmt_info)
12815 && !STMT_VINFO_LIVE_P (stmt_info))
12817 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12818 && pattern_stmt_info
12819 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12820 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12822 /* Analyze PATTERN_STMT instead of the original stmt. */
12823 stmt_info = pattern_stmt_info;
12824 if (dump_enabled_p ())
12825 dump_printf_loc (MSG_NOTE, vect_location,
12826 "==> examining pattern statement: %G",
12827 stmt_info->stmt);
12829 else
12831 if (dump_enabled_p ())
12832 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
12834 return opt_result::success ();
12837 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12838 && node == NULL
12839 && pattern_stmt_info
12840 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12841 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12843 /* Analyze PATTERN_STMT too. */
12844 if (dump_enabled_p ())
12845 dump_printf_loc (MSG_NOTE, vect_location,
12846 "==> examining pattern statement: %G",
12847 pattern_stmt_info->stmt);
12849 opt_result res
12850 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
12851 node_instance, cost_vec);
12852 if (!res)
12853 return res;
12856 switch (STMT_VINFO_DEF_TYPE (stmt_info))
12858 case vect_internal_def:
12859 break;
12861 case vect_reduction_def:
12862 case vect_nested_cycle:
12863 gcc_assert (!bb_vinfo
12864 && (relevance == vect_used_in_outer
12865 || relevance == vect_used_in_outer_by_reduction
12866 || relevance == vect_used_by_reduction
12867 || relevance == vect_unused_in_scope
12868 || relevance == vect_used_only_live));
12869 break;
12871 case vect_induction_def:
12872 case vect_first_order_recurrence:
12873 gcc_assert (!bb_vinfo);
12874 break;
12876 case vect_constant_def:
12877 case vect_external_def:
12878 case vect_unknown_def_type:
12879 default:
12880 gcc_unreachable ();
12883 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12884 if (node)
12885 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
12887 if (STMT_VINFO_RELEVANT_P (stmt_info))
12889 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
12890 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
12891 || (call && gimple_call_lhs (call) == NULL_TREE));
12892 *need_to_vectorize = true;
12895 if (PURE_SLP_STMT (stmt_info) && !node)
12897 if (dump_enabled_p ())
12898 dump_printf_loc (MSG_NOTE, vect_location,
12899 "handled only by SLP analysis\n");
12900 return opt_result::success ();
12903 ok = true;
12904 if (!bb_vinfo
12905 && (STMT_VINFO_RELEVANT_P (stmt_info)
12906 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
12907 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
12908 -mveclibabi= takes preference over library functions with
12909 the simd attribute. */
12910 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12911 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
12912 cost_vec)
12913 || vectorizable_conversion (vinfo, stmt_info,
12914 NULL, NULL, node, cost_vec)
12915 || vectorizable_operation (vinfo, stmt_info,
12916 NULL, NULL, node, cost_vec)
12917 || vectorizable_assignment (vinfo, stmt_info,
12918 NULL, NULL, node, cost_vec)
12919 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12920 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12921 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
12922 node, node_instance, cost_vec)
12923 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
12924 NULL, node, cost_vec)
12925 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12926 || vectorizable_condition (vinfo, stmt_info,
12927 NULL, NULL, node, cost_vec)
12928 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12929 cost_vec)
12930 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
12931 stmt_info, NULL, node)
12932 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
12933 stmt_info, NULL, node, cost_vec));
12934 else
12936 if (bb_vinfo)
12937 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12938 || vectorizable_simd_clone_call (vinfo, stmt_info,
12939 NULL, NULL, node, cost_vec)
12940 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
12941 cost_vec)
12942 || vectorizable_shift (vinfo, stmt_info,
12943 NULL, NULL, node, cost_vec)
12944 || vectorizable_operation (vinfo, stmt_info,
12945 NULL, NULL, node, cost_vec)
12946 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
12947 cost_vec)
12948 || vectorizable_load (vinfo, stmt_info,
12949 NULL, NULL, node, cost_vec)
12950 || vectorizable_store (vinfo, stmt_info,
12951 NULL, NULL, node, cost_vec)
12952 || vectorizable_condition (vinfo, stmt_info,
12953 NULL, NULL, node, cost_vec)
12954 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12955 cost_vec)
12956 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
12959 if (node)
12960 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12962 if (!ok)
12963 return opt_result::failure_at (stmt_info->stmt,
12964 "not vectorized:"
12965 " relevant stmt not supported: %G",
12966 stmt_info->stmt);
12968 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
12969 need extra handling, except for vectorizable reductions. */
12970 if (!bb_vinfo
12971 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
12972 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
12973 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
12974 stmt_info, node, node_instance,
12975 false, cost_vec))
12976 return opt_result::failure_at (stmt_info->stmt,
12977 "not vectorized:"
12978 " live stmt not supported: %G",
12979 stmt_info->stmt);
12981 return opt_result::success ();
12985 /* Function vect_transform_stmt.
12987 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
12989 bool
12990 vect_transform_stmt (vec_info *vinfo,
12991 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12992 slp_tree slp_node, slp_instance slp_node_instance)
12994 bool is_store = false;
12995 gimple *vec_stmt = NULL;
12996 bool done;
12998 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
13000 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
13001 if (slp_node)
13002 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
13004 switch (STMT_VINFO_TYPE (stmt_info))
13006 case type_demotion_vec_info_type:
13007 case type_promotion_vec_info_type:
13008 case type_conversion_vec_info_type:
13009 done = vectorizable_conversion (vinfo, stmt_info,
13010 gsi, &vec_stmt, slp_node, NULL);
13011 gcc_assert (done);
13012 break;
13014 case induc_vec_info_type:
13015 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
13016 stmt_info, &vec_stmt, slp_node,
13017 NULL);
13018 gcc_assert (done);
13019 break;
13021 case shift_vec_info_type:
13022 done = vectorizable_shift (vinfo, stmt_info,
13023 gsi, &vec_stmt, slp_node, NULL);
13024 gcc_assert (done);
13025 break;
13027 case op_vec_info_type:
13028 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
13029 NULL);
13030 gcc_assert (done);
13031 break;
13033 case assignment_vec_info_type:
13034 done = vectorizable_assignment (vinfo, stmt_info,
13035 gsi, &vec_stmt, slp_node, NULL);
13036 gcc_assert (done);
13037 break;
13039 case load_vec_info_type:
13040 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
13041 NULL);
13042 gcc_assert (done);
13043 break;
13045 case store_vec_info_type:
13046 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
13047 && !slp_node
13048 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))
13049 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))))
13050 /* In case of interleaving, the whole chain is vectorized when the
13051 last store in the chain is reached. Store stmts before the last
13052 one are skipped, and there vec_stmt_info shouldn't be freed
13053 meanwhile. */
13055 else
13057 done = vectorizable_store (vinfo, stmt_info,
13058 gsi, &vec_stmt, slp_node, NULL);
13059 gcc_assert (done);
13060 is_store = true;
13062 break;
13064 case condition_vec_info_type:
13065 done = vectorizable_condition (vinfo, stmt_info,
13066 gsi, &vec_stmt, slp_node, NULL);
13067 gcc_assert (done);
13068 break;
13070 case comparison_vec_info_type:
13071 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
13072 slp_node, NULL);
13073 gcc_assert (done);
13074 break;
13076 case call_vec_info_type:
13077 done = vectorizable_call (vinfo, stmt_info,
13078 gsi, &vec_stmt, slp_node, NULL);
13079 break;
13081 case call_simd_clone_vec_info_type:
13082 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
13083 slp_node, NULL);
13084 break;
13086 case reduc_vec_info_type:
13087 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
13088 gsi, &vec_stmt, slp_node);
13089 gcc_assert (done);
13090 break;
13092 case cycle_phi_info_type:
13093 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
13094 &vec_stmt, slp_node, slp_node_instance);
13095 gcc_assert (done);
13096 break;
13098 case lc_phi_info_type:
13099 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
13100 stmt_info, &vec_stmt, slp_node);
13101 gcc_assert (done);
13102 break;
13104 case recurr_info_type:
13105 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13106 stmt_info, &vec_stmt, slp_node, NULL);
13107 gcc_assert (done);
13108 break;
13110 case phi_info_type:
13111 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
13112 gcc_assert (done);
13113 break;
13115 default:
13116 if (!STMT_VINFO_LIVE_P (stmt_info))
13118 if (dump_enabled_p ())
13119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13120 "stmt not supported.\n");
13121 gcc_unreachable ();
13123 done = true;
13126 if (!slp_node && vec_stmt)
13127 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
13129 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
13131 /* Handle stmts whose DEF is used outside the loop-nest that is
13132 being vectorized. */
13133 done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
13134 slp_node_instance, true, NULL);
13135 gcc_assert (done);
13138 if (slp_node)
13139 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13141 return is_store;
13145 /* Remove a group of stores (for SLP or interleaving), free their
13146 stmt_vec_info. */
13148 void
13149 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
13151 stmt_vec_info next_stmt_info = first_stmt_info;
13153 while (next_stmt_info)
13155 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
13156 next_stmt_info = vect_orig_stmt (next_stmt_info);
13157 /* Free the attached stmt_vec_info and remove the stmt. */
13158 vinfo->remove_stmt (next_stmt_info);
13159 next_stmt_info = tmp;
13163 /* If NUNITS is nonzero, return a vector type that contains NUNITS
13164 elements of type SCALAR_TYPE, or null if the target doesn't support
13165 such a type.
13167 If NUNITS is zero, return a vector type that contains elements of
13168 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13170 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13171 for this vectorization region and want to "autodetect" the best choice.
13172 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13173 and we want the new type to be interoperable with it. PREVAILING_MODE
13174 in this case can be a scalar integer mode or a vector mode; when it
13175 is a vector mode, the function acts like a tree-level version of
13176 related_vector_mode. */
13178 tree
13179 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
13180 tree scalar_type, poly_uint64 nunits)
13182 tree orig_scalar_type = scalar_type;
13183 scalar_mode inner_mode;
13184 machine_mode simd_mode;
13185 tree vectype;
13187 if ((!INTEGRAL_TYPE_P (scalar_type)
13188 && !POINTER_TYPE_P (scalar_type)
13189 && !SCALAR_FLOAT_TYPE_P (scalar_type))
13190 || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
13191 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
13192 return NULL_TREE;
13194 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
13196 /* Interoperability between modes requires one to be a constant multiple
13197 of the other, so that the number of vectors required for each operation
13198 is a compile-time constant. */
13199 if (prevailing_mode != VOIDmode
13200 && !constant_multiple_p (nunits * nbytes,
13201 GET_MODE_SIZE (prevailing_mode))
13202 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
13203 nunits * nbytes))
13204 return NULL_TREE;
13206 /* For vector types of elements whose mode precision doesn't
13207 match their types precision we use a element type of mode
13208 precision. The vectorization routines will have to make sure
13209 they support the proper result truncation/extension.
13210 We also make sure to build vector types with INTEGER_TYPE
13211 component type only. */
13212 if (INTEGRAL_TYPE_P (scalar_type)
13213 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
13214 || TREE_CODE (scalar_type) != INTEGER_TYPE))
13215 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
13216 TYPE_UNSIGNED (scalar_type));
13218 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13219 When the component mode passes the above test simply use a type
13220 corresponding to that mode. The theory is that any use that
13221 would cause problems with this will disable vectorization anyway. */
13222 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
13223 && !INTEGRAL_TYPE_P (scalar_type))
13224 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
13226 /* We can't build a vector type of elements with alignment bigger than
13227 their size. */
13228 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
13229 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
13230 TYPE_UNSIGNED (scalar_type));
13232 /* If we felt back to using the mode fail if there was
13233 no scalar type for it. */
13234 if (scalar_type == NULL_TREE)
13235 return NULL_TREE;
13237 /* If no prevailing mode was supplied, use the mode the target prefers.
13238 Otherwise lookup a vector mode based on the prevailing mode. */
13239 if (prevailing_mode == VOIDmode)
13241 gcc_assert (known_eq (nunits, 0U));
13242 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
13243 if (SCALAR_INT_MODE_P (simd_mode))
13245 /* Traditional behavior is not to take the integer mode
13246 literally, but simply to use it as a way of determining
13247 the vector size. It is up to mode_for_vector to decide
13248 what the TYPE_MODE should be.
13250 Note that nunits == 1 is allowed in order to support single
13251 element vector types. */
13252 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
13253 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13254 return NULL_TREE;
13257 else if (SCALAR_INT_MODE_P (prevailing_mode)
13258 || !related_vector_mode (prevailing_mode,
13259 inner_mode, nunits).exists (&simd_mode))
13261 /* Fall back to using mode_for_vector, mostly in the hope of being
13262 able to use an integer mode. */
13263 if (known_eq (nunits, 0U)
13264 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
13265 return NULL_TREE;
13267 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13268 return NULL_TREE;
13271 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
13273 /* In cases where the mode was chosen by mode_for_vector, check that
13274 the target actually supports the chosen mode, or that it at least
13275 allows the vector mode to be replaced by a like-sized integer. */
13276 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
13277 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
13278 return NULL_TREE;
13280 /* Re-attach the address-space qualifier if we canonicalized the scalar
13281 type. */
13282 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
13283 return build_qualified_type
13284 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
13286 return vectype;
13289 /* Function get_vectype_for_scalar_type.
13291 Returns the vector type corresponding to SCALAR_TYPE as supported
13292 by the target. If GROUP_SIZE is nonzero and we're performing BB
13293 vectorization, make sure that the number of elements in the vector
13294 is no bigger than GROUP_SIZE. */
13296 tree
13297 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
13298 unsigned int group_size)
13300 /* For BB vectorization, we should always have a group size once we've
13301 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13302 are tentative requests during things like early data reference
13303 analysis and pattern recognition. */
13304 if (is_a <bb_vec_info> (vinfo))
13305 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13306 else
13307 group_size = 0;
13309 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13310 scalar_type);
13311 if (vectype && vinfo->vector_mode == VOIDmode)
13312 vinfo->vector_mode = TYPE_MODE (vectype);
13314 /* Register the natural choice of vector type, before the group size
13315 has been applied. */
13316 if (vectype)
13317 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
13319 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13320 try again with an explicit number of elements. */
13321 if (vectype
13322 && group_size
13323 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
13325 /* Start with the biggest number of units that fits within
13326 GROUP_SIZE and halve it until we find a valid vector type.
13327 Usually either the first attempt will succeed or all will
13328 fail (in the latter case because GROUP_SIZE is too small
13329 for the target), but it's possible that a target could have
13330 a hole between supported vector types.
13332 If GROUP_SIZE is not a power of 2, this has the effect of
13333 trying the largest power of 2 that fits within the group,
13334 even though the group is not a multiple of that vector size.
13335 The BB vectorizer will then try to carve up the group into
13336 smaller pieces. */
13337 unsigned int nunits = 1 << floor_log2 (group_size);
13340 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13341 scalar_type, nunits);
13342 nunits /= 2;
13344 while (nunits > 1 && !vectype);
13347 return vectype;
13350 /* Return the vector type corresponding to SCALAR_TYPE as supported
13351 by the target. NODE, if nonnull, is the SLP tree node that will
13352 use the returned vector type. */
13354 tree
13355 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
13357 unsigned int group_size = 0;
13358 if (node)
13359 group_size = SLP_TREE_LANES (node);
13360 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13363 /* Function get_mask_type_for_scalar_type.
13365 Returns the mask type corresponding to a result of comparison
13366 of vectors of specified SCALAR_TYPE as supported by target.
13367 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13368 make sure that the number of elements in the vector is no bigger
13369 than GROUP_SIZE. */
13371 tree
13372 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13373 unsigned int group_size)
13375 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13377 if (!vectype)
13378 return NULL;
13380 return truth_type_for (vectype);
13383 /* Function get_mask_type_for_scalar_type.
13385 Returns the mask type corresponding to a result of comparison
13386 of vectors of specified SCALAR_TYPE as supported by target.
13387 NODE, if nonnull, is the SLP tree node that will use the returned
13388 vector type. */
13390 tree
13391 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13392 slp_tree node)
13394 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
13396 if (!vectype)
13397 return NULL;
13399 return truth_type_for (vectype);
13402 /* Function get_same_sized_vectype
13404 Returns a vector type corresponding to SCALAR_TYPE of size
13405 VECTOR_TYPE if supported by the target. */
13407 tree
13408 get_same_sized_vectype (tree scalar_type, tree vector_type)
13410 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
13411 return truth_type_for (vector_type);
13413 poly_uint64 nunits;
13414 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
13415 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
13416 return NULL_TREE;
13418 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
13419 scalar_type, nunits);
13422 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13423 would not change the chosen vector modes. */
13425 bool
13426 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
13428 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
13429 i != vinfo->used_vector_modes.end (); ++i)
13430 if (!VECTOR_MODE_P (*i)
13431 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
13432 return false;
13433 return true;
13436 /* Function vect_is_simple_use.
13438 Input:
13439 VINFO - the vect info of the loop or basic block that is being vectorized.
13440 OPERAND - operand in the loop or bb.
13441 Output:
13442 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13443 case OPERAND is an SSA_NAME that is defined in the vectorizable region
13444 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13445 the definition could be anywhere in the function
13446 DT - the type of definition
13448 Returns whether a stmt with OPERAND can be vectorized.
13449 For loops, supportable operands are constants, loop invariants, and operands
13450 that are defined by the current iteration of the loop. Unsupportable
13451 operands are those that are defined by a previous iteration of the loop (as
13452 is the case in reduction/induction computations).
13453 For basic blocks, supportable operands are constants and bb invariants.
13454 For now, operands defined outside the basic block are not supported. */
13456 bool
13457 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13458 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
13460 if (def_stmt_info_out)
13461 *def_stmt_info_out = NULL;
13462 if (def_stmt_out)
13463 *def_stmt_out = NULL;
13464 *dt = vect_unknown_def_type;
13466 if (dump_enabled_p ())
13468 dump_printf_loc (MSG_NOTE, vect_location,
13469 "vect_is_simple_use: operand ");
13470 if (TREE_CODE (operand) == SSA_NAME
13471 && !SSA_NAME_IS_DEFAULT_DEF (operand))
13472 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
13473 else
13474 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
13477 if (CONSTANT_CLASS_P (operand))
13478 *dt = vect_constant_def;
13479 else if (is_gimple_min_invariant (operand))
13480 *dt = vect_external_def;
13481 else if (TREE_CODE (operand) != SSA_NAME)
13482 *dt = vect_unknown_def_type;
13483 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
13484 *dt = vect_external_def;
13485 else
13487 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
13488 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
13489 if (!stmt_vinfo)
13490 *dt = vect_external_def;
13491 else
13493 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
13494 def_stmt = stmt_vinfo->stmt;
13495 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
13496 if (def_stmt_info_out)
13497 *def_stmt_info_out = stmt_vinfo;
13499 if (def_stmt_out)
13500 *def_stmt_out = def_stmt;
13503 if (dump_enabled_p ())
13505 dump_printf (MSG_NOTE, ", type of def: ");
13506 switch (*dt)
13508 case vect_uninitialized_def:
13509 dump_printf (MSG_NOTE, "uninitialized\n");
13510 break;
13511 case vect_constant_def:
13512 dump_printf (MSG_NOTE, "constant\n");
13513 break;
13514 case vect_external_def:
13515 dump_printf (MSG_NOTE, "external\n");
13516 break;
13517 case vect_internal_def:
13518 dump_printf (MSG_NOTE, "internal\n");
13519 break;
13520 case vect_induction_def:
13521 dump_printf (MSG_NOTE, "induction\n");
13522 break;
13523 case vect_reduction_def:
13524 dump_printf (MSG_NOTE, "reduction\n");
13525 break;
13526 case vect_double_reduction_def:
13527 dump_printf (MSG_NOTE, "double reduction\n");
13528 break;
13529 case vect_nested_cycle:
13530 dump_printf (MSG_NOTE, "nested cycle\n");
13531 break;
13532 case vect_first_order_recurrence:
13533 dump_printf (MSG_NOTE, "first order recurrence\n");
13534 break;
13535 case vect_unknown_def_type:
13536 dump_printf (MSG_NOTE, "unknown\n");
13537 break;
13541 if (*dt == vect_unknown_def_type)
13543 if (dump_enabled_p ())
13544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13545 "Unsupported pattern.\n");
13546 return false;
13549 return true;
13552 /* Function vect_is_simple_use.
13554 Same as vect_is_simple_use but also determines the vector operand
13555 type of OPERAND and stores it to *VECTYPE. If the definition of
13556 OPERAND is vect_uninitialized_def, vect_constant_def or
13557 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13558 is responsible to compute the best suited vector type for the
13559 scalar operand. */
13561 bool
13562 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13563 tree *vectype, stmt_vec_info *def_stmt_info_out,
13564 gimple **def_stmt_out)
13566 stmt_vec_info def_stmt_info;
13567 gimple *def_stmt;
13568 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
13569 return false;
13571 if (def_stmt_out)
13572 *def_stmt_out = def_stmt;
13573 if (def_stmt_info_out)
13574 *def_stmt_info_out = def_stmt_info;
13576 /* Now get a vector type if the def is internal, otherwise supply
13577 NULL_TREE and leave it up to the caller to figure out a proper
13578 type for the use stmt. */
13579 if (*dt == vect_internal_def
13580 || *dt == vect_induction_def
13581 || *dt == vect_reduction_def
13582 || *dt == vect_double_reduction_def
13583 || *dt == vect_nested_cycle
13584 || *dt == vect_first_order_recurrence)
13586 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
13587 gcc_assert (*vectype != NULL_TREE);
13588 if (dump_enabled_p ())
13589 dump_printf_loc (MSG_NOTE, vect_location,
13590 "vect_is_simple_use: vectype %T\n", *vectype);
13592 else if (*dt == vect_uninitialized_def
13593 || *dt == vect_constant_def
13594 || *dt == vect_external_def)
13595 *vectype = NULL_TREE;
13596 else
13597 gcc_unreachable ();
13599 return true;
13602 /* Function vect_is_simple_use.
13604 Same as vect_is_simple_use but determines the operand by operand
13605 position OPERAND from either STMT or SLP_NODE, filling in *OP
13606 and *SLP_DEF (when SLP_NODE is not NULL). */
13608 bool
13609 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
13610 unsigned operand, tree *op, slp_tree *slp_def,
13611 enum vect_def_type *dt,
13612 tree *vectype, stmt_vec_info *def_stmt_info_out)
13614 if (slp_node)
13616 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
13617 *slp_def = child;
13618 *vectype = SLP_TREE_VECTYPE (child);
13619 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
13621 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
13622 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
13624 else
13626 if (def_stmt_info_out)
13627 *def_stmt_info_out = NULL;
13628 *op = SLP_TREE_SCALAR_OPS (child)[0];
13629 *dt = SLP_TREE_DEF_TYPE (child);
13630 return true;
13633 else
13635 *slp_def = NULL;
13636 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
13638 if (gimple_assign_rhs_code (ass) == COND_EXPR
13639 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
13641 if (operand < 2)
13642 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
13643 else
13644 *op = gimple_op (ass, operand);
13646 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
13647 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
13648 else
13649 *op = gimple_op (ass, operand + 1);
13651 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
13652 *op = gimple_call_arg (call, operand);
13653 else
13654 gcc_unreachable ();
13655 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
13659 /* If OP is not NULL and is external or constant update its vector
13660 type with VECTYPE. Returns true if successful or false if not,
13661 for example when conflicting vector types are present. */
13663 bool
13664 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
13666 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
13667 return true;
13668 if (SLP_TREE_VECTYPE (op))
13669 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
13670 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
13671 should be handled by patters. Allow vect_constant_def for now. */
13672 if (VECTOR_BOOLEAN_TYPE_P (vectype)
13673 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
13674 return false;
13675 SLP_TREE_VECTYPE (op) = vectype;
13676 return true;
13679 /* Function supportable_widening_operation
13681 Check whether an operation represented by the code CODE is a
13682 widening operation that is supported by the target platform in
13683 vector form (i.e., when operating on arguments of type VECTYPE_IN
13684 producing a result of type VECTYPE_OUT).
13686 Widening operations we currently support are NOP (CONVERT), FLOAT,
13687 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
13688 are supported by the target platform either directly (via vector
13689 tree-codes), or via target builtins.
13691 Output:
13692 - CODE1 and CODE2 are codes of vector operations to be used when
13693 vectorizing the operation, if available.
13694 - MULTI_STEP_CVT determines the number of required intermediate steps in
13695 case of multi-step conversion (like char->short->int - in that case
13696 MULTI_STEP_CVT will be 1).
13697 - INTERM_TYPES contains the intermediate type required to perform the
13698 widening operation (short in the above example). */
13700 bool
13701 supportable_widening_operation (vec_info *vinfo,
13702 code_helper code,
13703 stmt_vec_info stmt_info,
13704 tree vectype_out, tree vectype_in,
13705 code_helper *code1,
13706 code_helper *code2,
13707 int *multi_step_cvt,
13708 vec<tree> *interm_types)
13710 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
13711 class loop *vect_loop = NULL;
13712 machine_mode vec_mode;
13713 enum insn_code icode1, icode2;
13714 optab optab1 = unknown_optab, optab2 = unknown_optab;
13715 tree vectype = vectype_in;
13716 tree wide_vectype = vectype_out;
13717 tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
13718 int i;
13719 tree prev_type, intermediate_type;
13720 machine_mode intermediate_mode, prev_mode;
13721 optab optab3, optab4;
13723 *multi_step_cvt = 0;
13724 if (loop_info)
13725 vect_loop = LOOP_VINFO_LOOP (loop_info);
13727 switch (code.safe_as_tree_code ())
13729 case MAX_TREE_CODES:
13730 /* Don't set c1 and c2 if code is not a tree_code. */
13731 break;
13733 case WIDEN_MULT_EXPR:
13734 /* The result of a vectorized widening operation usually requires
13735 two vectors (because the widened results do not fit into one vector).
13736 The generated vector results would normally be expected to be
13737 generated in the same order as in the original scalar computation,
13738 i.e. if 8 results are generated in each vector iteration, they are
13739 to be organized as follows:
13740 vect1: [res1,res2,res3,res4],
13741 vect2: [res5,res6,res7,res8].
13743 However, in the special case that the result of the widening
13744 operation is used in a reduction computation only, the order doesn't
13745 matter (because when vectorizing a reduction we change the order of
13746 the computation). Some targets can take advantage of this and
13747 generate more efficient code. For example, targets like Altivec,
13748 that support widen_mult using a sequence of {mult_even,mult_odd}
13749 generate the following vectors:
13750 vect1: [res1,res3,res5,res7],
13751 vect2: [res2,res4,res6,res8].
13753 When vectorizing outer-loops, we execute the inner-loop sequentially
13754 (each vectorized inner-loop iteration contributes to VF outer-loop
13755 iterations in parallel). We therefore don't allow to change the
13756 order of the computation in the inner-loop during outer-loop
13757 vectorization. */
13758 /* TODO: Another case in which order doesn't *really* matter is when we
13759 widen and then contract again, e.g. (short)((int)x * y >> 8).
13760 Normally, pack_trunc performs an even/odd permute, whereas the
13761 repack from an even/odd expansion would be an interleave, which
13762 would be significantly simpler for e.g. AVX2. */
13763 /* In any case, in order to avoid duplicating the code below, recurse
13764 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
13765 are properly set up for the caller. If we fail, we'll continue with
13766 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
13767 if (vect_loop
13768 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
13769 && !nested_in_vect_loop_p (vect_loop, stmt_info)
13770 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
13771 stmt_info, vectype_out,
13772 vectype_in, code1,
13773 code2, multi_step_cvt,
13774 interm_types))
13776 /* Elements in a vector with vect_used_by_reduction property cannot
13777 be reordered if the use chain with this property does not have the
13778 same operation. One such an example is s += a * b, where elements
13779 in a and b cannot be reordered. Here we check if the vector defined
13780 by STMT is only directly used in the reduction statement. */
13781 tree lhs = gimple_assign_lhs (stmt_info->stmt);
13782 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
13783 if (use_stmt_info
13784 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
13785 return true;
13787 c1 = VEC_WIDEN_MULT_LO_EXPR;
13788 c2 = VEC_WIDEN_MULT_HI_EXPR;
13789 break;
13791 case DOT_PROD_EXPR:
13792 c1 = DOT_PROD_EXPR;
13793 c2 = DOT_PROD_EXPR;
13794 break;
13796 case SAD_EXPR:
13797 c1 = SAD_EXPR;
13798 c2 = SAD_EXPR;
13799 break;
13801 case VEC_WIDEN_MULT_EVEN_EXPR:
13802 /* Support the recursion induced just above. */
13803 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
13804 c2 = VEC_WIDEN_MULT_ODD_EXPR;
13805 break;
13807 case WIDEN_LSHIFT_EXPR:
13808 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
13809 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
13810 break;
13812 CASE_CONVERT:
13813 c1 = VEC_UNPACK_LO_EXPR;
13814 c2 = VEC_UNPACK_HI_EXPR;
13815 break;
13817 case FLOAT_EXPR:
13818 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
13819 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
13820 break;
13822 case FIX_TRUNC_EXPR:
13823 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
13824 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
13825 break;
13827 default:
13828 gcc_unreachable ();
13831 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
13832 std::swap (c1, c2);
13834 if (code == FIX_TRUNC_EXPR)
13836 /* The signedness is determined from output operand. */
13837 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13838 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13840 else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
13841 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13842 && VECTOR_BOOLEAN_TYPE_P (vectype)
13843 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13844 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13846 /* If the input and result modes are the same, a different optab
13847 is needed where we pass in the number of units in vectype. */
13848 optab1 = vec_unpacks_sbool_lo_optab;
13849 optab2 = vec_unpacks_sbool_hi_optab;
13852 vec_mode = TYPE_MODE (vectype);
13853 if (widening_fn_p (code))
13855 /* If this is an internal fn then we must check whether the target
13856 supports either a low-high split or an even-odd split. */
13857 internal_fn ifn = as_internal_fn ((combined_fn) code);
13859 internal_fn lo, hi, even, odd;
13860 lookup_hilo_internal_fn (ifn, &lo, &hi);
13861 *code1 = as_combined_fn (lo);
13862 *code2 = as_combined_fn (hi);
13863 optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
13864 optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
13866 /* If we don't support low-high, then check for even-odd. */
13867 if (!optab1
13868 || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13869 || !optab2
13870 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13872 lookup_evenodd_internal_fn (ifn, &even, &odd);
13873 *code1 = as_combined_fn (even);
13874 *code2 = as_combined_fn (odd);
13875 optab1 = direct_internal_fn_optab (even, {vectype, vectype});
13876 optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
13879 else if (code.is_tree_code ())
13881 if (code == FIX_TRUNC_EXPR)
13883 /* The signedness is determined from output operand. */
13884 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13885 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13887 else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
13888 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13889 && VECTOR_BOOLEAN_TYPE_P (vectype)
13890 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13891 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13893 /* If the input and result modes are the same, a different optab
13894 is needed where we pass in the number of units in vectype. */
13895 optab1 = vec_unpacks_sbool_lo_optab;
13896 optab2 = vec_unpacks_sbool_hi_optab;
13898 else
13900 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13901 optab2 = optab_for_tree_code (c2, vectype, optab_default);
13903 *code1 = c1;
13904 *code2 = c2;
13907 if (!optab1 || !optab2)
13908 return false;
13910 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13911 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13912 return false;
13915 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13916 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13918 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13919 return true;
13920 /* For scalar masks we may have different boolean
13921 vector types having the same QImode. Thus we
13922 add additional check for elements number. */
13923 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
13924 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13925 return true;
13928 /* Check if it's a multi-step conversion that can be done using intermediate
13929 types. */
13931 prev_type = vectype;
13932 prev_mode = vec_mode;
13934 if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
13935 return false;
13937 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13938 intermediate steps in promotion sequence. We try
13939 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
13940 not. */
13941 interm_types->create (MAX_INTERM_CVT_STEPS);
13942 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
13944 intermediate_mode = insn_data[icode1].operand[0].mode;
13945 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13946 intermediate_type
13947 = vect_halve_mask_nunits (prev_type, intermediate_mode);
13948 else if (VECTOR_MODE_P (intermediate_mode))
13950 tree intermediate_element_type
13951 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
13952 TYPE_UNSIGNED (prev_type));
13953 intermediate_type
13954 = build_vector_type_for_mode (intermediate_element_type,
13955 intermediate_mode);
13957 else
13958 intermediate_type
13959 = lang_hooks.types.type_for_mode (intermediate_mode,
13960 TYPE_UNSIGNED (prev_type));
13962 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13963 && VECTOR_BOOLEAN_TYPE_P (prev_type)
13964 && intermediate_mode == prev_mode
13965 && SCALAR_INT_MODE_P (prev_mode))
13967 /* If the input and result modes are the same, a different optab
13968 is needed where we pass in the number of units in vectype. */
13969 optab3 = vec_unpacks_sbool_lo_optab;
13970 optab4 = vec_unpacks_sbool_hi_optab;
13972 else
13974 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
13975 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
13978 if (!optab3 || !optab4
13979 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
13980 || insn_data[icode1].operand[0].mode != intermediate_mode
13981 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
13982 || insn_data[icode2].operand[0].mode != intermediate_mode
13983 || ((icode1 = optab_handler (optab3, intermediate_mode))
13984 == CODE_FOR_nothing)
13985 || ((icode2 = optab_handler (optab4, intermediate_mode))
13986 == CODE_FOR_nothing))
13987 break;
13989 interm_types->quick_push (intermediate_type);
13990 (*multi_step_cvt)++;
13992 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13993 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13995 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13996 return true;
13997 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
13998 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13999 return true;
14002 prev_type = intermediate_type;
14003 prev_mode = intermediate_mode;
14006 interm_types->release ();
14007 return false;
14011 /* Function supportable_narrowing_operation
14013 Check whether an operation represented by the code CODE is a
14014 narrowing operation that is supported by the target platform in
14015 vector form (i.e., when operating on arguments of type VECTYPE_IN
14016 and producing a result of type VECTYPE_OUT).
14018 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14019 and FLOAT. This function checks if these operations are supported by
14020 the target platform directly via vector tree-codes.
14022 Output:
14023 - CODE1 is the code of a vector operation to be used when
14024 vectorizing the operation, if available.
14025 - MULTI_STEP_CVT determines the number of required intermediate steps in
14026 case of multi-step conversion (like int->short->char - in that case
14027 MULTI_STEP_CVT will be 1).
14028 - INTERM_TYPES contains the intermediate type required to perform the
14029 narrowing operation (short in the above example). */
14031 bool
14032 supportable_narrowing_operation (code_helper code,
14033 tree vectype_out, tree vectype_in,
14034 code_helper *code1, int *multi_step_cvt,
14035 vec<tree> *interm_types)
14037 machine_mode vec_mode;
14038 enum insn_code icode1;
14039 optab optab1, interm_optab;
14040 tree vectype = vectype_in;
14041 tree narrow_vectype = vectype_out;
14042 enum tree_code c1;
14043 tree intermediate_type, prev_type;
14044 machine_mode intermediate_mode, prev_mode;
14045 int i;
14046 unsigned HOST_WIDE_INT n_elts;
14047 bool uns;
14049 if (!code.is_tree_code ())
14050 return false;
14052 *multi_step_cvt = 0;
14053 switch ((tree_code) code)
14055 CASE_CONVERT:
14056 c1 = VEC_PACK_TRUNC_EXPR;
14057 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
14058 && VECTOR_BOOLEAN_TYPE_P (vectype)
14059 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
14060 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
14061 && n_elts < BITS_PER_UNIT)
14062 optab1 = vec_pack_sbool_trunc_optab;
14063 else
14064 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14065 break;
14067 case FIX_TRUNC_EXPR:
14068 c1 = VEC_PACK_FIX_TRUNC_EXPR;
14069 /* The signedness is determined from output operand. */
14070 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14071 break;
14073 case FLOAT_EXPR:
14074 c1 = VEC_PACK_FLOAT_EXPR;
14075 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14076 break;
14078 default:
14079 gcc_unreachable ();
14082 if (!optab1)
14083 return false;
14085 vec_mode = TYPE_MODE (vectype);
14086 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
14087 return false;
14089 *code1 = c1;
14091 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14093 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14094 return true;
14095 /* For scalar masks we may have different boolean
14096 vector types having the same QImode. Thus we
14097 add additional check for elements number. */
14098 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
14099 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14100 return true;
14103 if (code == FLOAT_EXPR)
14104 return false;
14106 /* Check if it's a multi-step conversion that can be done using intermediate
14107 types. */
14108 prev_mode = vec_mode;
14109 prev_type = vectype;
14110 if (code == FIX_TRUNC_EXPR)
14111 uns = TYPE_UNSIGNED (vectype_out);
14112 else
14113 uns = TYPE_UNSIGNED (vectype);
14115 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14116 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14117 costly than signed. */
14118 if (code == FIX_TRUNC_EXPR && uns)
14120 enum insn_code icode2;
14122 intermediate_type
14123 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
14124 interm_optab
14125 = optab_for_tree_code (c1, intermediate_type, optab_default);
14126 if (interm_optab != unknown_optab
14127 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
14128 && insn_data[icode1].operand[0].mode
14129 == insn_data[icode2].operand[0].mode)
14131 uns = false;
14132 optab1 = interm_optab;
14133 icode1 = icode2;
14137 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14138 intermediate steps in promotion sequence. We try
14139 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14140 interm_types->create (MAX_INTERM_CVT_STEPS);
14141 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14143 intermediate_mode = insn_data[icode1].operand[0].mode;
14144 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14145 intermediate_type
14146 = vect_double_mask_nunits (prev_type, intermediate_mode);
14147 else
14148 intermediate_type
14149 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
14150 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14151 && VECTOR_BOOLEAN_TYPE_P (prev_type)
14152 && SCALAR_INT_MODE_P (prev_mode)
14153 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
14154 && n_elts < BITS_PER_UNIT)
14155 interm_optab = vec_pack_sbool_trunc_optab;
14156 else
14157 interm_optab
14158 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
14159 optab_default);
14160 if (!interm_optab
14161 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
14162 || insn_data[icode1].operand[0].mode != intermediate_mode
14163 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
14164 == CODE_FOR_nothing))
14165 break;
14167 interm_types->quick_push (intermediate_type);
14168 (*multi_step_cvt)++;
14170 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14172 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14173 return true;
14174 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
14175 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14176 return true;
14179 prev_mode = intermediate_mode;
14180 prev_type = intermediate_type;
14181 optab1 = interm_optab;
14184 interm_types->release ();
14185 return false;
14188 /* Generate and return a vector mask of MASK_TYPE such that
14189 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14190 Add the statements to SEQ. */
14192 tree
14193 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
14194 tree end_index, const char *name)
14196 tree cmp_type = TREE_TYPE (start_index);
14197 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
14198 cmp_type, mask_type,
14199 OPTIMIZE_FOR_SPEED));
14200 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
14201 start_index, end_index,
14202 build_zero_cst (mask_type));
14203 tree tmp;
14204 if (name)
14205 tmp = make_temp_ssa_name (mask_type, NULL, name);
14206 else
14207 tmp = make_ssa_name (mask_type);
14208 gimple_call_set_lhs (call, tmp);
14209 gimple_seq_add_stmt (seq, call);
14210 return tmp;
14213 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14214 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14216 tree
14217 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
14218 tree end_index)
14220 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
14221 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
14224 /* Try to compute the vector types required to vectorize STMT_INFO,
14225 returning true on success and false if vectorization isn't possible.
14226 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14227 take sure that the number of elements in the vectors is no bigger
14228 than GROUP_SIZE.
14230 On success:
14232 - Set *STMT_VECTYPE_OUT to:
14233 - NULL_TREE if the statement doesn't need to be vectorized;
14234 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14236 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14237 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14238 statement does not help to determine the overall number of units. */
14240 opt_result
14241 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
14242 tree *stmt_vectype_out,
14243 tree *nunits_vectype_out,
14244 unsigned int group_size)
14246 gimple *stmt = stmt_info->stmt;
14248 /* For BB vectorization, we should always have a group size once we've
14249 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14250 are tentative requests during things like early data reference
14251 analysis and pattern recognition. */
14252 if (is_a <bb_vec_info> (vinfo))
14253 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
14254 else
14255 group_size = 0;
14257 *stmt_vectype_out = NULL_TREE;
14258 *nunits_vectype_out = NULL_TREE;
14260 if (gimple_get_lhs (stmt) == NULL_TREE
14261 /* MASK_STORE has no lhs, but is ok. */
14262 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
14264 if (is_a <gcall *> (stmt))
14266 /* Ignore calls with no lhs. These must be calls to
14267 #pragma omp simd functions, and what vectorization factor
14268 it really needs can't be determined until
14269 vectorizable_simd_clone_call. */
14270 if (dump_enabled_p ())
14271 dump_printf_loc (MSG_NOTE, vect_location,
14272 "defer to SIMD clone analysis.\n");
14273 return opt_result::success ();
14276 return opt_result::failure_at (stmt,
14277 "not vectorized: irregular stmt.%G", stmt);
14280 tree vectype;
14281 tree scalar_type = NULL_TREE;
14282 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
14284 vectype = STMT_VINFO_VECTYPE (stmt_info);
14285 if (dump_enabled_p ())
14286 dump_printf_loc (MSG_NOTE, vect_location,
14287 "precomputed vectype: %T\n", vectype);
14289 else if (vect_use_mask_type_p (stmt_info))
14291 unsigned int precision = stmt_info->mask_precision;
14292 scalar_type = build_nonstandard_integer_type (precision, 1);
14293 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
14294 if (!vectype)
14295 return opt_result::failure_at (stmt, "not vectorized: unsupported"
14296 " data-type %T\n", scalar_type);
14297 if (dump_enabled_p ())
14298 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14300 else
14302 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
14303 scalar_type = TREE_TYPE (DR_REF (dr));
14304 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
14305 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
14306 else
14307 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
14309 if (dump_enabled_p ())
14311 if (group_size)
14312 dump_printf_loc (MSG_NOTE, vect_location,
14313 "get vectype for scalar type (group size %d):"
14314 " %T\n", group_size, scalar_type);
14315 else
14316 dump_printf_loc (MSG_NOTE, vect_location,
14317 "get vectype for scalar type: %T\n", scalar_type);
14319 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
14320 if (!vectype)
14321 return opt_result::failure_at (stmt,
14322 "not vectorized:"
14323 " unsupported data-type %T\n",
14324 scalar_type);
14326 if (dump_enabled_p ())
14327 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14330 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
14331 return opt_result::failure_at (stmt,
14332 "not vectorized: vector stmt in loop:%G",
14333 stmt);
14335 *stmt_vectype_out = vectype;
14337 /* Don't try to compute scalar types if the stmt produces a boolean
14338 vector; use the existing vector type instead. */
14339 tree nunits_vectype = vectype;
14340 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14342 /* The number of units is set according to the smallest scalar
14343 type (or the largest vector size, but we only support one
14344 vector size per vectorization). */
14345 scalar_type = vect_get_smallest_scalar_type (stmt_info,
14346 TREE_TYPE (vectype));
14347 if (scalar_type != TREE_TYPE (vectype))
14349 if (dump_enabled_p ())
14350 dump_printf_loc (MSG_NOTE, vect_location,
14351 "get vectype for smallest scalar type: %T\n",
14352 scalar_type);
14353 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
14354 group_size);
14355 if (!nunits_vectype)
14356 return opt_result::failure_at
14357 (stmt, "not vectorized: unsupported data-type %T\n",
14358 scalar_type);
14359 if (dump_enabled_p ())
14360 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
14361 nunits_vectype);
14365 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
14366 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
14367 return opt_result::failure_at (stmt,
14368 "Not vectorized: Incompatible number "
14369 "of vector subparts between %T and %T\n",
14370 nunits_vectype, *stmt_vectype_out);
14372 if (dump_enabled_p ())
14374 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
14375 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
14376 dump_printf (MSG_NOTE, "\n");
14379 *nunits_vectype_out = nunits_vectype;
14380 return opt_result::success ();
14383 /* Generate and return statement sequence that sets vector length LEN that is:
14385 min_of_start_and_end = min (START_INDEX, END_INDEX);
14386 left_len = END_INDEX - min_of_start_and_end;
14387 rhs = min (left_len, LEN_LIMIT);
14388 LEN = rhs;
14390 Note: the cost of the code generated by this function is modeled
14391 by vect_estimate_min_profitable_iters, so changes here may need
14392 corresponding changes there. */
14394 gimple_seq
14395 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
14397 gimple_seq stmts = NULL;
14398 tree len_type = TREE_TYPE (len);
14399 gcc_assert (TREE_TYPE (start_index) == len_type);
14401 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
14402 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
14403 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
14404 gimple* stmt = gimple_build_assign (len, rhs);
14405 gimple_seq_add_stmt (&stmts, stmt);
14407 return stmts;