tree-optimization/112818 - re-instantiate vector type size check for bswap
[official-gcc.git] / gcc / tree-vect-stmts.cc
blob390c8472fd6c48259dffc4d9cf3ba1a0f0cf30d7
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
57 #include "regs.h"
58 #include "attribs.h"
59 #include "optabs-libfuncs.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* Return the vectorized type for the given statement. */
66 tree
67 stmt_vectype (class _stmt_vec_info *stmt_info)
69 return STMT_VINFO_VECTYPE (stmt_info);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
74 bool
75 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
77 gimple *stmt = STMT_VINFO_STMT (stmt_info);
78 basic_block bb = gimple_bb (stmt);
79 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
80 class loop* loop;
82 if (!loop_vinfo)
83 return false;
85 loop = LOOP_VINFO_LOOP (loop_vinfo);
87 return (bb->loop_father == loop->inner);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
94 static unsigned
95 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
96 enum vect_cost_for_stmt kind,
97 stmt_vec_info stmt_info, slp_tree node,
98 tree vectype, int misalign,
99 enum vect_cost_model_location where)
101 if ((kind == vector_load || kind == unaligned_load)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_gather_load;
104 if ((kind == vector_store || kind == unaligned_store)
105 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
106 kind = vector_scatter_store;
108 stmt_info_for_cost si
109 = { count, kind, where, stmt_info, node, vectype, misalign };
110 body_cost_vec->safe_push (si);
112 return (unsigned)
113 (builtin_vectorization_cost (kind, vectype, misalign) * count);
116 unsigned
117 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
118 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
119 tree vectype, int misalign,
120 enum vect_cost_model_location where)
122 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
123 vectype, misalign, where);
126 unsigned
127 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
128 enum vect_cost_for_stmt kind, slp_tree node,
129 tree vectype, int misalign,
130 enum vect_cost_model_location where)
132 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
133 vectype, misalign, where);
136 unsigned
137 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
138 enum vect_cost_for_stmt kind,
139 enum vect_cost_model_location where)
141 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
142 || kind == scalar_stmt);
143 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
144 NULL_TREE, 0, where);
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
149 static tree
150 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
152 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
153 "vect_array");
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
161 static tree
162 read_vector_array (vec_info *vinfo,
163 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
164 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
166 tree vect_type, vect, vect_name, array_ref;
167 gimple *new_stmt;
169 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170 vect_type = TREE_TYPE (TREE_TYPE (array));
171 vect = vect_create_destination_var (scalar_dest, vect_type);
172 array_ref = build4 (ARRAY_REF, vect_type, array,
173 build_int_cst (size_type_node, n),
174 NULL_TREE, NULL_TREE);
176 new_stmt = gimple_build_assign (vect, array_ref);
177 vect_name = make_ssa_name (vect, new_stmt);
178 gimple_assign_set_lhs (new_stmt, vect_name);
179 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
181 return vect_name;
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
188 static void
189 write_vector_array (vec_info *vinfo,
190 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
191 tree vect, tree array, unsigned HOST_WIDE_INT n)
193 tree array_ref;
194 gimple *new_stmt;
196 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
197 build_int_cst (size_type_node, n),
198 NULL_TREE, NULL_TREE);
200 new_stmt = gimple_build_assign (array_ref, vect);
201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
206 (and its group). */
208 static tree
209 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
211 tree mem_ref;
213 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
216 return mem_ref;
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
222 static void
223 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
224 gimple_stmt_iterator *gsi, tree var)
226 tree clobber = build_clobber (TREE_TYPE (var));
227 gimple *new_stmt = gimple_build_assign (var, clobber);
228 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
233 /* Function vect_mark_relevant.
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
237 static void
238 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
239 enum vect_relevant relevant, bool live_p)
241 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
242 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "mark relevant %d, live %d: %G", relevant, live_p,
247 stmt_info->stmt);
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE, vect_location,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
265 stmt_vec_info old_stmt_info = stmt_info;
266 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
268 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
269 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
271 if (live_p && relevant == vect_unused_in_scope)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
276 "relevant.\n");
277 relevant = vect_used_only_live;
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE, vect_location,
282 "mark relevant %d, live %d: %G", relevant, live_p,
283 stmt_info->stmt);
286 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
287 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
288 STMT_VINFO_RELEVANT (stmt_info) = relevant;
290 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE, vect_location,
295 "already marked relevant/live.\n");
296 return;
299 worklist->safe_push (stmt_info);
303 /* Function is_simple_and_all_uses_invariant
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
307 bool
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
309 loop_vec_info loop_vinfo)
311 tree op;
312 ssa_op_iter iter;
314 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
315 if (!stmt)
316 return false;
318 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
320 enum vect_def_type dt = vect_uninitialized_def;
322 if (!vect_is_simple_use (op, loop_vinfo, &dt))
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "use not simple.\n");
327 return false;
330 if (dt != vect_external_def && dt != vect_constant_def)
331 return false;
333 return true;
336 /* Function vect_stmt_relevant_p.
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
346 CHECKME: what other side effects would the vectorizer allow? */
348 static bool
349 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
350 enum vect_relevant *relevant, bool *live_p)
352 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
353 ssa_op_iter op_iter;
354 imm_use_iterator imm_iter;
355 use_operand_p use_p;
356 def_operand_p def_p;
358 *relevant = vect_unused_in_scope;
359 *live_p = false;
361 /* cond stmt other than loop exit cond. */
362 if (is_ctrl_stmt (stmt_info->stmt)
363 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
364 *relevant = vect_used_in_scope;
366 /* changing memory. */
367 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
368 if (gimple_vdef (stmt_info->stmt)
369 && !gimple_clobber_p (stmt_info->stmt))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: stmt has vdefs.\n");
374 *relevant = vect_used_in_scope;
377 /* uses outside the loop. */
378 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
380 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
382 basic_block bb = gimple_bb (USE_STMT (use_p));
383 if (!flow_bb_inside_loop_p (loop, bb))
385 if (is_gimple_debug (USE_STMT (use_p)))
386 continue;
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: used out of loop.\n");
392 /* We expect all such uses to be in the loop exit phis
393 (because of loop closed form) */
394 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
395 gcc_assert (bb == single_exit (loop)->dest);
397 *live_p = true;
402 if (*live_p && *relevant == vect_unused_in_scope
403 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
405 if (dump_enabled_p ())
406 dump_printf_loc (MSG_NOTE, vect_location,
407 "vec_stmt_relevant_p: stmt live but not relevant.\n");
408 *relevant = vect_used_only_live;
411 return (*live_p || *relevant);
415 /* Function exist_non_indexing_operands_for_use_p
417 USE is one of the uses attached to STMT_INFO. Check if USE is
418 used in STMT_INFO for anything other than indexing an array. */
420 static bool
421 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
423 tree operand;
425 /* USE corresponds to some operand in STMT. If there is no data
426 reference in STMT, then any operand that corresponds to USE
427 is not indexing an array. */
428 if (!STMT_VINFO_DATA_REF (stmt_info))
429 return true;
431 /* STMT has a data_ref. FORNOW this means that its of one of
432 the following forms:
433 -1- ARRAY_REF = var
434 -2- var = ARRAY_REF
435 (This should have been verified in analyze_data_refs).
437 'var' in the second case corresponds to a def, not a use,
438 so USE cannot correspond to any operands that are not used
439 for array indexing.
441 Therefore, all we need to check is if STMT falls into the
442 first case, and whether var corresponds to USE. */
444 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
445 if (!assign || !gimple_assign_copy_p (assign))
447 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
448 if (call && gimple_call_internal_p (call))
450 internal_fn ifn = gimple_call_internal_fn (call);
451 int mask_index = internal_fn_mask_index (ifn);
452 if (mask_index >= 0
453 && use == gimple_call_arg (call, mask_index))
454 return true;
455 int stored_value_index = internal_fn_stored_value_index (ifn);
456 if (stored_value_index >= 0
457 && use == gimple_call_arg (call, stored_value_index))
458 return true;
459 if (internal_gather_scatter_fn_p (ifn)
460 && use == gimple_call_arg (call, 1))
461 return true;
463 return false;
466 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
467 return false;
468 operand = gimple_assign_rhs1 (assign);
469 if (TREE_CODE (operand) != SSA_NAME)
470 return false;
472 if (operand == use)
473 return true;
475 return false;
480 Function process_use.
482 Inputs:
483 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
484 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
485 that defined USE. This is done by calling mark_relevant and passing it
486 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
487 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
488 be performed.
490 Outputs:
491 Generally, LIVE_P and RELEVANT are used to define the liveness and
492 relevance info of the DEF_STMT of this USE:
493 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
494 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
495 Exceptions:
496 - case 1: If USE is used only for address computations (e.g. array indexing),
497 which does not need to be directly vectorized, then the liveness/relevance
498 of the respective DEF_STMT is left unchanged.
499 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
500 we skip DEF_STMT cause it had already been processed.
501 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
502 "relevant" will be modified accordingly.
504 Return true if everything is as expected. Return false otherwise. */
506 static opt_result
507 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
508 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
509 bool force)
511 stmt_vec_info dstmt_vinfo;
512 enum vect_def_type dt;
514 /* case 1: we are only interested in uses that need to be vectorized. Uses
515 that are used for address computation are not considered relevant. */
516 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
517 return opt_result::success ();
519 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
520 return opt_result::failure_at (stmt_vinfo->stmt,
521 "not vectorized:"
522 " unsupported use in stmt.\n");
524 if (!dstmt_vinfo)
525 return opt_result::success ();
527 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
528 basic_block bb = gimple_bb (stmt_vinfo->stmt);
530 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
531 We have to force the stmt live since the epilogue loop needs it to
532 continue computing the reduction. */
533 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
534 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
537 && bb->loop_father == def_bb->loop_father)
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_NOTE, vect_location,
541 "reduc-stmt defining reduc-phi in the same nest.\n");
542 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
543 return opt_result::success ();
546 /* case 3a: outer-loop stmt defining an inner-loop stmt:
547 outer-loop-header-bb:
548 d = dstmt_vinfo
549 inner-loop:
550 stmt # use (d)
551 outer-loop-tail-bb:
552 ... */
553 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE, vect_location,
557 "outer-loop def-stmt defining inner-loop stmt.\n");
559 switch (relevant)
561 case vect_unused_in_scope:
562 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
563 vect_used_in_scope : vect_unused_in_scope;
564 break;
566 case vect_used_in_outer_by_reduction:
567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
568 relevant = vect_used_by_reduction;
569 break;
571 case vect_used_in_outer:
572 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
573 relevant = vect_used_in_scope;
574 break;
576 case vect_used_in_scope:
577 break;
579 default:
580 gcc_unreachable ();
584 /* case 3b: inner-loop stmt defining an outer-loop stmt:
585 outer-loop-header-bb:
587 inner-loop:
588 d = dstmt_vinfo
589 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
590 stmt # use (d) */
591 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
593 if (dump_enabled_p ())
594 dump_printf_loc (MSG_NOTE, vect_location,
595 "inner-loop def-stmt defining outer-loop stmt.\n");
597 switch (relevant)
599 case vect_unused_in_scope:
600 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
601 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
602 vect_used_in_outer_by_reduction : vect_unused_in_scope;
603 break;
605 case vect_used_by_reduction:
606 case vect_used_only_live:
607 relevant = vect_used_in_outer_by_reduction;
608 break;
610 case vect_used_in_scope:
611 relevant = vect_used_in_outer;
612 break;
614 default:
615 gcc_unreachable ();
618 /* We are also not interested in uses on loop PHI backedges that are
619 inductions. Otherwise we'll needlessly vectorize the IV increment
620 and cause hybrid SLP for SLP inductions. Unless the PHI is live
621 of course. */
622 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
623 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
624 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
625 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
626 loop_latch_edge (bb->loop_father))
627 == use))
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE, vect_location,
631 "induction value on backedge.\n");
632 return opt_result::success ();
636 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
637 return opt_result::success ();
641 /* Function vect_mark_stmts_to_be_vectorized.
643 Not all stmts in the loop need to be vectorized. For example:
645 for i...
646 for j...
647 1. T0 = i + j
648 2. T1 = a[T0]
650 3. j = j + 1
652 Stmt 1 and 3 do not need to be vectorized, because loop control and
653 addressing of vectorized data-refs are handled differently.
655 This pass detects such stmts. */
657 opt_result
658 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
660 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
661 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
662 unsigned int nbbs = loop->num_nodes;
663 gimple_stmt_iterator si;
664 unsigned int i;
665 basic_block bb;
666 bool live_p;
667 enum vect_relevant relevant;
669 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
671 auto_vec<stmt_vec_info, 64> worklist;
673 /* 1. Init worklist. */
674 for (i = 0; i < nbbs; i++)
676 bb = bbs[i];
677 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
679 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
682 phi_info->stmt);
684 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
685 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
687 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
689 if (is_gimple_debug (gsi_stmt (si)))
690 continue;
691 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE, vect_location,
694 "init: stmt relevant? %G", stmt_info->stmt);
696 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
697 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
701 /* 2. Process_worklist */
702 while (worklist.length () > 0)
704 use_operand_p use_p;
705 ssa_op_iter iter;
707 stmt_vec_info stmt_vinfo = worklist.pop ();
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE, vect_location,
710 "worklist: examine stmt: %G", stmt_vinfo->stmt);
712 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
713 (DEF_STMT) as relevant/irrelevant according to the relevance property
714 of STMT. */
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
717 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
718 propagated as is to the DEF_STMTs of its USEs.
720 One exception is when STMT has been identified as defining a reduction
721 variable; in this case we set the relevance to vect_used_by_reduction.
722 This is because we distinguish between two kinds of relevant stmts -
723 those that are used by a reduction computation, and those that are
724 (also) used by a regular computation. This allows us later on to
725 identify stmts that are used solely by a reduction, and therefore the
726 order of the results that they produce does not have to be kept. */
728 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
730 case vect_reduction_def:
731 gcc_assert (relevant != vect_unused_in_scope);
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_in_scope
734 && relevant != vect_used_by_reduction
735 && relevant != vect_used_only_live)
736 return opt_result::failure_at
737 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
738 break;
740 case vect_nested_cycle:
741 if (relevant != vect_unused_in_scope
742 && relevant != vect_used_in_outer_by_reduction
743 && relevant != vect_used_in_outer)
744 return opt_result::failure_at
745 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
746 break;
748 case vect_double_reduction_def:
749 if (relevant != vect_unused_in_scope
750 && relevant != vect_used_by_reduction
751 && relevant != vect_used_only_live)
752 return opt_result::failure_at
753 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
754 break;
756 default:
757 break;
760 if (is_pattern_stmt_p (stmt_vinfo))
762 /* Pattern statements are not inserted into the code, so
763 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
764 have to scan the RHS or function arguments instead. */
765 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
767 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
768 tree op = gimple_assign_rhs1 (assign);
770 i = 1;
771 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
773 opt_result res
774 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
775 loop_vinfo, relevant, &worklist, false);
776 if (!res)
777 return res;
778 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
779 loop_vinfo, relevant, &worklist, false);
780 if (!res)
781 return res;
782 i = 2;
784 for (; i < gimple_num_ops (assign); i++)
786 op = gimple_op (assign, i);
787 if (TREE_CODE (op) == SSA_NAME)
789 opt_result res
790 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
791 &worklist, false);
792 if (!res)
793 return res;
797 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
799 for (i = 0; i < gimple_call_num_args (call); i++)
801 tree arg = gimple_call_arg (call, i);
802 opt_result res
803 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
804 &worklist, false);
805 if (!res)
806 return res;
810 else
811 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
813 tree op = USE_FROM_PTR (use_p);
814 opt_result res
815 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
816 &worklist, false);
817 if (!res)
818 return res;
821 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
823 gather_scatter_info gs_info;
824 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
825 gcc_unreachable ();
826 opt_result res
827 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
828 &worklist, true);
829 if (!res)
831 if (fatal)
832 *fatal = false;
833 return res;
836 } /* while worklist */
838 return opt_result::success ();
841 /* Function vect_model_simple_cost.
843 Models cost for simple operations, i.e. those that only emit ncopies of a
844 single op. Right now, this does not account for multiple insns that could
845 be generated for the single vector op. We will handle that shortly. */
847 static void
848 vect_model_simple_cost (vec_info *,
849 stmt_vec_info stmt_info, int ncopies,
850 enum vect_def_type *dt,
851 int ndts,
852 slp_tree node,
853 stmt_vector_for_cost *cost_vec,
854 vect_cost_for_stmt kind = vector_stmt)
856 int inside_cost = 0, prologue_cost = 0;
858 gcc_assert (cost_vec != NULL);
860 /* ??? Somehow we need to fix this at the callers. */
861 if (node)
862 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
864 if (!node)
865 /* Cost the "broadcast" of a scalar operand in to a vector operand.
866 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
867 cost model. */
868 for (int i = 0; i < ndts; i++)
869 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
870 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
871 stmt_info, 0, vect_prologue);
873 /* Pass the inside-of-loop statements to the target-specific cost model. */
874 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
875 stmt_info, 0, vect_body);
877 if (dump_enabled_p ())
878 dump_printf_loc (MSG_NOTE, vect_location,
879 "vect_model_simple_cost: inside_cost = %d, "
880 "prologue_cost = %d .\n", inside_cost, prologue_cost);
884 /* Model cost for type demotion and promotion operations. PWR is
885 normally zero for single-step promotions and demotions. It will be
886 one if two-step promotion/demotion is required, and so on. NCOPIES
887 is the number of vector results (and thus number of instructions)
888 for the narrowest end of the operation chain. Each additional
889 step doubles the number of instructions required. If WIDEN_ARITH
890 is true the stmt is doing widening arithmetic. */
892 static void
893 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
894 enum vect_def_type *dt,
895 unsigned int ncopies, int pwr,
896 stmt_vector_for_cost *cost_vec,
897 bool widen_arith)
899 int i;
900 int inside_cost = 0, prologue_cost = 0;
902 for (i = 0; i < pwr + 1; i++)
904 inside_cost += record_stmt_cost (cost_vec, ncopies,
905 widen_arith
906 ? vector_stmt : vec_promote_demote,
907 stmt_info, 0, vect_body);
908 ncopies *= 2;
911 /* FORNOW: Assuming maximum 2 args per stmts. */
912 for (i = 0; i < 2; i++)
913 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
914 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
915 stmt_info, 0, vect_prologue);
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE, vect_location,
919 "vect_model_promotion_demotion_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost, prologue_cost);
923 /* Returns true if the current function returns DECL. */
925 static bool
926 cfun_returns (tree decl)
928 edge_iterator ei;
929 edge e;
930 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
932 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
933 if (!ret)
934 continue;
935 if (gimple_return_retval (ret) == decl)
936 return true;
937 /* We often end up with an aggregate copy to the result decl,
938 handle that case as well. First skip intermediate clobbers
939 though. */
940 gimple *def = ret;
943 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
945 while (gimple_clobber_p (def));
946 if (is_a <gassign *> (def)
947 && gimple_assign_lhs (def) == gimple_return_retval (ret)
948 && gimple_assign_rhs1 (def) == decl)
949 return true;
951 return false;
954 /* Calculate cost of DR's memory access. */
955 void
956 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
957 dr_alignment_support alignment_support_scheme,
958 int misalignment,
959 unsigned int *inside_cost,
960 stmt_vector_for_cost *body_cost_vec)
962 switch (alignment_support_scheme)
964 case dr_aligned:
966 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
967 vector_store, stmt_info, 0,
968 vect_body);
970 if (dump_enabled_p ())
971 dump_printf_loc (MSG_NOTE, vect_location,
972 "vect_model_store_cost: aligned.\n");
973 break;
976 case dr_unaligned_supported:
978 /* Here, we assign an additional cost for the unaligned store. */
979 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
980 unaligned_store, stmt_info,
981 misalignment, vect_body);
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_NOTE, vect_location,
984 "vect_model_store_cost: unaligned supported by "
985 "hardware.\n");
986 break;
989 case dr_unaligned_unsupported:
991 *inside_cost = VECT_MAX_COST;
993 if (dump_enabled_p ())
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
995 "vect_model_store_cost: unsupported access.\n");
996 break;
999 default:
1000 gcc_unreachable ();
1004 /* Calculate cost of DR's memory access. */
1005 void
1006 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1007 dr_alignment_support alignment_support_scheme,
1008 int misalignment,
1009 bool add_realign_cost, unsigned int *inside_cost,
1010 unsigned int *prologue_cost,
1011 stmt_vector_for_cost *prologue_cost_vec,
1012 stmt_vector_for_cost *body_cost_vec,
1013 bool record_prologue_costs)
1015 switch (alignment_support_scheme)
1017 case dr_aligned:
1019 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1020 stmt_info, 0, vect_body);
1022 if (dump_enabled_p ())
1023 dump_printf_loc (MSG_NOTE, vect_location,
1024 "vect_model_load_cost: aligned.\n");
1026 break;
1028 case dr_unaligned_supported:
1030 /* Here, we assign an additional cost for the unaligned load. */
1031 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1032 unaligned_load, stmt_info,
1033 misalignment, vect_body);
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_NOTE, vect_location,
1037 "vect_model_load_cost: unaligned supported by "
1038 "hardware.\n");
1040 break;
1042 case dr_explicit_realign:
1044 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1045 vector_load, stmt_info, 0, vect_body);
1046 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1047 vec_perm, stmt_info, 0, vect_body);
1049 /* FIXME: If the misalignment remains fixed across the iterations of
1050 the containing loop, the following cost should be added to the
1051 prologue costs. */
1052 if (targetm.vectorize.builtin_mask_for_load)
1053 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1054 stmt_info, 0, vect_body);
1056 if (dump_enabled_p ())
1057 dump_printf_loc (MSG_NOTE, vect_location,
1058 "vect_model_load_cost: explicit realign\n");
1060 break;
1062 case dr_explicit_realign_optimized:
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_NOTE, vect_location,
1066 "vect_model_load_cost: unaligned software "
1067 "pipelined.\n");
1069 /* Unaligned software pipeline has a load of an address, an initial
1070 load, and possibly a mask operation to "prime" the loop. However,
1071 if this is an access in a group of loads, which provide grouped
1072 access, then the above cost should only be considered for one
1073 access in the group. Inside the loop, there is a load op
1074 and a realignment op. */
1076 if (add_realign_cost && record_prologue_costs)
1078 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1079 vector_stmt, stmt_info,
1080 0, vect_prologue);
1081 if (targetm.vectorize.builtin_mask_for_load)
1082 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1083 vector_stmt, stmt_info,
1084 0, vect_prologue);
1087 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1088 stmt_info, 0, vect_body);
1089 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1090 stmt_info, 0, vect_body);
1092 if (dump_enabled_p ())
1093 dump_printf_loc (MSG_NOTE, vect_location,
1094 "vect_model_load_cost: explicit realign optimized"
1095 "\n");
1097 break;
1100 case dr_unaligned_unsupported:
1102 *inside_cost = VECT_MAX_COST;
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1106 "vect_model_load_cost: unsupported access.\n");
1107 break;
1110 default:
1111 gcc_unreachable ();
1115 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1116 the loop preheader for the vectorized stmt STMT_VINFO. */
1118 static void
1119 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1120 gimple_stmt_iterator *gsi)
1122 if (gsi)
1123 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1124 else
1125 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1127 if (dump_enabled_p ())
1128 dump_printf_loc (MSG_NOTE, vect_location,
1129 "created new init_stmt: %G", new_stmt);
1132 /* Function vect_init_vector.
1134 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1135 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1136 vector type a vector with all elements equal to VAL is created first.
1137 Place the initialization at GSI if it is not NULL. Otherwise, place the
1138 initialization at the loop preheader.
1139 Return the DEF of INIT_STMT.
1140 It will be used in the vectorization of STMT_INFO. */
1142 tree
1143 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1144 gimple_stmt_iterator *gsi)
1146 gimple *init_stmt;
1147 tree new_temp;
1149 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1150 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1152 gcc_assert (VECTOR_TYPE_P (type));
1153 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1155 /* Scalar boolean value should be transformed into
1156 all zeros or all ones value before building a vector. */
1157 if (VECTOR_BOOLEAN_TYPE_P (type))
1159 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1160 tree false_val = build_zero_cst (TREE_TYPE (type));
1162 if (CONSTANT_CLASS_P (val))
1163 val = integer_zerop (val) ? false_val : true_val;
1164 else
1166 new_temp = make_ssa_name (TREE_TYPE (type));
1167 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1168 val, true_val, false_val);
1169 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1170 val = new_temp;
1173 else
1175 gimple_seq stmts = NULL;
1176 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1177 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1178 TREE_TYPE (type), val);
1179 else
1180 /* ??? Condition vectorization expects us to do
1181 promotion of invariant/external defs. */
1182 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1183 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1184 !gsi_end_p (gsi2); )
1186 init_stmt = gsi_stmt (gsi2);
1187 gsi_remove (&gsi2, false);
1188 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1192 val = build_vector_from_val (type, val);
1195 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1196 init_stmt = gimple_build_assign (new_temp, val);
1197 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1198 return new_temp;
1202 /* Function vect_get_vec_defs_for_operand.
1204 OP is an operand in STMT_VINFO. This function returns a vector of
1205 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1207 In the case that OP is an SSA_NAME which is defined in the loop, then
1208 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1210 In case OP is an invariant or constant, a new stmt that creates a vector def
1211 needs to be introduced. VECTYPE may be used to specify a required type for
1212 vector invariant. */
1214 void
1215 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1216 unsigned ncopies,
1217 tree op, vec<tree> *vec_oprnds, tree vectype)
1219 gimple *def_stmt;
1220 enum vect_def_type dt;
1221 bool is_simple_use;
1222 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE, vect_location,
1226 "vect_get_vec_defs_for_operand: %T\n", op);
1228 stmt_vec_info def_stmt_info;
1229 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1230 &def_stmt_info, &def_stmt);
1231 gcc_assert (is_simple_use);
1232 if (def_stmt && dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1235 vec_oprnds->create (ncopies);
1236 if (dt == vect_constant_def || dt == vect_external_def)
1238 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1239 tree vector_type;
1241 if (vectype)
1242 vector_type = vectype;
1243 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1244 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1245 vector_type = truth_type_for (stmt_vectype);
1246 else
1247 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1249 gcc_assert (vector_type);
1250 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1251 while (ncopies--)
1252 vec_oprnds->quick_push (vop);
1254 else
1256 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1257 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1258 for (unsigned i = 0; i < ncopies; ++i)
1259 vec_oprnds->quick_push (gimple_get_lhs
1260 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1265 /* Get vectorized definitions for OP0 and OP1. */
1267 void
1268 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1269 unsigned ncopies,
1270 tree op0, tree vectype0, vec<tree> *vec_oprnds0,
1271 tree op1, tree vectype1, vec<tree> *vec_oprnds1,
1272 tree op2, tree vectype2, vec<tree> *vec_oprnds2,
1273 tree op3, tree vectype3, vec<tree> *vec_oprnds3)
1275 if (slp_node)
1277 if (op0)
1278 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1279 if (op1)
1280 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1281 if (op2)
1282 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1283 if (op3)
1284 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1286 else
1288 if (op0)
1289 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1290 op0, vec_oprnds0, vectype0);
1291 if (op1)
1292 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1293 op1, vec_oprnds1, vectype1);
1294 if (op2)
1295 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1296 op2, vec_oprnds2, vectype2);
1297 if (op3)
1298 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1299 op3, vec_oprnds3, vectype3);
1303 void
1304 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1305 unsigned ncopies,
1306 tree op0, vec<tree> *vec_oprnds0,
1307 tree op1, vec<tree> *vec_oprnds1,
1308 tree op2, vec<tree> *vec_oprnds2,
1309 tree op3, vec<tree> *vec_oprnds3)
1311 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1312 op0, NULL_TREE, vec_oprnds0,
1313 op1, NULL_TREE, vec_oprnds1,
1314 op2, NULL_TREE, vec_oprnds2,
1315 op3, NULL_TREE, vec_oprnds3);
1318 /* Helper function called by vect_finish_replace_stmt and
1319 vect_finish_stmt_generation. Set the location of the new
1320 statement and create and return a stmt_vec_info for it. */
1322 static void
1323 vect_finish_stmt_generation_1 (vec_info *,
1324 stmt_vec_info stmt_info, gimple *vec_stmt)
1326 if (dump_enabled_p ())
1327 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1329 if (stmt_info)
1331 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1333 /* While EH edges will generally prevent vectorization, stmt might
1334 e.g. be in a must-not-throw region. Ensure newly created stmts
1335 that could throw are part of the same region. */
1336 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1337 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1338 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1340 else
1341 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1344 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1345 which sets the same scalar result as STMT_INFO did. Create and return a
1346 stmt_vec_info for VEC_STMT. */
1348 void
1349 vect_finish_replace_stmt (vec_info *vinfo,
1350 stmt_vec_info stmt_info, gimple *vec_stmt)
1352 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1353 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1355 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1356 gsi_replace (&gsi, vec_stmt, true);
1358 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1361 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1362 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1364 void
1365 vect_finish_stmt_generation (vec_info *vinfo,
1366 stmt_vec_info stmt_info, gimple *vec_stmt,
1367 gimple_stmt_iterator *gsi)
1369 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1371 if (!gsi_end_p (*gsi)
1372 && gimple_has_mem_ops (vec_stmt))
1374 gimple *at_stmt = gsi_stmt (*gsi);
1375 tree vuse = gimple_vuse (at_stmt);
1376 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1378 tree vdef = gimple_vdef (at_stmt);
1379 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1380 gimple_set_modified (vec_stmt, true);
1381 /* If we have an SSA vuse and insert a store, update virtual
1382 SSA form to avoid triggering the renamer. Do so only
1383 if we can easily see all uses - which is what almost always
1384 happens with the way vectorized stmts are inserted. */
1385 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1386 && ((is_gimple_assign (vec_stmt)
1387 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1388 || (is_gimple_call (vec_stmt)
1389 && (!(gimple_call_flags (vec_stmt)
1390 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1391 || (gimple_call_lhs (vec_stmt)
1392 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1394 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1395 gimple_set_vdef (vec_stmt, new_vdef);
1396 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1400 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1401 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1404 /* We want to vectorize a call to combined function CFN with function
1405 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1406 as the types of all inputs. Check whether this is possible using
1407 an internal function, returning its code if so or IFN_LAST if not. */
1409 static internal_fn
1410 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1411 tree vectype_out, tree vectype_in)
1413 internal_fn ifn;
1414 if (internal_fn_p (cfn))
1415 ifn = as_internal_fn (cfn);
1416 else
1417 ifn = associated_internal_fn (fndecl);
1418 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1420 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1421 if (info.vectorizable)
1423 bool same_size_p = TYPE_SIZE (vectype_in) == TYPE_SIZE (vectype_out);
1424 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1425 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1427 /* The type size of both the vectype_in and vectype_out should be
1428 exactly the same when vectype_out isn't participating the optab.
1429 While there is no restriction for type size when vectype_out
1430 is part of the optab query. */
1431 if (type0 != vectype_out && type1 != vectype_out && !same_size_p)
1432 return IFN_LAST;
1434 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1435 OPTIMIZE_FOR_SPEED))
1436 return ifn;
1439 return IFN_LAST;
1443 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1444 gimple_stmt_iterator *);
1446 /* Check whether a load or store statement in the loop described by
1447 LOOP_VINFO is possible in a loop using partial vectors. This is
1448 testing whether the vectorizer pass has the appropriate support,
1449 as well as whether the target does.
1451 VLS_TYPE says whether the statement is a load or store and VECTYPE
1452 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1453 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1454 says how the load or store is going to be implemented and GROUP_SIZE
1455 is the number of load or store statements in the containing group.
1456 If the access is a gather load or scatter store, GS_INFO describes
1457 its arguments. If the load or store is conditional, SCALAR_MASK is the
1458 condition under which it occurs.
1460 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1461 vectors is not supported, otherwise record the required rgroup control
1462 types. */
1464 static void
1465 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1466 slp_tree slp_node,
1467 vec_load_store_type vls_type,
1468 int group_size,
1469 vect_memory_access_type
1470 memory_access_type,
1471 gather_scatter_info *gs_info,
1472 tree scalar_mask)
1474 /* Invariant loads need no special support. */
1475 if (memory_access_type == VMAT_INVARIANT)
1476 return;
1478 unsigned int nvectors;
1479 if (slp_node)
1480 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1481 else
1482 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1484 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1485 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1486 machine_mode vecmode = TYPE_MODE (vectype);
1487 bool is_load = (vls_type == VLS_LOAD);
1488 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1490 internal_fn ifn
1491 = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1492 : vect_store_lanes_supported (vectype, group_size, true));
1493 if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1494 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1495 else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1496 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1497 scalar_mask);
1498 else
1500 if (dump_enabled_p ())
1501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1502 "can't operate on partial vectors because"
1503 " the target doesn't have an appropriate"
1504 " load/store-lanes instruction.\n");
1505 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1507 return;
1510 if (memory_access_type == VMAT_GATHER_SCATTER)
1512 internal_fn ifn = (is_load
1513 ? IFN_MASK_GATHER_LOAD
1514 : IFN_MASK_SCATTER_STORE);
1515 internal_fn len_ifn = (is_load
1516 ? IFN_MASK_LEN_GATHER_LOAD
1517 : IFN_MASK_LEN_SCATTER_STORE);
1518 if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1519 gs_info->memory_type,
1520 gs_info->offset_vectype,
1521 gs_info->scale))
1522 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1523 else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1524 gs_info->memory_type,
1525 gs_info->offset_vectype,
1526 gs_info->scale))
1527 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1528 scalar_mask);
1529 else
1531 if (dump_enabled_p ())
1532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1533 "can't operate on partial vectors because"
1534 " the target doesn't have an appropriate"
1535 " gather load or scatter store instruction.\n");
1536 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1538 return;
1541 if (memory_access_type != VMAT_CONTIGUOUS
1542 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1544 /* Element X of the data must come from iteration i * VF + X of the
1545 scalar loop. We need more work to support other mappings. */
1546 if (dump_enabled_p ())
1547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1548 "can't operate on partial vectors because an"
1549 " access isn't contiguous.\n");
1550 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1551 return;
1554 if (!VECTOR_MODE_P (vecmode))
1556 if (dump_enabled_p ())
1557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1558 "can't operate on partial vectors when emulating"
1559 " vector operations.\n");
1560 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1561 return;
1564 /* We might load more scalars than we need for permuting SLP loads.
1565 We checked in get_group_load_store_type that the extra elements
1566 don't leak into a new vector. */
1567 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1569 unsigned int nvectors;
1570 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1571 return nvectors;
1572 gcc_unreachable ();
1575 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1576 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1577 machine_mode mask_mode;
1578 machine_mode vmode;
1579 bool using_partial_vectors_p = false;
1580 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1582 nvectors = group_memory_nvectors (group_size * vf, nunits);
1583 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1584 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1585 using_partial_vectors_p = true;
1587 else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1588 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1590 nvectors = group_memory_nvectors (group_size * vf, nunits);
1591 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1592 using_partial_vectors_p = true;
1595 if (!using_partial_vectors_p)
1597 if (dump_enabled_p ())
1598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1599 "can't operate on partial vectors because the"
1600 " target doesn't have the appropriate partial"
1601 " vectorization load or store.\n");
1602 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1606 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1607 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1608 that needs to be applied to all loads and stores in a vectorized loop.
1609 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1610 otherwise return VEC_MASK & LOOP_MASK.
1612 MASK_TYPE is the type of both masks. If new statements are needed,
1613 insert them before GSI. */
1615 static tree
1616 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1617 tree vec_mask, gimple_stmt_iterator *gsi)
1619 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1620 if (!loop_mask)
1621 return vec_mask;
1623 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1625 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1626 return vec_mask;
1628 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1629 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1630 vec_mask, loop_mask);
1632 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1633 return and_res;
1636 /* Determine whether we can use a gather load or scatter store to vectorize
1637 strided load or store STMT_INFO by truncating the current offset to a
1638 smaller width. We need to be able to construct an offset vector:
1640 { 0, X, X*2, X*3, ... }
1642 without loss of precision, where X is STMT_INFO's DR_STEP.
1644 Return true if this is possible, describing the gather load or scatter
1645 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1647 static bool
1648 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1649 loop_vec_info loop_vinfo, bool masked_p,
1650 gather_scatter_info *gs_info)
1652 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1653 data_reference *dr = dr_info->dr;
1654 tree step = DR_STEP (dr);
1655 if (TREE_CODE (step) != INTEGER_CST)
1657 /* ??? Perhaps we could use range information here? */
1658 if (dump_enabled_p ())
1659 dump_printf_loc (MSG_NOTE, vect_location,
1660 "cannot truncate variable step.\n");
1661 return false;
1664 /* Get the number of bits in an element. */
1665 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1666 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1667 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1669 /* Set COUNT to the upper limit on the number of elements - 1.
1670 Start with the maximum vectorization factor. */
1671 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1673 /* Try lowering COUNT to the number of scalar latch iterations. */
1674 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1675 widest_int max_iters;
1676 if (max_loop_iterations (loop, &max_iters)
1677 && max_iters < count)
1678 count = max_iters.to_shwi ();
1680 /* Try scales of 1 and the element size. */
1681 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1682 wi::overflow_type overflow = wi::OVF_NONE;
1683 for (int i = 0; i < 2; ++i)
1685 int scale = scales[i];
1686 widest_int factor;
1687 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1688 continue;
1690 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1691 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1692 if (overflow)
1693 continue;
1694 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1695 unsigned int min_offset_bits = wi::min_precision (range, sign);
1697 /* Find the narrowest viable offset type. */
1698 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1699 tree offset_type = build_nonstandard_integer_type (offset_bits,
1700 sign == UNSIGNED);
1702 /* See whether the target supports the operation with an offset
1703 no narrower than OFFSET_TYPE. */
1704 tree memory_type = TREE_TYPE (DR_REF (dr));
1705 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1706 vectype, memory_type, offset_type, scale,
1707 &gs_info->ifn, &gs_info->offset_vectype)
1708 || gs_info->ifn == IFN_LAST)
1709 continue;
1711 gs_info->decl = NULL_TREE;
1712 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1713 but we don't need to store that here. */
1714 gs_info->base = NULL_TREE;
1715 gs_info->element_type = TREE_TYPE (vectype);
1716 gs_info->offset = fold_convert (offset_type, step);
1717 gs_info->offset_dt = vect_constant_def;
1718 gs_info->scale = scale;
1719 gs_info->memory_type = memory_type;
1720 return true;
1723 if (overflow && dump_enabled_p ())
1724 dump_printf_loc (MSG_NOTE, vect_location,
1725 "truncating gather/scatter offset to %d bits"
1726 " might change its value.\n", element_bits);
1728 return false;
1731 /* Return true if we can use gather/scatter internal functions to
1732 vectorize STMT_INFO, which is a grouped or strided load or store.
1733 MASKED_P is true if load or store is conditional. When returning
1734 true, fill in GS_INFO with the information required to perform the
1735 operation. */
1737 static bool
1738 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1739 loop_vec_info loop_vinfo, bool masked_p,
1740 gather_scatter_info *gs_info)
1742 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1743 || gs_info->ifn == IFN_LAST)
1744 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1745 masked_p, gs_info);
1747 tree old_offset_type = TREE_TYPE (gs_info->offset);
1748 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1750 gcc_assert (TYPE_PRECISION (new_offset_type)
1751 >= TYPE_PRECISION (old_offset_type));
1752 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1754 if (dump_enabled_p ())
1755 dump_printf_loc (MSG_NOTE, vect_location,
1756 "using gather/scatter for strided/grouped access,"
1757 " scale = %d\n", gs_info->scale);
1759 return true;
1762 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1763 elements with a known constant step. Return -1 if that step
1764 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1766 static int
1767 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1769 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1770 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1771 size_zero_node);
1774 /* If the target supports a permute mask that reverses the elements in
1775 a vector of type VECTYPE, return that mask, otherwise return null. */
1777 static tree
1778 perm_mask_for_reverse (tree vectype)
1780 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1782 /* The encoding has a single stepped pattern. */
1783 vec_perm_builder sel (nunits, 1, 3);
1784 for (int i = 0; i < 3; ++i)
1785 sel.quick_push (nunits - 1 - i);
1787 vec_perm_indices indices (sel, 1, nunits);
1788 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1789 indices))
1790 return NULL_TREE;
1791 return vect_gen_perm_mask_checked (vectype, indices);
1794 /* A subroutine of get_load_store_type, with a subset of the same
1795 arguments. Handle the case where STMT_INFO is a load or store that
1796 accesses consecutive elements with a negative step. Sets *POFFSET
1797 to the offset to be applied to the DR for the first access. */
1799 static vect_memory_access_type
1800 get_negative_load_store_type (vec_info *vinfo,
1801 stmt_vec_info stmt_info, tree vectype,
1802 vec_load_store_type vls_type,
1803 unsigned int ncopies, poly_int64 *poffset)
1805 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1806 dr_alignment_support alignment_support_scheme;
1808 if (ncopies > 1)
1810 if (dump_enabled_p ())
1811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1812 "multiple types with negative step.\n");
1813 return VMAT_ELEMENTWISE;
1816 /* For backward running DRs the first access in vectype actually is
1817 N-1 elements before the address of the DR. */
1818 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1819 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1821 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
1822 alignment_support_scheme
1823 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1824 if (alignment_support_scheme != dr_aligned
1825 && alignment_support_scheme != dr_unaligned_supported)
1827 if (dump_enabled_p ())
1828 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1829 "negative step but alignment required.\n");
1830 *poffset = 0;
1831 return VMAT_ELEMENTWISE;
1834 if (vls_type == VLS_STORE_INVARIANT)
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_NOTE, vect_location,
1838 "negative step with invariant source;"
1839 " no permute needed.\n");
1840 return VMAT_CONTIGUOUS_DOWN;
1843 if (!perm_mask_for_reverse (vectype))
1845 if (dump_enabled_p ())
1846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1847 "negative step and reversing not supported.\n");
1848 *poffset = 0;
1849 return VMAT_ELEMENTWISE;
1852 return VMAT_CONTIGUOUS_REVERSE;
1855 /* STMT_INFO is either a masked or unconditional store. Return the value
1856 being stored. */
1858 tree
1859 vect_get_store_rhs (stmt_vec_info stmt_info)
1861 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1863 gcc_assert (gimple_assign_single_p (assign));
1864 return gimple_assign_rhs1 (assign);
1866 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1868 internal_fn ifn = gimple_call_internal_fn (call);
1869 int index = internal_fn_stored_value_index (ifn);
1870 gcc_assert (index >= 0);
1871 return gimple_call_arg (call, index);
1873 gcc_unreachable ();
1876 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1878 This function returns a vector type which can be composed with NETLS pieces,
1879 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1880 same vector size as the return vector. It checks target whether supports
1881 pieces-size vector mode for construction firstly, if target fails to, check
1882 pieces-size scalar mode for construction further. It returns NULL_TREE if
1883 fails to find the available composition.
1885 For example, for (vtype=V16QI, nelts=4), we can probably get:
1886 - V16QI with PTYPE V4QI.
1887 - V4SI with PTYPE SI.
1888 - NULL_TREE. */
1890 static tree
1891 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
1893 gcc_assert (VECTOR_TYPE_P (vtype));
1894 gcc_assert (known_gt (nelts, 0U));
1896 machine_mode vmode = TYPE_MODE (vtype);
1897 if (!VECTOR_MODE_P (vmode))
1898 return NULL_TREE;
1900 /* When we are asked to compose the vector from its components let
1901 that happen directly. */
1902 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
1904 *ptype = TREE_TYPE (vtype);
1905 return vtype;
1908 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
1909 unsigned int pbsize;
1910 if (constant_multiple_p (vbsize, nelts, &pbsize))
1912 /* First check if vec_init optab supports construction from
1913 vector pieces directly. */
1914 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
1915 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
1916 machine_mode rmode;
1917 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
1918 && (convert_optab_handler (vec_init_optab, vmode, rmode)
1919 != CODE_FOR_nothing))
1921 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
1922 return vtype;
1925 /* Otherwise check if exists an integer type of the same piece size and
1926 if vec_init optab supports construction from it directly. */
1927 if (int_mode_for_size (pbsize, 0).exists (&elmode)
1928 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
1929 && (convert_optab_handler (vec_init_optab, rmode, elmode)
1930 != CODE_FOR_nothing))
1932 *ptype = build_nonstandard_integer_type (pbsize, 1);
1933 return build_vector_type (*ptype, nelts);
1937 return NULL_TREE;
1940 /* A subroutine of get_load_store_type, with a subset of the same
1941 arguments. Handle the case where STMT_INFO is part of a grouped load
1942 or store.
1944 For stores, the statements in the group are all consecutive
1945 and there is no gap at the end. For loads, the statements in the
1946 group might not be consecutive; there can be gaps between statements
1947 as well as at the end. */
1949 static bool
1950 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
1951 tree vectype, slp_tree slp_node,
1952 bool masked_p, vec_load_store_type vls_type,
1953 vect_memory_access_type *memory_access_type,
1954 poly_int64 *poffset,
1955 dr_alignment_support *alignment_support_scheme,
1956 int *misalignment,
1957 gather_scatter_info *gs_info,
1958 internal_fn *lanes_ifn)
1960 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1961 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1962 stmt_vec_info first_stmt_info;
1963 unsigned int group_size;
1964 unsigned HOST_WIDE_INT gap;
1965 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1967 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1968 group_size = DR_GROUP_SIZE (first_stmt_info);
1969 gap = DR_GROUP_GAP (first_stmt_info);
1971 else
1973 first_stmt_info = stmt_info;
1974 group_size = 1;
1975 gap = 0;
1977 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
1978 bool single_element_p = (stmt_info == first_stmt_info
1979 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
1980 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1982 /* True if the vectorized statements would access beyond the last
1983 statement in the group. */
1984 bool overrun_p = false;
1986 /* True if we can cope with such overrun by peeling for gaps, so that
1987 there is at least one final scalar iteration after the vector loop. */
1988 bool can_overrun_p = (!masked_p
1989 && vls_type == VLS_LOAD
1990 && loop_vinfo
1991 && !loop->inner);
1993 /* There can only be a gap at the end of the group if the stride is
1994 known at compile time. */
1995 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
1997 /* Stores can't yet have gaps. */
1998 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2000 if (slp_node)
2002 /* For SLP vectorization we directly vectorize a subchain
2003 without permutation. */
2004 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2005 first_dr_info
2006 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2007 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2009 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2010 separated by the stride, until we have a complete vector.
2011 Fall back to scalar accesses if that isn't possible. */
2012 if (multiple_p (nunits, group_size))
2013 *memory_access_type = VMAT_STRIDED_SLP;
2014 else
2015 *memory_access_type = VMAT_ELEMENTWISE;
2017 else
2019 overrun_p = loop_vinfo && gap != 0;
2020 if (overrun_p && vls_type != VLS_LOAD)
2022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2023 "Grouped store with gaps requires"
2024 " non-consecutive accesses\n");
2025 return false;
2027 /* An overrun is fine if the trailing elements are smaller
2028 than the alignment boundary B. Every vector access will
2029 be a multiple of B and so we are guaranteed to access a
2030 non-gap element in the same B-sized block. */
2031 if (overrun_p
2032 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2033 vectype)
2034 / vect_get_scalar_dr_size (first_dr_info)))
2035 overrun_p = false;
2037 /* If the gap splits the vector in half and the target
2038 can do half-vector operations avoid the epilogue peeling
2039 by simply loading half of the vector only. Usually
2040 the construction with an upper zero half will be elided. */
2041 dr_alignment_support alss;
2042 int misalign = dr_misalignment (first_dr_info, vectype);
2043 tree half_vtype;
2044 if (overrun_p
2045 && !masked_p
2046 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2047 vectype, misalign)))
2048 == dr_aligned
2049 || alss == dr_unaligned_supported)
2050 && known_eq (nunits, (group_size - gap) * 2)
2051 && known_eq (nunits, group_size)
2052 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2053 != NULL_TREE))
2054 overrun_p = false;
2056 if (overrun_p && !can_overrun_p)
2058 if (dump_enabled_p ())
2059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2060 "Peeling for outer loop is not supported\n");
2061 return false;
2063 int cmp = compare_step_with_zero (vinfo, stmt_info);
2064 if (cmp < 0)
2066 if (single_element_p)
2067 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2068 only correct for single element "interleaving" SLP. */
2069 *memory_access_type = get_negative_load_store_type
2070 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2071 else
2073 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2074 separated by the stride, until we have a complete vector.
2075 Fall back to scalar accesses if that isn't possible. */
2076 if (multiple_p (nunits, group_size))
2077 *memory_access_type = VMAT_STRIDED_SLP;
2078 else
2079 *memory_access_type = VMAT_ELEMENTWISE;
2082 else if (cmp == 0 && loop_vinfo)
2084 gcc_assert (vls_type == VLS_LOAD);
2085 *memory_access_type = VMAT_INVARIANT;
2086 /* Invariant accesses perform only component accesses, alignment
2087 is irrelevant for them. */
2088 *alignment_support_scheme = dr_unaligned_supported;
2090 else
2091 *memory_access_type = VMAT_CONTIGUOUS;
2093 /* When we have a contiguous access across loop iterations
2094 but the access in the loop doesn't cover the full vector
2095 we can end up with no gap recorded but still excess
2096 elements accessed, see PR103116. Make sure we peel for
2097 gaps if necessary and sufficient and give up if not.
2099 If there is a combination of the access not covering the full
2100 vector and a gap recorded then we may need to peel twice. */
2101 if (loop_vinfo
2102 && *memory_access_type == VMAT_CONTIGUOUS
2103 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2104 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2105 nunits))
2107 unsigned HOST_WIDE_INT cnunits, cvf;
2108 if (!can_overrun_p
2109 || !nunits.is_constant (&cnunits)
2110 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2111 /* Peeling for gaps assumes that a single scalar iteration
2112 is enough to make sure the last vector iteration doesn't
2113 access excess elements.
2114 ??? Enhancements include peeling multiple iterations
2115 or using masked loads with a static mask. */
2116 || (group_size * cvf) % cnunits + group_size - gap < cnunits)
2118 if (dump_enabled_p ())
2119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2120 "peeling for gaps insufficient for "
2121 "access\n");
2122 return false;
2124 overrun_p = true;
2128 else
2130 /* We can always handle this case using elementwise accesses,
2131 but see if something more efficient is available. */
2132 *memory_access_type = VMAT_ELEMENTWISE;
2134 /* If there is a gap at the end of the group then these optimizations
2135 would access excess elements in the last iteration. */
2136 bool would_overrun_p = (gap != 0);
2137 /* An overrun is fine if the trailing elements are smaller than the
2138 alignment boundary B. Every vector access will be a multiple of B
2139 and so we are guaranteed to access a non-gap element in the
2140 same B-sized block. */
2141 if (would_overrun_p
2142 && !masked_p
2143 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2144 / vect_get_scalar_dr_size (first_dr_info)))
2145 would_overrun_p = false;
2147 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2148 && (can_overrun_p || !would_overrun_p)
2149 && compare_step_with_zero (vinfo, stmt_info) > 0)
2151 /* First cope with the degenerate case of a single-element
2152 vector. */
2153 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2156 else
2158 /* Otherwise try using LOAD/STORE_LANES. */
2159 *lanes_ifn
2160 = vls_type == VLS_LOAD
2161 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2162 : vect_store_lanes_supported (vectype, group_size,
2163 masked_p);
2164 if (*lanes_ifn != IFN_LAST)
2166 *memory_access_type = VMAT_LOAD_STORE_LANES;
2167 overrun_p = would_overrun_p;
2170 /* If that fails, try using permuting loads. */
2171 else if (vls_type == VLS_LOAD
2172 ? vect_grouped_load_supported (vectype,
2173 single_element_p,
2174 group_size)
2175 : vect_grouped_store_supported (vectype, group_size))
2177 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2178 overrun_p = would_overrun_p;
2183 /* As a last resort, trying using a gather load or scatter store.
2185 ??? Although the code can handle all group sizes correctly,
2186 it probably isn't a win to use separate strided accesses based
2187 on nearby locations. Or, even if it's a win over scalar code,
2188 it might not be a win over vectorizing at a lower VF, if that
2189 allows us to use contiguous accesses. */
2190 if (*memory_access_type == VMAT_ELEMENTWISE
2191 && single_element_p
2192 && loop_vinfo
2193 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2194 masked_p, gs_info))
2195 *memory_access_type = VMAT_GATHER_SCATTER;
2198 if (*memory_access_type == VMAT_GATHER_SCATTER
2199 || *memory_access_type == VMAT_ELEMENTWISE)
2201 *alignment_support_scheme = dr_unaligned_supported;
2202 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2204 else
2206 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2207 *alignment_support_scheme
2208 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2209 *misalignment);
2212 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2214 /* STMT is the leader of the group. Check the operands of all the
2215 stmts of the group. */
2216 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2217 while (next_stmt_info)
2219 tree op = vect_get_store_rhs (next_stmt_info);
2220 enum vect_def_type dt;
2221 if (!vect_is_simple_use (op, vinfo, &dt))
2223 if (dump_enabled_p ())
2224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2225 "use not simple.\n");
2226 return false;
2228 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2232 if (overrun_p)
2234 gcc_assert (can_overrun_p);
2235 if (dump_enabled_p ())
2236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2237 "Data access with gaps requires scalar "
2238 "epilogue loop\n");
2239 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2242 return true;
2245 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2246 if there is a memory access type that the vectorized form can use,
2247 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2248 or scatters, fill in GS_INFO accordingly. In addition
2249 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2250 the target does not support the alignment scheme. *MISALIGNMENT
2251 is set according to the alignment of the access (including
2252 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2254 SLP says whether we're performing SLP rather than loop vectorization.
2255 MASKED_P is true if the statement is conditional on a vectorized mask.
2256 VECTYPE is the vector type that the vectorized statements will use.
2257 NCOPIES is the number of vector statements that will be needed. */
2259 static bool
2260 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2261 tree vectype, slp_tree slp_node,
2262 bool masked_p, vec_load_store_type vls_type,
2263 unsigned int ncopies,
2264 vect_memory_access_type *memory_access_type,
2265 poly_int64 *poffset,
2266 dr_alignment_support *alignment_support_scheme,
2267 int *misalignment,
2268 gather_scatter_info *gs_info,
2269 internal_fn *lanes_ifn)
2271 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2272 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2273 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2274 *poffset = 0;
2275 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2277 *memory_access_type = VMAT_GATHER_SCATTER;
2278 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2279 gcc_unreachable ();
2280 /* When using internal functions, we rely on pattern recognition
2281 to convert the type of the offset to the type that the target
2282 requires, with the result being a call to an internal function.
2283 If that failed for some reason (e.g. because another pattern
2284 took priority), just handle cases in which the offset already
2285 has the right type. */
2286 else if (gs_info->ifn != IFN_LAST
2287 && !is_gimple_call (stmt_info->stmt)
2288 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2289 TREE_TYPE (gs_info->offset_vectype)))
2291 if (dump_enabled_p ())
2292 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2293 "%s offset requires a conversion\n",
2294 vls_type == VLS_LOAD ? "gather" : "scatter");
2295 return false;
2297 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2298 &gs_info->offset_dt,
2299 &gs_info->offset_vectype))
2301 if (dump_enabled_p ())
2302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2303 "%s index use not simple.\n",
2304 vls_type == VLS_LOAD ? "gather" : "scatter");
2305 return false;
2307 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2309 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2310 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2311 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2312 (gs_info->offset_vectype),
2313 TYPE_VECTOR_SUBPARTS (vectype)))
2315 if (dump_enabled_p ())
2316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2317 "unsupported vector types for emulated "
2318 "gather.\n");
2319 return false;
2322 /* Gather-scatter accesses perform only component accesses, alignment
2323 is irrelevant for them. */
2324 *alignment_support_scheme = dr_unaligned_supported;
2326 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
2328 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2329 masked_p,
2330 vls_type, memory_access_type, poffset,
2331 alignment_support_scheme,
2332 misalignment, gs_info, lanes_ifn))
2333 return false;
2335 else if (STMT_VINFO_STRIDED_P (stmt_info))
2337 gcc_assert (!slp_node);
2338 if (loop_vinfo
2339 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2340 masked_p, gs_info))
2341 *memory_access_type = VMAT_GATHER_SCATTER;
2342 else
2343 *memory_access_type = VMAT_ELEMENTWISE;
2344 /* Alignment is irrelevant here. */
2345 *alignment_support_scheme = dr_unaligned_supported;
2347 else
2349 int cmp = compare_step_with_zero (vinfo, stmt_info);
2350 if (cmp == 0)
2352 gcc_assert (vls_type == VLS_LOAD);
2353 *memory_access_type = VMAT_INVARIANT;
2354 /* Invariant accesses perform only component accesses, alignment
2355 is irrelevant for them. */
2356 *alignment_support_scheme = dr_unaligned_supported;
2358 else
2360 if (cmp < 0)
2361 *memory_access_type = get_negative_load_store_type
2362 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2363 else
2364 *memory_access_type = VMAT_CONTIGUOUS;
2365 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2366 vectype, *poffset);
2367 *alignment_support_scheme
2368 = vect_supportable_dr_alignment (vinfo,
2369 STMT_VINFO_DR_INFO (stmt_info),
2370 vectype, *misalignment);
2374 if ((*memory_access_type == VMAT_ELEMENTWISE
2375 || *memory_access_type == VMAT_STRIDED_SLP)
2376 && !nunits.is_constant ())
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2380 "Not using elementwise accesses due to variable "
2381 "vectorization factor.\n");
2382 return false;
2385 if (*alignment_support_scheme == dr_unaligned_unsupported)
2387 if (dump_enabled_p ())
2388 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2389 "unsupported unaligned access\n");
2390 return false;
2393 /* FIXME: At the moment the cost model seems to underestimate the
2394 cost of using elementwise accesses. This check preserves the
2395 traditional behavior until that can be fixed. */
2396 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2397 if (!first_stmt_info)
2398 first_stmt_info = stmt_info;
2399 if (*memory_access_type == VMAT_ELEMENTWISE
2400 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2401 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2402 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2403 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2407 "not falling back to elementwise accesses\n");
2408 return false;
2410 return true;
2413 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2414 conditional operation STMT_INFO. When returning true, store the mask
2415 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2416 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2417 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2419 static bool
2420 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2421 slp_tree slp_node, unsigned mask_index,
2422 tree *mask, slp_tree *mask_node,
2423 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2425 enum vect_def_type mask_dt;
2426 tree mask_vectype;
2427 slp_tree mask_node_1;
2428 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2429 mask, &mask_node_1, &mask_dt, &mask_vectype))
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2433 "mask use not simple.\n");
2434 return false;
2437 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2439 if (dump_enabled_p ())
2440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2441 "mask argument is not a boolean.\n");
2442 return false;
2445 /* If the caller is not prepared for adjusting an external/constant
2446 SLP mask vector type fail. */
2447 if (slp_node
2448 && !mask_node
2449 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2451 if (dump_enabled_p ())
2452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2453 "SLP mask argument is not vectorized.\n");
2454 return false;
2457 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2458 if (!mask_vectype)
2459 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
2460 mask_node_1);
2462 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2464 if (dump_enabled_p ())
2465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2466 "could not find an appropriate vector mask type.\n");
2467 return false;
2470 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2471 TYPE_VECTOR_SUBPARTS (vectype)))
2473 if (dump_enabled_p ())
2474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2475 "vector mask type %T"
2476 " does not match vector data type %T.\n",
2477 mask_vectype, vectype);
2479 return false;
2482 *mask_dt_out = mask_dt;
2483 *mask_vectype_out = mask_vectype;
2484 if (mask_node)
2485 *mask_node = mask_node_1;
2486 return true;
2489 /* Return true if stored value is suitable for vectorizing store
2490 statement STMT_INFO. When returning true, store the scalar stored
2491 in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
2492 the type of the vectorized store value in
2493 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2495 static bool
2496 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2497 slp_tree slp_node, tree *rhs, slp_tree *rhs_node,
2498 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2499 vec_load_store_type *vls_type_out)
2501 int op_no = 0;
2502 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2504 if (gimple_call_internal_p (call)
2505 && internal_store_fn_p (gimple_call_internal_fn (call)))
2506 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2508 if (slp_node)
2509 op_no = vect_slp_child_index_for_operand
2510 (stmt_info->stmt, op_no, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
2512 enum vect_def_type rhs_dt;
2513 tree rhs_vectype;
2514 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2515 rhs, rhs_node, &rhs_dt, &rhs_vectype))
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2519 "use not simple.\n");
2520 return false;
2523 /* In the case this is a store from a constant make sure
2524 native_encode_expr can handle it. */
2525 if (CONSTANT_CLASS_P (*rhs) && native_encode_expr (*rhs, NULL, 64) == 0)
2527 if (dump_enabled_p ())
2528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2529 "cannot encode constant as a byte sequence.\n");
2530 return false;
2533 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2534 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2536 if (dump_enabled_p ())
2537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2538 "incompatible vector types.\n");
2539 return false;
2542 *rhs_dt_out = rhs_dt;
2543 *rhs_vectype_out = rhs_vectype;
2544 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2545 *vls_type_out = VLS_STORE_INVARIANT;
2546 else
2547 *vls_type_out = VLS_STORE;
2548 return true;
2551 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2552 Note that we support masks with floating-point type, in which case the
2553 floats are interpreted as a bitmask. */
2555 static tree
2556 vect_build_all_ones_mask (vec_info *vinfo,
2557 stmt_vec_info stmt_info, tree masktype)
2559 if (TREE_CODE (masktype) == INTEGER_TYPE)
2560 return build_int_cst (masktype, -1);
2561 else if (VECTOR_BOOLEAN_TYPE_P (masktype)
2562 || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2564 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2565 mask = build_vector_from_val (masktype, mask);
2566 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2568 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2570 REAL_VALUE_TYPE r;
2571 long tmp[6];
2572 for (int j = 0; j < 6; ++j)
2573 tmp[j] = -1;
2574 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2575 tree mask = build_real (TREE_TYPE (masktype), r);
2576 mask = build_vector_from_val (masktype, mask);
2577 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2579 gcc_unreachable ();
2582 /* Build an all-zero merge value of type VECTYPE while vectorizing
2583 STMT_INFO as a gather load. */
2585 static tree
2586 vect_build_zero_merge_argument (vec_info *vinfo,
2587 stmt_vec_info stmt_info, tree vectype)
2589 tree merge;
2590 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2591 merge = build_int_cst (TREE_TYPE (vectype), 0);
2592 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2594 REAL_VALUE_TYPE r;
2595 long tmp[6];
2596 for (int j = 0; j < 6; ++j)
2597 tmp[j] = 0;
2598 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2599 merge = build_real (TREE_TYPE (vectype), r);
2601 else
2602 gcc_unreachable ();
2603 merge = build_vector_from_val (vectype, merge);
2604 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2607 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2608 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2609 the gather load operation. If the load is conditional, MASK is the
2610 vectorized condition, otherwise MASK is null. PTR is the base
2611 pointer and OFFSET is the vectorized offset. */
2613 static gimple *
2614 vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
2615 gimple_stmt_iterator *gsi,
2616 gather_scatter_info *gs_info,
2617 tree ptr, tree offset, tree mask)
2619 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2620 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2621 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2622 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2623 /* ptrtype */ arglist = TREE_CHAIN (arglist);
2624 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2625 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2626 tree scaletype = TREE_VALUE (arglist);
2627 tree var;
2628 gcc_checking_assert (types_compatible_p (srctype, rettype)
2629 && (!mask
2630 || TREE_CODE (masktype) == INTEGER_TYPE
2631 || types_compatible_p (srctype, masktype)));
2633 tree op = offset;
2634 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2636 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2637 TYPE_VECTOR_SUBPARTS (idxtype)));
2638 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2639 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2640 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2641 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2642 op = var;
2645 tree src_op = NULL_TREE;
2646 tree mask_op = NULL_TREE;
2647 if (mask)
2649 if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
2651 tree utype, optype = TREE_TYPE (mask);
2652 if (VECTOR_TYPE_P (masktype)
2653 || TYPE_MODE (masktype) == TYPE_MODE (optype))
2654 utype = masktype;
2655 else
2656 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2657 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2658 tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
2659 gassign *new_stmt
2660 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2661 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2662 mask_arg = var;
2663 if (!useless_type_conversion_p (masktype, utype))
2665 gcc_assert (TYPE_PRECISION (utype)
2666 <= TYPE_PRECISION (masktype));
2667 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2668 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2669 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2670 mask_arg = var;
2672 src_op = build_zero_cst (srctype);
2673 mask_op = mask_arg;
2675 else
2677 src_op = mask;
2678 mask_op = mask;
2681 else
2683 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2684 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2687 tree scale = build_int_cst (scaletype, gs_info->scale);
2688 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2689 mask_op, scale);
2691 if (!useless_type_conversion_p (vectype, rettype))
2693 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2694 TYPE_VECTOR_SUBPARTS (rettype)));
2695 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2696 gimple_call_set_lhs (new_stmt, op);
2697 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2698 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2699 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
2702 return new_stmt;
2705 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2706 instructions before GSI. GS_INFO describes the scatter store operation.
2707 PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
2708 vectorized data to store.
2709 If the store is conditional, MASK is the vectorized condition, otherwise
2710 MASK is null. */
2712 static gimple *
2713 vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
2714 gimple_stmt_iterator *gsi,
2715 gather_scatter_info *gs_info,
2716 tree ptr, tree offset, tree oprnd, tree mask)
2718 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2719 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2720 /* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist);
2721 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2722 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2723 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2724 tree scaletype = TREE_VALUE (arglist);
2725 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
2726 && TREE_CODE (rettype) == VOID_TYPE);
2728 tree mask_arg = NULL_TREE;
2729 if (mask)
2731 mask_arg = mask;
2732 tree optype = TREE_TYPE (mask_arg);
2733 tree utype;
2734 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
2735 utype = masktype;
2736 else
2737 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2738 tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
2739 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
2740 gassign *new_stmt
2741 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2742 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2743 mask_arg = var;
2744 if (!useless_type_conversion_p (masktype, utype))
2746 gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
2747 tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2748 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2749 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2750 mask_arg = var;
2753 else
2755 mask_arg = build_int_cst (masktype, -1);
2756 mask_arg = vect_init_vector (vinfo, stmt_info, mask_arg, masktype, NULL);
2759 tree src = oprnd;
2760 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
2762 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
2763 TYPE_VECTOR_SUBPARTS (srctype)));
2764 tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
2765 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
2766 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
2767 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2768 src = var;
2771 tree op = offset;
2772 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2774 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2775 TYPE_VECTOR_SUBPARTS (idxtype)));
2776 tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2777 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2778 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2779 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2780 op = var;
2783 tree scale = build_int_cst (scaletype, gs_info->scale);
2784 gcall *new_stmt
2785 = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
2786 return new_stmt;
2789 /* Prepare the base and offset in GS_INFO for vectorization.
2790 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2791 to the vectorized offset argument for the first copy of STMT_INFO.
2792 STMT_INFO is the statement described by GS_INFO and LOOP is the
2793 containing loop. */
2795 static void
2796 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2797 class loop *loop, stmt_vec_info stmt_info,
2798 slp_tree slp_node, gather_scatter_info *gs_info,
2799 tree *dataref_ptr, vec<tree> *vec_offset)
2801 gimple_seq stmts = NULL;
2802 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2803 if (stmts != NULL)
2805 basic_block new_bb;
2806 edge pe = loop_preheader_edge (loop);
2807 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2808 gcc_assert (!new_bb);
2810 if (slp_node)
2811 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
2812 else
2814 unsigned ncopies
2815 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2816 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2817 gs_info->offset, vec_offset,
2818 gs_info->offset_vectype);
2822 /* Prepare to implement a grouped or strided load or store using
2823 the gather load or scatter store operation described by GS_INFO.
2824 STMT_INFO is the load or store statement.
2826 Set *DATAREF_BUMP to the amount that should be added to the base
2827 address after each copy of the vectorized statement. Set *VEC_OFFSET
2828 to an invariant offset vector in which element I has the value
2829 I * DR_STEP / SCALE. */
2831 static void
2832 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2833 loop_vec_info loop_vinfo,
2834 gimple_stmt_iterator *gsi,
2835 gather_scatter_info *gs_info,
2836 tree *dataref_bump, tree *vec_offset,
2837 vec_loop_lens *loop_lens)
2839 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2840 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2842 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2844 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
2845 ivtmp_8 = _31 * 16 (step in bytes);
2846 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2847 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
2848 tree loop_len
2849 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
2850 tree tmp
2851 = fold_build2 (MULT_EXPR, sizetype,
2852 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2853 loop_len);
2854 *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
2855 GSI_SAME_STMT);
2857 else
2859 tree bump
2860 = size_binop (MULT_EXPR,
2861 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2862 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2863 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2866 /* The offset given in GS_INFO can have pointer type, so use the element
2867 type of the vector instead. */
2868 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2870 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2871 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2872 ssize_int (gs_info->scale));
2873 step = fold_convert (offset_type, step);
2875 /* Create {0, X, X*2, X*3, ...}. */
2876 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2877 build_zero_cst (offset_type), step);
2878 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2881 /* Prepare the pointer IVs which needs to be updated by a variable amount.
2882 Such variable amount is the outcome of .SELECT_VL. In this case, we can
2883 allow each iteration process the flexible number of elements as long as
2884 the number <= vf elments.
2886 Return data reference according to SELECT_VL.
2887 If new statements are needed, insert them before GSI. */
2889 static tree
2890 vect_get_loop_variant_data_ptr_increment (
2891 vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
2892 vec_loop_lens *loop_lens, dr_vec_info *dr_info,
2893 vect_memory_access_type memory_access_type)
2895 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2896 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2898 /* gather/scatter never reach here. */
2899 gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
2901 /* When we support SELECT_VL pattern, we dynamic adjust
2902 the memory address by .SELECT_VL result.
2904 The result of .SELECT_VL is the number of elements to
2905 be processed of each iteration. So the memory address
2906 adjustment operation should be:
2908 addr = addr + .SELECT_VL (ARG..) * step;
2910 tree loop_len
2911 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
2912 tree len_type = TREE_TYPE (loop_len);
2913 /* Since the outcome of .SELECT_VL is element size, we should adjust
2914 it into bytesize so that it can be used in address pointer variable
2915 amount IVs adjustment. */
2916 tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
2917 wide_int_to_tree (len_type, wi::to_widest (step)));
2918 tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
2919 gassign *assign = gimple_build_assign (bump, tmp);
2920 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
2921 return bump;
2924 /* Return the amount that should be added to a vector pointer to move
2925 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2926 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2927 vectorization. */
2929 static tree
2930 vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
2931 dr_vec_info *dr_info, tree aggr_type,
2932 vect_memory_access_type memory_access_type,
2933 vec_loop_lens *loop_lens = nullptr)
2935 if (memory_access_type == VMAT_INVARIANT)
2936 return size_zero_node;
2938 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2939 if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2940 return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
2941 loop_lens, dr_info,
2942 memory_access_type);
2944 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2945 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2946 if (tree_int_cst_sgn (step) == -1)
2947 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2948 return iv_step;
2951 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2953 static bool
2954 vectorizable_bswap (vec_info *vinfo,
2955 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2956 gimple **vec_stmt, slp_tree slp_node,
2957 slp_tree *slp_op,
2958 tree vectype_in, stmt_vector_for_cost *cost_vec)
2960 tree op, vectype;
2961 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2962 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2963 unsigned ncopies;
2965 op = gimple_call_arg (stmt, 0);
2966 vectype = STMT_VINFO_VECTYPE (stmt_info);
2967 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2969 /* Multiple types in SLP are handled by creating the appropriate number of
2970 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2971 case of SLP. */
2972 if (slp_node)
2973 ncopies = 1;
2974 else
2975 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2977 gcc_assert (ncopies >= 1);
2979 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype))
2981 if (dump_enabled_p ())
2982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2983 "mismatched vector sizes %T and %T\n",
2984 vectype_in, vectype);
2985 return false;
2988 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2989 if (! char_vectype)
2990 return false;
2992 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
2993 unsigned word_bytes;
2994 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
2995 return false;
2997 /* The encoding uses one stepped pattern for each byte in the word. */
2998 vec_perm_builder elts (num_bytes, word_bytes, 3);
2999 for (unsigned i = 0; i < 3; ++i)
3000 for (unsigned j = 0; j < word_bytes; ++j)
3001 elts.quick_push ((i + 1) * word_bytes - j - 1);
3003 vec_perm_indices indices (elts, 1, num_bytes);
3004 machine_mode vmode = TYPE_MODE (char_vectype);
3005 if (!can_vec_perm_const_p (vmode, vmode, indices))
3006 return false;
3008 if (! vec_stmt)
3010 if (slp_node
3011 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3013 if (dump_enabled_p ())
3014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3015 "incompatible vector types for invariants\n");
3016 return false;
3019 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3020 DUMP_VECT_SCOPE ("vectorizable_bswap");
3021 record_stmt_cost (cost_vec,
3022 1, vector_stmt, stmt_info, 0, vect_prologue);
3023 record_stmt_cost (cost_vec,
3024 slp_node
3025 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3026 vec_perm, stmt_info, 0, vect_body);
3027 return true;
3030 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3032 /* Transform. */
3033 vec<tree> vec_oprnds = vNULL;
3034 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3035 op, &vec_oprnds);
3036 /* Arguments are ready. create the new vector stmt. */
3037 unsigned i;
3038 tree vop;
3039 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3041 gimple *new_stmt;
3042 tree tem = make_ssa_name (char_vectype);
3043 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3044 char_vectype, vop));
3045 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3046 tree tem2 = make_ssa_name (char_vectype);
3047 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3048 tem, tem, bswap_vconst);
3049 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3050 tem = make_ssa_name (vectype);
3051 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3052 vectype, tem2));
3053 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3054 if (slp_node)
3055 slp_node->push_vec_def (new_stmt);
3056 else
3057 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3060 if (!slp_node)
3061 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3063 vec_oprnds.release ();
3064 return true;
3067 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3068 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3069 in a single step. On success, store the binary pack code in
3070 *CONVERT_CODE. */
3072 static bool
3073 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3074 code_helper *convert_code)
3076 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3077 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3078 return false;
3080 code_helper code;
3081 int multi_step_cvt = 0;
3082 auto_vec <tree, 8> interm_types;
3083 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3084 &code, &multi_step_cvt, &interm_types)
3085 || multi_step_cvt)
3086 return false;
3088 *convert_code = code;
3089 return true;
3092 /* Function vectorizable_call.
3094 Check if STMT_INFO performs a function call that can be vectorized.
3095 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3096 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3097 Return true if STMT_INFO is vectorizable in this way. */
3099 static bool
3100 vectorizable_call (vec_info *vinfo,
3101 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3102 gimple **vec_stmt, slp_tree slp_node,
3103 stmt_vector_for_cost *cost_vec)
3105 gcall *stmt;
3106 tree vec_dest;
3107 tree scalar_dest;
3108 tree op;
3109 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3110 tree vectype_out, vectype_in;
3111 poly_uint64 nunits_in;
3112 poly_uint64 nunits_out;
3113 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3114 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3115 tree fndecl, new_temp, rhs_type;
3116 enum vect_def_type dt[4]
3117 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3118 vect_unknown_def_type };
3119 tree vectypes[ARRAY_SIZE (dt)] = {};
3120 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3121 int ndts = ARRAY_SIZE (dt);
3122 int ncopies, j;
3123 auto_vec<tree, 8> vargs;
3124 enum { NARROW, NONE, WIDEN } modifier;
3125 size_t i, nargs;
3126 tree lhs;
3127 tree clz_ctz_arg1 = NULL_TREE;
3129 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3130 return false;
3132 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3133 && ! vec_stmt)
3134 return false;
3136 /* Is STMT_INFO a vectorizable call? */
3137 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3138 if (!stmt)
3139 return false;
3141 if (gimple_call_internal_p (stmt)
3142 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3143 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3144 /* Handled by vectorizable_load and vectorizable_store. */
3145 return false;
3147 if (gimple_call_lhs (stmt) == NULL_TREE
3148 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3149 return false;
3151 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3153 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3155 /* Process function arguments. */
3156 rhs_type = NULL_TREE;
3157 vectype_in = NULL_TREE;
3158 nargs = gimple_call_num_args (stmt);
3160 /* Bail out if the function has more than four arguments, we do not have
3161 interesting builtin functions to vectorize with more than two arguments
3162 except for fma. No arguments is also not good. */
3163 if (nargs == 0 || nargs > 4)
3164 return false;
3166 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3167 combined_fn cfn = gimple_call_combined_fn (stmt);
3168 if (cfn == CFN_GOMP_SIMD_LANE)
3170 nargs = 0;
3171 rhs_type = unsigned_type_node;
3173 /* Similarly pretend IFN_CLZ and IFN_CTZ only has one argument, the second
3174 argument just says whether it is well-defined at zero or not and what
3175 value should be returned for it. */
3176 if ((cfn == CFN_CLZ || cfn == CFN_CTZ) && nargs == 2)
3178 nargs = 1;
3179 clz_ctz_arg1 = gimple_call_arg (stmt, 1);
3182 int mask_opno = -1;
3183 if (internal_fn_p (cfn))
3184 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3186 for (i = 0; i < nargs; i++)
3188 if ((int) i == mask_opno)
3190 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3191 &op, &slp_op[i], &dt[i], &vectypes[i]))
3192 return false;
3193 continue;
3196 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3197 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3199 if (dump_enabled_p ())
3200 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3201 "use not simple.\n");
3202 return false;
3205 /* We can only handle calls with arguments of the same type. */
3206 if (rhs_type
3207 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3209 if (dump_enabled_p ())
3210 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3211 "argument types differ.\n");
3212 return false;
3214 if (!rhs_type)
3215 rhs_type = TREE_TYPE (op);
3217 if (!vectype_in)
3218 vectype_in = vectypes[i];
3219 else if (vectypes[i]
3220 && !types_compatible_p (vectypes[i], vectype_in))
3222 if (dump_enabled_p ())
3223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3224 "argument vector types differ.\n");
3225 return false;
3228 /* If all arguments are external or constant defs, infer the vector type
3229 from the scalar type. */
3230 if (!vectype_in)
3231 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3232 if (vec_stmt)
3233 gcc_assert (vectype_in);
3234 if (!vectype_in)
3236 if (dump_enabled_p ())
3237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3238 "no vectype for scalar type %T\n", rhs_type);
3240 return false;
3243 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3244 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3246 if (dump_enabled_p ())
3247 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3248 "mixed mask and nonmask vector types\n");
3249 return false;
3252 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3254 if (dump_enabled_p ())
3255 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3256 "use emulated vector type for call\n");
3257 return false;
3260 /* FORNOW */
3261 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3262 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3263 if (known_eq (nunits_in * 2, nunits_out))
3264 modifier = NARROW;
3265 else if (known_eq (nunits_out, nunits_in))
3266 modifier = NONE;
3267 else if (known_eq (nunits_out * 2, nunits_in))
3268 modifier = WIDEN;
3269 else
3270 return false;
3272 /* We only handle functions that do not read or clobber memory. */
3273 if (gimple_vuse (stmt))
3275 if (dump_enabled_p ())
3276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3277 "function reads from or writes to memory.\n");
3278 return false;
3281 /* For now, we only vectorize functions if a target specific builtin
3282 is available. TODO -- in some cases, it might be profitable to
3283 insert the calls for pieces of the vector, in order to be able
3284 to vectorize other operations in the loop. */
3285 fndecl = NULL_TREE;
3286 internal_fn ifn = IFN_LAST;
3287 tree callee = gimple_call_fndecl (stmt);
3289 /* First try using an internal function. */
3290 code_helper convert_code = MAX_TREE_CODES;
3291 if (cfn != CFN_LAST
3292 && (modifier == NONE
3293 || (modifier == NARROW
3294 && simple_integer_narrowing (vectype_out, vectype_in,
3295 &convert_code))))
3296 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3297 vectype_in);
3299 /* If that fails, try asking for a target-specific built-in function. */
3300 if (ifn == IFN_LAST)
3302 if (cfn != CFN_LAST)
3303 fndecl = targetm.vectorize.builtin_vectorized_function
3304 (cfn, vectype_out, vectype_in);
3305 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3306 fndecl = targetm.vectorize.builtin_md_vectorized_function
3307 (callee, vectype_out, vectype_in);
3310 if (ifn == IFN_LAST && !fndecl)
3312 if (cfn == CFN_GOMP_SIMD_LANE
3313 && !slp_node
3314 && loop_vinfo
3315 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3316 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3317 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3318 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3320 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3321 { 0, 1, 2, ... vf - 1 } vector. */
3322 gcc_assert (nargs == 0);
3324 else if (modifier == NONE
3325 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3326 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3327 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3328 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3329 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3330 slp_op, vectype_in, cost_vec);
3331 else
3333 if (dump_enabled_p ())
3334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3335 "function is not vectorizable.\n");
3336 return false;
3340 if (slp_node)
3341 ncopies = 1;
3342 else if (modifier == NARROW && ifn == IFN_LAST)
3343 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3344 else
3345 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3347 /* Sanity check: make sure that at least one copy of the vectorized stmt
3348 needs to be generated. */
3349 gcc_assert (ncopies >= 1);
3351 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3352 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3353 internal_fn cond_len_fn = get_len_internal_fn (ifn);
3354 int len_opno = internal_fn_len_index (cond_len_fn);
3355 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3356 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3357 if (!vec_stmt) /* transformation not required. */
3359 if (slp_node)
3360 for (i = 0; i < nargs; ++i)
3361 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3362 vectypes[i]
3363 ? vectypes[i] : vectype_in))
3365 if (dump_enabled_p ())
3366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3367 "incompatible vector types for invariants\n");
3368 return false;
3370 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3371 DUMP_VECT_SCOPE ("vectorizable_call");
3372 vect_model_simple_cost (vinfo, stmt_info,
3373 ncopies, dt, ndts, slp_node, cost_vec);
3374 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3375 record_stmt_cost (cost_vec, ncopies / 2,
3376 vec_promote_demote, stmt_info, 0, vect_body);
3378 if (loop_vinfo
3379 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3380 && (reduc_idx >= 0 || mask_opno >= 0))
3382 if (reduc_idx >= 0
3383 && (cond_fn == IFN_LAST
3384 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3385 OPTIMIZE_FOR_SPEED))
3386 && (cond_len_fn == IFN_LAST
3387 || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3388 OPTIMIZE_FOR_SPEED)))
3390 if (dump_enabled_p ())
3391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3392 "can't use a fully-masked loop because no"
3393 " conditional operation is available.\n");
3394 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3396 else
3398 unsigned int nvectors
3399 = (slp_node
3400 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3401 : ncopies);
3402 tree scalar_mask = NULL_TREE;
3403 if (mask_opno >= 0)
3404 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3405 if (cond_len_fn != IFN_LAST
3406 && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3407 OPTIMIZE_FOR_SPEED))
3408 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3410 else
3411 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3412 scalar_mask);
3415 return true;
3418 /* Transform. */
3420 if (dump_enabled_p ())
3421 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3423 /* Handle def. */
3424 scalar_dest = gimple_call_lhs (stmt);
3425 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3427 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3428 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3429 unsigned int vect_nargs = nargs;
3430 if (len_loop_p)
3432 if (len_opno >= 0)
3434 ifn = cond_len_fn;
3435 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3436 vect_nargs += 2;
3438 else if (reduc_idx >= 0)
3439 gcc_unreachable ();
3441 else if (masked_loop_p && reduc_idx >= 0)
3443 ifn = cond_fn;
3444 vect_nargs += 2;
3446 if (clz_ctz_arg1)
3447 ++vect_nargs;
3449 if (modifier == NONE || ifn != IFN_LAST)
3451 tree prev_res = NULL_TREE;
3452 vargs.safe_grow (vect_nargs, true);
3453 auto_vec<vec<tree> > vec_defs (nargs);
3454 for (j = 0; j < ncopies; ++j)
3456 /* Build argument list for the vectorized call. */
3457 if (slp_node)
3459 vec<tree> vec_oprnds0;
3461 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3462 vec_oprnds0 = vec_defs[0];
3464 /* Arguments are ready. Create the new vector stmt. */
3465 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3467 int varg = 0;
3468 if (masked_loop_p && reduc_idx >= 0)
3470 unsigned int vec_num = vec_oprnds0.length ();
3471 /* Always true for SLP. */
3472 gcc_assert (ncopies == 1);
3473 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
3474 gsi, masks, vec_num,
3475 vectype_out, i);
3477 size_t k;
3478 for (k = 0; k < nargs; k++)
3480 vec<tree> vec_oprndsk = vec_defs[k];
3481 vargs[varg++] = vec_oprndsk[i];
3483 if (masked_loop_p && reduc_idx >= 0)
3484 vargs[varg++] = vargs[reduc_idx + 1];
3485 if (clz_ctz_arg1)
3486 vargs[varg++] = clz_ctz_arg1;
3488 gimple *new_stmt;
3489 if (modifier == NARROW)
3491 /* We don't define any narrowing conditional functions
3492 at present. */
3493 gcc_assert (mask_opno < 0);
3494 tree half_res = make_ssa_name (vectype_in);
3495 gcall *call
3496 = gimple_build_call_internal_vec (ifn, vargs);
3497 gimple_call_set_lhs (call, half_res);
3498 gimple_call_set_nothrow (call, true);
3499 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3500 if ((i & 1) == 0)
3502 prev_res = half_res;
3503 continue;
3505 new_temp = make_ssa_name (vec_dest);
3506 new_stmt = vect_gimple_build (new_temp, convert_code,
3507 prev_res, half_res);
3508 vect_finish_stmt_generation (vinfo, stmt_info,
3509 new_stmt, gsi);
3511 else
3513 if (len_opno >= 0 && len_loop_p)
3515 unsigned int vec_num = vec_oprnds0.length ();
3516 /* Always true for SLP. */
3517 gcc_assert (ncopies == 1);
3518 tree len
3519 = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3520 vectype_out, i, 1);
3521 signed char biasval
3522 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3523 tree bias = build_int_cst (intQI_type_node, biasval);
3524 vargs[len_opno] = len;
3525 vargs[len_opno + 1] = bias;
3527 else if (mask_opno >= 0 && masked_loop_p)
3529 unsigned int vec_num = vec_oprnds0.length ();
3530 /* Always true for SLP. */
3531 gcc_assert (ncopies == 1);
3532 tree mask = vect_get_loop_mask (loop_vinfo,
3533 gsi, masks, vec_num,
3534 vectype_out, i);
3535 vargs[mask_opno] = prepare_vec_mask
3536 (loop_vinfo, TREE_TYPE (mask), mask,
3537 vargs[mask_opno], gsi);
3540 gcall *call;
3541 if (ifn != IFN_LAST)
3542 call = gimple_build_call_internal_vec (ifn, vargs);
3543 else
3544 call = gimple_build_call_vec (fndecl, vargs);
3545 new_temp = make_ssa_name (vec_dest, call);
3546 gimple_call_set_lhs (call, new_temp);
3547 gimple_call_set_nothrow (call, true);
3548 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3549 new_stmt = call;
3551 slp_node->push_vec_def (new_stmt);
3553 continue;
3556 int varg = 0;
3557 if (masked_loop_p && reduc_idx >= 0)
3558 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3559 vectype_out, j);
3560 for (i = 0; i < nargs; i++)
3562 op = gimple_call_arg (stmt, i);
3563 if (j == 0)
3565 vec_defs.quick_push (vNULL);
3566 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3567 op, &vec_defs[i],
3568 vectypes[i]);
3570 vargs[varg++] = vec_defs[i][j];
3572 if (masked_loop_p && reduc_idx >= 0)
3573 vargs[varg++] = vargs[reduc_idx + 1];
3574 if (clz_ctz_arg1)
3575 vargs[varg++] = clz_ctz_arg1;
3577 if (len_opno >= 0 && len_loop_p)
3579 tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3580 vectype_out, j, 1);
3581 signed char biasval
3582 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3583 tree bias = build_int_cst (intQI_type_node, biasval);
3584 vargs[len_opno] = len;
3585 vargs[len_opno + 1] = bias;
3587 else if (mask_opno >= 0 && masked_loop_p)
3589 tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3590 vectype_out, j);
3591 vargs[mask_opno]
3592 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3593 vargs[mask_opno], gsi);
3596 gimple *new_stmt;
3597 if (cfn == CFN_GOMP_SIMD_LANE)
3599 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3600 tree new_var
3601 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3602 gimple *init_stmt = gimple_build_assign (new_var, cst);
3603 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3604 new_temp = make_ssa_name (vec_dest);
3605 new_stmt = gimple_build_assign (new_temp, new_var);
3606 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3608 else if (modifier == NARROW)
3610 /* We don't define any narrowing conditional functions at
3611 present. */
3612 gcc_assert (mask_opno < 0);
3613 tree half_res = make_ssa_name (vectype_in);
3614 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3615 gimple_call_set_lhs (call, half_res);
3616 gimple_call_set_nothrow (call, true);
3617 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3618 if ((j & 1) == 0)
3620 prev_res = half_res;
3621 continue;
3623 new_temp = make_ssa_name (vec_dest);
3624 new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
3625 half_res);
3626 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3628 else
3630 gcall *call;
3631 if (ifn != IFN_LAST)
3632 call = gimple_build_call_internal_vec (ifn, vargs);
3633 else
3634 call = gimple_build_call_vec (fndecl, vargs);
3635 new_temp = make_ssa_name (vec_dest, call);
3636 gimple_call_set_lhs (call, new_temp);
3637 gimple_call_set_nothrow (call, true);
3638 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3639 new_stmt = call;
3642 if (j == (modifier == NARROW ? 1 : 0))
3643 *vec_stmt = new_stmt;
3644 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3646 for (i = 0; i < nargs; i++)
3648 vec<tree> vec_oprndsi = vec_defs[i];
3649 vec_oprndsi.release ();
3652 else if (modifier == NARROW)
3654 auto_vec<vec<tree> > vec_defs (nargs);
3655 /* We don't define any narrowing conditional functions at present. */
3656 gcc_assert (mask_opno < 0);
3657 for (j = 0; j < ncopies; ++j)
3659 /* Build argument list for the vectorized call. */
3660 if (j == 0)
3661 vargs.create (nargs * 2);
3662 else
3663 vargs.truncate (0);
3665 if (slp_node)
3667 vec<tree> vec_oprnds0;
3669 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3670 vec_oprnds0 = vec_defs[0];
3672 /* Arguments are ready. Create the new vector stmt. */
3673 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3675 size_t k;
3676 vargs.truncate (0);
3677 for (k = 0; k < nargs; k++)
3679 vec<tree> vec_oprndsk = vec_defs[k];
3680 vargs.quick_push (vec_oprndsk[i]);
3681 vargs.quick_push (vec_oprndsk[i + 1]);
3683 gcall *call;
3684 if (ifn != IFN_LAST)
3685 call = gimple_build_call_internal_vec (ifn, vargs);
3686 else
3687 call = gimple_build_call_vec (fndecl, vargs);
3688 new_temp = make_ssa_name (vec_dest, call);
3689 gimple_call_set_lhs (call, new_temp);
3690 gimple_call_set_nothrow (call, true);
3691 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3692 slp_node->push_vec_def (call);
3694 continue;
3697 for (i = 0; i < nargs; i++)
3699 op = gimple_call_arg (stmt, i);
3700 if (j == 0)
3702 vec_defs.quick_push (vNULL);
3703 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3704 op, &vec_defs[i], vectypes[i]);
3706 vec_oprnd0 = vec_defs[i][2*j];
3707 vec_oprnd1 = vec_defs[i][2*j+1];
3709 vargs.quick_push (vec_oprnd0);
3710 vargs.quick_push (vec_oprnd1);
3713 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3714 new_temp = make_ssa_name (vec_dest, new_stmt);
3715 gimple_call_set_lhs (new_stmt, new_temp);
3716 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3718 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3721 if (!slp_node)
3722 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3724 for (i = 0; i < nargs; i++)
3726 vec<tree> vec_oprndsi = vec_defs[i];
3727 vec_oprndsi.release ();
3730 else
3731 /* No current target implements this case. */
3732 return false;
3734 vargs.release ();
3736 /* The call in STMT might prevent it from being removed in dce.
3737 We however cannot remove it here, due to the way the ssa name
3738 it defines is mapped to the new definition. So just replace
3739 rhs of the statement with something harmless. */
3741 if (slp_node)
3742 return true;
3744 stmt_info = vect_orig_stmt (stmt_info);
3745 lhs = gimple_get_lhs (stmt_info->stmt);
3747 gassign *new_stmt
3748 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3749 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3751 return true;
3755 struct simd_call_arg_info
3757 tree vectype;
3758 tree op;
3759 HOST_WIDE_INT linear_step;
3760 enum vect_def_type dt;
3761 unsigned int align;
3762 bool simd_lane_linear;
3765 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3766 is linear within simd lane (but not within whole loop), note it in
3767 *ARGINFO. */
3769 static void
3770 vect_simd_lane_linear (tree op, class loop *loop,
3771 struct simd_call_arg_info *arginfo)
3773 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3775 if (!is_gimple_assign (def_stmt)
3776 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3777 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3778 return;
3780 tree base = gimple_assign_rhs1 (def_stmt);
3781 HOST_WIDE_INT linear_step = 0;
3782 tree v = gimple_assign_rhs2 (def_stmt);
3783 while (TREE_CODE (v) == SSA_NAME)
3785 tree t;
3786 def_stmt = SSA_NAME_DEF_STMT (v);
3787 if (is_gimple_assign (def_stmt))
3788 switch (gimple_assign_rhs_code (def_stmt))
3790 case PLUS_EXPR:
3791 t = gimple_assign_rhs2 (def_stmt);
3792 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3793 return;
3794 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3795 v = gimple_assign_rhs1 (def_stmt);
3796 continue;
3797 case MULT_EXPR:
3798 t = gimple_assign_rhs2 (def_stmt);
3799 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3800 return;
3801 linear_step = tree_to_shwi (t);
3802 v = gimple_assign_rhs1 (def_stmt);
3803 continue;
3804 CASE_CONVERT:
3805 t = gimple_assign_rhs1 (def_stmt);
3806 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3807 || (TYPE_PRECISION (TREE_TYPE (v))
3808 < TYPE_PRECISION (TREE_TYPE (t))))
3809 return;
3810 if (!linear_step)
3811 linear_step = 1;
3812 v = t;
3813 continue;
3814 default:
3815 return;
3817 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3818 && loop->simduid
3819 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3820 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3821 == loop->simduid))
3823 if (!linear_step)
3824 linear_step = 1;
3825 arginfo->linear_step = linear_step;
3826 arginfo->op = base;
3827 arginfo->simd_lane_linear = true;
3828 return;
3833 /* Function vectorizable_simd_clone_call.
3835 Check if STMT_INFO performs a function call that can be vectorized
3836 by calling a simd clone of the function.
3837 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3838 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3839 Return true if STMT_INFO is vectorizable in this way. */
3841 static bool
3842 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3843 gimple_stmt_iterator *gsi,
3844 gimple **vec_stmt, slp_tree slp_node,
3845 stmt_vector_for_cost *)
3847 tree vec_dest;
3848 tree scalar_dest;
3849 tree op, type;
3850 tree vec_oprnd0 = NULL_TREE;
3851 tree vectype;
3852 poly_uint64 nunits;
3853 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3854 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3855 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3856 tree fndecl, new_temp;
3857 int ncopies, j;
3858 auto_vec<simd_call_arg_info> arginfo;
3859 vec<tree> vargs = vNULL;
3860 size_t i, nargs;
3861 tree lhs, rtype, ratype;
3862 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3863 int masked_call_offset = 0;
3865 /* Is STMT a vectorizable call? */
3866 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3867 if (!stmt)
3868 return false;
3870 fndecl = gimple_call_fndecl (stmt);
3871 if (fndecl == NULL_TREE
3872 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
3874 fndecl = gimple_call_arg (stmt, 0);
3875 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
3876 fndecl = TREE_OPERAND (fndecl, 0);
3877 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
3878 masked_call_offset = 1;
3880 if (fndecl == NULL_TREE)
3881 return false;
3883 struct cgraph_node *node = cgraph_node::get (fndecl);
3884 if (node == NULL || node->simd_clones == NULL)
3885 return false;
3887 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3888 return false;
3890 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3891 && ! vec_stmt)
3892 return false;
3894 if (gimple_call_lhs (stmt)
3895 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3896 return false;
3898 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3900 vectype = STMT_VINFO_VECTYPE (stmt_info);
3902 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3903 return false;
3905 /* Process function arguments. */
3906 nargs = gimple_call_num_args (stmt) - masked_call_offset;
3908 /* Bail out if the function has zero arguments. */
3909 if (nargs == 0)
3910 return false;
3912 vec<tree>& simd_clone_info = (slp_node ? SLP_TREE_SIMD_CLONE_INFO (slp_node)
3913 : STMT_VINFO_SIMD_CLONE_INFO (stmt_info));
3914 arginfo.reserve (nargs, true);
3915 auto_vec<slp_tree> slp_op;
3916 slp_op.safe_grow_cleared (nargs);
3918 for (i = 0; i < nargs; i++)
3920 simd_call_arg_info thisarginfo;
3921 affine_iv iv;
3923 thisarginfo.linear_step = 0;
3924 thisarginfo.align = 0;
3925 thisarginfo.op = NULL_TREE;
3926 thisarginfo.simd_lane_linear = false;
3928 int op_no = i + masked_call_offset;
3929 if (slp_node)
3930 op_no = vect_slp_child_index_for_operand (stmt, op_no, false);
3931 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3932 op_no, &op, &slp_op[i],
3933 &thisarginfo.dt, &thisarginfo.vectype)
3934 || thisarginfo.dt == vect_uninitialized_def)
3936 if (dump_enabled_p ())
3937 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3938 "use not simple.\n");
3939 return false;
3942 if (thisarginfo.dt == vect_constant_def
3943 || thisarginfo.dt == vect_external_def)
3945 /* With SLP we determine the vector type of constants/externals
3946 at analysis time, handling conflicts via
3947 vect_maybe_update_slp_op_vectype. At transform time
3948 we have a vector type recorded for SLP. */
3949 gcc_assert (!vec_stmt
3950 || !slp_node
3951 || thisarginfo.vectype != NULL_TREE);
3952 if (!vec_stmt)
3953 thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
3954 TREE_TYPE (op),
3955 slp_node);
3957 else
3958 gcc_assert (thisarginfo.vectype != NULL_TREE);
3960 /* For linear arguments, the analyze phase should have saved
3961 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */
3962 if (i * 3 + 4 <= simd_clone_info.length ()
3963 && simd_clone_info[i * 3 + 2])
3965 gcc_assert (vec_stmt);
3966 thisarginfo.linear_step = tree_to_shwi (simd_clone_info[i * 3 + 2]);
3967 thisarginfo.op = simd_clone_info[i * 3 + 1];
3968 thisarginfo.simd_lane_linear
3969 = (simd_clone_info[i * 3 + 3] == boolean_true_node);
3970 /* If loop has been peeled for alignment, we need to adjust it. */
3971 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3972 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3973 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3975 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3976 tree step = simd_clone_info[i * 3 + 2];
3977 tree opt = TREE_TYPE (thisarginfo.op);
3978 bias = fold_convert (TREE_TYPE (step), bias);
3979 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3980 thisarginfo.op
3981 = fold_build2 (POINTER_TYPE_P (opt)
3982 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3983 thisarginfo.op, bias);
3986 else if (!vec_stmt
3987 && thisarginfo.dt != vect_constant_def
3988 && thisarginfo.dt != vect_external_def
3989 && loop_vinfo
3990 && TREE_CODE (op) == SSA_NAME
3991 && simple_iv (loop, loop_containing_stmt (stmt), op,
3992 &iv, false)
3993 && tree_fits_shwi_p (iv.step))
3995 thisarginfo.linear_step = tree_to_shwi (iv.step);
3996 thisarginfo.op = iv.base;
3998 else if ((thisarginfo.dt == vect_constant_def
3999 || thisarginfo.dt == vect_external_def)
4000 && POINTER_TYPE_P (TREE_TYPE (op)))
4001 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4002 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4003 linear too. */
4004 if (POINTER_TYPE_P (TREE_TYPE (op))
4005 && !thisarginfo.linear_step
4006 && !vec_stmt
4007 && thisarginfo.dt != vect_constant_def
4008 && thisarginfo.dt != vect_external_def
4009 && loop_vinfo
4010 && TREE_CODE (op) == SSA_NAME)
4011 vect_simd_lane_linear (op, loop, &thisarginfo);
4013 arginfo.quick_push (thisarginfo);
4016 poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
4017 unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : 1;
4018 unsigned int badness = 0;
4019 struct cgraph_node *bestn = NULL;
4020 if (simd_clone_info.exists ())
4021 bestn = cgraph_node::get (simd_clone_info[0]);
4022 else
4023 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4024 n = n->simdclone->next_clone)
4026 unsigned int this_badness = 0;
4027 unsigned int num_calls;
4028 /* The number of arguments in the call and the number of parameters in
4029 the simdclone should match. However, when the simdclone is
4030 'inbranch', it could have one more paramater than nargs when using
4031 an inbranch simdclone to call a non-inbranch call, either in a
4032 non-masked loop using a all true constant mask, or inside a masked
4033 loop using it's mask. */
4034 size_t simd_nargs = n->simdclone->nargs;
4035 if (!masked_call_offset && n->simdclone->inbranch)
4036 simd_nargs--;
4037 if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen,
4038 &num_calls)
4039 || (!n->simdclone->inbranch && (masked_call_offset > 0))
4040 || (nargs != simd_nargs))
4041 continue;
4042 if (num_calls != 1)
4043 this_badness += exact_log2 (num_calls) * 4096;
4044 if (n->simdclone->inbranch)
4045 this_badness += 8192;
4046 int target_badness = targetm.simd_clone.usable (n);
4047 if (target_badness < 0)
4048 continue;
4049 this_badness += target_badness * 512;
4050 for (i = 0; i < nargs; i++)
4052 switch (n->simdclone->args[i].arg_type)
4054 case SIMD_CLONE_ARG_TYPE_VECTOR:
4055 if (!useless_type_conversion_p
4056 (n->simdclone->args[i].orig_type,
4057 TREE_TYPE (gimple_call_arg (stmt,
4058 i + masked_call_offset))))
4059 i = -1;
4060 else if (arginfo[i].dt == vect_constant_def
4061 || arginfo[i].dt == vect_external_def
4062 || arginfo[i].linear_step)
4063 this_badness += 64;
4064 break;
4065 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4066 if (arginfo[i].dt != vect_constant_def
4067 && arginfo[i].dt != vect_external_def)
4068 i = -1;
4069 break;
4070 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4071 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4072 if (arginfo[i].dt == vect_constant_def
4073 || arginfo[i].dt == vect_external_def
4074 || (arginfo[i].linear_step
4075 != n->simdclone->args[i].linear_step))
4076 i = -1;
4077 break;
4078 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4079 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4080 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4081 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4082 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4083 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4084 /* FORNOW */
4085 i = -1;
4086 break;
4087 case SIMD_CLONE_ARG_TYPE_MASK:
4088 /* While we can create a traditional data vector from
4089 an incoming integer mode mask we have no good way to
4090 force generate an integer mode mask from a traditional
4091 boolean vector input. */
4092 if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4093 && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4094 i = -1;
4095 else if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4096 && SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4097 this_badness += 2048;
4098 break;
4100 if (i == (size_t) -1)
4101 break;
4102 if (n->simdclone->args[i].alignment > arginfo[i].align)
4104 i = -1;
4105 break;
4107 if (arginfo[i].align)
4108 this_badness += (exact_log2 (arginfo[i].align)
4109 - exact_log2 (n->simdclone->args[i].alignment));
4111 if (i == (size_t) -1)
4112 continue;
4113 if (masked_call_offset == 0
4114 && n->simdclone->inbranch
4115 && n->simdclone->nargs > nargs)
4117 gcc_assert (n->simdclone->args[n->simdclone->nargs - 1].arg_type ==
4118 SIMD_CLONE_ARG_TYPE_MASK);
4119 /* Penalize using a masked SIMD clone in a non-masked loop, that is
4120 not in a branch, as we'd have to construct an all-true mask. */
4121 if (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4122 this_badness += 64;
4124 if (bestn == NULL || this_badness < badness)
4126 bestn = n;
4127 badness = this_badness;
4131 if (bestn == NULL)
4132 return false;
4134 unsigned int num_mask_args = 0;
4135 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4136 for (i = 0; i < nargs; i++)
4137 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4138 num_mask_args++;
4140 for (i = 0; i < nargs; i++)
4142 if ((arginfo[i].dt == vect_constant_def
4143 || arginfo[i].dt == vect_external_def)
4144 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4146 tree arg_type = TREE_TYPE (gimple_call_arg (stmt,
4147 i + masked_call_offset));
4148 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4149 slp_node);
4150 if (arginfo[i].vectype == NULL
4151 || !constant_multiple_p (bestn->simdclone->simdlen,
4152 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4153 return false;
4156 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4157 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4159 if (dump_enabled_p ())
4160 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4161 "vector mask arguments are not supported.\n");
4162 return false;
4165 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4167 tree clone_arg_vectype = bestn->simdclone->args[i].vector_type;
4168 if (bestn->simdclone->mask_mode == VOIDmode)
4170 if (maybe_ne (TYPE_VECTOR_SUBPARTS (clone_arg_vectype),
4171 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4173 /* FORNOW we only have partial support for vector-type masks
4174 that can't hold all of simdlen. */
4175 if (dump_enabled_p ())
4176 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4177 vect_location,
4178 "in-branch vector clones are not yet"
4179 " supported for mismatched vector sizes.\n");
4180 return false;
4183 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4185 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype))
4186 || maybe_ne (exact_div (bestn->simdclone->simdlen,
4187 num_mask_args),
4188 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4190 /* FORNOW we only have partial support for integer-type masks
4191 that represent the same number of lanes as the
4192 vectorized mask inputs. */
4193 if (dump_enabled_p ())
4194 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4195 vect_location,
4196 "in-branch vector clones are not yet "
4197 "supported for mismatched vector sizes.\n");
4198 return false;
4201 else
4203 if (dump_enabled_p ())
4204 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4205 vect_location,
4206 "in-branch vector clones not supported"
4207 " on this target.\n");
4208 return false;
4213 fndecl = bestn->decl;
4214 nunits = bestn->simdclone->simdlen;
4215 if (slp_node)
4216 ncopies = vector_unroll_factor (vf * group_size, nunits);
4217 else
4218 ncopies = vector_unroll_factor (vf, nunits);
4220 /* If the function isn't const, only allow it in simd loops where user
4221 has asserted that at least nunits consecutive iterations can be
4222 performed using SIMD instructions. */
4223 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4224 && gimple_vuse (stmt))
4225 return false;
4227 /* Sanity check: make sure that at least one copy of the vectorized stmt
4228 needs to be generated. */
4229 gcc_assert (ncopies >= 1);
4231 if (!vec_stmt) /* transformation not required. */
4233 if (slp_node)
4234 for (unsigned i = 0; i < nargs; ++i)
4235 if (!vect_maybe_update_slp_op_vectype (slp_op[i], arginfo[i].vectype))
4237 if (dump_enabled_p ())
4238 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4239 "incompatible vector types for invariants\n");
4240 return false;
4242 /* When the original call is pure or const but the SIMD ABI dictates
4243 an aggregate return we will have to use a virtual definition and
4244 in a loop eventually even need to add a virtual PHI. That's
4245 not straight-forward so allow to fix this up via renaming. */
4246 if (gimple_call_lhs (stmt)
4247 && !gimple_vdef (stmt)
4248 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4249 vinfo->any_known_not_updated_vssa = true;
4250 /* ??? For SLP code-gen we end up inserting after the last
4251 vector argument def rather than at the original call position
4252 so automagic virtual operand updating doesn't work. */
4253 if (gimple_vuse (stmt) && slp_node)
4254 vinfo->any_known_not_updated_vssa = true;
4255 simd_clone_info.safe_push (bestn->decl);
4256 for (i = 0; i < bestn->simdclone->nargs; i++)
4258 switch (bestn->simdclone->args[i].arg_type)
4260 default:
4261 continue;
4262 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4263 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4265 simd_clone_info.safe_grow_cleared (i * 3 + 1, true);
4266 simd_clone_info.safe_push (arginfo[i].op);
4267 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4268 ? size_type_node : TREE_TYPE (arginfo[i].op);
4269 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4270 simd_clone_info.safe_push (ls);
4271 tree sll = arginfo[i].simd_lane_linear
4272 ? boolean_true_node : boolean_false_node;
4273 simd_clone_info.safe_push (sll);
4275 break;
4276 case SIMD_CLONE_ARG_TYPE_MASK:
4277 if (loop_vinfo
4278 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4279 vect_record_loop_mask (loop_vinfo,
4280 &LOOP_VINFO_MASKS (loop_vinfo),
4281 ncopies, vectype, op);
4283 break;
4287 if (!bestn->simdclone->inbranch && loop_vinfo)
4289 if (dump_enabled_p ()
4290 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4291 dump_printf_loc (MSG_NOTE, vect_location,
4292 "can't use a fully-masked loop because a"
4293 " non-masked simd clone was selected.\n");
4294 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
4297 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4298 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4299 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4300 dt, slp_node, cost_vec); */
4301 return true;
4304 /* Transform. */
4306 if (dump_enabled_p ())
4307 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4309 /* Handle def. */
4310 scalar_dest = gimple_call_lhs (stmt);
4311 vec_dest = NULL_TREE;
4312 rtype = NULL_TREE;
4313 ratype = NULL_TREE;
4314 if (scalar_dest)
4316 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4317 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4318 if (TREE_CODE (rtype) == ARRAY_TYPE)
4320 ratype = rtype;
4321 rtype = TREE_TYPE (ratype);
4325 auto_vec<vec<tree> > vec_oprnds;
4326 auto_vec<unsigned> vec_oprnds_i;
4327 vec_oprnds_i.safe_grow_cleared (nargs, true);
4328 if (slp_node)
4330 vec_oprnds.reserve_exact (nargs);
4331 vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
4333 else
4334 vec_oprnds.safe_grow_cleared (nargs, true);
4335 for (j = 0; j < ncopies; ++j)
4337 poly_uint64 callee_nelements;
4338 poly_uint64 caller_nelements;
4339 /* Build argument list for the vectorized call. */
4340 if (j == 0)
4341 vargs.create (nargs);
4342 else
4343 vargs.truncate (0);
4345 for (i = 0; i < nargs; i++)
4347 unsigned int k, l, m, o;
4348 tree atype;
4349 op = gimple_call_arg (stmt, i + masked_call_offset);
4350 switch (bestn->simdclone->args[i].arg_type)
4352 case SIMD_CLONE_ARG_TYPE_VECTOR:
4353 atype = bestn->simdclone->args[i].vector_type;
4354 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4355 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4356 o = vector_unroll_factor (nunits, callee_nelements);
4357 for (m = j * o; m < (j + 1) * o; m++)
4359 if (known_lt (callee_nelements, caller_nelements))
4361 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4362 if (!constant_multiple_p (caller_nelements,
4363 callee_nelements, &k))
4364 gcc_unreachable ();
4366 gcc_assert ((k & (k - 1)) == 0);
4367 if (m == 0)
4369 if (!slp_node)
4370 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4371 ncopies * o / k, op,
4372 &vec_oprnds[i]);
4373 vec_oprnds_i[i] = 0;
4374 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4376 else
4378 vec_oprnd0 = arginfo[i].op;
4379 if ((m & (k - 1)) == 0)
4380 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4382 arginfo[i].op = vec_oprnd0;
4383 vec_oprnd0
4384 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4385 bitsize_int (prec),
4386 bitsize_int ((m & (k - 1)) * prec));
4387 gassign *new_stmt
4388 = gimple_build_assign (make_ssa_name (atype),
4389 vec_oprnd0);
4390 vect_finish_stmt_generation (vinfo, stmt_info,
4391 new_stmt, gsi);
4392 vargs.safe_push (gimple_assign_lhs (new_stmt));
4394 else
4396 if (!constant_multiple_p (callee_nelements,
4397 caller_nelements, &k))
4398 gcc_unreachable ();
4399 gcc_assert ((k & (k - 1)) == 0);
4400 vec<constructor_elt, va_gc> *ctor_elts;
4401 if (k != 1)
4402 vec_alloc (ctor_elts, k);
4403 else
4404 ctor_elts = NULL;
4405 for (l = 0; l < k; l++)
4407 if (m == 0 && l == 0)
4409 if (!slp_node)
4410 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4411 k * o * ncopies,
4413 &vec_oprnds[i]);
4414 vec_oprnds_i[i] = 0;
4415 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4417 else
4418 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4419 arginfo[i].op = vec_oprnd0;
4420 if (k == 1)
4421 break;
4422 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4423 vec_oprnd0);
4425 if (k == 1)
4426 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4427 atype))
4429 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, atype,
4430 vec_oprnd0);
4431 gassign *new_stmt
4432 = gimple_build_assign (make_ssa_name (atype),
4433 vec_oprnd0);
4434 vect_finish_stmt_generation (vinfo, stmt_info,
4435 new_stmt, gsi);
4436 vargs.safe_push (gimple_get_lhs (new_stmt));
4438 else
4439 vargs.safe_push (vec_oprnd0);
4440 else
4442 vec_oprnd0 = build_constructor (atype, ctor_elts);
4443 gassign *new_stmt
4444 = gimple_build_assign (make_ssa_name (atype),
4445 vec_oprnd0);
4446 vect_finish_stmt_generation (vinfo, stmt_info,
4447 new_stmt, gsi);
4448 vargs.safe_push (gimple_assign_lhs (new_stmt));
4452 break;
4453 case SIMD_CLONE_ARG_TYPE_MASK:
4454 if (bestn->simdclone->mask_mode == VOIDmode)
4456 atype = bestn->simdclone->args[i].vector_type;
4457 tree elt_type = TREE_TYPE (atype);
4458 tree one = fold_convert (elt_type, integer_one_node);
4459 tree zero = fold_convert (elt_type, integer_zero_node);
4460 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4461 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4462 o = vector_unroll_factor (nunits, callee_nelements);
4463 for (m = j * o; m < (j + 1) * o; m++)
4465 if (maybe_lt (callee_nelements, caller_nelements))
4467 /* The mask type has fewer elements than simdlen. */
4469 /* FORNOW */
4470 gcc_unreachable ();
4472 else if (known_eq (callee_nelements, caller_nelements))
4474 /* The SIMD clone function has the same number of
4475 elements as the current function. */
4476 if (m == 0)
4478 if (!slp_node)
4479 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4480 o * ncopies,
4482 &vec_oprnds[i]);
4483 vec_oprnds_i[i] = 0;
4485 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4486 if (loop_vinfo
4487 && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4489 vec_loop_masks *loop_masks
4490 = &LOOP_VINFO_MASKS (loop_vinfo);
4491 tree loop_mask
4492 = vect_get_loop_mask (loop_vinfo, gsi,
4493 loop_masks, ncopies,
4494 vectype, j);
4495 vec_oprnd0
4496 = prepare_vec_mask (loop_vinfo,
4497 TREE_TYPE (loop_mask),
4498 loop_mask, vec_oprnd0,
4499 gsi);
4500 loop_vinfo->vec_cond_masked_set.add ({ vec_oprnd0,
4501 loop_mask });
4504 vec_oprnd0
4505 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4506 build_vector_from_val (atype, one),
4507 build_vector_from_val (atype, zero));
4508 gassign *new_stmt
4509 = gimple_build_assign (make_ssa_name (atype),
4510 vec_oprnd0);
4511 vect_finish_stmt_generation (vinfo, stmt_info,
4512 new_stmt, gsi);
4513 vargs.safe_push (gimple_assign_lhs (new_stmt));
4515 else
4517 /* The mask type has more elements than simdlen. */
4519 /* FORNOW */
4520 gcc_unreachable ();
4524 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4526 atype = bestn->simdclone->args[i].vector_type;
4527 /* Guess the number of lanes represented by atype. */
4528 poly_uint64 atype_subparts
4529 = exact_div (bestn->simdclone->simdlen,
4530 num_mask_args);
4531 o = vector_unroll_factor (nunits, atype_subparts);
4532 for (m = j * o; m < (j + 1) * o; m++)
4534 if (m == 0)
4536 if (!slp_node)
4537 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4538 o * ncopies,
4540 &vec_oprnds[i]);
4541 vec_oprnds_i[i] = 0;
4543 if (maybe_lt (atype_subparts,
4544 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4546 /* The mask argument has fewer elements than the
4547 input vector. */
4548 /* FORNOW */
4549 gcc_unreachable ();
4551 else if (known_eq (atype_subparts,
4552 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4554 /* The vector mask argument matches the input
4555 in the number of lanes, but not necessarily
4556 in the mode. */
4557 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4558 tree st = lang_hooks.types.type_for_mode
4559 (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
4560 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
4561 vec_oprnd0);
4562 gassign *new_stmt
4563 = gimple_build_assign (make_ssa_name (st),
4564 vec_oprnd0);
4565 vect_finish_stmt_generation (vinfo, stmt_info,
4566 new_stmt, gsi);
4567 if (!types_compatible_p (atype, st))
4569 new_stmt
4570 = gimple_build_assign (make_ssa_name (atype),
4571 NOP_EXPR,
4572 gimple_assign_lhs
4573 (new_stmt));
4574 vect_finish_stmt_generation (vinfo, stmt_info,
4575 new_stmt, gsi);
4577 vargs.safe_push (gimple_assign_lhs (new_stmt));
4579 else
4581 /* The mask argument has more elements than the
4582 input vector. */
4583 /* FORNOW */
4584 gcc_unreachable ();
4588 else
4589 gcc_unreachable ();
4590 break;
4591 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4592 vargs.safe_push (op);
4593 break;
4594 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4595 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4596 if (j == 0)
4598 gimple_seq stmts;
4599 arginfo[i].op
4600 = force_gimple_operand (unshare_expr (arginfo[i].op),
4601 &stmts, true, NULL_TREE);
4602 if (stmts != NULL)
4604 basic_block new_bb;
4605 edge pe = loop_preheader_edge (loop);
4606 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4607 gcc_assert (!new_bb);
4609 if (arginfo[i].simd_lane_linear)
4611 vargs.safe_push (arginfo[i].op);
4612 break;
4614 tree phi_res = copy_ssa_name (op);
4615 gphi *new_phi = create_phi_node (phi_res, loop->header);
4616 add_phi_arg (new_phi, arginfo[i].op,
4617 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4618 enum tree_code code
4619 = POINTER_TYPE_P (TREE_TYPE (op))
4620 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4621 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4622 ? sizetype : TREE_TYPE (op);
4623 poly_widest_int cst
4624 = wi::mul (bestn->simdclone->args[i].linear_step,
4625 ncopies * nunits);
4626 tree tcst = wide_int_to_tree (type, cst);
4627 tree phi_arg = copy_ssa_name (op);
4628 gassign *new_stmt
4629 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4630 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4631 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4632 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4633 UNKNOWN_LOCATION);
4634 arginfo[i].op = phi_res;
4635 vargs.safe_push (phi_res);
4637 else
4639 enum tree_code code
4640 = POINTER_TYPE_P (TREE_TYPE (op))
4641 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4642 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4643 ? sizetype : TREE_TYPE (op);
4644 poly_widest_int cst
4645 = wi::mul (bestn->simdclone->args[i].linear_step,
4646 j * nunits);
4647 tree tcst = wide_int_to_tree (type, cst);
4648 new_temp = make_ssa_name (TREE_TYPE (op));
4649 gassign *new_stmt
4650 = gimple_build_assign (new_temp, code,
4651 arginfo[i].op, tcst);
4652 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4653 vargs.safe_push (new_temp);
4655 break;
4656 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4657 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4658 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4659 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4660 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4661 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4662 default:
4663 gcc_unreachable ();
4667 if (masked_call_offset == 0
4668 && bestn->simdclone->inbranch
4669 && bestn->simdclone->nargs > nargs)
4671 unsigned long m, o;
4672 size_t mask_i = bestn->simdclone->nargs - 1;
4673 tree mask;
4674 gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
4675 SIMD_CLONE_ARG_TYPE_MASK);
4677 tree masktype = bestn->simdclone->args[mask_i].vector_type;
4678 callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
4679 o = vector_unroll_factor (nunits, callee_nelements);
4680 for (m = j * o; m < (j + 1) * o; m++)
4682 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4684 vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
4685 mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
4686 ncopies, vectype, j);
4688 else
4689 mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
4691 gassign *new_stmt;
4692 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4694 /* This means we are dealing with integer mask modes.
4695 First convert to an integer type with the same size as
4696 the current vector type. */
4697 unsigned HOST_WIDE_INT intermediate_size
4698 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
4699 tree mid_int_type =
4700 build_nonstandard_integer_type (intermediate_size, 1);
4701 mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
4702 new_stmt
4703 = gimple_build_assign (make_ssa_name (mid_int_type),
4704 mask);
4705 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4706 /* Then zero-extend to the mask mode. */
4707 mask = fold_build1 (NOP_EXPR, masktype,
4708 gimple_get_lhs (new_stmt));
4710 else if (bestn->simdclone->mask_mode == VOIDmode)
4712 tree one = fold_convert (TREE_TYPE (masktype),
4713 integer_one_node);
4714 tree zero = fold_convert (TREE_TYPE (masktype),
4715 integer_zero_node);
4716 mask = build3 (VEC_COND_EXPR, masktype, mask,
4717 build_vector_from_val (masktype, one),
4718 build_vector_from_val (masktype, zero));
4720 else
4721 gcc_unreachable ();
4723 new_stmt = gimple_build_assign (make_ssa_name (masktype), mask);
4724 vect_finish_stmt_generation (vinfo, stmt_info,
4725 new_stmt, gsi);
4726 mask = gimple_assign_lhs (new_stmt);
4727 vargs.safe_push (mask);
4731 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4732 if (vec_dest)
4734 gcc_assert (ratype
4735 || known_eq (TYPE_VECTOR_SUBPARTS (rtype), nunits));
4736 if (ratype)
4737 new_temp = create_tmp_var (ratype);
4738 else if (useless_type_conversion_p (vectype, rtype))
4739 new_temp = make_ssa_name (vec_dest, new_call);
4740 else
4741 new_temp = make_ssa_name (rtype, new_call);
4742 gimple_call_set_lhs (new_call, new_temp);
4744 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4745 gimple *new_stmt = new_call;
4747 if (vec_dest)
4749 if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype), nunits))
4751 unsigned int k, l;
4752 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4753 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4754 k = vector_unroll_factor (nunits,
4755 TYPE_VECTOR_SUBPARTS (vectype));
4756 gcc_assert ((k & (k - 1)) == 0);
4757 for (l = 0; l < k; l++)
4759 tree t;
4760 if (ratype)
4762 t = build_fold_addr_expr (new_temp);
4763 t = build2 (MEM_REF, vectype, t,
4764 build_int_cst (TREE_TYPE (t), l * bytes));
4766 else
4767 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4768 bitsize_int (prec), bitsize_int (l * prec));
4769 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4770 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4772 if (j == 0 && l == 0)
4773 *vec_stmt = new_stmt;
4774 if (slp_node)
4775 SLP_TREE_VEC_DEFS (slp_node)
4776 .quick_push (gimple_assign_lhs (new_stmt));
4777 else
4778 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4781 if (ratype)
4782 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4783 continue;
4785 else if (!multiple_p (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
4787 unsigned int k;
4788 if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
4789 TYPE_VECTOR_SUBPARTS (rtype), &k))
4790 gcc_unreachable ();
4791 gcc_assert ((k & (k - 1)) == 0);
4792 if ((j & (k - 1)) == 0)
4793 vec_alloc (ret_ctor_elts, k);
4794 if (ratype)
4796 unsigned int m, o;
4797 o = vector_unroll_factor (nunits,
4798 TYPE_VECTOR_SUBPARTS (rtype));
4799 for (m = 0; m < o; m++)
4801 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4802 size_int (m), NULL_TREE, NULL_TREE);
4803 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4804 tem);
4805 vect_finish_stmt_generation (vinfo, stmt_info,
4806 new_stmt, gsi);
4807 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4808 gimple_assign_lhs (new_stmt));
4810 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4812 else
4813 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4814 if ((j & (k - 1)) != k - 1)
4815 continue;
4816 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4817 new_stmt
4818 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4819 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4821 if ((unsigned) j == k - 1)
4822 *vec_stmt = new_stmt;
4823 if (slp_node)
4824 SLP_TREE_VEC_DEFS (slp_node)
4825 .quick_push (gimple_assign_lhs (new_stmt));
4826 else
4827 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4828 continue;
4830 else if (ratype)
4832 tree t = build_fold_addr_expr (new_temp);
4833 t = build2 (MEM_REF, vectype, t,
4834 build_int_cst (TREE_TYPE (t), 0));
4835 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4836 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4837 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4839 else if (!useless_type_conversion_p (vectype, rtype))
4841 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4842 new_stmt
4843 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4844 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4848 if (j == 0)
4849 *vec_stmt = new_stmt;
4850 if (slp_node)
4851 SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
4852 else
4853 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4856 for (i = 0; i < nargs; ++i)
4858 vec<tree> oprndsi = vec_oprnds[i];
4859 oprndsi.release ();
4861 vargs.release ();
4863 /* Mark the clone as no longer being a candidate for GC. */
4864 bestn->gc_candidate = false;
4866 /* The call in STMT might prevent it from being removed in dce.
4867 We however cannot remove it here, due to the way the ssa name
4868 it defines is mapped to the new definition. So just replace
4869 rhs of the statement with something harmless. */
4871 if (slp_node)
4872 return true;
4874 gimple *new_stmt;
4875 if (scalar_dest)
4877 type = TREE_TYPE (scalar_dest);
4878 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4879 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4881 else
4882 new_stmt = gimple_build_nop ();
4883 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4884 unlink_stmt_vdef (stmt);
4886 return true;
4890 /* Function vect_gen_widened_results_half
4892 Create a vector stmt whose code, type, number of arguments, and result
4893 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4894 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4895 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4896 needs to be created (DECL is a function-decl of a target-builtin).
4897 STMT_INFO is the original scalar stmt that we are vectorizing. */
4899 static gimple *
4900 vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
4901 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4902 tree vec_dest, gimple_stmt_iterator *gsi,
4903 stmt_vec_info stmt_info)
4905 gimple *new_stmt;
4906 tree new_temp;
4908 /* Generate half of the widened result: */
4909 if (op_type != binary_op)
4910 vec_oprnd1 = NULL;
4911 new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
4912 new_temp = make_ssa_name (vec_dest, new_stmt);
4913 gimple_set_lhs (new_stmt, new_temp);
4914 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4916 return new_stmt;
4920 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4921 For multi-step conversions store the resulting vectors and call the function
4922 recursively. When NARROW_SRC_P is true, there's still a conversion after
4923 narrowing, don't store the vectors in the SLP_NODE or in vector info of
4924 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
4926 static void
4927 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4928 int multi_step_cvt,
4929 stmt_vec_info stmt_info,
4930 vec<tree> &vec_dsts,
4931 gimple_stmt_iterator *gsi,
4932 slp_tree slp_node, code_helper code,
4933 bool narrow_src_p)
4935 unsigned int i;
4936 tree vop0, vop1, new_tmp, vec_dest;
4938 vec_dest = vec_dsts.pop ();
4940 for (i = 0; i < vec_oprnds->length (); i += 2)
4942 /* Create demotion operation. */
4943 vop0 = (*vec_oprnds)[i];
4944 vop1 = (*vec_oprnds)[i + 1];
4945 gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
4946 new_tmp = make_ssa_name (vec_dest, new_stmt);
4947 gimple_set_lhs (new_stmt, new_tmp);
4948 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4949 if (multi_step_cvt || narrow_src_p)
4950 /* Store the resulting vector for next recursive call,
4951 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
4952 (*vec_oprnds)[i/2] = new_tmp;
4953 else
4955 /* This is the last step of the conversion sequence. Store the
4956 vectors in SLP_NODE or in vector info of the scalar statement
4957 (or in STMT_VINFO_RELATED_STMT chain). */
4958 if (slp_node)
4959 slp_node->push_vec_def (new_stmt);
4960 else
4961 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4965 /* For multi-step demotion operations we first generate demotion operations
4966 from the source type to the intermediate types, and then combine the
4967 results (stored in VEC_OPRNDS) in demotion operation to the destination
4968 type. */
4969 if (multi_step_cvt)
4971 /* At each level of recursion we have half of the operands we had at the
4972 previous level. */
4973 vec_oprnds->truncate ((i+1)/2);
4974 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4975 multi_step_cvt - 1,
4976 stmt_info, vec_dsts, gsi,
4977 slp_node, VEC_PACK_TRUNC_EXPR,
4978 narrow_src_p);
4981 vec_dsts.quick_push (vec_dest);
4985 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4986 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4987 STMT_INFO. For multi-step conversions store the resulting vectors and
4988 call the function recursively. */
4990 static void
4991 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4992 vec<tree> *vec_oprnds0,
4993 vec<tree> *vec_oprnds1,
4994 stmt_vec_info stmt_info, tree vec_dest,
4995 gimple_stmt_iterator *gsi,
4996 code_helper ch1,
4997 code_helper ch2, int op_type)
4999 int i;
5000 tree vop0, vop1, new_tmp1, new_tmp2;
5001 gimple *new_stmt1, *new_stmt2;
5002 vec<tree> vec_tmp = vNULL;
5004 vec_tmp.create (vec_oprnds0->length () * 2);
5005 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5007 if (op_type == binary_op)
5008 vop1 = (*vec_oprnds1)[i];
5009 else
5010 vop1 = NULL_TREE;
5012 /* Generate the two halves of promotion operation. */
5013 new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
5014 op_type, vec_dest, gsi,
5015 stmt_info);
5016 new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
5017 op_type, vec_dest, gsi,
5018 stmt_info);
5019 if (is_gimple_call (new_stmt1))
5021 new_tmp1 = gimple_call_lhs (new_stmt1);
5022 new_tmp2 = gimple_call_lhs (new_stmt2);
5024 else
5026 new_tmp1 = gimple_assign_lhs (new_stmt1);
5027 new_tmp2 = gimple_assign_lhs (new_stmt2);
5030 /* Store the results for the next step. */
5031 vec_tmp.quick_push (new_tmp1);
5032 vec_tmp.quick_push (new_tmp2);
5035 vec_oprnds0->release ();
5036 *vec_oprnds0 = vec_tmp;
5039 /* Create vectorized promotion stmts for widening stmts using only half the
5040 potential vector size for input. */
5041 static void
5042 vect_create_half_widening_stmts (vec_info *vinfo,
5043 vec<tree> *vec_oprnds0,
5044 vec<tree> *vec_oprnds1,
5045 stmt_vec_info stmt_info, tree vec_dest,
5046 gimple_stmt_iterator *gsi,
5047 code_helper code1,
5048 int op_type)
5050 int i;
5051 tree vop0, vop1;
5052 gimple *new_stmt1;
5053 gimple *new_stmt2;
5054 gimple *new_stmt3;
5055 vec<tree> vec_tmp = vNULL;
5057 vec_tmp.create (vec_oprnds0->length ());
5058 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5060 tree new_tmp1, new_tmp2, new_tmp3, out_type;
5062 gcc_assert (op_type == binary_op);
5063 vop1 = (*vec_oprnds1)[i];
5065 /* Widen the first vector input. */
5066 out_type = TREE_TYPE (vec_dest);
5067 new_tmp1 = make_ssa_name (out_type);
5068 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5069 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
5070 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5072 /* Widen the second vector input. */
5073 new_tmp2 = make_ssa_name (out_type);
5074 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5075 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
5076 /* Perform the operation. With both vector inputs widened. */
5077 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5079 else
5081 /* Perform the operation. With the single vector input widened. */
5082 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5085 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
5086 gimple_assign_set_lhs (new_stmt3, new_tmp3);
5087 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
5089 /* Store the results for the next step. */
5090 vec_tmp.quick_push (new_tmp3);
5093 vec_oprnds0->release ();
5094 *vec_oprnds0 = vec_tmp;
5098 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5099 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5100 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5101 Return true if STMT_INFO is vectorizable in this way. */
5103 static bool
5104 vectorizable_conversion (vec_info *vinfo,
5105 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5106 gimple **vec_stmt, slp_tree slp_node,
5107 stmt_vector_for_cost *cost_vec)
5109 tree vec_dest, cvt_op = NULL_TREE;
5110 tree scalar_dest;
5111 tree op0, op1 = NULL_TREE;
5112 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5113 tree_code tc1, tc2;
5114 code_helper code, code1, code2;
5115 code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5116 tree new_temp;
5117 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5118 int ndts = 2;
5119 poly_uint64 nunits_in;
5120 poly_uint64 nunits_out;
5121 tree vectype_out, vectype_in;
5122 int ncopies, i;
5123 tree lhs_type, rhs_type;
5124 /* For conversions between floating point and integer, there're 2 NARROW
5125 cases. NARROW_SRC is for FLOAT_EXPR, means
5126 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5127 This is safe when the range of the source integer can fit into the lower
5128 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5129 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5130 For other conversions, when there's narrowing, NARROW_DST is used as
5131 default. */
5132 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5133 vec<tree> vec_oprnds0 = vNULL;
5134 vec<tree> vec_oprnds1 = vNULL;
5135 tree vop0;
5136 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5137 int multi_step_cvt = 0;
5138 vec<tree> interm_types = vNULL;
5139 tree intermediate_type, cvt_type = NULL_TREE;
5140 int op_type;
5141 unsigned short fltsz;
5143 /* Is STMT a vectorizable conversion? */
5145 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5146 return false;
5148 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5149 && ! vec_stmt)
5150 return false;
5152 gimple* stmt = stmt_info->stmt;
5153 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
5154 return false;
5156 if (gimple_get_lhs (stmt) == NULL_TREE
5157 || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5158 return false;
5160 if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5161 return false;
5163 if (is_gimple_assign (stmt))
5165 code = gimple_assign_rhs_code (stmt);
5166 op_type = TREE_CODE_LENGTH ((tree_code) code);
5168 else if (gimple_call_internal_p (stmt))
5170 code = gimple_call_internal_fn (stmt);
5171 op_type = gimple_call_num_args (stmt);
5173 else
5174 return false;
5176 bool widen_arith = (code == WIDEN_MULT_EXPR
5177 || code == WIDEN_LSHIFT_EXPR
5178 || widening_fn_p (code));
5180 if (!widen_arith
5181 && !CONVERT_EXPR_CODE_P (code)
5182 && code != FIX_TRUNC_EXPR
5183 && code != FLOAT_EXPR)
5184 return false;
5186 /* Check types of lhs and rhs. */
5187 scalar_dest = gimple_get_lhs (stmt);
5188 lhs_type = TREE_TYPE (scalar_dest);
5189 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5191 /* Check the operands of the operation. */
5192 slp_tree slp_op0, slp_op1 = NULL;
5193 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5194 0, &op0, &slp_op0, &dt[0], &vectype_in))
5196 if (dump_enabled_p ())
5197 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5198 "use not simple.\n");
5199 return false;
5202 rhs_type = TREE_TYPE (op0);
5203 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5204 && !((INTEGRAL_TYPE_P (lhs_type)
5205 && INTEGRAL_TYPE_P (rhs_type))
5206 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5207 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5208 return false;
5210 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5211 && ((INTEGRAL_TYPE_P (lhs_type)
5212 && !type_has_mode_precision_p (lhs_type))
5213 || (INTEGRAL_TYPE_P (rhs_type)
5214 && !type_has_mode_precision_p (rhs_type))))
5216 if (dump_enabled_p ())
5217 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5218 "type conversion to/from bit-precision unsupported."
5219 "\n");
5220 return false;
5223 if (op_type == binary_op)
5225 gcc_assert (code == WIDEN_MULT_EXPR
5226 || code == WIDEN_LSHIFT_EXPR
5227 || widening_fn_p (code));
5229 op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
5230 gimple_call_arg (stmt, 0);
5231 tree vectype1_in;
5232 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5233 &op1, &slp_op1, &dt[1], &vectype1_in))
5235 if (dump_enabled_p ())
5236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5237 "use not simple.\n");
5238 return false;
5240 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5241 OP1. */
5242 if (!vectype_in)
5243 vectype_in = vectype1_in;
5246 /* If op0 is an external or constant def, infer the vector type
5247 from the scalar type. */
5248 if (!vectype_in)
5249 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5250 if (vec_stmt)
5251 gcc_assert (vectype_in);
5252 if (!vectype_in)
5254 if (dump_enabled_p ())
5255 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5256 "no vectype for scalar type %T\n", rhs_type);
5258 return false;
5261 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5262 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5264 if (dump_enabled_p ())
5265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5266 "can't convert between boolean and non "
5267 "boolean vectors %T\n", rhs_type);
5269 return false;
5272 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5273 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5274 if (known_eq (nunits_out, nunits_in))
5275 if (widen_arith)
5276 modifier = WIDEN;
5277 else
5278 modifier = NONE;
5279 else if (multiple_p (nunits_out, nunits_in))
5280 modifier = NARROW_DST;
5281 else
5283 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5284 modifier = WIDEN;
5287 /* Multiple types in SLP are handled by creating the appropriate number of
5288 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5289 case of SLP. */
5290 if (slp_node)
5291 ncopies = 1;
5292 else if (modifier == NARROW_DST)
5293 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5294 else
5295 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5297 /* Sanity check: make sure that at least one copy of the vectorized stmt
5298 needs to be generated. */
5299 gcc_assert (ncopies >= 1);
5301 bool found_mode = false;
5302 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5303 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5304 opt_scalar_mode rhs_mode_iter;
5306 /* Supportable by target? */
5307 switch (modifier)
5309 case NONE:
5310 if (code != FIX_TRUNC_EXPR
5311 && code != FLOAT_EXPR
5312 && !CONVERT_EXPR_CODE_P (code))
5313 return false;
5314 gcc_assert (code.is_tree_code ());
5315 if (supportable_convert_operation ((tree_code) code, vectype_out,
5316 vectype_in, &tc1))
5318 code1 = tc1;
5319 break;
5322 /* For conversions between float and integer types try whether
5323 we can use intermediate signed integer types to support the
5324 conversion. */
5325 if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
5326 && (code == FLOAT_EXPR ||
5327 (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
5329 bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
5330 bool float_expr_p = code == FLOAT_EXPR;
5331 unsigned short target_size;
5332 scalar_mode intermediate_mode;
5333 if (demotion)
5335 intermediate_mode = lhs_mode;
5336 target_size = GET_MODE_SIZE (rhs_mode);
5338 else
5340 target_size = GET_MODE_SIZE (lhs_mode);
5341 if (!int_mode_for_size
5342 (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
5343 goto unsupported;
5345 code1 = float_expr_p ? code : NOP_EXPR;
5346 codecvt1 = float_expr_p ? NOP_EXPR : code;
5347 opt_scalar_mode mode_iter;
5348 FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
5350 intermediate_mode = mode_iter.require ();
5352 if (GET_MODE_SIZE (intermediate_mode) > target_size)
5353 break;
5355 scalar_mode cvt_mode;
5356 if (!int_mode_for_size
5357 (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
5358 break;
5360 cvt_type = build_nonstandard_integer_type
5361 (GET_MODE_BITSIZE (cvt_mode), 0);
5363 /* Check if the intermediate type can hold OP0's range.
5364 When converting from float to integer this is not necessary
5365 because values that do not fit the (smaller) target type are
5366 unspecified anyway. */
5367 if (demotion && float_expr_p)
5369 wide_int op_min_value, op_max_value;
5370 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5371 break;
5373 if (cvt_type == NULL_TREE
5374 || (wi::min_precision (op_max_value, SIGNED)
5375 > TYPE_PRECISION (cvt_type))
5376 || (wi::min_precision (op_min_value, SIGNED)
5377 > TYPE_PRECISION (cvt_type)))
5378 continue;
5381 cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
5382 /* This should only happened for SLP as long as loop vectorizer
5383 only supports same-sized vector. */
5384 if (cvt_type == NULL_TREE
5385 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
5386 || !supportable_convert_operation ((tree_code) code1,
5387 vectype_out,
5388 cvt_type, &tc1)
5389 || !supportable_convert_operation ((tree_code) codecvt1,
5390 cvt_type,
5391 vectype_in, &tc2))
5392 continue;
5394 found_mode = true;
5395 break;
5398 if (found_mode)
5400 multi_step_cvt++;
5401 interm_types.safe_push (cvt_type);
5402 cvt_type = NULL_TREE;
5403 code1 = tc1;
5404 codecvt1 = tc2;
5405 break;
5408 /* FALLTHRU */
5409 unsupported:
5410 if (dump_enabled_p ())
5411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5412 "conversion not supported by target.\n");
5413 return false;
5415 case WIDEN:
5416 if (known_eq (nunits_in, nunits_out))
5418 if (!(code.is_tree_code ()
5419 && supportable_half_widening_operation ((tree_code) code,
5420 vectype_out, vectype_in,
5421 &tc1)))
5422 goto unsupported;
5423 code1 = tc1;
5424 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5425 break;
5427 if (supportable_widening_operation (vinfo, code, stmt_info,
5428 vectype_out, vectype_in, &code1,
5429 &code2, &multi_step_cvt,
5430 &interm_types))
5432 /* Binary widening operation can only be supported directly by the
5433 architecture. */
5434 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5435 break;
5438 if (code != FLOAT_EXPR
5439 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5440 goto unsupported;
5442 fltsz = GET_MODE_SIZE (lhs_mode);
5443 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5445 rhs_mode = rhs_mode_iter.require ();
5446 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5447 break;
5449 cvt_type
5450 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5451 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5452 if (cvt_type == NULL_TREE)
5453 goto unsupported;
5455 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5457 tc1 = ERROR_MARK;
5458 gcc_assert (code.is_tree_code ());
5459 if (!supportable_convert_operation ((tree_code) code, vectype_out,
5460 cvt_type, &tc1))
5461 goto unsupported;
5462 codecvt1 = tc1;
5464 else if (!supportable_widening_operation (vinfo, code,
5465 stmt_info, vectype_out,
5466 cvt_type, &codecvt1,
5467 &codecvt2, &multi_step_cvt,
5468 &interm_types))
5469 continue;
5470 else
5471 gcc_assert (multi_step_cvt == 0);
5473 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5474 cvt_type,
5475 vectype_in, &code1,
5476 &code2, &multi_step_cvt,
5477 &interm_types))
5479 found_mode = true;
5480 break;
5484 if (!found_mode)
5485 goto unsupported;
5487 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5488 codecvt2 = ERROR_MARK;
5489 else
5491 multi_step_cvt++;
5492 interm_types.safe_push (cvt_type);
5493 cvt_type = NULL_TREE;
5495 break;
5497 case NARROW_DST:
5498 gcc_assert (op_type == unary_op);
5499 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5500 &code1, &multi_step_cvt,
5501 &interm_types))
5502 break;
5504 if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5505 goto unsupported;
5507 if (code == FIX_TRUNC_EXPR)
5509 cvt_type
5510 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5511 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5512 if (cvt_type == NULL_TREE)
5513 goto unsupported;
5514 if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5515 &tc1))
5516 codecvt1 = tc1;
5517 else
5518 goto unsupported;
5519 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5520 &code1, &multi_step_cvt,
5521 &interm_types))
5522 break;
5524 /* If op0 can be represented with low precision integer,
5525 truncate it to cvt_type and the do FLOAT_EXPR. */
5526 else if (code == FLOAT_EXPR)
5528 wide_int op_min_value, op_max_value;
5529 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5530 goto unsupported;
5532 cvt_type
5533 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5534 if (cvt_type == NULL_TREE
5535 || (wi::min_precision (op_max_value, SIGNED)
5536 > TYPE_PRECISION (cvt_type))
5537 || (wi::min_precision (op_min_value, SIGNED)
5538 > TYPE_PRECISION (cvt_type)))
5539 goto unsupported;
5541 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5542 if (cvt_type == NULL_TREE)
5543 goto unsupported;
5544 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5545 &code1, &multi_step_cvt,
5546 &interm_types))
5547 goto unsupported;
5548 if (supportable_convert_operation ((tree_code) code, vectype_out,
5549 cvt_type, &tc1))
5551 codecvt1 = tc1;
5552 modifier = NARROW_SRC;
5553 break;
5557 goto unsupported;
5559 default:
5560 gcc_unreachable ();
5563 if (!vec_stmt) /* transformation not required. */
5565 if (slp_node
5566 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5567 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5569 if (dump_enabled_p ())
5570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5571 "incompatible vector types for invariants\n");
5572 return false;
5574 DUMP_VECT_SCOPE ("vectorizable_conversion");
5575 if (modifier == NONE)
5577 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5578 vect_model_simple_cost (vinfo, stmt_info,
5579 ncopies * (1 + multi_step_cvt),
5580 dt, ndts, slp_node, cost_vec);
5582 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5584 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5585 /* The final packing step produces one vector result per copy. */
5586 unsigned int nvectors
5587 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5588 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5589 multi_step_cvt, cost_vec,
5590 widen_arith);
5592 else
5594 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5595 /* The initial unpacking step produces two vector results
5596 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5597 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5598 unsigned int nvectors
5599 = (slp_node
5600 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5601 : ncopies * 2);
5602 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5603 multi_step_cvt, cost_vec,
5604 widen_arith);
5606 interm_types.release ();
5607 return true;
5610 /* Transform. */
5611 if (dump_enabled_p ())
5612 dump_printf_loc (MSG_NOTE, vect_location,
5613 "transform conversion. ncopies = %d.\n", ncopies);
5615 if (op_type == binary_op)
5617 if (CONSTANT_CLASS_P (op0))
5618 op0 = fold_convert (TREE_TYPE (op1), op0);
5619 else if (CONSTANT_CLASS_P (op1))
5620 op1 = fold_convert (TREE_TYPE (op0), op1);
5623 /* In case of multi-step conversion, we first generate conversion operations
5624 to the intermediate types, and then from that types to the final one.
5625 We create vector destinations for the intermediate type (TYPES) received
5626 from supportable_*_operation, and store them in the correct order
5627 for future use in vect_create_vectorized_*_stmts (). */
5628 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5629 bool widen_or_narrow_float_p
5630 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5631 vec_dest = vect_create_destination_var (scalar_dest,
5632 widen_or_narrow_float_p
5633 ? cvt_type : vectype_out);
5634 vec_dsts.quick_push (vec_dest);
5636 if (multi_step_cvt)
5638 for (i = interm_types.length () - 1;
5639 interm_types.iterate (i, &intermediate_type); i--)
5641 vec_dest = vect_create_destination_var (scalar_dest,
5642 intermediate_type);
5643 vec_dsts.quick_push (vec_dest);
5647 if (cvt_type)
5648 vec_dest = vect_create_destination_var (scalar_dest,
5649 widen_or_narrow_float_p
5650 ? vectype_out : cvt_type);
5652 int ninputs = 1;
5653 if (!slp_node)
5655 if (modifier == WIDEN)
5657 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5659 if (multi_step_cvt)
5660 ninputs = vect_pow2 (multi_step_cvt);
5661 ninputs *= 2;
5665 switch (modifier)
5667 case NONE:
5668 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5669 op0, vectype_in, &vec_oprnds0);
5670 /* vec_dest is intermediate type operand when multi_step_cvt. */
5671 if (multi_step_cvt)
5673 cvt_op = vec_dest;
5674 vec_dest = vec_dsts[0];
5677 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5679 /* Arguments are ready, create the new vector stmt. */
5680 gimple* new_stmt;
5681 if (multi_step_cvt)
5683 gcc_assert (multi_step_cvt == 1);
5684 new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5685 new_temp = make_ssa_name (cvt_op, new_stmt);
5686 gimple_assign_set_lhs (new_stmt, new_temp);
5687 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5688 vop0 = new_temp;
5690 new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5691 new_temp = make_ssa_name (vec_dest, new_stmt);
5692 gimple_set_lhs (new_stmt, new_temp);
5693 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5695 if (slp_node)
5696 slp_node->push_vec_def (new_stmt);
5697 else
5698 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5700 break;
5702 case WIDEN:
5703 /* In case the vectorization factor (VF) is bigger than the number
5704 of elements that we can fit in a vectype (nunits), we have to
5705 generate more than one vector stmt - i.e - we need to "unroll"
5706 the vector stmt by a factor VF/nunits. */
5707 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5708 op0, vectype_in, &vec_oprnds0,
5709 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5710 vectype_in, &vec_oprnds1);
5711 if (code == WIDEN_LSHIFT_EXPR)
5713 int oprnds_size = vec_oprnds0.length ();
5714 vec_oprnds1.create (oprnds_size);
5715 for (i = 0; i < oprnds_size; ++i)
5716 vec_oprnds1.quick_push (op1);
5718 /* Arguments are ready. Create the new vector stmts. */
5719 for (i = multi_step_cvt; i >= 0; i--)
5721 tree this_dest = vec_dsts[i];
5722 code_helper c1 = code1, c2 = code2;
5723 if (i == 0 && codecvt2 != ERROR_MARK)
5725 c1 = codecvt1;
5726 c2 = codecvt2;
5728 if (known_eq (nunits_out, nunits_in))
5729 vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
5730 stmt_info, this_dest, gsi, c1,
5731 op_type);
5732 else
5733 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5734 &vec_oprnds1, stmt_info,
5735 this_dest, gsi,
5736 c1, c2, op_type);
5739 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5741 gimple *new_stmt;
5742 if (cvt_type)
5744 new_temp = make_ssa_name (vec_dest);
5745 new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5746 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5748 else
5749 new_stmt = SSA_NAME_DEF_STMT (vop0);
5751 if (slp_node)
5752 slp_node->push_vec_def (new_stmt);
5753 else
5754 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5756 break;
5758 case NARROW_SRC:
5759 case NARROW_DST:
5760 /* In case the vectorization factor (VF) is bigger than the number
5761 of elements that we can fit in a vectype (nunits), we have to
5762 generate more than one vector stmt - i.e - we need to "unroll"
5763 the vector stmt by a factor VF/nunits. */
5764 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5765 op0, vectype_in, &vec_oprnds0);
5766 /* Arguments are ready. Create the new vector stmts. */
5767 if (cvt_type && modifier == NARROW_DST)
5768 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5770 new_temp = make_ssa_name (vec_dest);
5771 gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5772 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5773 vec_oprnds0[i] = new_temp;
5776 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5777 multi_step_cvt,
5778 stmt_info, vec_dsts, gsi,
5779 slp_node, code1,
5780 modifier == NARROW_SRC);
5781 /* After demoting op0 to cvt_type, convert it to dest. */
5782 if (cvt_type && code == FLOAT_EXPR)
5784 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5786 /* Arguments are ready, create the new vector stmt. */
5787 gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5788 gimple *new_stmt
5789 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5790 new_temp = make_ssa_name (vec_dest, new_stmt);
5791 gimple_set_lhs (new_stmt, new_temp);
5792 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5794 /* This is the last step of the conversion sequence. Store the
5795 vectors in SLP_NODE or in vector info of the scalar statement
5796 (or in STMT_VINFO_RELATED_STMT chain). */
5797 if (slp_node)
5798 slp_node->push_vec_def (new_stmt);
5799 else
5800 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5803 break;
5805 if (!slp_node)
5806 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5808 vec_oprnds0.release ();
5809 vec_oprnds1.release ();
5810 interm_types.release ();
5812 return true;
5815 /* Return true if we can assume from the scalar form of STMT_INFO that
5816 neither the scalar nor the vector forms will generate code. STMT_INFO
5817 is known not to involve a data reference. */
5819 bool
5820 vect_nop_conversion_p (stmt_vec_info stmt_info)
5822 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5823 if (!stmt)
5824 return false;
5826 tree lhs = gimple_assign_lhs (stmt);
5827 tree_code code = gimple_assign_rhs_code (stmt);
5828 tree rhs = gimple_assign_rhs1 (stmt);
5830 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5831 return true;
5833 if (CONVERT_EXPR_CODE_P (code))
5834 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5836 return false;
5839 /* Function vectorizable_assignment.
5841 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5842 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5843 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5844 Return true if STMT_INFO is vectorizable in this way. */
5846 static bool
5847 vectorizable_assignment (vec_info *vinfo,
5848 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5849 gimple **vec_stmt, slp_tree slp_node,
5850 stmt_vector_for_cost *cost_vec)
5852 tree vec_dest;
5853 tree scalar_dest;
5854 tree op;
5855 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5856 tree new_temp;
5857 enum vect_def_type dt[1] = {vect_unknown_def_type};
5858 int ndts = 1;
5859 int ncopies;
5860 int i;
5861 vec<tree> vec_oprnds = vNULL;
5862 tree vop;
5863 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5864 enum tree_code code;
5865 tree vectype_in;
5867 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5868 return false;
5870 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5871 && ! vec_stmt)
5872 return false;
5874 /* Is vectorizable assignment? */
5875 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5876 if (!stmt)
5877 return false;
5879 scalar_dest = gimple_assign_lhs (stmt);
5880 if (TREE_CODE (scalar_dest) != SSA_NAME)
5881 return false;
5883 if (STMT_VINFO_DATA_REF (stmt_info))
5884 return false;
5886 code = gimple_assign_rhs_code (stmt);
5887 if (!(gimple_assign_single_p (stmt)
5888 || code == PAREN_EXPR
5889 || CONVERT_EXPR_CODE_P (code)))
5890 return false;
5892 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5893 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5895 /* Multiple types in SLP are handled by creating the appropriate number of
5896 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5897 case of SLP. */
5898 if (slp_node)
5899 ncopies = 1;
5900 else
5901 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5903 gcc_assert (ncopies >= 1);
5905 slp_tree slp_op;
5906 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5907 &dt[0], &vectype_in))
5909 if (dump_enabled_p ())
5910 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5911 "use not simple.\n");
5912 return false;
5914 if (!vectype_in)
5915 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5917 /* We can handle NOP_EXPR conversions that do not change the number
5918 of elements or the vector size. */
5919 if ((CONVERT_EXPR_CODE_P (code)
5920 || code == VIEW_CONVERT_EXPR)
5921 && (!vectype_in
5922 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5923 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5924 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5925 return false;
5927 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5929 if (dump_enabled_p ())
5930 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5931 "can't convert between boolean and non "
5932 "boolean vectors %T\n", TREE_TYPE (op));
5934 return false;
5937 /* We do not handle bit-precision changes. */
5938 if ((CONVERT_EXPR_CODE_P (code)
5939 || code == VIEW_CONVERT_EXPR)
5940 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5941 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5942 || (INTEGRAL_TYPE_P (TREE_TYPE (op))
5943 && !type_has_mode_precision_p (TREE_TYPE (op))))
5944 /* But a conversion that does not change the bit-pattern is ok. */
5945 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5946 && INTEGRAL_TYPE_P (TREE_TYPE (op))
5947 && (((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5948 > TYPE_PRECISION (TREE_TYPE (op)))
5949 && TYPE_UNSIGNED (TREE_TYPE (op)))
5950 || (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5951 == TYPE_PRECISION (TREE_TYPE (op))))))
5953 if (dump_enabled_p ())
5954 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5955 "type conversion to/from bit-precision "
5956 "unsupported.\n");
5957 return false;
5960 if (!vec_stmt) /* transformation not required. */
5962 if (slp_node
5963 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5965 if (dump_enabled_p ())
5966 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5967 "incompatible vector types for invariants\n");
5968 return false;
5970 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5971 DUMP_VECT_SCOPE ("vectorizable_assignment");
5972 if (!vect_nop_conversion_p (stmt_info))
5973 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5974 cost_vec);
5975 return true;
5978 /* Transform. */
5979 if (dump_enabled_p ())
5980 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5982 /* Handle def. */
5983 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5985 /* Handle use. */
5986 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5988 /* Arguments are ready. create the new vector stmt. */
5989 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5991 if (CONVERT_EXPR_CODE_P (code)
5992 || code == VIEW_CONVERT_EXPR)
5993 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5994 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5995 new_temp = make_ssa_name (vec_dest, new_stmt);
5996 gimple_assign_set_lhs (new_stmt, new_temp);
5997 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5998 if (slp_node)
5999 slp_node->push_vec_def (new_stmt);
6000 else
6001 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6003 if (!slp_node)
6004 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6006 vec_oprnds.release ();
6007 return true;
6011 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6012 either as shift by a scalar or by a vector. */
6014 bool
6015 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
6018 machine_mode vec_mode;
6019 optab optab;
6020 int icode;
6021 tree vectype;
6023 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6024 if (!vectype)
6025 return false;
6027 optab = optab_for_tree_code (code, vectype, optab_scalar);
6028 if (!optab
6029 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
6031 optab = optab_for_tree_code (code, vectype, optab_vector);
6032 if (!optab
6033 || (optab_handler (optab, TYPE_MODE (vectype))
6034 == CODE_FOR_nothing))
6035 return false;
6038 vec_mode = TYPE_MODE (vectype);
6039 icode = (int) optab_handler (optab, vec_mode);
6040 if (icode == CODE_FOR_nothing)
6041 return false;
6043 return true;
6047 /* Function vectorizable_shift.
6049 Check if STMT_INFO performs a shift operation that can be vectorized.
6050 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6051 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6052 Return true if STMT_INFO is vectorizable in this way. */
6054 static bool
6055 vectorizable_shift (vec_info *vinfo,
6056 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6057 gimple **vec_stmt, slp_tree slp_node,
6058 stmt_vector_for_cost *cost_vec)
6060 tree vec_dest;
6061 tree scalar_dest;
6062 tree op0, op1 = NULL;
6063 tree vec_oprnd1 = NULL_TREE;
6064 tree vectype;
6065 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6066 enum tree_code code;
6067 machine_mode vec_mode;
6068 tree new_temp;
6069 optab optab;
6070 int icode;
6071 machine_mode optab_op2_mode;
6072 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
6073 int ndts = 2;
6074 poly_uint64 nunits_in;
6075 poly_uint64 nunits_out;
6076 tree vectype_out;
6077 tree op1_vectype;
6078 int ncopies;
6079 int i;
6080 vec<tree> vec_oprnds0 = vNULL;
6081 vec<tree> vec_oprnds1 = vNULL;
6082 tree vop0, vop1;
6083 unsigned int k;
6084 bool scalar_shift_arg = true;
6085 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6086 bool incompatible_op1_vectype_p = false;
6088 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6089 return false;
6091 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6092 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6093 && ! vec_stmt)
6094 return false;
6096 /* Is STMT a vectorizable binary/unary operation? */
6097 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6098 if (!stmt)
6099 return false;
6101 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6102 return false;
6104 code = gimple_assign_rhs_code (stmt);
6106 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6107 || code == RROTATE_EXPR))
6108 return false;
6110 scalar_dest = gimple_assign_lhs (stmt);
6111 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6112 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6114 if (dump_enabled_p ())
6115 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6116 "bit-precision shifts not supported.\n");
6117 return false;
6120 slp_tree slp_op0;
6121 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6122 0, &op0, &slp_op0, &dt[0], &vectype))
6124 if (dump_enabled_p ())
6125 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6126 "use not simple.\n");
6127 return false;
6129 /* If op0 is an external or constant def, infer the vector type
6130 from the scalar type. */
6131 if (!vectype)
6132 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6133 if (vec_stmt)
6134 gcc_assert (vectype);
6135 if (!vectype)
6137 if (dump_enabled_p ())
6138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6139 "no vectype for scalar type\n");
6140 return false;
6143 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6144 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6145 if (maybe_ne (nunits_out, nunits_in))
6146 return false;
6148 stmt_vec_info op1_def_stmt_info;
6149 slp_tree slp_op1;
6150 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
6151 &dt[1], &op1_vectype, &op1_def_stmt_info))
6153 if (dump_enabled_p ())
6154 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6155 "use not simple.\n");
6156 return false;
6159 /* Multiple types in SLP are handled by creating the appropriate number of
6160 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6161 case of SLP. */
6162 if (slp_node)
6163 ncopies = 1;
6164 else
6165 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6167 gcc_assert (ncopies >= 1);
6169 /* Determine whether the shift amount is a vector, or scalar. If the
6170 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6172 if ((dt[1] == vect_internal_def
6173 || dt[1] == vect_induction_def
6174 || dt[1] == vect_nested_cycle)
6175 && !slp_node)
6176 scalar_shift_arg = false;
6177 else if (dt[1] == vect_constant_def
6178 || dt[1] == vect_external_def
6179 || dt[1] == vect_internal_def)
6181 /* In SLP, need to check whether the shift count is the same,
6182 in loops if it is a constant or invariant, it is always
6183 a scalar shift. */
6184 if (slp_node)
6186 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6187 stmt_vec_info slpstmt_info;
6189 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6191 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
6192 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
6193 scalar_shift_arg = false;
6196 /* For internal SLP defs we have to make sure we see scalar stmts
6197 for all vector elements.
6198 ??? For different vectors we could resort to a different
6199 scalar shift operand but code-generation below simply always
6200 takes the first. */
6201 if (dt[1] == vect_internal_def
6202 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6203 stmts.length ()))
6204 scalar_shift_arg = false;
6207 /* If the shift amount is computed by a pattern stmt we cannot
6208 use the scalar amount directly thus give up and use a vector
6209 shift. */
6210 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
6211 scalar_shift_arg = false;
6213 else
6215 if (dump_enabled_p ())
6216 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6217 "operand mode requires invariant argument.\n");
6218 return false;
6221 /* Vector shifted by vector. */
6222 bool was_scalar_shift_arg = scalar_shift_arg;
6223 if (!scalar_shift_arg)
6225 optab = optab_for_tree_code (code, vectype, optab_vector);
6226 if (dump_enabled_p ())
6227 dump_printf_loc (MSG_NOTE, vect_location,
6228 "vector/vector shift/rotate found.\n");
6230 if (!op1_vectype)
6231 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6232 slp_op1);
6233 incompatible_op1_vectype_p
6234 = (op1_vectype == NULL_TREE
6235 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
6236 TYPE_VECTOR_SUBPARTS (vectype))
6237 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6238 if (incompatible_op1_vectype_p
6239 && (!slp_node
6240 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6241 || slp_op1->refcnt != 1))
6243 if (dump_enabled_p ())
6244 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6245 "unusable type for last operand in"
6246 " vector/vector shift/rotate.\n");
6247 return false;
6250 /* See if the machine has a vector shifted by scalar insn and if not
6251 then see if it has a vector shifted by vector insn. */
6252 else
6254 optab = optab_for_tree_code (code, vectype, optab_scalar);
6255 if (optab
6256 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6258 if (dump_enabled_p ())
6259 dump_printf_loc (MSG_NOTE, vect_location,
6260 "vector/scalar shift/rotate found.\n");
6262 else
6264 optab = optab_for_tree_code (code, vectype, optab_vector);
6265 if (optab
6266 && (optab_handler (optab, TYPE_MODE (vectype))
6267 != CODE_FOR_nothing))
6269 scalar_shift_arg = false;
6271 if (dump_enabled_p ())
6272 dump_printf_loc (MSG_NOTE, vect_location,
6273 "vector/vector shift/rotate found.\n");
6275 if (!op1_vectype)
6276 op1_vectype = get_vectype_for_scalar_type (vinfo,
6277 TREE_TYPE (op1),
6278 slp_op1);
6280 /* Unlike the other binary operators, shifts/rotates have
6281 the rhs being int, instead of the same type as the lhs,
6282 so make sure the scalar is the right type if we are
6283 dealing with vectors of long long/long/short/char. */
6284 incompatible_op1_vectype_p
6285 = (!op1_vectype
6286 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6287 TREE_TYPE (op1)));
6288 if (incompatible_op1_vectype_p
6289 && dt[1] == vect_internal_def)
6291 if (dump_enabled_p ())
6292 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6293 "unusable type for last operand in"
6294 " vector/vector shift/rotate.\n");
6295 return false;
6301 /* Supportable by target? */
6302 if (!optab)
6304 if (dump_enabled_p ())
6305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6306 "no optab.\n");
6307 return false;
6309 vec_mode = TYPE_MODE (vectype);
6310 icode = (int) optab_handler (optab, vec_mode);
6311 if (icode == CODE_FOR_nothing)
6313 if (dump_enabled_p ())
6314 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6315 "op not supported by target.\n");
6316 return false;
6318 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6319 if (vect_emulated_vector_p (vectype))
6320 return false;
6322 if (!vec_stmt) /* transformation not required. */
6324 if (slp_node
6325 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6326 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6327 && (!incompatible_op1_vectype_p
6328 || dt[1] == vect_constant_def)
6329 && !vect_maybe_update_slp_op_vectype
6330 (slp_op1,
6331 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6333 if (dump_enabled_p ())
6334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6335 "incompatible vector types for invariants\n");
6336 return false;
6338 /* Now adjust the constant shift amount in place. */
6339 if (slp_node
6340 && incompatible_op1_vectype_p
6341 && dt[1] == vect_constant_def)
6343 for (unsigned i = 0;
6344 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6346 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6347 = fold_convert (TREE_TYPE (vectype),
6348 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6349 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6350 == INTEGER_CST));
6353 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6354 DUMP_VECT_SCOPE ("vectorizable_shift");
6355 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6356 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6357 return true;
6360 /* Transform. */
6362 if (dump_enabled_p ())
6363 dump_printf_loc (MSG_NOTE, vect_location,
6364 "transform binary/unary operation.\n");
6366 if (incompatible_op1_vectype_p && !slp_node)
6368 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6369 op1 = fold_convert (TREE_TYPE (vectype), op1);
6370 if (dt[1] != vect_constant_def)
6371 op1 = vect_init_vector (vinfo, stmt_info, op1,
6372 TREE_TYPE (vectype), NULL);
6375 /* Handle def. */
6376 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6378 if (scalar_shift_arg && dt[1] != vect_internal_def)
6380 /* Vector shl and shr insn patterns can be defined with scalar
6381 operand 2 (shift operand). In this case, use constant or loop
6382 invariant op1 directly, without extending it to vector mode
6383 first. */
6384 optab_op2_mode = insn_data[icode].operand[2].mode;
6385 if (!VECTOR_MODE_P (optab_op2_mode))
6387 if (dump_enabled_p ())
6388 dump_printf_loc (MSG_NOTE, vect_location,
6389 "operand 1 using scalar mode.\n");
6390 vec_oprnd1 = op1;
6391 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6392 vec_oprnds1.quick_push (vec_oprnd1);
6393 /* Store vec_oprnd1 for every vector stmt to be created.
6394 We check during the analysis that all the shift arguments
6395 are the same.
6396 TODO: Allow different constants for different vector
6397 stmts generated for an SLP instance. */
6398 for (k = 0;
6399 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6400 vec_oprnds1.quick_push (vec_oprnd1);
6403 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6405 if (was_scalar_shift_arg)
6407 /* If the argument was the same in all lanes create
6408 the correctly typed vector shift amount directly. */
6409 op1 = fold_convert (TREE_TYPE (vectype), op1);
6410 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6411 !loop_vinfo ? gsi : NULL);
6412 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6413 !loop_vinfo ? gsi : NULL);
6414 vec_oprnds1.create (slp_node->vec_stmts_size);
6415 for (k = 0; k < slp_node->vec_stmts_size; k++)
6416 vec_oprnds1.quick_push (vec_oprnd1);
6418 else if (dt[1] == vect_constant_def)
6419 /* The constant shift amount has been adjusted in place. */
6421 else
6422 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6425 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6426 (a special case for certain kind of vector shifts); otherwise,
6427 operand 1 should be of a vector type (the usual case). */
6428 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6429 op0, &vec_oprnds0,
6430 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6432 /* Arguments are ready. Create the new vector stmt. */
6433 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6435 /* For internal defs where we need to use a scalar shift arg
6436 extract the first lane. */
6437 if (scalar_shift_arg && dt[1] == vect_internal_def)
6439 vop1 = vec_oprnds1[0];
6440 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6441 gassign *new_stmt
6442 = gimple_build_assign (new_temp,
6443 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6444 vop1,
6445 TYPE_SIZE (TREE_TYPE (new_temp)),
6446 bitsize_zero_node));
6447 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6448 vop1 = new_temp;
6450 else
6451 vop1 = vec_oprnds1[i];
6452 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6453 new_temp = make_ssa_name (vec_dest, new_stmt);
6454 gimple_assign_set_lhs (new_stmt, new_temp);
6455 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6456 if (slp_node)
6457 slp_node->push_vec_def (new_stmt);
6458 else
6459 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6462 if (!slp_node)
6463 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6465 vec_oprnds0.release ();
6466 vec_oprnds1.release ();
6468 return true;
6471 /* Function vectorizable_operation.
6473 Check if STMT_INFO performs a binary, unary or ternary operation that can
6474 be vectorized.
6475 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6476 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6477 Return true if STMT_INFO is vectorizable in this way. */
6479 static bool
6480 vectorizable_operation (vec_info *vinfo,
6481 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6482 gimple **vec_stmt, slp_tree slp_node,
6483 stmt_vector_for_cost *cost_vec)
6485 tree vec_dest;
6486 tree scalar_dest;
6487 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6488 tree vectype;
6489 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6490 enum tree_code code, orig_code;
6491 machine_mode vec_mode;
6492 tree new_temp;
6493 int op_type;
6494 optab optab;
6495 bool target_support_p;
6496 enum vect_def_type dt[3]
6497 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6498 int ndts = 3;
6499 poly_uint64 nunits_in;
6500 poly_uint64 nunits_out;
6501 tree vectype_out;
6502 int ncopies, vec_num;
6503 int i;
6504 vec<tree> vec_oprnds0 = vNULL;
6505 vec<tree> vec_oprnds1 = vNULL;
6506 vec<tree> vec_oprnds2 = vNULL;
6507 tree vop0, vop1, vop2;
6508 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6510 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6511 return false;
6513 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6514 && ! vec_stmt)
6515 return false;
6517 /* Is STMT a vectorizable binary/unary operation? */
6518 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6519 if (!stmt)
6520 return false;
6522 /* Loads and stores are handled in vectorizable_{load,store}. */
6523 if (STMT_VINFO_DATA_REF (stmt_info))
6524 return false;
6526 orig_code = code = gimple_assign_rhs_code (stmt);
6528 /* Shifts are handled in vectorizable_shift. */
6529 if (code == LSHIFT_EXPR
6530 || code == RSHIFT_EXPR
6531 || code == LROTATE_EXPR
6532 || code == RROTATE_EXPR)
6533 return false;
6535 /* Comparisons are handled in vectorizable_comparison. */
6536 if (TREE_CODE_CLASS (code) == tcc_comparison)
6537 return false;
6539 /* Conditions are handled in vectorizable_condition. */
6540 if (code == COND_EXPR)
6541 return false;
6543 /* For pointer addition and subtraction, we should use the normal
6544 plus and minus for the vector operation. */
6545 if (code == POINTER_PLUS_EXPR)
6546 code = PLUS_EXPR;
6547 if (code == POINTER_DIFF_EXPR)
6548 code = MINUS_EXPR;
6550 /* Support only unary or binary operations. */
6551 op_type = TREE_CODE_LENGTH (code);
6552 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6554 if (dump_enabled_p ())
6555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6556 "num. args = %d (not unary/binary/ternary op).\n",
6557 op_type);
6558 return false;
6561 scalar_dest = gimple_assign_lhs (stmt);
6562 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6564 /* Most operations cannot handle bit-precision types without extra
6565 truncations. */
6566 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6567 if (!mask_op_p
6568 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6569 /* Exception are bitwise binary operations. */
6570 && code != BIT_IOR_EXPR
6571 && code != BIT_XOR_EXPR
6572 && code != BIT_AND_EXPR)
6574 if (dump_enabled_p ())
6575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6576 "bit-precision arithmetic not supported.\n");
6577 return false;
6580 slp_tree slp_op0;
6581 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6582 0, &op0, &slp_op0, &dt[0], &vectype))
6584 if (dump_enabled_p ())
6585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6586 "use not simple.\n");
6587 return false;
6589 bool is_invariant = (dt[0] == vect_external_def
6590 || dt[0] == vect_constant_def);
6591 /* If op0 is an external or constant def, infer the vector type
6592 from the scalar type. */
6593 if (!vectype)
6595 /* For boolean type we cannot determine vectype by
6596 invariant value (don't know whether it is a vector
6597 of booleans or vector of integers). We use output
6598 vectype because operations on boolean don't change
6599 type. */
6600 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6602 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6604 if (dump_enabled_p ())
6605 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6606 "not supported operation on bool value.\n");
6607 return false;
6609 vectype = vectype_out;
6611 else
6612 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6613 slp_node);
6615 if (vec_stmt)
6616 gcc_assert (vectype);
6617 if (!vectype)
6619 if (dump_enabled_p ())
6620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6621 "no vectype for scalar type %T\n",
6622 TREE_TYPE (op0));
6624 return false;
6627 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6628 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6629 if (maybe_ne (nunits_out, nunits_in))
6630 return false;
6632 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6633 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6634 if (op_type == binary_op || op_type == ternary_op)
6636 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6637 1, &op1, &slp_op1, &dt[1], &vectype2))
6639 if (dump_enabled_p ())
6640 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6641 "use not simple.\n");
6642 return false;
6644 is_invariant &= (dt[1] == vect_external_def
6645 || dt[1] == vect_constant_def);
6646 if (vectype2
6647 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6648 return false;
6650 if (op_type == ternary_op)
6652 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6653 2, &op2, &slp_op2, &dt[2], &vectype3))
6655 if (dump_enabled_p ())
6656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6657 "use not simple.\n");
6658 return false;
6660 is_invariant &= (dt[2] == vect_external_def
6661 || dt[2] == vect_constant_def);
6662 if (vectype3
6663 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6664 return false;
6667 /* Multiple types in SLP are handled by creating the appropriate number of
6668 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6669 case of SLP. */
6670 if (slp_node)
6672 ncopies = 1;
6673 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6675 else
6677 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6678 vec_num = 1;
6681 gcc_assert (ncopies >= 1);
6683 /* Reject attempts to combine mask types with nonmask types, e.g. if
6684 we have an AND between a (nonmask) boolean loaded from memory and
6685 a (mask) boolean result of a comparison.
6687 TODO: We could easily fix these cases up using pattern statements. */
6688 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6689 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6690 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6692 if (dump_enabled_p ())
6693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6694 "mixed mask and nonmask vector types\n");
6695 return false;
6698 /* Supportable by target? */
6700 vec_mode = TYPE_MODE (vectype);
6701 if (code == MULT_HIGHPART_EXPR)
6702 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6703 else
6705 optab = optab_for_tree_code (code, vectype, optab_default);
6706 if (!optab)
6708 if (dump_enabled_p ())
6709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6710 "no optab.\n");
6711 return false;
6713 target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing
6714 || optab_libfunc (optab, vec_mode));
6717 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6718 if (!target_support_p || using_emulated_vectors_p)
6720 if (dump_enabled_p ())
6721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6722 "op not supported by target.\n");
6723 /* When vec_mode is not a vector mode and we verified ops we
6724 do not have to lower like AND are natively supported let
6725 those through even when the mode isn't word_mode. For
6726 ops we have to lower the lowering code assumes we are
6727 dealing with word_mode. */
6728 if ((((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6729 || !target_support_p)
6730 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6731 /* Check only during analysis. */
6732 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6734 if (dump_enabled_p ())
6735 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6736 return false;
6738 if (dump_enabled_p ())
6739 dump_printf_loc (MSG_NOTE, vect_location,
6740 "proceeding using word mode.\n");
6741 using_emulated_vectors_p = true;
6744 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6745 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6746 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6747 internal_fn cond_fn = get_conditional_internal_fn (code);
6748 internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6750 /* If operating on inactive elements could generate spurious traps,
6751 we need to restrict the operation to active lanes. Note that this
6752 specifically doesn't apply to unhoisted invariants, since they
6753 operate on the same value for every lane.
6755 Similarly, if this operation is part of a reduction, a fully-masked
6756 loop should only change the active lanes of the reduction chain,
6757 keeping the inactive lanes as-is. */
6758 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6759 || reduc_idx >= 0);
6761 if (!vec_stmt) /* transformation not required. */
6763 if (loop_vinfo
6764 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6765 && mask_out_inactive)
6767 if (cond_len_fn != IFN_LAST
6768 && direct_internal_fn_supported_p (cond_len_fn, vectype,
6769 OPTIMIZE_FOR_SPEED))
6770 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6772 else if (cond_fn != IFN_LAST
6773 && direct_internal_fn_supported_p (cond_fn, vectype,
6774 OPTIMIZE_FOR_SPEED))
6775 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6776 vectype, NULL);
6777 else
6779 if (dump_enabled_p ())
6780 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6781 "can't use a fully-masked loop because no"
6782 " conditional operation is available.\n");
6783 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6787 /* Put types on constant and invariant SLP children. */
6788 if (slp_node
6789 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6790 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6791 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6793 if (dump_enabled_p ())
6794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6795 "incompatible vector types for invariants\n");
6796 return false;
6799 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6800 DUMP_VECT_SCOPE ("vectorizable_operation");
6801 vect_model_simple_cost (vinfo, stmt_info,
6802 ncopies, dt, ndts, slp_node, cost_vec);
6803 if (using_emulated_vectors_p)
6805 /* The above vect_model_simple_cost call handles constants
6806 in the prologue and (mis-)costs one of the stmts as
6807 vector stmt. See below for the actual lowering that will
6808 be applied. */
6809 unsigned n
6810 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6811 switch (code)
6813 case PLUS_EXPR:
6814 n *= 5;
6815 break;
6816 case MINUS_EXPR:
6817 n *= 6;
6818 break;
6819 case NEGATE_EXPR:
6820 n *= 4;
6821 break;
6822 default:
6823 /* Bit operations do not have extra cost and are accounted
6824 as vector stmt by vect_model_simple_cost. */
6825 n = 0;
6826 break;
6828 if (n != 0)
6830 /* We also need to materialize two large constants. */
6831 record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6832 0, vect_prologue);
6833 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6834 0, vect_body);
6837 return true;
6840 /* Transform. */
6842 if (dump_enabled_p ())
6843 dump_printf_loc (MSG_NOTE, vect_location,
6844 "transform binary/unary operation.\n");
6846 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6847 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6849 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6850 vectors with unsigned elements, but the result is signed. So, we
6851 need to compute the MINUS_EXPR into vectype temporary and
6852 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6853 tree vec_cvt_dest = NULL_TREE;
6854 if (orig_code == POINTER_DIFF_EXPR)
6856 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6857 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6859 /* Handle def. */
6860 else
6861 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6863 /* In case the vectorization factor (VF) is bigger than the number
6864 of elements that we can fit in a vectype (nunits), we have to generate
6865 more than one vector stmt - i.e - we need to "unroll" the
6866 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6867 from one copy of the vector stmt to the next, in the field
6868 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6869 stages to find the correct vector defs to be used when vectorizing
6870 stmts that use the defs of the current stmt. The example below
6871 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6872 we need to create 4 vectorized stmts):
6874 before vectorization:
6875 RELATED_STMT VEC_STMT
6876 S1: x = memref - -
6877 S2: z = x + 1 - -
6879 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6880 there):
6881 RELATED_STMT VEC_STMT
6882 VS1_0: vx0 = memref0 VS1_1 -
6883 VS1_1: vx1 = memref1 VS1_2 -
6884 VS1_2: vx2 = memref2 VS1_3 -
6885 VS1_3: vx3 = memref3 - -
6886 S1: x = load - VS1_0
6887 S2: z = x + 1 - -
6889 step2: vectorize stmt S2 (done here):
6890 To vectorize stmt S2 we first need to find the relevant vector
6891 def for the first operand 'x'. This is, as usual, obtained from
6892 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6893 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6894 relevant vector def 'vx0'. Having found 'vx0' we can generate
6895 the vector stmt VS2_0, and as usual, record it in the
6896 STMT_VINFO_VEC_STMT of stmt S2.
6897 When creating the second copy (VS2_1), we obtain the relevant vector
6898 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6899 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6900 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6901 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6902 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6903 chain of stmts and pointers:
6904 RELATED_STMT VEC_STMT
6905 VS1_0: vx0 = memref0 VS1_1 -
6906 VS1_1: vx1 = memref1 VS1_2 -
6907 VS1_2: vx2 = memref2 VS1_3 -
6908 VS1_3: vx3 = memref3 - -
6909 S1: x = load - VS1_0
6910 VS2_0: vz0 = vx0 + v1 VS2_1 -
6911 VS2_1: vz1 = vx1 + v1 VS2_2 -
6912 VS2_2: vz2 = vx2 + v1 VS2_3 -
6913 VS2_3: vz3 = vx3 + v1 - -
6914 S2: z = x + 1 - VS2_0 */
6916 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6917 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6918 /* Arguments are ready. Create the new vector stmt. */
6919 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6921 gimple *new_stmt = NULL;
6922 vop1 = ((op_type == binary_op || op_type == ternary_op)
6923 ? vec_oprnds1[i] : NULL_TREE);
6924 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6925 if (using_emulated_vectors_p
6926 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
6928 /* Lower the operation. This follows vector lowering. */
6929 unsigned int width = vector_element_bits (vectype);
6930 tree inner_type = TREE_TYPE (vectype);
6931 tree word_type
6932 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
6933 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
6934 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
6935 tree high_bits
6936 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
6937 tree wvop0 = make_ssa_name (word_type);
6938 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
6939 build1 (VIEW_CONVERT_EXPR,
6940 word_type, vop0));
6941 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6942 tree result_low, signs;
6943 if (code == PLUS_EXPR || code == MINUS_EXPR)
6945 tree wvop1 = make_ssa_name (word_type);
6946 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
6947 build1 (VIEW_CONVERT_EXPR,
6948 word_type, vop1));
6949 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6950 signs = make_ssa_name (word_type);
6951 new_stmt = gimple_build_assign (signs,
6952 BIT_XOR_EXPR, wvop0, wvop1);
6953 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6954 tree b_low = make_ssa_name (word_type);
6955 new_stmt = gimple_build_assign (b_low,
6956 BIT_AND_EXPR, wvop1, low_bits);
6957 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6958 tree a_low = make_ssa_name (word_type);
6959 if (code == PLUS_EXPR)
6960 new_stmt = gimple_build_assign (a_low,
6961 BIT_AND_EXPR, wvop0, low_bits);
6962 else
6963 new_stmt = gimple_build_assign (a_low,
6964 BIT_IOR_EXPR, wvop0, high_bits);
6965 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6966 if (code == MINUS_EXPR)
6968 new_stmt = gimple_build_assign (NULL_TREE,
6969 BIT_NOT_EXPR, signs);
6970 signs = make_ssa_name (word_type);
6971 gimple_assign_set_lhs (new_stmt, signs);
6972 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6974 new_stmt = gimple_build_assign (NULL_TREE,
6975 BIT_AND_EXPR, signs, high_bits);
6976 signs = make_ssa_name (word_type);
6977 gimple_assign_set_lhs (new_stmt, signs);
6978 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6979 result_low = make_ssa_name (word_type);
6980 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
6981 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6983 else
6985 tree a_low = make_ssa_name (word_type);
6986 new_stmt = gimple_build_assign (a_low,
6987 BIT_AND_EXPR, wvop0, low_bits);
6988 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6989 signs = make_ssa_name (word_type);
6990 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
6991 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6992 new_stmt = gimple_build_assign (NULL_TREE,
6993 BIT_AND_EXPR, signs, high_bits);
6994 signs = make_ssa_name (word_type);
6995 gimple_assign_set_lhs (new_stmt, signs);
6996 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6997 result_low = make_ssa_name (word_type);
6998 new_stmt = gimple_build_assign (result_low,
6999 MINUS_EXPR, high_bits, a_low);
7000 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7002 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
7003 signs);
7004 result_low = make_ssa_name (word_type);
7005 gimple_assign_set_lhs (new_stmt, result_low);
7006 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7007 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
7008 build1 (VIEW_CONVERT_EXPR,
7009 vectype, result_low));
7010 new_temp = make_ssa_name (vectype);
7011 gimple_assign_set_lhs (new_stmt, new_temp);
7012 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7014 else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
7016 tree mask;
7017 if (masked_loop_p)
7018 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7019 vec_num * ncopies, vectype, i);
7020 else
7021 /* Dummy mask. */
7022 mask = build_minus_one_cst (truth_type_for (vectype));
7023 auto_vec<tree> vops (6);
7024 vops.quick_push (mask);
7025 vops.quick_push (vop0);
7026 if (vop1)
7027 vops.quick_push (vop1);
7028 if (vop2)
7029 vops.quick_push (vop2);
7030 if (reduc_idx >= 0)
7032 /* Perform the operation on active elements only and take
7033 inactive elements from the reduction chain input. */
7034 gcc_assert (!vop2);
7035 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
7037 else
7039 auto else_value = targetm.preferred_else_value
7040 (cond_fn, vectype, vops.length () - 1, &vops[1]);
7041 vops.quick_push (else_value);
7043 if (len_loop_p)
7045 tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7046 vec_num * ncopies, vectype, i, 1);
7047 signed char biasval
7048 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7049 tree bias = build_int_cst (intQI_type_node, biasval);
7050 vops.quick_push (len);
7051 vops.quick_push (bias);
7053 gcall *call
7054 = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7055 : cond_len_fn,
7056 vops);
7057 new_temp = make_ssa_name (vec_dest, call);
7058 gimple_call_set_lhs (call, new_temp);
7059 gimple_call_set_nothrow (call, true);
7060 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7061 new_stmt = call;
7063 else
7065 tree mask = NULL_TREE;
7066 /* When combining two masks check if either of them is elsewhere
7067 combined with a loop mask, if that's the case we can mark that the
7068 new combined mask doesn't need to be combined with a loop mask. */
7069 if (masked_loop_p
7070 && code == BIT_AND_EXPR
7071 && VECTOR_BOOLEAN_TYPE_P (vectype))
7073 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
7074 ncopies}))
7076 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7077 vec_num * ncopies, vectype, i);
7079 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7080 vop0, gsi);
7083 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
7084 ncopies }))
7086 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7087 vec_num * ncopies, vectype, i);
7089 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7090 vop1, gsi);
7094 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7095 new_temp = make_ssa_name (vec_dest, new_stmt);
7096 gimple_assign_set_lhs (new_stmt, new_temp);
7097 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7098 if (using_emulated_vectors_p)
7099 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7101 /* Enter the combined value into the vector cond hash so we don't
7102 AND it with a loop mask again. */
7103 if (mask)
7104 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
7107 if (vec_cvt_dest)
7109 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7110 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7111 new_temp);
7112 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
7113 gimple_assign_set_lhs (new_stmt, new_temp);
7114 vect_finish_stmt_generation (vinfo, stmt_info,
7115 new_stmt, gsi);
7118 if (slp_node)
7119 slp_node->push_vec_def (new_stmt);
7120 else
7121 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7124 if (!slp_node)
7125 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7127 vec_oprnds0.release ();
7128 vec_oprnds1.release ();
7129 vec_oprnds2.release ();
7131 return true;
7134 /* A helper function to ensure data reference DR_INFO's base alignment. */
7136 static void
7137 ensure_base_align (dr_vec_info *dr_info)
7139 /* Alignment is only analyzed for the first element of a DR group,
7140 use that to look at base alignment we need to enforce. */
7141 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7142 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7144 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7146 if (dr_info->base_misaligned)
7148 tree base_decl = dr_info->base_decl;
7150 // We should only be able to increase the alignment of a base object if
7151 // we know what its new alignment should be at compile time.
7152 unsigned HOST_WIDE_INT align_base_to =
7153 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7155 if (decl_in_symtab_p (base_decl))
7156 symtab_node::get (base_decl)->increase_alignment (align_base_to);
7157 else if (DECL_ALIGN (base_decl) < align_base_to)
7159 SET_DECL_ALIGN (base_decl, align_base_to);
7160 DECL_USER_ALIGN (base_decl) = 1;
7162 dr_info->base_misaligned = false;
7167 /* Function get_group_alias_ptr_type.
7169 Return the alias type for the group starting at FIRST_STMT_INFO. */
7171 static tree
7172 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7174 struct data_reference *first_dr, *next_dr;
7176 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7177 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7178 while (next_stmt_info)
7180 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7181 if (get_alias_set (DR_REF (first_dr))
7182 != get_alias_set (DR_REF (next_dr)))
7184 if (dump_enabled_p ())
7185 dump_printf_loc (MSG_NOTE, vect_location,
7186 "conflicting alias set types.\n");
7187 return ptr_type_node;
7189 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7191 return reference_alias_ptr_type (DR_REF (first_dr));
7195 /* Function scan_operand_equal_p.
7197 Helper function for check_scan_store. Compare two references
7198 with .GOMP_SIMD_LANE bases. */
7200 static bool
7201 scan_operand_equal_p (tree ref1, tree ref2)
7203 tree ref[2] = { ref1, ref2 };
7204 poly_int64 bitsize[2], bitpos[2];
7205 tree offset[2], base[2];
7206 for (int i = 0; i < 2; ++i)
7208 machine_mode mode;
7209 int unsignedp, reversep, volatilep = 0;
7210 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7211 &offset[i], &mode, &unsignedp,
7212 &reversep, &volatilep);
7213 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
7214 return false;
7215 if (TREE_CODE (base[i]) == MEM_REF
7216 && offset[i] == NULL_TREE
7217 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7219 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7220 if (is_gimple_assign (def_stmt)
7221 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
7222 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7223 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7225 if (maybe_ne (mem_ref_offset (base[i]), 0))
7226 return false;
7227 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7228 offset[i] = gimple_assign_rhs2 (def_stmt);
7233 if (!operand_equal_p (base[0], base[1], 0))
7234 return false;
7235 if (maybe_ne (bitsize[0], bitsize[1]))
7236 return false;
7237 if (offset[0] != offset[1])
7239 if (!offset[0] || !offset[1])
7240 return false;
7241 if (!operand_equal_p (offset[0], offset[1], 0))
7243 tree step[2];
7244 for (int i = 0; i < 2; ++i)
7246 step[i] = integer_one_node;
7247 if (TREE_CODE (offset[i]) == SSA_NAME)
7249 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7250 if (is_gimple_assign (def_stmt)
7251 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
7252 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7253 == INTEGER_CST))
7255 step[i] = gimple_assign_rhs2 (def_stmt);
7256 offset[i] = gimple_assign_rhs1 (def_stmt);
7259 else if (TREE_CODE (offset[i]) == MULT_EXPR)
7261 step[i] = TREE_OPERAND (offset[i], 1);
7262 offset[i] = TREE_OPERAND (offset[i], 0);
7264 tree rhs1 = NULL_TREE;
7265 if (TREE_CODE (offset[i]) == SSA_NAME)
7267 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7268 if (gimple_assign_cast_p (def_stmt))
7269 rhs1 = gimple_assign_rhs1 (def_stmt);
7271 else if (CONVERT_EXPR_P (offset[i]))
7272 rhs1 = TREE_OPERAND (offset[i], 0);
7273 if (rhs1
7274 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7275 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7276 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7277 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7278 offset[i] = rhs1;
7280 if (!operand_equal_p (offset[0], offset[1], 0)
7281 || !operand_equal_p (step[0], step[1], 0))
7282 return false;
7285 return true;
7289 enum scan_store_kind {
7290 /* Normal permutation. */
7291 scan_store_kind_perm,
7293 /* Whole vector left shift permutation with zero init. */
7294 scan_store_kind_lshift_zero,
7296 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7297 scan_store_kind_lshift_cond
7300 /* Function check_scan_store.
7302 Verify if we can perform the needed permutations or whole vector shifts.
7303 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7304 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7305 to do at each step. */
7307 static int
7308 scan_store_can_perm_p (tree vectype, tree init,
7309 vec<enum scan_store_kind> *use_whole_vector = NULL)
7311 enum machine_mode vec_mode = TYPE_MODE (vectype);
7312 unsigned HOST_WIDE_INT nunits;
7313 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7314 return -1;
7315 int units_log2 = exact_log2 (nunits);
7316 if (units_log2 <= 0)
7317 return -1;
7319 int i;
7320 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7321 for (i = 0; i <= units_log2; ++i)
7323 unsigned HOST_WIDE_INT j, k;
7324 enum scan_store_kind kind = scan_store_kind_perm;
7325 vec_perm_builder sel (nunits, nunits, 1);
7326 sel.quick_grow (nunits);
7327 if (i == units_log2)
7329 for (j = 0; j < nunits; ++j)
7330 sel[j] = nunits - 1;
7332 else
7334 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7335 sel[j] = j;
7336 for (k = 0; j < nunits; ++j, ++k)
7337 sel[j] = nunits + k;
7339 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7340 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7342 if (i == units_log2)
7343 return -1;
7345 if (whole_vector_shift_kind == scan_store_kind_perm)
7347 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
7348 return -1;
7349 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7350 /* Whole vector shifts shift in zeros, so if init is all zero
7351 constant, there is no need to do anything further. */
7352 if ((TREE_CODE (init) != INTEGER_CST
7353 && TREE_CODE (init) != REAL_CST)
7354 || !initializer_zerop (init))
7356 tree masktype = truth_type_for (vectype);
7357 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7358 return -1;
7359 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7362 kind = whole_vector_shift_kind;
7364 if (use_whole_vector)
7366 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7367 use_whole_vector->safe_grow_cleared (i, true);
7368 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7369 use_whole_vector->safe_push (kind);
7373 return units_log2;
7377 /* Function check_scan_store.
7379 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7381 static bool
7382 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7383 enum vect_def_type rhs_dt, bool slp, tree mask,
7384 vect_memory_access_type memory_access_type)
7386 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7387 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7388 tree ref_type;
7390 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7391 if (slp
7392 || mask
7393 || memory_access_type != VMAT_CONTIGUOUS
7394 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7395 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7396 || loop_vinfo == NULL
7397 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7398 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7399 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7400 || !integer_zerop (DR_INIT (dr_info->dr))
7401 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7402 || !alias_sets_conflict_p (get_alias_set (vectype),
7403 get_alias_set (TREE_TYPE (ref_type))))
7405 if (dump_enabled_p ())
7406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7407 "unsupported OpenMP scan store.\n");
7408 return false;
7411 /* We need to pattern match code built by OpenMP lowering and simplified
7412 by following optimizations into something we can handle.
7413 #pragma omp simd reduction(inscan,+:r)
7414 for (...)
7416 r += something ();
7417 #pragma omp scan inclusive (r)
7418 use (r);
7420 shall have body with:
7421 // Initialization for input phase, store the reduction initializer:
7422 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7423 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7424 D.2042[_21] = 0;
7425 // Actual input phase:
7427 r.0_5 = D.2042[_20];
7428 _6 = _4 + r.0_5;
7429 D.2042[_20] = _6;
7430 // Initialization for scan phase:
7431 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7432 _26 = D.2043[_25];
7433 _27 = D.2042[_25];
7434 _28 = _26 + _27;
7435 D.2043[_25] = _28;
7436 D.2042[_25] = _28;
7437 // Actual scan phase:
7439 r.1_8 = D.2042[_20];
7441 The "omp simd array" variable D.2042 holds the privatized copy used
7442 inside of the loop and D.2043 is another one that holds copies of
7443 the current original list item. The separate GOMP_SIMD_LANE ifn
7444 kinds are there in order to allow optimizing the initializer store
7445 and combiner sequence, e.g. if it is originally some C++ish user
7446 defined reduction, but allow the vectorizer to pattern recognize it
7447 and turn into the appropriate vectorized scan.
7449 For exclusive scan, this is slightly different:
7450 #pragma omp simd reduction(inscan,+:r)
7451 for (...)
7453 use (r);
7454 #pragma omp scan exclusive (r)
7455 r += something ();
7457 shall have body with:
7458 // Initialization for input phase, store the reduction initializer:
7459 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7460 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7461 D.2042[_21] = 0;
7462 // Actual input phase:
7464 r.0_5 = D.2042[_20];
7465 _6 = _4 + r.0_5;
7466 D.2042[_20] = _6;
7467 // Initialization for scan phase:
7468 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7469 _26 = D.2043[_25];
7470 D.2044[_25] = _26;
7471 _27 = D.2042[_25];
7472 _28 = _26 + _27;
7473 D.2043[_25] = _28;
7474 // Actual scan phase:
7476 r.1_8 = D.2044[_20];
7477 ... */
7479 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7481 /* Match the D.2042[_21] = 0; store above. Just require that
7482 it is a constant or external definition store. */
7483 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7485 fail_init:
7486 if (dump_enabled_p ())
7487 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7488 "unsupported OpenMP scan initializer store.\n");
7489 return false;
7492 if (! loop_vinfo->scan_map)
7493 loop_vinfo->scan_map = new hash_map<tree, tree>;
7494 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7495 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7496 if (cached)
7497 goto fail_init;
7498 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7500 /* These stores can be vectorized normally. */
7501 return true;
7504 if (rhs_dt != vect_internal_def)
7506 fail:
7507 if (dump_enabled_p ())
7508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7509 "unsupported OpenMP scan combiner pattern.\n");
7510 return false;
7513 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7514 tree rhs = gimple_assign_rhs1 (stmt);
7515 if (TREE_CODE (rhs) != SSA_NAME)
7516 goto fail;
7518 gimple *other_store_stmt = NULL;
7519 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7520 bool inscan_var_store
7521 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7523 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7525 if (!inscan_var_store)
7527 use_operand_p use_p;
7528 imm_use_iterator iter;
7529 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7531 gimple *use_stmt = USE_STMT (use_p);
7532 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7533 continue;
7534 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7535 || !is_gimple_assign (use_stmt)
7536 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7537 || other_store_stmt
7538 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7539 goto fail;
7540 other_store_stmt = use_stmt;
7542 if (other_store_stmt == NULL)
7543 goto fail;
7544 rhs = gimple_assign_lhs (other_store_stmt);
7545 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7546 goto fail;
7549 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7551 use_operand_p use_p;
7552 imm_use_iterator iter;
7553 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7555 gimple *use_stmt = USE_STMT (use_p);
7556 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7557 continue;
7558 if (other_store_stmt)
7559 goto fail;
7560 other_store_stmt = use_stmt;
7563 else
7564 goto fail;
7566 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7567 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7568 || !is_gimple_assign (def_stmt)
7569 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7570 goto fail;
7572 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7573 /* For pointer addition, we should use the normal plus for the vector
7574 operation. */
7575 switch (code)
7577 case POINTER_PLUS_EXPR:
7578 code = PLUS_EXPR;
7579 break;
7580 case MULT_HIGHPART_EXPR:
7581 goto fail;
7582 default:
7583 break;
7585 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7586 goto fail;
7588 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7589 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7590 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7591 goto fail;
7593 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7594 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7595 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7596 || !gimple_assign_load_p (load1_stmt)
7597 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7598 || !gimple_assign_load_p (load2_stmt))
7599 goto fail;
7601 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7602 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7603 if (load1_stmt_info == NULL
7604 || load2_stmt_info == NULL
7605 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7606 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7607 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7608 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7609 goto fail;
7611 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7613 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7614 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7615 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7616 goto fail;
7617 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7618 tree lrhs;
7619 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7620 lrhs = rhs1;
7621 else
7622 lrhs = rhs2;
7623 use_operand_p use_p;
7624 imm_use_iterator iter;
7625 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7627 gimple *use_stmt = USE_STMT (use_p);
7628 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7629 continue;
7630 if (other_store_stmt)
7631 goto fail;
7632 other_store_stmt = use_stmt;
7636 if (other_store_stmt == NULL)
7637 goto fail;
7638 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7639 || !gimple_store_p (other_store_stmt))
7640 goto fail;
7642 stmt_vec_info other_store_stmt_info
7643 = loop_vinfo->lookup_stmt (other_store_stmt);
7644 if (other_store_stmt_info == NULL
7645 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7646 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7647 goto fail;
7649 gimple *stmt1 = stmt;
7650 gimple *stmt2 = other_store_stmt;
7651 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7652 std::swap (stmt1, stmt2);
7653 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7654 gimple_assign_rhs1 (load2_stmt)))
7656 std::swap (rhs1, rhs2);
7657 std::swap (load1_stmt, load2_stmt);
7658 std::swap (load1_stmt_info, load2_stmt_info);
7660 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7661 gimple_assign_rhs1 (load1_stmt)))
7662 goto fail;
7664 tree var3 = NULL_TREE;
7665 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7666 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7667 gimple_assign_rhs1 (load2_stmt)))
7668 goto fail;
7669 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7671 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7672 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7673 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7674 goto fail;
7675 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7676 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7677 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7678 || lookup_attribute ("omp simd inscan exclusive",
7679 DECL_ATTRIBUTES (var3)))
7680 goto fail;
7683 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7684 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7685 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7686 goto fail;
7688 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7689 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7690 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7691 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7692 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7693 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7694 goto fail;
7696 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7697 std::swap (var1, var2);
7699 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7701 if (!lookup_attribute ("omp simd inscan exclusive",
7702 DECL_ATTRIBUTES (var1)))
7703 goto fail;
7704 var1 = var3;
7707 if (loop_vinfo->scan_map == NULL)
7708 goto fail;
7709 tree *init = loop_vinfo->scan_map->get (var1);
7710 if (init == NULL)
7711 goto fail;
7713 /* The IL is as expected, now check if we can actually vectorize it.
7714 Inclusive scan:
7715 _26 = D.2043[_25];
7716 _27 = D.2042[_25];
7717 _28 = _26 + _27;
7718 D.2043[_25] = _28;
7719 D.2042[_25] = _28;
7720 should be vectorized as (where _40 is the vectorized rhs
7721 from the D.2042[_21] = 0; store):
7722 _30 = MEM <vector(8) int> [(int *)&D.2043];
7723 _31 = MEM <vector(8) int> [(int *)&D.2042];
7724 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7725 _33 = _31 + _32;
7726 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7727 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7728 _35 = _33 + _34;
7729 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7730 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7731 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7732 _37 = _35 + _36;
7733 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7734 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7735 _38 = _30 + _37;
7736 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7737 MEM <vector(8) int> [(int *)&D.2043] = _39;
7738 MEM <vector(8) int> [(int *)&D.2042] = _38;
7739 Exclusive scan:
7740 _26 = D.2043[_25];
7741 D.2044[_25] = _26;
7742 _27 = D.2042[_25];
7743 _28 = _26 + _27;
7744 D.2043[_25] = _28;
7745 should be vectorized as (where _40 is the vectorized rhs
7746 from the D.2042[_21] = 0; store):
7747 _30 = MEM <vector(8) int> [(int *)&D.2043];
7748 _31 = MEM <vector(8) int> [(int *)&D.2042];
7749 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7750 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7751 _34 = _32 + _33;
7752 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7753 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7754 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7755 _36 = _34 + _35;
7756 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7757 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7758 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7759 _38 = _36 + _37;
7760 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7761 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7762 _39 = _30 + _38;
7763 _50 = _31 + _39;
7764 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7765 MEM <vector(8) int> [(int *)&D.2044] = _39;
7766 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7767 enum machine_mode vec_mode = TYPE_MODE (vectype);
7768 optab optab = optab_for_tree_code (code, vectype, optab_default);
7769 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7770 goto fail;
7772 int units_log2 = scan_store_can_perm_p (vectype, *init);
7773 if (units_log2 == -1)
7774 goto fail;
7776 return true;
7780 /* Function vectorizable_scan_store.
7782 Helper of vectorizable_score, arguments like on vectorizable_store.
7783 Handle only the transformation, checking is done in check_scan_store. */
7785 static bool
7786 vectorizable_scan_store (vec_info *vinfo,
7787 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7788 gimple **vec_stmt, int ncopies)
7790 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7791 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7792 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7793 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7795 if (dump_enabled_p ())
7796 dump_printf_loc (MSG_NOTE, vect_location,
7797 "transform scan store. ncopies = %d\n", ncopies);
7799 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7800 tree rhs = gimple_assign_rhs1 (stmt);
7801 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7803 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7804 bool inscan_var_store
7805 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7807 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7809 use_operand_p use_p;
7810 imm_use_iterator iter;
7811 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7813 gimple *use_stmt = USE_STMT (use_p);
7814 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7815 continue;
7816 rhs = gimple_assign_lhs (use_stmt);
7817 break;
7821 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7822 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7823 if (code == POINTER_PLUS_EXPR)
7824 code = PLUS_EXPR;
7825 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7826 && commutative_tree_code (code));
7827 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7828 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7829 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7830 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7831 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7832 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7833 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7834 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7835 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7836 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7837 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7839 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7841 std::swap (rhs1, rhs2);
7842 std::swap (var1, var2);
7843 std::swap (load1_dr_info, load2_dr_info);
7846 tree *init = loop_vinfo->scan_map->get (var1);
7847 gcc_assert (init);
7849 unsigned HOST_WIDE_INT nunits;
7850 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7851 gcc_unreachable ();
7852 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7853 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7854 gcc_assert (units_log2 > 0);
7855 auto_vec<tree, 16> perms;
7856 perms.quick_grow (units_log2 + 1);
7857 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7858 for (int i = 0; i <= units_log2; ++i)
7860 unsigned HOST_WIDE_INT j, k;
7861 vec_perm_builder sel (nunits, nunits, 1);
7862 sel.quick_grow (nunits);
7863 if (i == units_log2)
7864 for (j = 0; j < nunits; ++j)
7865 sel[j] = nunits - 1;
7866 else
7868 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7869 sel[j] = j;
7870 for (k = 0; j < nunits; ++j, ++k)
7871 sel[j] = nunits + k;
7873 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7874 if (!use_whole_vector.is_empty ()
7875 && use_whole_vector[i] != scan_store_kind_perm)
7877 if (zero_vec == NULL_TREE)
7878 zero_vec = build_zero_cst (vectype);
7879 if (masktype == NULL_TREE
7880 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7881 masktype = truth_type_for (vectype);
7882 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7884 else
7885 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7888 tree vec_oprnd1 = NULL_TREE;
7889 tree vec_oprnd2 = NULL_TREE;
7890 tree vec_oprnd3 = NULL_TREE;
7891 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7892 tree dataref_offset = build_int_cst (ref_type, 0);
7893 tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
7894 vectype, VMAT_CONTIGUOUS);
7895 tree ldataref_ptr = NULL_TREE;
7896 tree orig = NULL_TREE;
7897 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7898 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7899 auto_vec<tree> vec_oprnds1;
7900 auto_vec<tree> vec_oprnds2;
7901 auto_vec<tree> vec_oprnds3;
7902 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7903 *init, &vec_oprnds1,
7904 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7905 rhs2, &vec_oprnds3);
7906 for (int j = 0; j < ncopies; j++)
7908 vec_oprnd1 = vec_oprnds1[j];
7909 if (ldataref_ptr == NULL)
7910 vec_oprnd2 = vec_oprnds2[j];
7911 vec_oprnd3 = vec_oprnds3[j];
7912 if (j == 0)
7913 orig = vec_oprnd3;
7914 else if (!inscan_var_store)
7915 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7917 if (ldataref_ptr)
7919 vec_oprnd2 = make_ssa_name (vectype);
7920 tree data_ref = fold_build2 (MEM_REF, vectype,
7921 unshare_expr (ldataref_ptr),
7922 dataref_offset);
7923 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7924 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7925 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7926 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7927 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7930 tree v = vec_oprnd2;
7931 for (int i = 0; i < units_log2; ++i)
7933 tree new_temp = make_ssa_name (vectype);
7934 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7935 (zero_vec
7936 && (use_whole_vector[i]
7937 != scan_store_kind_perm))
7938 ? zero_vec : vec_oprnd1, v,
7939 perms[i]);
7940 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7941 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7942 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7944 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7946 /* Whole vector shift shifted in zero bits, but if *init
7947 is not initializer_zerop, we need to replace those elements
7948 with elements from vec_oprnd1. */
7949 tree_vector_builder vb (masktype, nunits, 1);
7950 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7951 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7952 ? boolean_false_node : boolean_true_node);
7954 tree new_temp2 = make_ssa_name (vectype);
7955 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7956 new_temp, vec_oprnd1);
7957 vect_finish_stmt_generation (vinfo, stmt_info,
7958 g, gsi);
7959 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7960 new_temp = new_temp2;
7963 /* For exclusive scan, perform the perms[i] permutation once
7964 more. */
7965 if (i == 0
7966 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7967 && v == vec_oprnd2)
7969 v = new_temp;
7970 --i;
7971 continue;
7974 tree new_temp2 = make_ssa_name (vectype);
7975 g = gimple_build_assign (new_temp2, code, v, new_temp);
7976 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7977 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7979 v = new_temp2;
7982 tree new_temp = make_ssa_name (vectype);
7983 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7984 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7985 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7987 tree last_perm_arg = new_temp;
7988 /* For exclusive scan, new_temp computed above is the exclusive scan
7989 prefix sum. Turn it into inclusive prefix sum for the broadcast
7990 of the last element into orig. */
7991 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7993 last_perm_arg = make_ssa_name (vectype);
7994 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7995 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7996 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7999 orig = make_ssa_name (vectype);
8000 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
8001 last_perm_arg, perms[units_log2]);
8002 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8003 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8005 if (!inscan_var_store)
8007 tree data_ref = fold_build2 (MEM_REF, vectype,
8008 unshare_expr (dataref_ptr),
8009 dataref_offset);
8010 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8011 g = gimple_build_assign (data_ref, new_temp);
8012 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8013 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8017 if (inscan_var_store)
8018 for (int j = 0; j < ncopies; j++)
8020 if (j != 0)
8021 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8023 tree data_ref = fold_build2 (MEM_REF, vectype,
8024 unshare_expr (dataref_ptr),
8025 dataref_offset);
8026 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8027 gimple *g = gimple_build_assign (data_ref, orig);
8028 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8029 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8031 return true;
8035 /* Function vectorizable_store.
8037 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8038 that can be vectorized.
8039 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8040 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8041 Return true if STMT_INFO is vectorizable in this way. */
8043 static bool
8044 vectorizable_store (vec_info *vinfo,
8045 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8046 gimple **vec_stmt, slp_tree slp_node,
8047 stmt_vector_for_cost *cost_vec)
8049 tree data_ref;
8050 tree vec_oprnd = NULL_TREE;
8051 tree elem_type;
8052 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8053 class loop *loop = NULL;
8054 machine_mode vec_mode;
8055 tree dummy;
8056 enum vect_def_type rhs_dt = vect_unknown_def_type;
8057 enum vect_def_type mask_dt = vect_unknown_def_type;
8058 tree dataref_ptr = NULL_TREE;
8059 tree dataref_offset = NULL_TREE;
8060 gimple *ptr_incr = NULL;
8061 int ncopies;
8062 int j;
8063 stmt_vec_info first_stmt_info;
8064 bool grouped_store;
8065 unsigned int group_size, i;
8066 bool slp = (slp_node != NULL);
8067 unsigned int vec_num;
8068 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8069 tree aggr_type;
8070 gather_scatter_info gs_info;
8071 poly_uint64 vf;
8072 vec_load_store_type vls_type;
8073 tree ref_type;
8075 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8076 return false;
8078 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8079 && ! vec_stmt)
8080 return false;
8082 /* Is vectorizable store? */
8084 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8085 slp_tree mask_node = NULL;
8086 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8088 tree scalar_dest = gimple_assign_lhs (assign);
8089 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8090 && is_pattern_stmt_p (stmt_info))
8091 scalar_dest = TREE_OPERAND (scalar_dest, 0);
8092 if (TREE_CODE (scalar_dest) != ARRAY_REF
8093 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8094 && TREE_CODE (scalar_dest) != INDIRECT_REF
8095 && TREE_CODE (scalar_dest) != COMPONENT_REF
8096 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8097 && TREE_CODE (scalar_dest) != REALPART_EXPR
8098 && TREE_CODE (scalar_dest) != MEM_REF)
8099 return false;
8101 else
8103 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8104 if (!call || !gimple_call_internal_p (call))
8105 return false;
8107 internal_fn ifn = gimple_call_internal_fn (call);
8108 if (!internal_store_fn_p (ifn))
8109 return false;
8111 int mask_index = internal_fn_mask_index (ifn);
8112 if (mask_index >= 0 && slp_node)
8113 mask_index = vect_slp_child_index_for_operand
8114 (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8115 if (mask_index >= 0
8116 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8117 &mask, &mask_node, &mask_dt,
8118 &mask_vectype))
8119 return false;
8122 /* Cannot have hybrid store SLP -- that would mean storing to the
8123 same location twice. */
8124 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8126 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8127 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8129 if (loop_vinfo)
8131 loop = LOOP_VINFO_LOOP (loop_vinfo);
8132 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8134 else
8135 vf = 1;
8137 /* Multiple types in SLP are handled by creating the appropriate number of
8138 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8139 case of SLP. */
8140 if (slp)
8141 ncopies = 1;
8142 else
8143 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8145 gcc_assert (ncopies >= 1);
8147 /* FORNOW. This restriction should be relaxed. */
8148 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
8150 if (dump_enabled_p ())
8151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8152 "multiple types in nested loop.\n");
8153 return false;
8156 tree op;
8157 slp_tree op_node;
8158 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8159 &op, &op_node, &rhs_dt, &rhs_vectype, &vls_type))
8160 return false;
8162 elem_type = TREE_TYPE (vectype);
8163 vec_mode = TYPE_MODE (vectype);
8165 if (!STMT_VINFO_DATA_REF (stmt_info))
8166 return false;
8168 vect_memory_access_type memory_access_type;
8169 enum dr_alignment_support alignment_support_scheme;
8170 int misalignment;
8171 poly_int64 poffset;
8172 internal_fn lanes_ifn;
8173 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
8174 ncopies, &memory_access_type, &poffset,
8175 &alignment_support_scheme, &misalignment, &gs_info,
8176 &lanes_ifn))
8177 return false;
8179 if (mask)
8181 if (memory_access_type == VMAT_CONTIGUOUS)
8183 if (!VECTOR_MODE_P (vec_mode)
8184 || !can_vec_mask_load_store_p (vec_mode,
8185 TYPE_MODE (mask_vectype), false))
8186 return false;
8188 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8189 && (memory_access_type != VMAT_GATHER_SCATTER
8190 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8192 if (dump_enabled_p ())
8193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8194 "unsupported access type for masked store.\n");
8195 return false;
8197 else if (memory_access_type == VMAT_GATHER_SCATTER
8198 && gs_info.ifn == IFN_LAST
8199 && !gs_info.decl)
8201 if (dump_enabled_p ())
8202 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8203 "unsupported masked emulated scatter.\n");
8204 return false;
8207 else
8209 /* FORNOW. In some cases can vectorize even if data-type not supported
8210 (e.g. - array initialization with 0). */
8211 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
8212 return false;
8215 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8216 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8217 && memory_access_type != VMAT_GATHER_SCATTER
8218 && (slp || memory_access_type != VMAT_CONTIGUOUS));
8219 if (grouped_store)
8221 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8222 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8223 group_size = DR_GROUP_SIZE (first_stmt_info);
8225 else
8227 first_stmt_info = stmt_info;
8228 first_dr_info = dr_info;
8229 group_size = vec_num = 1;
8232 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
8234 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
8235 memory_access_type))
8236 return false;
8239 bool costing_p = !vec_stmt;
8240 if (costing_p) /* transformation not required. */
8242 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8244 if (loop_vinfo
8245 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8246 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8247 vls_type, group_size,
8248 memory_access_type, &gs_info,
8249 mask);
8251 if (slp_node
8252 && (!vect_maybe_update_slp_op_vectype (op_node, vectype)
8253 || (mask
8254 && !vect_maybe_update_slp_op_vectype (mask_node,
8255 mask_vectype))))
8257 if (dump_enabled_p ())
8258 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8259 "incompatible vector types for invariants\n");
8260 return false;
8263 if (dump_enabled_p ()
8264 && memory_access_type != VMAT_ELEMENTWISE
8265 && memory_access_type != VMAT_GATHER_SCATTER
8266 && alignment_support_scheme != dr_aligned)
8267 dump_printf_loc (MSG_NOTE, vect_location,
8268 "Vectorizing an unaligned access.\n");
8270 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8272 /* As function vect_transform_stmt shows, for interleaving stores
8273 the whole chain is vectorized when the last store in the chain
8274 is reached, the other stores in the group are skipped. So we
8275 want to only cost the last one here, but it's not trivial to
8276 get the last, as it's equivalent to use the first one for
8277 costing, use the first one instead. */
8278 if (grouped_store
8279 && !slp
8280 && first_stmt_info != stmt_info)
8281 return true;
8283 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8285 /* Transform. */
8287 ensure_base_align (dr_info);
8289 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8291 gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
8292 gcc_assert (!slp);
8293 if (costing_p)
8295 unsigned int inside_cost = 0, prologue_cost = 0;
8296 if (vls_type == VLS_STORE_INVARIANT)
8297 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8298 stmt_info, 0, vect_prologue);
8299 vect_get_store_cost (vinfo, stmt_info, ncopies,
8300 alignment_support_scheme, misalignment,
8301 &inside_cost, cost_vec);
8303 if (dump_enabled_p ())
8304 dump_printf_loc (MSG_NOTE, vect_location,
8305 "vect_model_store_cost: inside_cost = %d, "
8306 "prologue_cost = %d .\n",
8307 inside_cost, prologue_cost);
8309 return true;
8311 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8314 if (grouped_store)
8316 /* FORNOW */
8317 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8319 if (slp)
8321 grouped_store = false;
8322 /* VEC_NUM is the number of vect stmts to be created for this
8323 group. */
8324 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8325 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8326 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8327 == first_stmt_info);
8328 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8329 op = vect_get_store_rhs (first_stmt_info);
8331 else
8332 /* VEC_NUM is the number of vect stmts to be created for this
8333 group. */
8334 vec_num = group_size;
8336 ref_type = get_group_alias_ptr_type (first_stmt_info);
8338 else
8339 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8341 if (!costing_p && dump_enabled_p ())
8342 dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n",
8343 ncopies);
8345 /* Check if we need to update prologue cost for invariant,
8346 and update it accordingly if so. If it's not for
8347 interleaving store, we can just check vls_type; but if
8348 it's for interleaving store, need to check the def_type
8349 of the stored value since the current vls_type is just
8350 for first_stmt_info. */
8351 auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
8353 gcc_assert (costing_p);
8354 if (slp)
8355 return;
8356 if (grouped_store)
8358 gcc_assert (store_rhs);
8359 enum vect_def_type cdt;
8360 gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
8361 if (cdt != vect_constant_def && cdt != vect_external_def)
8362 return;
8364 else if (vls_type != VLS_STORE_INVARIANT)
8365 return;
8366 *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info,
8367 0, vect_prologue);
8370 if (memory_access_type == VMAT_ELEMENTWISE
8371 || memory_access_type == VMAT_STRIDED_SLP)
8373 unsigned inside_cost = 0, prologue_cost = 0;
8374 gimple_stmt_iterator incr_gsi;
8375 bool insert_after;
8376 gimple *incr;
8377 tree offvar;
8378 tree ivstep;
8379 tree running_off;
8380 tree stride_base, stride_step, alias_off;
8381 tree vec_oprnd = NULL_TREE;
8382 tree dr_offset;
8383 unsigned int g;
8384 /* Checked by get_load_store_type. */
8385 unsigned int const_nunits = nunits.to_constant ();
8387 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8388 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8390 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8391 stride_base
8392 = fold_build_pointer_plus
8393 (DR_BASE_ADDRESS (first_dr_info->dr),
8394 size_binop (PLUS_EXPR,
8395 convert_to_ptrofftype (dr_offset),
8396 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8397 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8399 /* For a store with loop-invariant (but other than power-of-2)
8400 stride (i.e. not a grouped access) like so:
8402 for (i = 0; i < n; i += stride)
8403 array[i] = ...;
8405 we generate a new induction variable and new stores from
8406 the components of the (vectorized) rhs:
8408 for (j = 0; ; j += VF*stride)
8409 vectemp = ...;
8410 tmp1 = vectemp[0];
8411 array[j] = tmp1;
8412 tmp2 = vectemp[1];
8413 array[j + stride] = tmp2;
8417 unsigned nstores = const_nunits;
8418 unsigned lnel = 1;
8419 tree ltype = elem_type;
8420 tree lvectype = vectype;
8421 if (slp)
8423 if (group_size < const_nunits
8424 && const_nunits % group_size == 0)
8426 nstores = const_nunits / group_size;
8427 lnel = group_size;
8428 ltype = build_vector_type (elem_type, group_size);
8429 lvectype = vectype;
8431 /* First check if vec_extract optab doesn't support extraction
8432 of vector elts directly. */
8433 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8434 machine_mode vmode;
8435 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8436 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8437 group_size).exists (&vmode)
8438 || (convert_optab_handler (vec_extract_optab,
8439 TYPE_MODE (vectype), vmode)
8440 == CODE_FOR_nothing))
8442 /* Try to avoid emitting an extract of vector elements
8443 by performing the extracts using an integer type of the
8444 same size, extracting from a vector of those and then
8445 re-interpreting it as the original vector type if
8446 supported. */
8447 unsigned lsize
8448 = group_size * GET_MODE_BITSIZE (elmode);
8449 unsigned int lnunits = const_nunits / group_size;
8450 /* If we can't construct such a vector fall back to
8451 element extracts from the original vector type and
8452 element size stores. */
8453 if (int_mode_for_size (lsize, 0).exists (&elmode)
8454 && VECTOR_MODE_P (TYPE_MODE (vectype))
8455 && related_vector_mode (TYPE_MODE (vectype), elmode,
8456 lnunits).exists (&vmode)
8457 && (convert_optab_handler (vec_extract_optab,
8458 vmode, elmode)
8459 != CODE_FOR_nothing))
8461 nstores = lnunits;
8462 lnel = group_size;
8463 ltype = build_nonstandard_integer_type (lsize, 1);
8464 lvectype = build_vector_type (ltype, nstores);
8466 /* Else fall back to vector extraction anyway.
8467 Fewer stores are more important than avoiding spilling
8468 of the vector we extract from. Compared to the
8469 construction case in vectorizable_load no store-forwarding
8470 issue exists here for reasonable archs. */
8473 else if (group_size >= const_nunits
8474 && group_size % const_nunits == 0)
8476 int mis_align = dr_misalignment (first_dr_info, vectype);
8477 dr_alignment_support dr_align
8478 = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
8479 mis_align);
8480 if (dr_align == dr_aligned
8481 || dr_align == dr_unaligned_supported)
8483 nstores = 1;
8484 lnel = const_nunits;
8485 ltype = vectype;
8486 lvectype = vectype;
8487 alignment_support_scheme = dr_align;
8488 misalignment = mis_align;
8491 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8492 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8495 if (!costing_p)
8497 ivstep = stride_step;
8498 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8499 build_int_cst (TREE_TYPE (ivstep), vf));
8501 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8503 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8504 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8505 create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
8506 insert_after, &offvar, NULL);
8507 incr = gsi_stmt (incr_gsi);
8509 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8512 alias_off = build_int_cst (ref_type, 0);
8513 stmt_vec_info next_stmt_info = first_stmt_info;
8514 auto_vec<tree> vec_oprnds (ncopies);
8515 /* For costing some adjacent vector stores, we'd like to cost with
8516 the total number of them once instead of cost each one by one. */
8517 unsigned int n_adjacent_stores = 0;
8518 for (g = 0; g < group_size; g++)
8520 running_off = offvar;
8521 if (!costing_p)
8523 if (g)
8525 tree size = TYPE_SIZE_UNIT (ltype);
8526 tree pos
8527 = fold_build2 (MULT_EXPR, sizetype, size_int (g), size);
8528 tree newoff = copy_ssa_name (running_off, NULL);
8529 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8530 running_off, pos);
8531 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8532 running_off = newoff;
8535 if (!slp)
8536 op = vect_get_store_rhs (next_stmt_info);
8537 if (!costing_p)
8538 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies, op,
8539 &vec_oprnds);
8540 else
8541 update_prologue_cost (&prologue_cost, op);
8542 unsigned int group_el = 0;
8543 unsigned HOST_WIDE_INT
8544 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8545 for (j = 0; j < ncopies; j++)
8547 if (!costing_p)
8549 vec_oprnd = vec_oprnds[j];
8550 /* Pun the vector to extract from if necessary. */
8551 if (lvectype != vectype)
8553 tree tem = make_ssa_name (lvectype);
8554 tree cvt
8555 = build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
8556 gimple *pun = gimple_build_assign (tem, cvt);
8557 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8558 vec_oprnd = tem;
8561 for (i = 0; i < nstores; i++)
8563 if (costing_p)
8565 /* Only need vector extracting when there are more
8566 than one stores. */
8567 if (nstores > 1)
8568 inside_cost
8569 += record_stmt_cost (cost_vec, 1, vec_to_scalar,
8570 stmt_info, 0, vect_body);
8571 /* Take a single lane vector type store as scalar
8572 store to avoid ICE like 110776. */
8573 if (VECTOR_TYPE_P (ltype)
8574 && known_ne (TYPE_VECTOR_SUBPARTS (ltype), 1U))
8575 n_adjacent_stores++;
8576 else
8577 inside_cost
8578 += record_stmt_cost (cost_vec, 1, scalar_store,
8579 stmt_info, 0, vect_body);
8580 continue;
8582 tree newref, newoff;
8583 gimple *incr, *assign;
8584 tree size = TYPE_SIZE (ltype);
8585 /* Extract the i'th component. */
8586 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8587 bitsize_int (i), size);
8588 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8589 size, pos);
8591 elem = force_gimple_operand_gsi (gsi, elem, true,
8592 NULL_TREE, true,
8593 GSI_SAME_STMT);
8595 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8596 group_el * elsz);
8597 newref = build2 (MEM_REF, ltype,
8598 running_off, this_off);
8599 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8601 /* And store it to *running_off. */
8602 assign = gimple_build_assign (newref, elem);
8603 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8605 group_el += lnel;
8606 if (! slp
8607 || group_el == group_size)
8609 newoff = copy_ssa_name (running_off, NULL);
8610 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8611 running_off, stride_step);
8612 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8614 running_off = newoff;
8615 group_el = 0;
8617 if (g == group_size - 1
8618 && !slp)
8620 if (j == 0 && i == 0)
8621 *vec_stmt = assign;
8622 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8626 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8627 vec_oprnds.truncate(0);
8628 if (slp)
8629 break;
8632 if (costing_p)
8634 if (n_adjacent_stores > 0)
8635 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
8636 alignment_support_scheme, misalignment,
8637 &inside_cost, cost_vec);
8638 if (dump_enabled_p ())
8639 dump_printf_loc (MSG_NOTE, vect_location,
8640 "vect_model_store_cost: inside_cost = %d, "
8641 "prologue_cost = %d .\n",
8642 inside_cost, prologue_cost);
8645 return true;
8648 gcc_assert (alignment_support_scheme);
8649 vec_loop_masks *loop_masks
8650 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8651 ? &LOOP_VINFO_MASKS (loop_vinfo)
8652 : NULL);
8653 vec_loop_lens *loop_lens
8654 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8655 ? &LOOP_VINFO_LENS (loop_vinfo)
8656 : NULL);
8658 /* Shouldn't go with length-based approach if fully masked. */
8659 gcc_assert (!loop_lens || !loop_masks);
8661 /* Targets with store-lane instructions must not require explicit
8662 realignment. vect_supportable_dr_alignment always returns either
8663 dr_aligned or dr_unaligned_supported for masked operations. */
8664 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8665 && !mask
8666 && !loop_masks)
8667 || alignment_support_scheme == dr_aligned
8668 || alignment_support_scheme == dr_unaligned_supported);
8670 tree offset = NULL_TREE;
8671 if (!known_eq (poffset, 0))
8672 offset = size_int (poffset);
8674 tree bump;
8675 tree vec_offset = NULL_TREE;
8676 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8678 aggr_type = NULL_TREE;
8679 bump = NULL_TREE;
8681 else if (memory_access_type == VMAT_GATHER_SCATTER)
8683 aggr_type = elem_type;
8684 if (!costing_p)
8685 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
8686 &bump, &vec_offset, loop_lens);
8688 else
8690 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8691 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8692 else
8693 aggr_type = vectype;
8694 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8695 memory_access_type, loop_lens);
8698 if (mask && !costing_p)
8699 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8701 /* In case the vectorization factor (VF) is bigger than the number
8702 of elements that we can fit in a vectype (nunits), we have to generate
8703 more than one vector stmt - i.e - we need to "unroll" the
8704 vector stmt by a factor VF/nunits. */
8706 /* In case of interleaving (non-unit grouped access):
8708 S1: &base + 2 = x2
8709 S2: &base = x0
8710 S3: &base + 1 = x1
8711 S4: &base + 3 = x3
8713 We create vectorized stores starting from base address (the access of the
8714 first stmt in the chain (S2 in the above example), when the last store stmt
8715 of the chain (S4) is reached:
8717 VS1: &base = vx2
8718 VS2: &base + vec_size*1 = vx0
8719 VS3: &base + vec_size*2 = vx1
8720 VS4: &base + vec_size*3 = vx3
8722 Then permutation statements are generated:
8724 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8725 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8728 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8729 (the order of the data-refs in the output of vect_permute_store_chain
8730 corresponds to the order of scalar stmts in the interleaving chain - see
8731 the documentation of vect_permute_store_chain()).
8733 In case of both multiple types and interleaving, above vector stores and
8734 permutation stmts are created for every copy. The result vector stmts are
8735 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8736 STMT_VINFO_RELATED_STMT for the next copies.
8739 auto_vec<tree> dr_chain (group_size);
8740 auto_vec<tree> vec_masks;
8741 tree vec_mask = NULL;
8742 auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8743 for (i = 0; i < group_size; i++)
8744 gvec_oprnds.quick_push (new auto_vec<tree> (ncopies));
8746 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8748 gcc_assert (!slp && grouped_store);
8749 unsigned inside_cost = 0, prologue_cost = 0;
8750 /* For costing some adjacent vector stores, we'd like to cost with
8751 the total number of them once instead of cost each one by one. */
8752 unsigned int n_adjacent_stores = 0;
8753 for (j = 0; j < ncopies; j++)
8755 gimple *new_stmt;
8756 if (j == 0)
8758 /* For interleaved stores we collect vectorized defs for all
8759 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8760 as an input to vect_permute_store_chain(). */
8761 stmt_vec_info next_stmt_info = first_stmt_info;
8762 for (i = 0; i < group_size; i++)
8764 /* Since gaps are not supported for interleaved stores,
8765 DR_GROUP_SIZE is the exact number of stmts in the
8766 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8767 op = vect_get_store_rhs (next_stmt_info);
8768 if (costing_p)
8769 update_prologue_cost (&prologue_cost, op);
8770 else
8772 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8773 ncopies, op,
8774 gvec_oprnds[i]);
8775 vec_oprnd = (*gvec_oprnds[i])[0];
8776 dr_chain.quick_push (vec_oprnd);
8778 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8781 if (!costing_p)
8783 if (mask)
8785 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8786 mask, &vec_masks,
8787 mask_vectype);
8788 vec_mask = vec_masks[0];
8791 /* We should have catched mismatched types earlier. */
8792 gcc_assert (
8793 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
8794 dataref_ptr
8795 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8796 aggr_type, NULL, offset, &dummy,
8797 gsi, &ptr_incr, false, bump);
8800 else if (!costing_p)
8802 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8803 /* DR_CHAIN is then used as an input to
8804 vect_permute_store_chain(). */
8805 for (i = 0; i < group_size; i++)
8807 vec_oprnd = (*gvec_oprnds[i])[j];
8808 dr_chain[i] = vec_oprnd;
8810 if (mask)
8811 vec_mask = vec_masks[j];
8812 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8813 stmt_info, bump);
8816 if (costing_p)
8818 n_adjacent_stores += vec_num;
8819 continue;
8822 /* Get an array into which we can store the individual vectors. */
8823 tree vec_array = create_vector_array (vectype, vec_num);
8825 /* Invalidate the current contents of VEC_ARRAY. This should
8826 become an RTL clobber too, which prevents the vector registers
8827 from being upward-exposed. */
8828 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8830 /* Store the individual vectors into the array. */
8831 for (i = 0; i < vec_num; i++)
8833 vec_oprnd = dr_chain[i];
8834 write_vector_array (vinfo, stmt_info, gsi, vec_oprnd, vec_array,
8838 tree final_mask = NULL;
8839 tree final_len = NULL;
8840 tree bias = NULL;
8841 if (loop_masks)
8842 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8843 ncopies, vectype, j);
8844 if (vec_mask)
8845 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
8846 vec_mask, gsi);
8848 if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8850 if (loop_lens)
8851 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8852 ncopies, vectype, j, 1);
8853 else
8854 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8855 signed char biasval
8856 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8857 bias = build_int_cst (intQI_type_node, biasval);
8858 if (!final_mask)
8860 mask_vectype = truth_type_for (vectype);
8861 final_mask = build_minus_one_cst (mask_vectype);
8865 gcall *call;
8866 if (final_len && final_mask)
8868 /* Emit:
8869 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8870 LEN, BIAS, VEC_ARRAY). */
8871 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8872 tree alias_ptr = build_int_cst (ref_type, align);
8873 call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
8874 dataref_ptr, alias_ptr,
8875 final_mask, final_len, bias,
8876 vec_array);
8878 else if (final_mask)
8880 /* Emit:
8881 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8882 VEC_ARRAY). */
8883 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8884 tree alias_ptr = build_int_cst (ref_type, align);
8885 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8886 dataref_ptr, alias_ptr,
8887 final_mask, vec_array);
8889 else
8891 /* Emit:
8892 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8893 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8894 call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
8895 gimple_call_set_lhs (call, data_ref);
8897 gimple_call_set_nothrow (call, true);
8898 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8899 new_stmt = call;
8901 /* Record that VEC_ARRAY is now dead. */
8902 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8903 if (j == 0)
8904 *vec_stmt = new_stmt;
8905 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8908 if (costing_p)
8910 if (n_adjacent_stores > 0)
8911 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
8912 alignment_support_scheme, misalignment,
8913 &inside_cost, cost_vec);
8914 if (dump_enabled_p ())
8915 dump_printf_loc (MSG_NOTE, vect_location,
8916 "vect_model_store_cost: inside_cost = %d, "
8917 "prologue_cost = %d .\n",
8918 inside_cost, prologue_cost);
8921 return true;
8924 if (memory_access_type == VMAT_GATHER_SCATTER)
8926 gcc_assert (!grouped_store);
8927 auto_vec<tree> vec_offsets;
8928 unsigned int inside_cost = 0, prologue_cost = 0;
8929 for (j = 0; j < ncopies; j++)
8931 gimple *new_stmt;
8932 if (j == 0)
8934 if (costing_p && vls_type == VLS_STORE_INVARIANT)
8935 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8936 stmt_info, 0, vect_prologue);
8937 else if (!costing_p)
8939 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
8940 DR_CHAIN is of size 1. */
8941 gcc_assert (group_size == 1);
8942 if (slp_node)
8943 vect_get_slp_defs (op_node, gvec_oprnds[0]);
8944 else
8945 vect_get_vec_defs_for_operand (vinfo, first_stmt_info,
8946 ncopies, op, gvec_oprnds[0]);
8947 if (mask)
8949 if (slp_node)
8950 vect_get_slp_defs (mask_node, &vec_masks);
8951 else
8952 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8953 ncopies,
8954 mask, &vec_masks,
8955 mask_vectype);
8958 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8959 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8960 slp_node, &gs_info,
8961 &dataref_ptr, &vec_offsets);
8962 else
8963 dataref_ptr
8964 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8965 aggr_type, NULL, offset,
8966 &dummy, gsi, &ptr_incr, false,
8967 bump);
8970 else if (!costing_p)
8972 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8973 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8974 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8975 gsi, stmt_info, bump);
8978 new_stmt = NULL;
8979 for (i = 0; i < vec_num; ++i)
8981 if (!costing_p)
8983 vec_oprnd = (*gvec_oprnds[0])[vec_num * j + i];
8984 if (mask)
8985 vec_mask = vec_masks[vec_num * j + i];
8986 /* We should have catched mismatched types earlier. */
8987 gcc_assert (useless_type_conversion_p (vectype,
8988 TREE_TYPE (vec_oprnd)));
8990 unsigned HOST_WIDE_INT align;
8991 tree final_mask = NULL_TREE;
8992 tree final_len = NULL_TREE;
8993 tree bias = NULL_TREE;
8994 if (!costing_p)
8996 if (loop_masks)
8997 final_mask = vect_get_loop_mask (loop_vinfo, gsi,
8998 loop_masks, ncopies,
8999 vectype, j);
9000 if (vec_mask)
9001 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9002 final_mask, vec_mask, gsi);
9005 if (gs_info.ifn != IFN_LAST)
9007 if (costing_p)
9009 unsigned int cnunits = vect_nunits_for_cost (vectype);
9010 inside_cost
9011 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9012 stmt_info, 0, vect_body);
9013 continue;
9016 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9017 vec_offset = vec_offsets[vec_num * j + i];
9018 tree scale = size_int (gs_info.scale);
9020 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
9022 if (loop_lens)
9023 final_len = vect_get_loop_len (loop_vinfo, gsi,
9024 loop_lens, ncopies,
9025 vectype, j, 1);
9026 else
9027 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9028 signed char biasval
9029 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9030 bias = build_int_cst (intQI_type_node, biasval);
9031 if (!final_mask)
9033 mask_vectype = truth_type_for (vectype);
9034 final_mask = build_minus_one_cst (mask_vectype);
9038 gcall *call;
9039 if (final_len && final_mask)
9040 call = gimple_build_call_internal
9041 (IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr,
9042 vec_offset, scale, vec_oprnd, final_mask,
9043 final_len, bias);
9044 else if (final_mask)
9045 call = gimple_build_call_internal
9046 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr,
9047 vec_offset, scale, vec_oprnd, final_mask);
9048 else
9049 call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
9050 dataref_ptr, vec_offset,
9051 scale, vec_oprnd);
9052 gimple_call_set_nothrow (call, true);
9053 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9054 new_stmt = call;
9056 else if (gs_info.decl)
9058 /* The builtin decls path for scatter is legacy, x86 only. */
9059 gcc_assert (nunits.is_constant ()
9060 && (!final_mask
9061 || SCALAR_INT_MODE_P
9062 (TYPE_MODE (TREE_TYPE (final_mask)))));
9063 if (costing_p)
9065 unsigned int cnunits = vect_nunits_for_cost (vectype);
9066 inside_cost
9067 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9068 stmt_info, 0, vect_body);
9069 continue;
9071 poly_uint64 offset_nunits
9072 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
9073 if (known_eq (nunits, offset_nunits))
9075 new_stmt = vect_build_one_scatter_store_call
9076 (vinfo, stmt_info, gsi, &gs_info,
9077 dataref_ptr, vec_offsets[vec_num * j + i],
9078 vec_oprnd, final_mask);
9079 vect_finish_stmt_generation (vinfo, stmt_info,
9080 new_stmt, gsi);
9082 else if (known_eq (nunits, offset_nunits * 2))
9084 /* We have a offset vector with half the number of
9085 lanes but the builtins will store full vectype
9086 data from the lower lanes. */
9087 new_stmt = vect_build_one_scatter_store_call
9088 (vinfo, stmt_info, gsi, &gs_info,
9089 dataref_ptr,
9090 vec_offsets[2 * vec_num * j + 2 * i],
9091 vec_oprnd, final_mask);
9092 vect_finish_stmt_generation (vinfo, stmt_info,
9093 new_stmt, gsi);
9094 int count = nunits.to_constant ();
9095 vec_perm_builder sel (count, count, 1);
9096 sel.quick_grow (count);
9097 for (int i = 0; i < count; ++i)
9098 sel[i] = i | (count / 2);
9099 vec_perm_indices indices (sel, 2, count);
9100 tree perm_mask
9101 = vect_gen_perm_mask_checked (vectype, indices);
9102 new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
9103 vec_oprnd, vec_oprnd,
9104 perm_mask);
9105 vec_oprnd = make_ssa_name (vectype);
9106 gimple_set_lhs (new_stmt, vec_oprnd);
9107 vect_finish_stmt_generation (vinfo, stmt_info,
9108 new_stmt, gsi);
9109 if (final_mask)
9111 new_stmt = gimple_build_assign (NULL_TREE,
9112 VEC_UNPACK_HI_EXPR,
9113 final_mask);
9114 final_mask = make_ssa_name
9115 (truth_type_for (gs_info.offset_vectype));
9116 gimple_set_lhs (new_stmt, final_mask);
9117 vect_finish_stmt_generation (vinfo, stmt_info,
9118 new_stmt, gsi);
9120 new_stmt = vect_build_one_scatter_store_call
9121 (vinfo, stmt_info, gsi, &gs_info,
9122 dataref_ptr,
9123 vec_offsets[2 * vec_num * j + 2 * i + 1],
9124 vec_oprnd, final_mask);
9125 vect_finish_stmt_generation (vinfo, stmt_info,
9126 new_stmt, gsi);
9128 else if (known_eq (nunits * 2, offset_nunits))
9130 /* We have a offset vector with double the number of
9131 lanes. Select the low/high part accordingly. */
9132 vec_offset = vec_offsets[(vec_num * j + i) / 2];
9133 if ((vec_num * j + i) & 1)
9135 int count = offset_nunits.to_constant ();
9136 vec_perm_builder sel (count, count, 1);
9137 sel.quick_grow (count);
9138 for (int i = 0; i < count; ++i)
9139 sel[i] = i | (count / 2);
9140 vec_perm_indices indices (sel, 2, count);
9141 tree perm_mask = vect_gen_perm_mask_checked
9142 (TREE_TYPE (vec_offset), indices);
9143 new_stmt = gimple_build_assign (NULL_TREE,
9144 VEC_PERM_EXPR,
9145 vec_offset,
9146 vec_offset,
9147 perm_mask);
9148 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
9149 gimple_set_lhs (new_stmt, vec_offset);
9150 vect_finish_stmt_generation (vinfo, stmt_info,
9151 new_stmt, gsi);
9153 new_stmt = vect_build_one_scatter_store_call
9154 (vinfo, stmt_info, gsi, &gs_info,
9155 dataref_ptr, vec_offset,
9156 vec_oprnd, final_mask);
9157 vect_finish_stmt_generation (vinfo, stmt_info,
9158 new_stmt, gsi);
9160 else
9161 gcc_unreachable ();
9163 else
9165 /* Emulated scatter. */
9166 gcc_assert (!final_mask);
9167 if (costing_p)
9169 unsigned int cnunits = vect_nunits_for_cost (vectype);
9170 /* For emulated scatter N offset vector element extracts
9171 (we assume the scalar scaling and ptr + offset add is
9172 consumed by the load). */
9173 inside_cost
9174 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9175 stmt_info, 0, vect_body);
9176 /* N scalar stores plus extracting the elements. */
9177 inside_cost
9178 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9179 stmt_info, 0, vect_body);
9180 inside_cost
9181 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9182 stmt_info, 0, vect_body);
9183 continue;
9186 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9187 unsigned HOST_WIDE_INT const_offset_nunits
9188 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant ();
9189 vec<constructor_elt, va_gc> *ctor_elts;
9190 vec_alloc (ctor_elts, const_nunits);
9191 gimple_seq stmts = NULL;
9192 tree elt_type = TREE_TYPE (vectype);
9193 unsigned HOST_WIDE_INT elt_size
9194 = tree_to_uhwi (TYPE_SIZE (elt_type));
9195 /* We support offset vectors with more elements
9196 than the data vector for now. */
9197 unsigned HOST_WIDE_INT factor
9198 = const_offset_nunits / const_nunits;
9199 vec_offset = vec_offsets[(vec_num * j + i) / factor];
9200 unsigned elt_offset
9201 = ((vec_num * j + i) % factor) * const_nunits;
9202 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9203 tree scale = size_int (gs_info.scale);
9204 align = get_object_alignment (DR_REF (first_dr_info->dr));
9205 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9206 for (unsigned k = 0; k < const_nunits; ++k)
9208 /* Compute the offsetted pointer. */
9209 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9210 bitsize_int (k + elt_offset));
9211 tree idx
9212 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
9213 vec_offset, TYPE_SIZE (idx_type), boff);
9214 idx = gimple_convert (&stmts, sizetype, idx);
9215 idx = gimple_build (&stmts, MULT_EXPR, sizetype,
9216 idx, scale);
9217 tree ptr
9218 = gimple_build (&stmts, PLUS_EXPR,
9219 TREE_TYPE (dataref_ptr),
9220 dataref_ptr, idx);
9221 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9222 /* Extract the element to be stored. */
9223 tree elt
9224 = gimple_build (&stmts, BIT_FIELD_REF,
9225 TREE_TYPE (vectype),
9226 vec_oprnd, TYPE_SIZE (elt_type),
9227 bitsize_int (k * elt_size));
9228 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9229 stmts = NULL;
9230 tree ref
9231 = build2 (MEM_REF, ltype, ptr,
9232 build_int_cst (ref_type, 0));
9233 new_stmt = gimple_build_assign (ref, elt);
9234 vect_finish_stmt_generation (vinfo, stmt_info,
9235 new_stmt, gsi);
9237 if (slp)
9238 slp_node->push_vec_def (new_stmt);
9241 if (!slp && !costing_p)
9242 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9245 if (!slp && !costing_p)
9246 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9248 if (costing_p && dump_enabled_p ())
9249 dump_printf_loc (MSG_NOTE, vect_location,
9250 "vect_model_store_cost: inside_cost = %d, "
9251 "prologue_cost = %d .\n",
9252 inside_cost, prologue_cost);
9254 return true;
9257 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
9258 || memory_access_type == VMAT_CONTIGUOUS_DOWN
9259 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE
9260 || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
9262 unsigned inside_cost = 0, prologue_cost = 0;
9263 /* For costing some adjacent vector stores, we'd like to cost with
9264 the total number of them once instead of cost each one by one. */
9265 unsigned int n_adjacent_stores = 0;
9266 auto_vec<tree> result_chain (group_size);
9267 auto_vec<tree, 1> vec_oprnds;
9268 for (j = 0; j < ncopies; j++)
9270 gimple *new_stmt;
9271 if (j == 0)
9273 if (slp && !costing_p)
9275 /* Get vectorized arguments for SLP_NODE. */
9276 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
9277 &vec_oprnds, mask, &vec_masks);
9278 vec_oprnd = vec_oprnds[0];
9279 if (mask)
9280 vec_mask = vec_masks[0];
9282 else
9284 /* For interleaved stores we collect vectorized defs for all the
9285 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9286 input to vect_permute_store_chain().
9288 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9289 is of size 1. */
9290 stmt_vec_info next_stmt_info = first_stmt_info;
9291 for (i = 0; i < group_size; i++)
9293 /* Since gaps are not supported for interleaved stores,
9294 DR_GROUP_SIZE is the exact number of stmts in the chain.
9295 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9296 that there is no interleaving, DR_GROUP_SIZE is 1,
9297 and only one iteration of the loop will be executed. */
9298 op = vect_get_store_rhs (next_stmt_info);
9299 if (costing_p)
9300 update_prologue_cost (&prologue_cost, op);
9301 else
9303 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
9304 ncopies, op,
9305 gvec_oprnds[i]);
9306 vec_oprnd = (*gvec_oprnds[i])[0];
9307 dr_chain.quick_push (vec_oprnd);
9309 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9311 if (mask && !costing_p)
9313 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
9314 mask, &vec_masks,
9315 mask_vectype);
9316 vec_mask = vec_masks[0];
9320 /* We should have catched mismatched types earlier. */
9321 gcc_assert (costing_p
9322 || useless_type_conversion_p (vectype,
9323 TREE_TYPE (vec_oprnd)));
9324 bool simd_lane_access_p
9325 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9326 if (!costing_p
9327 && simd_lane_access_p
9328 && !loop_masks
9329 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9330 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9331 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9332 && integer_zerop (DR_INIT (first_dr_info->dr))
9333 && alias_sets_conflict_p (get_alias_set (aggr_type),
9334 get_alias_set (TREE_TYPE (ref_type))))
9336 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9337 dataref_offset = build_int_cst (ref_type, 0);
9339 else if (!costing_p)
9340 dataref_ptr
9341 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9342 simd_lane_access_p ? loop : NULL,
9343 offset, &dummy, gsi, &ptr_incr,
9344 simd_lane_access_p, bump);
9346 else if (!costing_p)
9348 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9349 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9350 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9351 of size 1. */
9352 for (i = 0; i < group_size; i++)
9354 vec_oprnd = (*gvec_oprnds[i])[j];
9355 dr_chain[i] = vec_oprnd;
9357 if (mask)
9358 vec_mask = vec_masks[j];
9359 if (dataref_offset)
9360 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
9361 else
9362 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9363 stmt_info, bump);
9366 new_stmt = NULL;
9367 if (grouped_store)
9369 /* Permute. */
9370 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
9371 if (costing_p)
9373 int group_size = DR_GROUP_SIZE (first_stmt_info);
9374 int nstmts = ceil_log2 (group_size) * group_size;
9375 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
9376 stmt_info, 0, vect_body);
9377 if (dump_enabled_p ())
9378 dump_printf_loc (MSG_NOTE, vect_location,
9379 "vect_model_store_cost: "
9380 "strided group_size = %d .\n",
9381 group_size);
9383 else
9384 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
9385 gsi, &result_chain);
9388 stmt_vec_info next_stmt_info = first_stmt_info;
9389 for (i = 0; i < vec_num; i++)
9391 if (!costing_p)
9393 if (slp)
9394 vec_oprnd = vec_oprnds[i];
9395 else if (grouped_store)
9396 /* For grouped stores vectorized defs are interleaved in
9397 vect_permute_store_chain(). */
9398 vec_oprnd = result_chain[i];
9401 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9403 if (costing_p)
9404 inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
9405 stmt_info, 0, vect_body);
9406 else
9408 tree perm_mask = perm_mask_for_reverse (vectype);
9409 tree perm_dest = vect_create_destination_var (
9410 vect_get_store_rhs (stmt_info), vectype);
9411 tree new_temp = make_ssa_name (perm_dest);
9413 /* Generate the permute statement. */
9414 gimple *perm_stmt
9415 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9416 vec_oprnd, perm_mask);
9417 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
9418 gsi);
9420 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9421 vec_oprnd = new_temp;
9425 if (costing_p)
9427 n_adjacent_stores++;
9429 if (!slp)
9431 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9432 if (!next_stmt_info)
9433 break;
9436 continue;
9439 tree final_mask = NULL_TREE;
9440 tree final_len = NULL_TREE;
9441 tree bias = NULL_TREE;
9442 if (loop_masks)
9443 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9444 vec_num * ncopies, vectype,
9445 vec_num * j + i);
9446 if (slp && vec_mask)
9447 vec_mask = vec_masks[i];
9448 if (vec_mask)
9449 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
9450 vec_mask, gsi);
9452 if (i > 0)
9453 /* Bump the vector pointer. */
9454 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9455 stmt_info, bump);
9457 unsigned misalign;
9458 unsigned HOST_WIDE_INT align;
9459 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9460 if (alignment_support_scheme == dr_aligned)
9461 misalign = 0;
9462 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9464 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
9465 misalign = 0;
9467 else
9468 misalign = misalignment;
9469 if (dataref_offset == NULL_TREE
9470 && TREE_CODE (dataref_ptr) == SSA_NAME)
9471 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9472 misalign);
9473 align = least_bit_hwi (misalign | align);
9475 /* Compute IFN when LOOP_LENS or final_mask valid. */
9476 machine_mode vmode = TYPE_MODE (vectype);
9477 machine_mode new_vmode = vmode;
9478 internal_fn partial_ifn = IFN_LAST;
9479 if (loop_lens)
9481 opt_machine_mode new_ovmode
9482 = get_len_load_store_mode (vmode, false, &partial_ifn);
9483 new_vmode = new_ovmode.require ();
9484 unsigned factor
9485 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9486 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9487 vec_num * ncopies, vectype,
9488 vec_num * j + i, factor);
9490 else if (final_mask)
9492 if (!can_vec_mask_load_store_p (
9493 vmode, TYPE_MODE (TREE_TYPE (final_mask)), false,
9494 &partial_ifn))
9495 gcc_unreachable ();
9498 if (partial_ifn == IFN_MASK_LEN_STORE)
9500 if (!final_len)
9502 /* Pass VF value to 'len' argument of
9503 MASK_LEN_STORE if LOOP_LENS is invalid. */
9504 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9506 if (!final_mask)
9508 /* Pass all ones value to 'mask' argument of
9509 MASK_LEN_STORE if final_mask is invalid. */
9510 mask_vectype = truth_type_for (vectype);
9511 final_mask = build_minus_one_cst (mask_vectype);
9514 if (final_len)
9516 signed char biasval
9517 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9519 bias = build_int_cst (intQI_type_node, biasval);
9522 /* Arguments are ready. Create the new vector stmt. */
9523 if (final_len)
9525 gcall *call;
9526 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9527 /* Need conversion if it's wrapped with VnQI. */
9528 if (vmode != new_vmode)
9530 tree new_vtype
9531 = build_vector_type_for_mode (unsigned_intQI_type_node,
9532 new_vmode);
9533 tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9534 vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9535 gassign *new_stmt
9536 = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9537 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9538 vec_oprnd = var;
9541 if (partial_ifn == IFN_MASK_LEN_STORE)
9542 call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9543 dataref_ptr, ptr, final_mask,
9544 final_len, bias, vec_oprnd);
9545 else
9546 call = gimple_build_call_internal (IFN_LEN_STORE, 5,
9547 dataref_ptr, ptr, final_len,
9548 bias, vec_oprnd);
9549 gimple_call_set_nothrow (call, true);
9550 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9551 new_stmt = call;
9553 else if (final_mask)
9555 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9556 gcall *call
9557 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
9558 ptr, final_mask, vec_oprnd);
9559 gimple_call_set_nothrow (call, true);
9560 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9561 new_stmt = call;
9563 else
9565 data_ref
9566 = fold_build2 (MEM_REF, vectype, dataref_ptr,
9567 dataref_offset ? dataref_offset
9568 : build_int_cst (ref_type, 0));
9569 if (alignment_support_scheme == dr_aligned)
9571 else
9572 TREE_TYPE (data_ref)
9573 = build_aligned_type (TREE_TYPE (data_ref),
9574 align * BITS_PER_UNIT);
9575 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9576 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9577 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9580 if (slp)
9581 continue;
9583 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9584 if (!next_stmt_info)
9585 break;
9587 if (!slp && !costing_p)
9589 if (j == 0)
9590 *vec_stmt = new_stmt;
9591 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9595 if (costing_p)
9597 if (n_adjacent_stores > 0)
9598 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
9599 alignment_support_scheme, misalignment,
9600 &inside_cost, cost_vec);
9602 /* When vectorizing a store into the function result assign
9603 a penalty if the function returns in a multi-register location.
9604 In this case we assume we'll end up with having to spill the
9605 vector result and do piecewise loads as a conservative estimate. */
9606 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
9607 if (base
9608 && (TREE_CODE (base) == RESULT_DECL
9609 || (DECL_P (base) && cfun_returns (base)))
9610 && !aggregate_value_p (base, cfun->decl))
9612 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
9613 /* ??? Handle PARALLEL in some way. */
9614 if (REG_P (reg))
9616 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
9617 /* Assume that a single reg-reg move is possible and cheap,
9618 do not account for vector to gp register move cost. */
9619 if (nregs > 1)
9621 /* Spill. */
9622 prologue_cost
9623 += record_stmt_cost (cost_vec, ncopies, vector_store,
9624 stmt_info, 0, vect_epilogue);
9625 /* Loads. */
9626 prologue_cost
9627 += record_stmt_cost (cost_vec, ncopies * nregs, scalar_load,
9628 stmt_info, 0, vect_epilogue);
9632 if (dump_enabled_p ())
9633 dump_printf_loc (MSG_NOTE, vect_location,
9634 "vect_model_store_cost: inside_cost = %d, "
9635 "prologue_cost = %d .\n",
9636 inside_cost, prologue_cost);
9639 return true;
9642 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9643 VECTOR_CST mask. No checks are made that the target platform supports the
9644 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9645 vect_gen_perm_mask_checked. */
9647 tree
9648 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9650 tree mask_type;
9652 poly_uint64 nunits = sel.length ();
9653 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9655 mask_type = build_vector_type (ssizetype, nunits);
9656 return vec_perm_indices_to_tree (mask_type, sel);
9659 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9660 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9662 tree
9663 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9665 machine_mode vmode = TYPE_MODE (vectype);
9666 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9667 return vect_gen_perm_mask_any (vectype, sel);
9670 /* Given a vector variable X and Y, that was generated for the scalar
9671 STMT_INFO, generate instructions to permute the vector elements of X and Y
9672 using permutation mask MASK_VEC, insert them at *GSI and return the
9673 permuted vector variable. */
9675 static tree
9676 permute_vec_elements (vec_info *vinfo,
9677 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9678 gimple_stmt_iterator *gsi)
9680 tree vectype = TREE_TYPE (x);
9681 tree perm_dest, data_ref;
9682 gimple *perm_stmt;
9684 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9685 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9686 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9687 else
9688 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9689 data_ref = make_ssa_name (perm_dest);
9691 /* Generate the permute statement. */
9692 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9693 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9695 return data_ref;
9698 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9699 inserting them on the loops preheader edge. Returns true if we
9700 were successful in doing so (and thus STMT_INFO can be moved then),
9701 otherwise returns false. HOIST_P indicates if we want to hoist the
9702 definitions of all SSA uses, it would be false when we are costing. */
9704 static bool
9705 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
9707 ssa_op_iter i;
9708 tree op;
9709 bool any = false;
9711 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9713 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9714 if (!gimple_nop_p (def_stmt)
9715 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9717 /* Make sure we don't need to recurse. While we could do
9718 so in simple cases when there are more complex use webs
9719 we don't have an easy way to preserve stmt order to fulfil
9720 dependencies within them. */
9721 tree op2;
9722 ssa_op_iter i2;
9723 if (gimple_code (def_stmt) == GIMPLE_PHI)
9724 return false;
9725 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9727 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9728 if (!gimple_nop_p (def_stmt2)
9729 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9730 return false;
9732 any = true;
9736 if (!any)
9737 return true;
9739 if (!hoist_p)
9740 return true;
9742 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9744 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9745 if (!gimple_nop_p (def_stmt)
9746 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9748 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9749 gsi_remove (&gsi, false);
9750 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9754 return true;
9757 /* vectorizable_load.
9759 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9760 that can be vectorized.
9761 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9762 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9763 Return true if STMT_INFO is vectorizable in this way. */
9765 static bool
9766 vectorizable_load (vec_info *vinfo,
9767 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9768 gimple **vec_stmt, slp_tree slp_node,
9769 stmt_vector_for_cost *cost_vec)
9771 tree scalar_dest;
9772 tree vec_dest = NULL;
9773 tree data_ref = NULL;
9774 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9775 class loop *loop = NULL;
9776 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9777 bool nested_in_vect_loop = false;
9778 tree elem_type;
9779 /* Avoid false positive uninitialized warning, see PR110652. */
9780 tree new_temp = NULL_TREE;
9781 machine_mode mode;
9782 tree dummy;
9783 tree dataref_ptr = NULL_TREE;
9784 tree dataref_offset = NULL_TREE;
9785 gimple *ptr_incr = NULL;
9786 int ncopies;
9787 int i, j;
9788 unsigned int group_size;
9789 poly_uint64 group_gap_adj;
9790 tree msq = NULL_TREE, lsq;
9791 tree realignment_token = NULL_TREE;
9792 gphi *phi = NULL;
9793 vec<tree> dr_chain = vNULL;
9794 bool grouped_load = false;
9795 stmt_vec_info first_stmt_info;
9796 stmt_vec_info first_stmt_info_for_drptr = NULL;
9797 bool compute_in_loop = false;
9798 class loop *at_loop;
9799 int vec_num;
9800 bool slp = (slp_node != NULL);
9801 bool slp_perm = false;
9802 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9803 poly_uint64 vf;
9804 tree aggr_type;
9805 gather_scatter_info gs_info;
9806 tree ref_type;
9807 enum vect_def_type mask_dt = vect_unknown_def_type;
9809 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9810 return false;
9812 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9813 && ! vec_stmt)
9814 return false;
9816 if (!STMT_VINFO_DATA_REF (stmt_info))
9817 return false;
9819 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9820 int mask_index = -1;
9821 slp_tree slp_op = NULL;
9822 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9824 scalar_dest = gimple_assign_lhs (assign);
9825 if (TREE_CODE (scalar_dest) != SSA_NAME)
9826 return false;
9828 tree_code code = gimple_assign_rhs_code (assign);
9829 if (code != ARRAY_REF
9830 && code != BIT_FIELD_REF
9831 && code != INDIRECT_REF
9832 && code != COMPONENT_REF
9833 && code != IMAGPART_EXPR
9834 && code != REALPART_EXPR
9835 && code != MEM_REF
9836 && TREE_CODE_CLASS (code) != tcc_declaration)
9837 return false;
9839 else
9841 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9842 if (!call || !gimple_call_internal_p (call))
9843 return false;
9845 internal_fn ifn = gimple_call_internal_fn (call);
9846 if (!internal_load_fn_p (ifn))
9847 return false;
9849 scalar_dest = gimple_call_lhs (call);
9850 if (!scalar_dest)
9851 return false;
9853 mask_index = internal_fn_mask_index (ifn);
9854 if (mask_index >= 0 && slp_node)
9855 mask_index = vect_slp_child_index_for_operand
9856 (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9857 if (mask_index >= 0
9858 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9859 &mask, &slp_op, &mask_dt, &mask_vectype))
9860 return false;
9863 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9864 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9866 if (loop_vinfo)
9868 loop = LOOP_VINFO_LOOP (loop_vinfo);
9869 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9870 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9872 else
9873 vf = 1;
9875 /* Multiple types in SLP are handled by creating the appropriate number of
9876 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9877 case of SLP. */
9878 if (slp)
9879 ncopies = 1;
9880 else
9881 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9883 gcc_assert (ncopies >= 1);
9885 /* FORNOW. This restriction should be relaxed. */
9886 if (nested_in_vect_loop && ncopies > 1)
9888 if (dump_enabled_p ())
9889 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9890 "multiple types in nested loop.\n");
9891 return false;
9894 /* Invalidate assumptions made by dependence analysis when vectorization
9895 on the unrolled body effectively re-orders stmts. */
9896 if (ncopies > 1
9897 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9898 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9899 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9901 if (dump_enabled_p ())
9902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9903 "cannot perform implicit CSE when unrolling "
9904 "with negative dependence distance\n");
9905 return false;
9908 elem_type = TREE_TYPE (vectype);
9909 mode = TYPE_MODE (vectype);
9911 /* FORNOW. In some cases can vectorize even if data-type not supported
9912 (e.g. - data copies). */
9913 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
9915 if (dump_enabled_p ())
9916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9917 "Aligned load, but unsupported type.\n");
9918 return false;
9921 /* Check if the load is a part of an interleaving chain. */
9922 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9924 grouped_load = true;
9925 /* FORNOW */
9926 gcc_assert (!nested_in_vect_loop);
9927 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9929 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9930 group_size = DR_GROUP_SIZE (first_stmt_info);
9932 /* Refuse non-SLP vectorization of SLP-only groups. */
9933 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9935 if (dump_enabled_p ())
9936 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9937 "cannot vectorize load in non-SLP mode.\n");
9938 return false;
9941 /* Invalidate assumptions made by dependence analysis when vectorization
9942 on the unrolled body effectively re-orders stmts. */
9943 if (!PURE_SLP_STMT (stmt_info)
9944 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9945 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9946 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9948 if (dump_enabled_p ())
9949 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9950 "cannot perform implicit CSE when performing "
9951 "group loads with negative dependence distance\n");
9952 return false;
9955 else
9956 group_size = 1;
9958 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9960 slp_perm = true;
9962 if (!loop_vinfo)
9964 /* In BB vectorization we may not actually use a loaded vector
9965 accessing elements in excess of DR_GROUP_SIZE. */
9966 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9967 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9968 unsigned HOST_WIDE_INT nunits;
9969 unsigned j, k, maxk = 0;
9970 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
9971 if (k > maxk)
9972 maxk = k;
9973 tree vectype = SLP_TREE_VECTYPE (slp_node);
9974 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
9975 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
9977 if (dump_enabled_p ())
9978 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9979 "BB vectorization with gaps at the end of "
9980 "a load is not supported\n");
9981 return false;
9985 auto_vec<tree> tem;
9986 unsigned n_perms;
9987 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
9988 true, &n_perms))
9990 if (dump_enabled_p ())
9991 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
9992 vect_location,
9993 "unsupported load permutation\n");
9994 return false;
9998 vect_memory_access_type memory_access_type;
9999 enum dr_alignment_support alignment_support_scheme;
10000 int misalignment;
10001 poly_int64 poffset;
10002 internal_fn lanes_ifn;
10003 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
10004 ncopies, &memory_access_type, &poffset,
10005 &alignment_support_scheme, &misalignment, &gs_info,
10006 &lanes_ifn))
10007 return false;
10009 if (mask)
10011 if (memory_access_type == VMAT_CONTIGUOUS)
10013 machine_mode vec_mode = TYPE_MODE (vectype);
10014 if (!VECTOR_MODE_P (vec_mode)
10015 || !can_vec_mask_load_store_p (vec_mode,
10016 TYPE_MODE (mask_vectype), true))
10017 return false;
10019 else if (memory_access_type != VMAT_LOAD_STORE_LANES
10020 && memory_access_type != VMAT_GATHER_SCATTER)
10022 if (dump_enabled_p ())
10023 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10024 "unsupported access type for masked load.\n");
10025 return false;
10027 else if (memory_access_type == VMAT_GATHER_SCATTER
10028 && gs_info.ifn == IFN_LAST
10029 && !gs_info.decl)
10031 if (dump_enabled_p ())
10032 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10033 "unsupported masked emulated gather.\n");
10034 return false;
10038 bool costing_p = !vec_stmt;
10040 if (costing_p) /* transformation not required. */
10042 if (slp_node
10043 && mask
10044 && !vect_maybe_update_slp_op_vectype (slp_op,
10045 mask_vectype))
10047 if (dump_enabled_p ())
10048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10049 "incompatible vector types for invariants\n");
10050 return false;
10053 if (!slp)
10054 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
10056 if (loop_vinfo
10057 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10058 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
10059 VLS_LOAD, group_size,
10060 memory_access_type, &gs_info,
10061 mask);
10063 if (dump_enabled_p ()
10064 && memory_access_type != VMAT_ELEMENTWISE
10065 && memory_access_type != VMAT_GATHER_SCATTER
10066 && alignment_support_scheme != dr_aligned)
10067 dump_printf_loc (MSG_NOTE, vect_location,
10068 "Vectorizing an unaligned access.\n");
10070 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10071 vinfo->any_known_not_updated_vssa = true;
10073 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
10076 if (!slp)
10077 gcc_assert (memory_access_type
10078 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
10080 if (dump_enabled_p () && !costing_p)
10081 dump_printf_loc (MSG_NOTE, vect_location,
10082 "transform load. ncopies = %d\n", ncopies);
10084 /* Transform. */
10086 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
10087 ensure_base_align (dr_info);
10089 if (memory_access_type == VMAT_INVARIANT)
10091 gcc_assert (!grouped_load && !mask && !bb_vinfo);
10092 /* If we have versioned for aliasing or the loop doesn't
10093 have any data dependencies that would preclude this,
10094 then we are sure this is a loop invariant load and
10095 thus we can insert it on the preheader edge.
10096 TODO: hoist_defs_of_uses should ideally be computed
10097 once at analysis time, remembered and used in the
10098 transform time. */
10099 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
10100 && !nested_in_vect_loop
10101 && hoist_defs_of_uses (stmt_info, loop, !costing_p));
10102 if (costing_p)
10104 enum vect_cost_model_location cost_loc
10105 = hoist_p ? vect_prologue : vect_body;
10106 unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
10107 stmt_info, 0, cost_loc);
10108 cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0,
10109 cost_loc);
10110 unsigned int prologue_cost = hoist_p ? cost : 0;
10111 unsigned int inside_cost = hoist_p ? 0 : cost;
10112 if (dump_enabled_p ())
10113 dump_printf_loc (MSG_NOTE, vect_location,
10114 "vect_model_load_cost: inside_cost = %d, "
10115 "prologue_cost = %d .\n",
10116 inside_cost, prologue_cost);
10117 return true;
10119 if (hoist_p)
10121 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
10122 if (dump_enabled_p ())
10123 dump_printf_loc (MSG_NOTE, vect_location,
10124 "hoisting out of the vectorized loop: %G",
10125 (gimple *) stmt);
10126 scalar_dest = copy_ssa_name (scalar_dest);
10127 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
10128 edge pe = loop_preheader_edge (loop);
10129 gphi *vphi = get_virtual_phi (loop->header);
10130 tree vuse;
10131 if (vphi)
10132 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
10133 else
10134 vuse = gimple_vuse (gsi_stmt (*gsi));
10135 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
10136 gimple_set_vuse (new_stmt, vuse);
10137 gsi_insert_on_edge_immediate (pe, new_stmt);
10139 /* These copies are all equivalent. */
10140 if (hoist_p)
10141 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10142 vectype, NULL);
10143 else
10145 gimple_stmt_iterator gsi2 = *gsi;
10146 gsi_next (&gsi2);
10147 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10148 vectype, &gsi2);
10150 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
10151 if (slp)
10152 for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
10153 slp_node->push_vec_def (new_stmt);
10154 else
10156 for (j = 0; j < ncopies; ++j)
10157 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10158 *vec_stmt = new_stmt;
10160 return true;
10163 if (memory_access_type == VMAT_ELEMENTWISE
10164 || memory_access_type == VMAT_STRIDED_SLP)
10166 gimple_stmt_iterator incr_gsi;
10167 bool insert_after;
10168 tree offvar;
10169 tree ivstep;
10170 tree running_off;
10171 vec<constructor_elt, va_gc> *v = NULL;
10172 tree stride_base, stride_step, alias_off;
10173 /* Checked by get_load_store_type. */
10174 unsigned int const_nunits = nunits.to_constant ();
10175 unsigned HOST_WIDE_INT cst_offset = 0;
10176 tree dr_offset;
10177 unsigned int inside_cost = 0;
10179 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
10180 gcc_assert (!nested_in_vect_loop);
10182 if (grouped_load)
10184 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10185 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10187 else
10189 first_stmt_info = stmt_info;
10190 first_dr_info = dr_info;
10193 if (slp && grouped_load)
10195 group_size = DR_GROUP_SIZE (first_stmt_info);
10196 ref_type = get_group_alias_ptr_type (first_stmt_info);
10198 else
10200 if (grouped_load)
10201 cst_offset
10202 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
10203 * vect_get_place_in_interleaving_chain (stmt_info,
10204 first_stmt_info));
10205 group_size = 1;
10206 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
10209 if (!costing_p)
10211 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
10212 stride_base = fold_build_pointer_plus (
10213 DR_BASE_ADDRESS (first_dr_info->dr),
10214 size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
10215 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
10216 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
10218 /* For a load with loop-invariant (but other than power-of-2)
10219 stride (i.e. not a grouped access) like so:
10221 for (i = 0; i < n; i += stride)
10222 ... = array[i];
10224 we generate a new induction variable and new accesses to
10225 form a new vector (or vectors, depending on ncopies):
10227 for (j = 0; ; j += VF*stride)
10228 tmp1 = array[j];
10229 tmp2 = array[j + stride];
10231 vectemp = {tmp1, tmp2, ...}
10234 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
10235 build_int_cst (TREE_TYPE (stride_step), vf));
10237 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
10239 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
10240 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
10241 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
10242 loop, &incr_gsi, insert_after,
10243 &offvar, NULL);
10245 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
10248 running_off = offvar;
10249 alias_off = build_int_cst (ref_type, 0);
10250 int nloads = const_nunits;
10251 int lnel = 1;
10252 tree ltype = TREE_TYPE (vectype);
10253 tree lvectype = vectype;
10254 auto_vec<tree> dr_chain;
10255 if (memory_access_type == VMAT_STRIDED_SLP)
10257 if (group_size < const_nunits)
10259 /* First check if vec_init optab supports construction from vector
10260 elts directly. Otherwise avoid emitting a constructor of
10261 vector elements by performing the loads using an integer type
10262 of the same size, constructing a vector of those and then
10263 re-interpreting it as the original vector type. This avoids a
10264 huge runtime penalty due to the general inability to perform
10265 store forwarding from smaller stores to a larger load. */
10266 tree ptype;
10267 tree vtype
10268 = vector_vector_composition_type (vectype,
10269 const_nunits / group_size,
10270 &ptype);
10271 if (vtype != NULL_TREE)
10273 nloads = const_nunits / group_size;
10274 lnel = group_size;
10275 lvectype = vtype;
10276 ltype = ptype;
10279 else
10281 nloads = 1;
10282 lnel = const_nunits;
10283 ltype = vectype;
10285 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
10287 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10288 else if (nloads == 1)
10289 ltype = vectype;
10291 if (slp)
10293 /* For SLP permutation support we need to load the whole group,
10294 not only the number of vector stmts the permutation result
10295 fits in. */
10296 if (slp_perm)
10298 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10299 variable VF. */
10300 unsigned int const_vf = vf.to_constant ();
10301 ncopies = CEIL (group_size * const_vf, const_nunits);
10302 dr_chain.create (ncopies);
10304 else
10305 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10307 unsigned int group_el = 0;
10308 unsigned HOST_WIDE_INT
10309 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10310 unsigned int n_groups = 0;
10311 /* For costing some adjacent vector loads, we'd like to cost with
10312 the total number of them once instead of cost each one by one. */
10313 unsigned int n_adjacent_loads = 0;
10314 for (j = 0; j < ncopies; j++)
10316 if (nloads > 1 && !costing_p)
10317 vec_alloc (v, nloads);
10318 gimple *new_stmt = NULL;
10319 for (i = 0; i < nloads; i++)
10321 if (costing_p)
10323 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10324 avoid ICE, see PR110776. */
10325 if (VECTOR_TYPE_P (ltype)
10326 && memory_access_type != VMAT_ELEMENTWISE)
10327 n_adjacent_loads++;
10328 else
10329 inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
10330 stmt_info, 0, vect_body);
10331 continue;
10333 tree this_off = build_int_cst (TREE_TYPE (alias_off),
10334 group_el * elsz + cst_offset);
10335 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10336 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10337 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
10338 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10339 if (nloads > 1)
10340 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10341 gimple_assign_lhs (new_stmt));
10343 group_el += lnel;
10344 if (! slp
10345 || group_el == group_size)
10347 n_groups++;
10348 /* When doing SLP make sure to not load elements from
10349 the next vector iteration, those will not be accessed
10350 so just use the last element again. See PR107451. */
10351 if (!slp || known_lt (n_groups, vf))
10353 tree newoff = copy_ssa_name (running_off);
10354 gimple *incr
10355 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10356 running_off, stride_step);
10357 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
10358 running_off = newoff;
10360 group_el = 0;
10364 if (nloads > 1)
10366 if (costing_p)
10367 inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
10368 stmt_info, 0, vect_body);
10369 else
10371 tree vec_inv = build_constructor (lvectype, v);
10372 new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
10373 lvectype, gsi);
10374 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10375 if (lvectype != vectype)
10377 new_stmt
10378 = gimple_build_assign (make_ssa_name (vectype),
10379 VIEW_CONVERT_EXPR,
10380 build1 (VIEW_CONVERT_EXPR,
10381 vectype, new_temp));
10382 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10383 gsi);
10388 if (!costing_p)
10390 if (slp)
10392 if (slp_perm)
10393 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
10394 else
10395 slp_node->push_vec_def (new_stmt);
10397 else
10399 if (j == 0)
10400 *vec_stmt = new_stmt;
10401 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10405 if (slp_perm)
10407 unsigned n_perms;
10408 if (costing_p)
10410 unsigned n_loads;
10411 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10412 true, &n_perms, &n_loads);
10413 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
10414 first_stmt_info, 0, vect_body);
10416 else
10417 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10418 false, &n_perms);
10421 if (costing_p)
10423 if (n_adjacent_loads > 0)
10424 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
10425 alignment_support_scheme, misalignment, false,
10426 &inside_cost, nullptr, cost_vec, cost_vec,
10427 true);
10428 if (dump_enabled_p ())
10429 dump_printf_loc (MSG_NOTE, vect_location,
10430 "vect_model_load_cost: inside_cost = %u, "
10431 "prologue_cost = 0 .\n",
10432 inside_cost);
10435 return true;
10438 if (memory_access_type == VMAT_GATHER_SCATTER
10439 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
10440 grouped_load = false;
10442 if (grouped_load
10443 || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10445 if (grouped_load)
10447 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10448 group_size = DR_GROUP_SIZE (first_stmt_info);
10450 else
10452 first_stmt_info = stmt_info;
10453 group_size = 1;
10455 /* For SLP vectorization we directly vectorize a subchain
10456 without permutation. */
10457 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10458 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10459 /* For BB vectorization always use the first stmt to base
10460 the data ref pointer on. */
10461 if (bb_vinfo)
10462 first_stmt_info_for_drptr
10463 = vect_find_first_scalar_stmt_in_slp (slp_node);
10465 /* Check if the chain of loads is already vectorized. */
10466 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10467 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10468 ??? But we can only do so if there is exactly one
10469 as we have no way to get at the rest. Leave the CSE
10470 opportunity alone.
10471 ??? With the group load eventually participating
10472 in multiple different permutations (having multiple
10473 slp nodes which refer to the same group) the CSE
10474 is even wrong code. See PR56270. */
10475 && !slp)
10477 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10478 return true;
10480 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10481 group_gap_adj = 0;
10483 /* VEC_NUM is the number of vect stmts to be created for this group. */
10484 if (slp)
10486 grouped_load = false;
10487 /* If an SLP permutation is from N elements to N elements,
10488 and if one vector holds a whole number of N, we can load
10489 the inputs to the permutation in the same way as an
10490 unpermuted sequence. In other cases we need to load the
10491 whole group, not only the number of vector stmts the
10492 permutation result fits in. */
10493 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10494 if (slp_perm
10495 && (group_size != scalar_lanes
10496 || !multiple_p (nunits, group_size)))
10498 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10499 variable VF; see vect_transform_slp_perm_load. */
10500 unsigned int const_vf = vf.to_constant ();
10501 unsigned int const_nunits = nunits.to_constant ();
10502 vec_num = CEIL (group_size * const_vf, const_nunits);
10503 group_gap_adj = vf * group_size - nunits * vec_num;
10505 else
10507 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10508 group_gap_adj
10509 = group_size - scalar_lanes;
10512 else
10513 vec_num = group_size;
10515 ref_type = get_group_alias_ptr_type (first_stmt_info);
10517 else
10519 first_stmt_info = stmt_info;
10520 first_dr_info = dr_info;
10521 group_size = vec_num = 1;
10522 group_gap_adj = 0;
10523 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10524 if (slp)
10525 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10528 gcc_assert (alignment_support_scheme);
10529 vec_loop_masks *loop_masks
10530 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10531 ? &LOOP_VINFO_MASKS (loop_vinfo)
10532 : NULL);
10533 vec_loop_lens *loop_lens
10534 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10535 ? &LOOP_VINFO_LENS (loop_vinfo)
10536 : NULL);
10538 /* Shouldn't go with length-based approach if fully masked. */
10539 gcc_assert (!loop_lens || !loop_masks);
10541 /* Targets with store-lane instructions must not require explicit
10542 realignment. vect_supportable_dr_alignment always returns either
10543 dr_aligned or dr_unaligned_supported for masked operations. */
10544 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10545 && !mask
10546 && !loop_masks)
10547 || alignment_support_scheme == dr_aligned
10548 || alignment_support_scheme == dr_unaligned_supported);
10550 /* In case the vectorization factor (VF) is bigger than the number
10551 of elements that we can fit in a vectype (nunits), we have to generate
10552 more than one vector stmt - i.e - we need to "unroll" the
10553 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10554 from one copy of the vector stmt to the next, in the field
10555 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10556 stages to find the correct vector defs to be used when vectorizing
10557 stmts that use the defs of the current stmt. The example below
10558 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10559 need to create 4 vectorized stmts):
10561 before vectorization:
10562 RELATED_STMT VEC_STMT
10563 S1: x = memref - -
10564 S2: z = x + 1 - -
10566 step 1: vectorize stmt S1:
10567 We first create the vector stmt VS1_0, and, as usual, record a
10568 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10569 Next, we create the vector stmt VS1_1, and record a pointer to
10570 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10571 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10572 stmts and pointers:
10573 RELATED_STMT VEC_STMT
10574 VS1_0: vx0 = memref0 VS1_1 -
10575 VS1_1: vx1 = memref1 VS1_2 -
10576 VS1_2: vx2 = memref2 VS1_3 -
10577 VS1_3: vx3 = memref3 - -
10578 S1: x = load - VS1_0
10579 S2: z = x + 1 - -
10582 /* In case of interleaving (non-unit grouped access):
10584 S1: x2 = &base + 2
10585 S2: x0 = &base
10586 S3: x1 = &base + 1
10587 S4: x3 = &base + 3
10589 Vectorized loads are created in the order of memory accesses
10590 starting from the access of the first stmt of the chain:
10592 VS1: vx0 = &base
10593 VS2: vx1 = &base + vec_size*1
10594 VS3: vx3 = &base + vec_size*2
10595 VS4: vx4 = &base + vec_size*3
10597 Then permutation statements are generated:
10599 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10600 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10603 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10604 (the order of the data-refs in the output of vect_permute_load_chain
10605 corresponds to the order of scalar stmts in the interleaving chain - see
10606 the documentation of vect_permute_load_chain()).
10607 The generation of permutation stmts and recording them in
10608 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10610 In case of both multiple types and interleaving, the vector loads and
10611 permutation stmts above are created for every copy. The result vector
10612 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10613 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10615 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10616 on a target that supports unaligned accesses (dr_unaligned_supported)
10617 we generate the following code:
10618 p = initial_addr;
10619 indx = 0;
10620 loop {
10621 p = p + indx * vectype_size;
10622 vec_dest = *(p);
10623 indx = indx + 1;
10626 Otherwise, the data reference is potentially unaligned on a target that
10627 does not support unaligned accesses (dr_explicit_realign_optimized) -
10628 then generate the following code, in which the data in each iteration is
10629 obtained by two vector loads, one from the previous iteration, and one
10630 from the current iteration:
10631 p1 = initial_addr;
10632 msq_init = *(floor(p1))
10633 p2 = initial_addr + VS - 1;
10634 realignment_token = call target_builtin;
10635 indx = 0;
10636 loop {
10637 p2 = p2 + indx * vectype_size
10638 lsq = *(floor(p2))
10639 vec_dest = realign_load (msq, lsq, realignment_token)
10640 indx = indx + 1;
10641 msq = lsq;
10642 } */
10644 /* If the misalignment remains the same throughout the execution of the
10645 loop, we can create the init_addr and permutation mask at the loop
10646 preheader. Otherwise, it needs to be created inside the loop.
10647 This can only occur when vectorizing memory accesses in the inner-loop
10648 nested within an outer-loop that is being vectorized. */
10650 if (nested_in_vect_loop
10651 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10652 GET_MODE_SIZE (TYPE_MODE (vectype))))
10654 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10655 compute_in_loop = true;
10658 bool diff_first_stmt_info
10659 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10661 tree offset = NULL_TREE;
10662 if ((alignment_support_scheme == dr_explicit_realign_optimized
10663 || alignment_support_scheme == dr_explicit_realign)
10664 && !compute_in_loop)
10666 /* If we have different first_stmt_info, we can't set up realignment
10667 here, since we can't guarantee first_stmt_info DR has been
10668 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10669 distance from first_stmt_info DR instead as below. */
10670 if (!costing_p)
10672 if (!diff_first_stmt_info)
10673 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10674 &realignment_token,
10675 alignment_support_scheme, NULL_TREE,
10676 &at_loop);
10677 if (alignment_support_scheme == dr_explicit_realign_optimized)
10679 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10680 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10681 size_one_node);
10682 gcc_assert (!first_stmt_info_for_drptr);
10686 else
10687 at_loop = loop;
10689 if (!known_eq (poffset, 0))
10690 offset = (offset
10691 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10692 : size_int (poffset));
10694 tree bump;
10695 tree vec_offset = NULL_TREE;
10696 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10698 aggr_type = NULL_TREE;
10699 bump = NULL_TREE;
10701 else if (memory_access_type == VMAT_GATHER_SCATTER)
10703 aggr_type = elem_type;
10704 if (!costing_p)
10705 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
10706 &bump, &vec_offset, loop_lens);
10708 else
10710 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10711 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
10712 else
10713 aggr_type = vectype;
10714 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10715 memory_access_type, loop_lens);
10718 auto_vec<tree> vec_offsets;
10719 auto_vec<tree> vec_masks;
10720 if (mask && !costing_p)
10722 if (slp_node)
10723 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10724 &vec_masks);
10725 else
10726 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
10727 &vec_masks, mask_vectype);
10730 tree vec_mask = NULL_TREE;
10731 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10733 gcc_assert (alignment_support_scheme == dr_aligned
10734 || alignment_support_scheme == dr_unaligned_supported);
10735 gcc_assert (grouped_load && !slp);
10737 unsigned int inside_cost = 0, prologue_cost = 0;
10738 /* For costing some adjacent vector loads, we'd like to cost with
10739 the total number of them once instead of cost each one by one. */
10740 unsigned int n_adjacent_loads = 0;
10741 for (j = 0; j < ncopies; j++)
10743 if (costing_p)
10745 /* An IFN_LOAD_LANES will load all its vector results,
10746 regardless of which ones we actually need. Account
10747 for the cost of unused results. */
10748 if (first_stmt_info == stmt_info)
10750 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10751 stmt_vec_info next_stmt_info = first_stmt_info;
10754 gaps -= 1;
10755 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10757 while (next_stmt_info);
10758 if (gaps)
10760 if (dump_enabled_p ())
10761 dump_printf_loc (MSG_NOTE, vect_location,
10762 "vect_model_load_cost: %d "
10763 "unused vectors.\n",
10764 gaps);
10765 vect_get_load_cost (vinfo, stmt_info, gaps,
10766 alignment_support_scheme,
10767 misalignment, false, &inside_cost,
10768 &prologue_cost, cost_vec, cost_vec,
10769 true);
10772 n_adjacent_loads++;
10773 continue;
10776 /* 1. Create the vector or array pointer update chain. */
10777 if (j == 0)
10778 dataref_ptr
10779 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10780 at_loop, offset, &dummy, gsi,
10781 &ptr_incr, false, bump);
10782 else
10784 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10785 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10786 stmt_info, bump);
10788 if (mask)
10789 vec_mask = vec_masks[j];
10791 tree vec_array = create_vector_array (vectype, vec_num);
10793 tree final_mask = NULL_TREE;
10794 tree final_len = NULL_TREE;
10795 tree bias = NULL_TREE;
10796 if (loop_masks)
10797 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10798 ncopies, vectype, j);
10799 if (vec_mask)
10800 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
10801 vec_mask, gsi);
10803 if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10805 if (loop_lens)
10806 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10807 ncopies, vectype, j, 1);
10808 else
10809 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10810 signed char biasval
10811 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10812 bias = build_int_cst (intQI_type_node, biasval);
10813 if (!final_mask)
10815 mask_vectype = truth_type_for (vectype);
10816 final_mask = build_minus_one_cst (mask_vectype);
10820 gcall *call;
10821 if (final_len && final_mask)
10823 /* Emit:
10824 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10825 VEC_MASK, LEN, BIAS). */
10826 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10827 tree alias_ptr = build_int_cst (ref_type, align);
10828 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
10829 dataref_ptr, alias_ptr,
10830 final_mask, final_len, bias);
10832 else if (final_mask)
10834 /* Emit:
10835 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10836 VEC_MASK). */
10837 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10838 tree alias_ptr = build_int_cst (ref_type, align);
10839 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
10840 dataref_ptr, alias_ptr,
10841 final_mask);
10843 else
10845 /* Emit:
10846 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10847 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
10848 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10850 gimple_call_set_lhs (call, vec_array);
10851 gimple_call_set_nothrow (call, true);
10852 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
10854 dr_chain.create (vec_num);
10855 /* Extract each vector into an SSA_NAME. */
10856 for (i = 0; i < vec_num; i++)
10858 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10859 vec_array, i);
10860 dr_chain.quick_push (new_temp);
10863 /* Record the mapping between SSA_NAMEs and statements. */
10864 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10866 /* Record that VEC_ARRAY is now dead. */
10867 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10869 dr_chain.release ();
10871 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10874 if (costing_p)
10876 if (n_adjacent_loads > 0)
10877 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
10878 alignment_support_scheme, misalignment, false,
10879 &inside_cost, &prologue_cost, cost_vec,
10880 cost_vec, true);
10881 if (dump_enabled_p ())
10882 dump_printf_loc (MSG_NOTE, vect_location,
10883 "vect_model_load_cost: inside_cost = %u, "
10884 "prologue_cost = %u .\n",
10885 inside_cost, prologue_cost);
10888 return true;
10891 if (memory_access_type == VMAT_GATHER_SCATTER)
10893 gcc_assert (alignment_support_scheme == dr_aligned
10894 || alignment_support_scheme == dr_unaligned_supported);
10895 gcc_assert (!grouped_load && !slp_perm);
10897 unsigned int inside_cost = 0, prologue_cost = 0;
10898 for (j = 0; j < ncopies; j++)
10900 /* 1. Create the vector or array pointer update chain. */
10901 if (j == 0 && !costing_p)
10903 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10904 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10905 slp_node, &gs_info, &dataref_ptr,
10906 &vec_offsets);
10907 else
10908 dataref_ptr
10909 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10910 at_loop, offset, &dummy, gsi,
10911 &ptr_incr, false, bump);
10913 else if (!costing_p)
10915 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10916 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10917 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10918 gsi, stmt_info, bump);
10921 gimple *new_stmt = NULL;
10922 for (i = 0; i < vec_num; i++)
10924 tree final_mask = NULL_TREE;
10925 tree final_len = NULL_TREE;
10926 tree bias = NULL_TREE;
10927 if (!costing_p)
10929 if (mask)
10930 vec_mask = vec_masks[vec_num * j + i];
10931 if (loop_masks)
10932 final_mask
10933 = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10934 vec_num * ncopies, vectype,
10935 vec_num * j + i);
10936 if (vec_mask)
10937 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10938 final_mask, vec_mask, gsi);
10940 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10941 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10942 gsi, stmt_info, bump);
10945 /* 2. Create the vector-load in the loop. */
10946 unsigned HOST_WIDE_INT align;
10947 if (gs_info.ifn != IFN_LAST)
10949 if (costing_p)
10951 unsigned int cnunits = vect_nunits_for_cost (vectype);
10952 inside_cost
10953 = record_stmt_cost (cost_vec, cnunits, scalar_load,
10954 stmt_info, 0, vect_body);
10955 continue;
10957 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10958 vec_offset = vec_offsets[vec_num * j + i];
10959 tree zero = build_zero_cst (vectype);
10960 tree scale = size_int (gs_info.scale);
10962 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
10964 if (loop_lens)
10965 final_len
10966 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10967 vec_num * ncopies, vectype,
10968 vec_num * j + i, 1);
10969 else
10970 final_len
10971 = build_int_cst (sizetype,
10972 TYPE_VECTOR_SUBPARTS (vectype));
10973 signed char biasval
10974 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10975 bias = build_int_cst (intQI_type_node, biasval);
10976 if (!final_mask)
10978 mask_vectype = truth_type_for (vectype);
10979 final_mask = build_minus_one_cst (mask_vectype);
10983 gcall *call;
10984 if (final_len && final_mask)
10985 call
10986 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
10987 dataref_ptr, vec_offset,
10988 scale, zero, final_mask,
10989 final_len, bias);
10990 else if (final_mask)
10991 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
10992 dataref_ptr, vec_offset,
10993 scale, zero, final_mask);
10994 else
10995 call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
10996 dataref_ptr, vec_offset,
10997 scale, zero);
10998 gimple_call_set_nothrow (call, true);
10999 new_stmt = call;
11000 data_ref = NULL_TREE;
11002 else if (gs_info.decl)
11004 /* The builtin decls path for gather is legacy, x86 only. */
11005 gcc_assert (!final_len && nunits.is_constant ());
11006 if (costing_p)
11008 unsigned int cnunits = vect_nunits_for_cost (vectype);
11009 inside_cost
11010 = record_stmt_cost (cost_vec, cnunits, scalar_load,
11011 stmt_info, 0, vect_body);
11012 continue;
11014 poly_uint64 offset_nunits
11015 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
11016 if (known_eq (nunits, offset_nunits))
11018 new_stmt = vect_build_one_gather_load_call
11019 (vinfo, stmt_info, gsi, &gs_info,
11020 dataref_ptr, vec_offsets[vec_num * j + i],
11021 final_mask);
11022 data_ref = NULL_TREE;
11024 else if (known_eq (nunits, offset_nunits * 2))
11026 /* We have a offset vector with half the number of
11027 lanes but the builtins will produce full vectype
11028 data with just the lower lanes filled. */
11029 new_stmt = vect_build_one_gather_load_call
11030 (vinfo, stmt_info, gsi, &gs_info,
11031 dataref_ptr, vec_offsets[2 * vec_num * j + 2 * i],
11032 final_mask);
11033 tree low = make_ssa_name (vectype);
11034 gimple_set_lhs (new_stmt, low);
11035 vect_finish_stmt_generation (vinfo, stmt_info,
11036 new_stmt, gsi);
11038 /* now put upper half of final_mask in final_mask low. */
11039 if (final_mask
11040 && !SCALAR_INT_MODE_P
11041 (TYPE_MODE (TREE_TYPE (final_mask))))
11043 int count = nunits.to_constant ();
11044 vec_perm_builder sel (count, count, 1);
11045 sel.quick_grow (count);
11046 for (int i = 0; i < count; ++i)
11047 sel[i] = i | (count / 2);
11048 vec_perm_indices indices (sel, 2, count);
11049 tree perm_mask = vect_gen_perm_mask_checked
11050 (TREE_TYPE (final_mask), indices);
11051 new_stmt = gimple_build_assign (NULL_TREE,
11052 VEC_PERM_EXPR,
11053 final_mask,
11054 final_mask,
11055 perm_mask);
11056 final_mask = make_ssa_name (TREE_TYPE (final_mask));
11057 gimple_set_lhs (new_stmt, final_mask);
11058 vect_finish_stmt_generation (vinfo, stmt_info,
11059 new_stmt, gsi);
11061 else if (final_mask)
11063 new_stmt = gimple_build_assign (NULL_TREE,
11064 VEC_UNPACK_HI_EXPR,
11065 final_mask);
11066 final_mask = make_ssa_name
11067 (truth_type_for (gs_info.offset_vectype));
11068 gimple_set_lhs (new_stmt, final_mask);
11069 vect_finish_stmt_generation (vinfo, stmt_info,
11070 new_stmt, gsi);
11073 new_stmt = vect_build_one_gather_load_call
11074 (vinfo, stmt_info, gsi, &gs_info,
11075 dataref_ptr,
11076 vec_offsets[2 * vec_num * j + 2 * i + 1],
11077 final_mask);
11078 tree high = make_ssa_name (vectype);
11079 gimple_set_lhs (new_stmt, high);
11080 vect_finish_stmt_generation (vinfo, stmt_info,
11081 new_stmt, gsi);
11083 /* compose low + high. */
11084 int count = nunits.to_constant ();
11085 vec_perm_builder sel (count, count, 1);
11086 sel.quick_grow (count);
11087 for (int i = 0; i < count; ++i)
11088 sel[i] = i < count / 2 ? i : i + count / 2;
11089 vec_perm_indices indices (sel, 2, count);
11090 tree perm_mask
11091 = vect_gen_perm_mask_checked (vectype, indices);
11092 new_stmt = gimple_build_assign (NULL_TREE,
11093 VEC_PERM_EXPR,
11094 low, high, perm_mask);
11095 data_ref = NULL_TREE;
11097 else if (known_eq (nunits * 2, offset_nunits))
11099 /* We have a offset vector with double the number of
11100 lanes. Select the low/high part accordingly. */
11101 vec_offset = vec_offsets[(vec_num * j + i) / 2];
11102 if ((vec_num * j + i) & 1)
11104 int count = offset_nunits.to_constant ();
11105 vec_perm_builder sel (count, count, 1);
11106 sel.quick_grow (count);
11107 for (int i = 0; i < count; ++i)
11108 sel[i] = i | (count / 2);
11109 vec_perm_indices indices (sel, 2, count);
11110 tree perm_mask = vect_gen_perm_mask_checked
11111 (TREE_TYPE (vec_offset), indices);
11112 new_stmt = gimple_build_assign (NULL_TREE,
11113 VEC_PERM_EXPR,
11114 vec_offset,
11115 vec_offset,
11116 perm_mask);
11117 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
11118 gimple_set_lhs (new_stmt, vec_offset);
11119 vect_finish_stmt_generation (vinfo, stmt_info,
11120 new_stmt, gsi);
11122 new_stmt = vect_build_one_gather_load_call
11123 (vinfo, stmt_info, gsi, &gs_info,
11124 dataref_ptr, vec_offset, final_mask);
11125 data_ref = NULL_TREE;
11127 else
11128 gcc_unreachable ();
11130 else
11132 /* Emulated gather-scatter. */
11133 gcc_assert (!final_mask);
11134 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
11135 if (costing_p)
11137 /* For emulated gathers N offset vector element
11138 offset add is consumed by the load). */
11139 inside_cost = record_stmt_cost (cost_vec, const_nunits,
11140 vec_to_scalar, stmt_info,
11141 0, vect_body);
11142 /* N scalar loads plus gathering them into a
11143 vector. */
11144 inside_cost
11145 = record_stmt_cost (cost_vec, const_nunits, scalar_load,
11146 stmt_info, 0, vect_body);
11147 inside_cost
11148 = record_stmt_cost (cost_vec, 1, vec_construct,
11149 stmt_info, 0, vect_body);
11150 continue;
11152 unsigned HOST_WIDE_INT const_offset_nunits
11153 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
11154 .to_constant ();
11155 vec<constructor_elt, va_gc> *ctor_elts;
11156 vec_alloc (ctor_elts, const_nunits);
11157 gimple_seq stmts = NULL;
11158 /* We support offset vectors with more elements
11159 than the data vector for now. */
11160 unsigned HOST_WIDE_INT factor
11161 = const_offset_nunits / const_nunits;
11162 vec_offset = vec_offsets[(vec_num * j + i) / factor];
11163 unsigned elt_offset
11164 = ((vec_num * j + i) % factor) * const_nunits;
11165 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
11166 tree scale = size_int (gs_info.scale);
11167 align = get_object_alignment (DR_REF (first_dr_info->dr));
11168 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
11169 for (unsigned k = 0; k < const_nunits; ++k)
11171 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
11172 bitsize_int (k + elt_offset));
11173 tree idx
11174 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
11175 vec_offset, TYPE_SIZE (idx_type), boff);
11176 idx = gimple_convert (&stmts, sizetype, idx);
11177 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx,
11178 scale);
11179 tree ptr = gimple_build (&stmts, PLUS_EXPR,
11180 TREE_TYPE (dataref_ptr),
11181 dataref_ptr, idx);
11182 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
11183 tree elt = make_ssa_name (TREE_TYPE (vectype));
11184 tree ref = build2 (MEM_REF, ltype, ptr,
11185 build_int_cst (ref_type, 0));
11186 new_stmt = gimple_build_assign (elt, ref);
11187 gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
11188 gimple_seq_add_stmt (&stmts, new_stmt);
11189 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
11191 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
11192 new_stmt = gimple_build_assign (
11193 NULL_TREE, build_constructor (vectype, ctor_elts));
11194 data_ref = NULL_TREE;
11197 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11198 /* DATA_REF is null if we've already built the statement. */
11199 if (data_ref)
11201 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11202 new_stmt = gimple_build_assign (vec_dest, data_ref);
11204 new_temp = make_ssa_name (vec_dest, new_stmt);
11205 gimple_set_lhs (new_stmt, new_temp);
11206 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11208 /* Store vector loads in the corresponding SLP_NODE. */
11209 if (slp)
11210 slp_node->push_vec_def (new_stmt);
11213 if (!slp && !costing_p)
11214 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11217 if (!slp && !costing_p)
11218 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11220 if (costing_p && dump_enabled_p ())
11221 dump_printf_loc (MSG_NOTE, vect_location,
11222 "vect_model_load_cost: inside_cost = %u, "
11223 "prologue_cost = %u .\n",
11224 inside_cost, prologue_cost);
11225 return true;
11228 poly_uint64 group_elt = 0;
11229 unsigned int inside_cost = 0, prologue_cost = 0;
11230 /* For costing some adjacent vector loads, we'd like to cost with
11231 the total number of them once instead of cost each one by one. */
11232 unsigned int n_adjacent_loads = 0;
11233 for (j = 0; j < ncopies; j++)
11235 /* 1. Create the vector or array pointer update chain. */
11236 if (j == 0 && !costing_p)
11238 bool simd_lane_access_p
11239 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
11240 if (simd_lane_access_p
11241 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
11242 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
11243 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
11244 && integer_zerop (DR_INIT (first_dr_info->dr))
11245 && alias_sets_conflict_p (get_alias_set (aggr_type),
11246 get_alias_set (TREE_TYPE (ref_type)))
11247 && (alignment_support_scheme == dr_aligned
11248 || alignment_support_scheme == dr_unaligned_supported))
11250 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
11251 dataref_offset = build_int_cst (ref_type, 0);
11253 else if (diff_first_stmt_info)
11255 dataref_ptr
11256 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
11257 aggr_type, at_loop, offset, &dummy,
11258 gsi, &ptr_incr, simd_lane_access_p,
11259 bump);
11260 /* Adjust the pointer by the difference to first_stmt. */
11261 data_reference_p ptrdr
11262 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
11263 tree diff
11264 = fold_convert (sizetype,
11265 size_binop (MINUS_EXPR,
11266 DR_INIT (first_dr_info->dr),
11267 DR_INIT (ptrdr)));
11268 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11269 stmt_info, diff);
11270 if (alignment_support_scheme == dr_explicit_realign)
11272 msq = vect_setup_realignment (vinfo,
11273 first_stmt_info_for_drptr, gsi,
11274 &realignment_token,
11275 alignment_support_scheme,
11276 dataref_ptr, &at_loop);
11277 gcc_assert (!compute_in_loop);
11280 else
11281 dataref_ptr
11282 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11283 at_loop,
11284 offset, &dummy, gsi, &ptr_incr,
11285 simd_lane_access_p, bump);
11287 else if (!costing_p)
11289 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11290 if (dataref_offset)
11291 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
11292 bump);
11293 else
11294 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11295 stmt_info, bump);
11298 if (grouped_load || slp_perm)
11299 dr_chain.create (vec_num);
11301 gimple *new_stmt = NULL;
11302 for (i = 0; i < vec_num; i++)
11304 tree final_mask = NULL_TREE;
11305 tree final_len = NULL_TREE;
11306 tree bias = NULL_TREE;
11307 if (!costing_p)
11309 if (mask)
11310 vec_mask = vec_masks[vec_num * j + i];
11311 if (loop_masks)
11312 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11313 vec_num * ncopies, vectype,
11314 vec_num * j + i);
11315 if (vec_mask)
11316 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
11317 final_mask, vec_mask, gsi);
11319 if (i > 0)
11320 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11321 gsi, stmt_info, bump);
11324 /* 2. Create the vector-load in the loop. */
11325 switch (alignment_support_scheme)
11327 case dr_aligned:
11328 case dr_unaligned_supported:
11330 if (costing_p)
11331 break;
11333 unsigned int misalign;
11334 unsigned HOST_WIDE_INT align;
11335 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
11336 if (alignment_support_scheme == dr_aligned)
11337 misalign = 0;
11338 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
11340 align
11341 = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
11342 misalign = 0;
11344 else
11345 misalign = misalignment;
11346 if (dataref_offset == NULL_TREE
11347 && TREE_CODE (dataref_ptr) == SSA_NAME)
11348 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
11349 misalign);
11350 align = least_bit_hwi (misalign | align);
11352 /* Compute IFN when LOOP_LENS or final_mask valid. */
11353 machine_mode vmode = TYPE_MODE (vectype);
11354 machine_mode new_vmode = vmode;
11355 internal_fn partial_ifn = IFN_LAST;
11356 if (loop_lens)
11358 opt_machine_mode new_ovmode
11359 = get_len_load_store_mode (vmode, true, &partial_ifn);
11360 new_vmode = new_ovmode.require ();
11361 unsigned factor
11362 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
11363 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11364 vec_num * ncopies, vectype,
11365 vec_num * j + i, factor);
11367 else if (final_mask)
11369 if (!can_vec_mask_load_store_p (
11370 vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
11371 &partial_ifn))
11372 gcc_unreachable ();
11375 if (partial_ifn == IFN_MASK_LEN_LOAD)
11377 if (!final_len)
11379 /* Pass VF value to 'len' argument of
11380 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11381 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11383 if (!final_mask)
11385 /* Pass all ones value to 'mask' argument of
11386 MASK_LEN_LOAD if final_mask is invalid. */
11387 mask_vectype = truth_type_for (vectype);
11388 final_mask = build_minus_one_cst (mask_vectype);
11391 if (final_len)
11393 signed char biasval
11394 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11396 bias = build_int_cst (intQI_type_node, biasval);
11399 if (final_len)
11401 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11402 gcall *call;
11403 if (partial_ifn == IFN_MASK_LEN_LOAD)
11404 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
11405 dataref_ptr, ptr,
11406 final_mask, final_len,
11407 bias);
11408 else
11409 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
11410 dataref_ptr, ptr,
11411 final_len, bias);
11412 gimple_call_set_nothrow (call, true);
11413 new_stmt = call;
11414 data_ref = NULL_TREE;
11416 /* Need conversion if it's wrapped with VnQI. */
11417 if (vmode != new_vmode)
11419 tree new_vtype = build_vector_type_for_mode (
11420 unsigned_intQI_type_node, new_vmode);
11421 tree var
11422 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
11423 gimple_set_lhs (call, var);
11424 vect_finish_stmt_generation (vinfo, stmt_info, call,
11425 gsi);
11426 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11427 new_stmt = gimple_build_assign (vec_dest,
11428 VIEW_CONVERT_EXPR, op);
11431 else if (final_mask)
11433 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11434 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
11435 dataref_ptr, ptr,
11436 final_mask);
11437 gimple_call_set_nothrow (call, true);
11438 new_stmt = call;
11439 data_ref = NULL_TREE;
11441 else
11443 tree ltype = vectype;
11444 tree new_vtype = NULL_TREE;
11445 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11446 unsigned int vect_align
11447 = vect_known_alignment_in_bytes (first_dr_info, vectype);
11448 unsigned int scalar_dr_size
11449 = vect_get_scalar_dr_size (first_dr_info);
11450 /* If there's no peeling for gaps but we have a gap
11451 with slp loads then load the lower half of the
11452 vector only. See get_group_load_store_type for
11453 when we apply this optimization. */
11454 if (slp
11455 && loop_vinfo
11456 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != 0
11457 && known_eq (nunits, (group_size - gap) * 2)
11458 && known_eq (nunits, group_size)
11459 && gap >= (vect_align / scalar_dr_size))
11461 tree half_vtype;
11462 new_vtype
11463 = vector_vector_composition_type (vectype, 2,
11464 &half_vtype);
11465 if (new_vtype != NULL_TREE)
11466 ltype = half_vtype;
11468 tree offset
11469 = (dataref_offset ? dataref_offset
11470 : build_int_cst (ref_type, 0));
11471 if (ltype != vectype
11472 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11474 unsigned HOST_WIDE_INT gap_offset
11475 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
11476 tree gapcst = build_int_cst (ref_type, gap_offset);
11477 offset = size_binop (PLUS_EXPR, offset, gapcst);
11479 data_ref
11480 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
11481 if (alignment_support_scheme == dr_aligned)
11483 else
11484 TREE_TYPE (data_ref)
11485 = build_aligned_type (TREE_TYPE (data_ref),
11486 align * BITS_PER_UNIT);
11487 if (ltype != vectype)
11489 vect_copy_ref_info (data_ref,
11490 DR_REF (first_dr_info->dr));
11491 tree tem = make_ssa_name (ltype);
11492 new_stmt = gimple_build_assign (tem, data_ref);
11493 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
11494 gsi);
11495 data_ref = NULL;
11496 vec<constructor_elt, va_gc> *v;
11497 vec_alloc (v, 2);
11498 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11500 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11501 build_zero_cst (ltype));
11502 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11504 else
11506 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11507 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11508 build_zero_cst (ltype));
11510 gcc_assert (new_vtype != NULL_TREE);
11511 if (new_vtype == vectype)
11512 new_stmt = gimple_build_assign (
11513 vec_dest, build_constructor (vectype, v));
11514 else
11516 tree new_vname = make_ssa_name (new_vtype);
11517 new_stmt = gimple_build_assign (
11518 new_vname, build_constructor (new_vtype, v));
11519 vect_finish_stmt_generation (vinfo, stmt_info,
11520 new_stmt, gsi);
11521 new_stmt = gimple_build_assign (
11522 vec_dest,
11523 build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
11527 break;
11529 case dr_explicit_realign:
11531 if (costing_p)
11532 break;
11533 tree ptr, bump;
11535 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11537 if (compute_in_loop)
11538 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
11539 &realignment_token,
11540 dr_explicit_realign,
11541 dataref_ptr, NULL);
11543 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11544 ptr = copy_ssa_name (dataref_ptr);
11545 else
11546 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11547 // For explicit realign the target alignment should be
11548 // known at compile time.
11549 unsigned HOST_WIDE_INT align
11550 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11551 new_stmt = gimple_build_assign (
11552 ptr, BIT_AND_EXPR, dataref_ptr,
11553 build_int_cst (TREE_TYPE (dataref_ptr),
11554 -(HOST_WIDE_INT) align));
11555 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11556 data_ref
11557 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11558 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11559 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11560 new_stmt = gimple_build_assign (vec_dest, data_ref);
11561 new_temp = make_ssa_name (vec_dest, new_stmt);
11562 gimple_assign_set_lhs (new_stmt, new_temp);
11563 gimple_move_vops (new_stmt, stmt_info->stmt);
11564 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11565 msq = new_temp;
11567 bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11568 bump = size_binop (MINUS_EXPR, bump, size_one_node);
11569 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11570 bump);
11571 new_stmt = gimple_build_assign (
11572 NULL_TREE, BIT_AND_EXPR, ptr,
11573 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11574 if (TREE_CODE (ptr) == SSA_NAME)
11575 ptr = copy_ssa_name (ptr, new_stmt);
11576 else
11577 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
11578 gimple_assign_set_lhs (new_stmt, ptr);
11579 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11580 data_ref
11581 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11582 break;
11584 case dr_explicit_realign_optimized:
11586 if (costing_p)
11587 break;
11588 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11589 new_temp = copy_ssa_name (dataref_ptr);
11590 else
11591 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11592 // We should only be doing this if we know the target
11593 // alignment at compile time.
11594 unsigned HOST_WIDE_INT align
11595 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11596 new_stmt = gimple_build_assign (
11597 new_temp, BIT_AND_EXPR, dataref_ptr,
11598 build_int_cst (TREE_TYPE (dataref_ptr),
11599 -(HOST_WIDE_INT) align));
11600 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11601 data_ref = build2 (MEM_REF, vectype, new_temp,
11602 build_int_cst (ref_type, 0));
11603 break;
11605 default:
11606 gcc_unreachable ();
11609 /* One common place to cost the above vect load for different
11610 alignment support schemes. */
11611 if (costing_p)
11613 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11614 only need to take care of the first stmt, whose
11615 stmt_info is first_stmt_info, vec_num iterating on it
11616 will cover the cost for the remaining, it's consistent
11617 with transforming. For the prologue cost for realign,
11618 we only need to count it once for the whole group. */
11619 bool first_stmt_info_p = first_stmt_info == stmt_info;
11620 bool add_realign_cost = first_stmt_info_p && i == 0;
11621 if (memory_access_type == VMAT_CONTIGUOUS
11622 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11623 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11624 && (!grouped_load || first_stmt_info_p)))
11626 /* Leave realign cases alone to keep them simple. */
11627 if (alignment_support_scheme == dr_explicit_realign_optimized
11628 || alignment_support_scheme == dr_explicit_realign)
11629 vect_get_load_cost (vinfo, stmt_info, 1,
11630 alignment_support_scheme, misalignment,
11631 add_realign_cost, &inside_cost,
11632 &prologue_cost, cost_vec, cost_vec,
11633 true);
11634 else
11635 n_adjacent_loads++;
11638 else
11640 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11641 /* DATA_REF is null if we've already built the statement. */
11642 if (data_ref)
11644 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11645 new_stmt = gimple_build_assign (vec_dest, data_ref);
11647 new_temp = make_ssa_name (vec_dest, new_stmt);
11648 gimple_set_lhs (new_stmt, new_temp);
11649 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11652 /* 3. Handle explicit realignment if necessary/supported.
11653 Create in loop:
11654 vec_dest = realign_load (msq, lsq, realignment_token) */
11655 if (!costing_p
11656 && (alignment_support_scheme == dr_explicit_realign_optimized
11657 || alignment_support_scheme == dr_explicit_realign))
11659 lsq = gimple_assign_lhs (new_stmt);
11660 if (!realignment_token)
11661 realignment_token = dataref_ptr;
11662 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11663 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11664 lsq, realignment_token);
11665 new_temp = make_ssa_name (vec_dest, new_stmt);
11666 gimple_assign_set_lhs (new_stmt, new_temp);
11667 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11669 if (alignment_support_scheme == dr_explicit_realign_optimized)
11671 gcc_assert (phi);
11672 if (i == vec_num - 1 && j == ncopies - 1)
11673 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11674 UNKNOWN_LOCATION);
11675 msq = lsq;
11679 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11681 if (costing_p)
11682 inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
11683 stmt_info, 0, vect_body);
11684 else
11686 tree perm_mask = perm_mask_for_reverse (vectype);
11687 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
11688 perm_mask, stmt_info, gsi);
11689 new_stmt = SSA_NAME_DEF_STMT (new_temp);
11693 /* Collect vector loads and later create their permutation in
11694 vect_transform_grouped_load (). */
11695 if (!costing_p && (grouped_load || slp_perm))
11696 dr_chain.quick_push (new_temp);
11698 /* Store vector loads in the corresponding SLP_NODE. */
11699 if (!costing_p && slp && !slp_perm)
11700 slp_node->push_vec_def (new_stmt);
11702 /* With SLP permutation we load the gaps as well, without
11703 we need to skip the gaps after we manage to fully load
11704 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11705 group_elt += nunits;
11706 if (!costing_p
11707 && maybe_ne (group_gap_adj, 0U)
11708 && !slp_perm
11709 && known_eq (group_elt, group_size - group_gap_adj))
11711 poly_wide_int bump_val
11712 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11713 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11714 == -1)
11715 bump_val = -bump_val;
11716 tree bump = wide_int_to_tree (sizetype, bump_val);
11717 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11718 stmt_info, bump);
11719 group_elt = 0;
11722 /* Bump the vector pointer to account for a gap or for excess
11723 elements loaded for a permuted SLP load. */
11724 if (!costing_p
11725 && maybe_ne (group_gap_adj, 0U)
11726 && slp_perm)
11728 poly_wide_int bump_val
11729 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11730 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11731 bump_val = -bump_val;
11732 tree bump = wide_int_to_tree (sizetype, bump_val);
11733 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11734 stmt_info, bump);
11737 if (slp && !slp_perm)
11738 continue;
11740 if (slp_perm)
11742 unsigned n_perms;
11743 /* For SLP we know we've seen all possible uses of dr_chain so
11744 direct vect_transform_slp_perm_load to DCE the unused parts.
11745 ??? This is a hack to prevent compile-time issues as seen
11746 in PR101120 and friends. */
11747 if (costing_p)
11749 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
11750 true, &n_perms, nullptr);
11751 inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
11752 stmt_info, 0, vect_body);
11754 else
11756 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11757 gsi, vf, false, &n_perms,
11758 nullptr, true);
11759 gcc_assert (ok);
11762 else
11764 if (grouped_load)
11766 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11767 /* We assume that the cost of a single load-lanes instruction
11768 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11769 If a grouped access is instead being provided by a
11770 load-and-permute operation, include the cost of the
11771 permutes. */
11772 if (costing_p && first_stmt_info == stmt_info)
11774 /* Uses an even and odd extract operations or shuffle
11775 operations for each needed permute. */
11776 int group_size = DR_GROUP_SIZE (first_stmt_info);
11777 int nstmts = ceil_log2 (group_size) * group_size;
11778 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
11779 stmt_info, 0, vect_body);
11781 if (dump_enabled_p ())
11782 dump_printf_loc (MSG_NOTE, vect_location,
11783 "vect_model_load_cost:"
11784 "strided group_size = %d .\n",
11785 group_size);
11787 else if (!costing_p)
11789 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
11790 group_size, gsi);
11791 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11794 else if (!costing_p)
11795 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11797 dr_chain.release ();
11799 if (!slp && !costing_p)
11800 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11802 if (costing_p)
11804 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
11805 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11806 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11807 if (n_adjacent_loads > 0)
11808 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
11809 alignment_support_scheme, misalignment, false,
11810 &inside_cost, &prologue_cost, cost_vec, cost_vec,
11811 true);
11812 if (dump_enabled_p ())
11813 dump_printf_loc (MSG_NOTE, vect_location,
11814 "vect_model_load_cost: inside_cost = %u, "
11815 "prologue_cost = %u .\n",
11816 inside_cost, prologue_cost);
11819 return true;
11822 /* Function vect_is_simple_cond.
11824 Input:
11825 LOOP - the loop that is being vectorized.
11826 COND - Condition that is checked for simple use.
11828 Output:
11829 *COMP_VECTYPE - the vector type for the comparison.
11830 *DTS - The def types for the arguments of the comparison
11832 Returns whether a COND can be vectorized. Checks whether
11833 condition operands are supportable using vec_is_simple_use. */
11835 static bool
11836 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
11837 slp_tree slp_node, tree *comp_vectype,
11838 enum vect_def_type *dts, tree vectype)
11840 tree lhs, rhs;
11841 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11842 slp_tree slp_op;
11844 /* Mask case. */
11845 if (TREE_CODE (cond) == SSA_NAME
11846 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11848 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
11849 &slp_op, &dts[0], comp_vectype)
11850 || !*comp_vectype
11851 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11852 return false;
11853 return true;
11856 if (!COMPARISON_CLASS_P (cond))
11857 return false;
11859 lhs = TREE_OPERAND (cond, 0);
11860 rhs = TREE_OPERAND (cond, 1);
11862 if (TREE_CODE (lhs) == SSA_NAME)
11864 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
11865 &lhs, &slp_op, &dts[0], &vectype1))
11866 return false;
11868 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
11869 || TREE_CODE (lhs) == FIXED_CST)
11870 dts[0] = vect_constant_def;
11871 else
11872 return false;
11874 if (TREE_CODE (rhs) == SSA_NAME)
11876 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
11877 &rhs, &slp_op, &dts[1], &vectype2))
11878 return false;
11880 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
11881 || TREE_CODE (rhs) == FIXED_CST)
11882 dts[1] = vect_constant_def;
11883 else
11884 return false;
11886 if (vectype1 && vectype2
11887 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11888 TYPE_VECTOR_SUBPARTS (vectype2)))
11889 return false;
11891 *comp_vectype = vectype1 ? vectype1 : vectype2;
11892 /* Invariant comparison. */
11893 if (! *comp_vectype)
11895 tree scalar_type = TREE_TYPE (lhs);
11896 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11897 *comp_vectype = truth_type_for (vectype);
11898 else
11900 /* If we can widen the comparison to match vectype do so. */
11901 if (INTEGRAL_TYPE_P (scalar_type)
11902 && !slp_node
11903 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
11904 TYPE_SIZE (TREE_TYPE (vectype))))
11905 scalar_type = build_nonstandard_integer_type
11906 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
11907 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
11908 slp_node);
11912 return true;
11915 /* vectorizable_condition.
11917 Check if STMT_INFO is conditional modify expression that can be vectorized.
11918 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11919 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
11920 at GSI.
11922 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
11924 Return true if STMT_INFO is vectorizable in this way. */
11926 static bool
11927 vectorizable_condition (vec_info *vinfo,
11928 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11929 gimple **vec_stmt,
11930 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11932 tree scalar_dest = NULL_TREE;
11933 tree vec_dest = NULL_TREE;
11934 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
11935 tree then_clause, else_clause;
11936 tree comp_vectype = NULL_TREE;
11937 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
11938 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
11939 tree vec_compare;
11940 tree new_temp;
11941 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11942 enum vect_def_type dts[4]
11943 = {vect_unknown_def_type, vect_unknown_def_type,
11944 vect_unknown_def_type, vect_unknown_def_type};
11945 int ndts = 4;
11946 int ncopies;
11947 int vec_num;
11948 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11949 int i;
11950 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11951 vec<tree> vec_oprnds0 = vNULL;
11952 vec<tree> vec_oprnds1 = vNULL;
11953 vec<tree> vec_oprnds2 = vNULL;
11954 vec<tree> vec_oprnds3 = vNULL;
11955 tree vec_cmp_type;
11956 bool masked = false;
11958 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11959 return false;
11961 /* Is vectorizable conditional operation? */
11962 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11963 if (!stmt)
11964 return false;
11966 code = gimple_assign_rhs_code (stmt);
11967 if (code != COND_EXPR)
11968 return false;
11970 stmt_vec_info reduc_info = NULL;
11971 int reduc_index = -1;
11972 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
11973 bool for_reduction
11974 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
11975 if (for_reduction)
11977 if (slp_node)
11978 return false;
11979 reduc_info = info_for_reduction (vinfo, stmt_info);
11980 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
11981 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
11982 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
11983 || reduc_index != -1);
11985 else
11987 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11988 return false;
11991 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11992 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11994 if (slp_node)
11996 ncopies = 1;
11997 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
11999 else
12001 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12002 vec_num = 1;
12005 gcc_assert (ncopies >= 1);
12006 if (for_reduction && ncopies > 1)
12007 return false; /* FORNOW */
12009 cond_expr = gimple_assign_rhs1 (stmt);
12011 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
12012 &comp_vectype, &dts[0], vectype)
12013 || !comp_vectype)
12014 return false;
12016 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
12017 slp_tree then_slp_node, else_slp_node;
12018 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
12019 &then_clause, &then_slp_node, &dts[2], &vectype1))
12020 return false;
12021 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
12022 &else_clause, &else_slp_node, &dts[3], &vectype2))
12023 return false;
12025 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
12026 return false;
12028 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
12029 return false;
12031 masked = !COMPARISON_CLASS_P (cond_expr);
12032 vec_cmp_type = truth_type_for (comp_vectype);
12034 if (vec_cmp_type == NULL_TREE)
12035 return false;
12037 cond_code = TREE_CODE (cond_expr);
12038 if (!masked)
12040 cond_expr0 = TREE_OPERAND (cond_expr, 0);
12041 cond_expr1 = TREE_OPERAND (cond_expr, 1);
12044 /* For conditional reductions, the "then" value needs to be the candidate
12045 value calculated by this iteration while the "else" value needs to be
12046 the result carried over from previous iterations. If the COND_EXPR
12047 is the other way around, we need to swap it. */
12048 bool must_invert_cmp_result = false;
12049 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
12051 if (masked)
12052 must_invert_cmp_result = true;
12053 else
12055 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
12056 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
12057 if (new_code == ERROR_MARK)
12058 must_invert_cmp_result = true;
12059 else
12061 cond_code = new_code;
12062 /* Make sure we don't accidentally use the old condition. */
12063 cond_expr = NULL_TREE;
12066 std::swap (then_clause, else_clause);
12069 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
12071 /* Boolean values may have another representation in vectors
12072 and therefore we prefer bit operations over comparison for
12073 them (which also works for scalar masks). We store opcodes
12074 to use in bitop1 and bitop2. Statement is vectorized as
12075 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12076 depending on bitop1 and bitop2 arity. */
12077 switch (cond_code)
12079 case GT_EXPR:
12080 bitop1 = BIT_NOT_EXPR;
12081 bitop2 = BIT_AND_EXPR;
12082 break;
12083 case GE_EXPR:
12084 bitop1 = BIT_NOT_EXPR;
12085 bitop2 = BIT_IOR_EXPR;
12086 break;
12087 case LT_EXPR:
12088 bitop1 = BIT_NOT_EXPR;
12089 bitop2 = BIT_AND_EXPR;
12090 std::swap (cond_expr0, cond_expr1);
12091 break;
12092 case LE_EXPR:
12093 bitop1 = BIT_NOT_EXPR;
12094 bitop2 = BIT_IOR_EXPR;
12095 std::swap (cond_expr0, cond_expr1);
12096 break;
12097 case NE_EXPR:
12098 bitop1 = BIT_XOR_EXPR;
12099 break;
12100 case EQ_EXPR:
12101 bitop1 = BIT_XOR_EXPR;
12102 bitop2 = BIT_NOT_EXPR;
12103 break;
12104 default:
12105 return false;
12107 cond_code = SSA_NAME;
12110 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
12111 && reduction_type == EXTRACT_LAST_REDUCTION
12112 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
12114 if (dump_enabled_p ())
12115 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12116 "reduction comparison operation not supported.\n");
12117 return false;
12120 if (!vec_stmt)
12122 if (bitop1 != NOP_EXPR)
12124 machine_mode mode = TYPE_MODE (comp_vectype);
12125 optab optab;
12127 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
12128 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12129 return false;
12131 if (bitop2 != NOP_EXPR)
12133 optab = optab_for_tree_code (bitop2, comp_vectype,
12134 optab_default);
12135 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12136 return false;
12140 vect_cost_for_stmt kind = vector_stmt;
12141 if (reduction_type == EXTRACT_LAST_REDUCTION)
12142 /* Count one reduction-like operation per vector. */
12143 kind = vec_to_scalar;
12144 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
12145 && (masked
12146 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
12147 cond_code)
12148 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
12149 ERROR_MARK))))
12150 return false;
12152 if (slp_node
12153 && (!vect_maybe_update_slp_op_vectype
12154 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
12155 || (op_adjust == 1
12156 && !vect_maybe_update_slp_op_vectype
12157 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
12158 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
12159 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
12161 if (dump_enabled_p ())
12162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12163 "incompatible vector types for invariants\n");
12164 return false;
12167 if (loop_vinfo && for_reduction
12168 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12170 if (reduction_type == EXTRACT_LAST_REDUCTION)
12172 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12173 vectype, OPTIMIZE_FOR_SPEED))
12174 vect_record_loop_len (loop_vinfo,
12175 &LOOP_VINFO_LENS (loop_vinfo),
12176 ncopies * vec_num, vectype, 1);
12177 else
12178 vect_record_loop_mask (loop_vinfo,
12179 &LOOP_VINFO_MASKS (loop_vinfo),
12180 ncopies * vec_num, vectype, NULL);
12182 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12183 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
12185 if (dump_enabled_p ())
12186 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12187 "conditional reduction prevents the use"
12188 " of partial vectors.\n");
12189 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
12193 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
12194 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
12195 cost_vec, kind);
12196 return true;
12199 /* Transform. */
12201 /* Handle def. */
12202 scalar_dest = gimple_assign_lhs (stmt);
12203 if (reduction_type != EXTRACT_LAST_REDUCTION)
12204 vec_dest = vect_create_destination_var (scalar_dest, vectype);
12206 bool swap_cond_operands = false;
12208 /* See whether another part of the vectorized code applies a loop
12209 mask to the condition, or to its inverse. */
12211 vec_loop_masks *masks = NULL;
12212 vec_loop_lens *lens = NULL;
12213 if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
12215 if (reduction_type == EXTRACT_LAST_REDUCTION)
12216 lens = &LOOP_VINFO_LENS (loop_vinfo);
12218 else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
12220 if (reduction_type == EXTRACT_LAST_REDUCTION)
12221 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12222 else
12224 scalar_cond_masked_key cond (cond_expr, ncopies);
12225 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12226 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12227 else
12229 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
12230 tree_code orig_code = cond.code;
12231 cond.code = invert_tree_comparison (cond.code, honor_nans);
12232 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
12234 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12235 cond_code = cond.code;
12236 swap_cond_operands = true;
12238 else
12240 /* Try the inverse of the current mask. We check if the
12241 inverse mask is live and if so we generate a negate of
12242 the current mask such that we still honor NaNs. */
12243 cond.inverted_p = true;
12244 cond.code = orig_code;
12245 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12247 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12248 cond_code = cond.code;
12249 swap_cond_operands = true;
12250 must_invert_cmp_result = true;
12257 /* Handle cond expr. */
12258 if (masked)
12259 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12260 cond_expr, comp_vectype, &vec_oprnds0,
12261 then_clause, vectype, &vec_oprnds2,
12262 reduction_type != EXTRACT_LAST_REDUCTION
12263 ? else_clause : NULL, vectype, &vec_oprnds3);
12264 else
12265 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12266 cond_expr0, comp_vectype, &vec_oprnds0,
12267 cond_expr1, comp_vectype, &vec_oprnds1,
12268 then_clause, vectype, &vec_oprnds2,
12269 reduction_type != EXTRACT_LAST_REDUCTION
12270 ? else_clause : NULL, vectype, &vec_oprnds3);
12272 /* Arguments are ready. Create the new vector stmt. */
12273 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
12275 vec_then_clause = vec_oprnds2[i];
12276 if (reduction_type != EXTRACT_LAST_REDUCTION)
12277 vec_else_clause = vec_oprnds3[i];
12279 if (swap_cond_operands)
12280 std::swap (vec_then_clause, vec_else_clause);
12282 if (masked)
12283 vec_compare = vec_cond_lhs;
12284 else
12286 vec_cond_rhs = vec_oprnds1[i];
12287 if (bitop1 == NOP_EXPR)
12289 gimple_seq stmts = NULL;
12290 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
12291 vec_cond_lhs, vec_cond_rhs);
12292 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
12294 else
12296 new_temp = make_ssa_name (vec_cmp_type);
12297 gassign *new_stmt;
12298 if (bitop1 == BIT_NOT_EXPR)
12299 new_stmt = gimple_build_assign (new_temp, bitop1,
12300 vec_cond_rhs);
12301 else
12302 new_stmt
12303 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
12304 vec_cond_rhs);
12305 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12306 if (bitop2 == NOP_EXPR)
12307 vec_compare = new_temp;
12308 else if (bitop2 == BIT_NOT_EXPR
12309 && reduction_type != EXTRACT_LAST_REDUCTION)
12311 /* Instead of doing ~x ? y : z do x ? z : y. */
12312 vec_compare = new_temp;
12313 std::swap (vec_then_clause, vec_else_clause);
12315 else
12317 vec_compare = make_ssa_name (vec_cmp_type);
12318 if (bitop2 == BIT_NOT_EXPR)
12319 new_stmt
12320 = gimple_build_assign (vec_compare, bitop2, new_temp);
12321 else
12322 new_stmt
12323 = gimple_build_assign (vec_compare, bitop2,
12324 vec_cond_lhs, new_temp);
12325 vect_finish_stmt_generation (vinfo, stmt_info,
12326 new_stmt, gsi);
12331 /* If we decided to apply a loop mask to the result of the vector
12332 comparison, AND the comparison with the mask now. Later passes
12333 should then be able to reuse the AND results between mulitple
12334 vector statements.
12336 For example:
12337 for (int i = 0; i < 100; ++i)
12338 x[i] = y[i] ? z[i] : 10;
12340 results in following optimized GIMPLE:
12342 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12343 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12344 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12345 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12346 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12347 vect_iftmp.11_47, { 10, ... }>;
12349 instead of using a masked and unmasked forms of
12350 vec != { 0, ... } (masked in the MASK_LOAD,
12351 unmasked in the VEC_COND_EXPR). */
12353 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12354 in cases where that's necessary. */
12356 tree len = NULL_TREE, bias = NULL_TREE;
12357 if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
12359 if (!is_gimple_val (vec_compare))
12361 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12362 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12363 vec_compare);
12364 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12365 vec_compare = vec_compare_name;
12368 if (must_invert_cmp_result)
12370 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12371 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12372 BIT_NOT_EXPR,
12373 vec_compare);
12374 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12375 vec_compare = vec_compare_name;
12378 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12379 vectype, OPTIMIZE_FOR_SPEED))
12381 if (lens)
12383 len = vect_get_loop_len (loop_vinfo, gsi, lens,
12384 vec_num * ncopies, vectype, i, 1);
12385 signed char biasval
12386 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
12387 bias = build_int_cst (intQI_type_node, biasval);
12389 else
12391 len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
12392 bias = build_int_cst (intQI_type_node, 0);
12395 if (masks)
12397 tree loop_mask
12398 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
12399 vectype, i);
12400 tree tmp2 = make_ssa_name (vec_cmp_type);
12401 gassign *g
12402 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
12403 loop_mask);
12404 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
12405 vec_compare = tmp2;
12409 gimple *new_stmt;
12410 if (reduction_type == EXTRACT_LAST_REDUCTION)
12412 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
12413 tree lhs = gimple_get_lhs (old_stmt);
12414 if (len)
12415 new_stmt = gimple_build_call_internal
12416 (IFN_LEN_FOLD_EXTRACT_LAST, 5, else_clause, vec_compare,
12417 vec_then_clause, len, bias);
12418 else
12419 new_stmt = gimple_build_call_internal
12420 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
12421 vec_then_clause);
12422 gimple_call_set_lhs (new_stmt, lhs);
12423 SSA_NAME_DEF_STMT (lhs) = new_stmt;
12424 if (old_stmt == gsi_stmt (*gsi))
12425 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
12426 else
12428 /* In this case we're moving the definition to later in the
12429 block. That doesn't matter because the only uses of the
12430 lhs are in phi statements. */
12431 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12432 gsi_remove (&old_gsi, true);
12433 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12436 else
12438 new_temp = make_ssa_name (vec_dest);
12439 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12440 vec_then_clause, vec_else_clause);
12441 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12443 if (slp_node)
12444 slp_node->push_vec_def (new_stmt);
12445 else
12446 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12449 if (!slp_node)
12450 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12452 vec_oprnds0.release ();
12453 vec_oprnds1.release ();
12454 vec_oprnds2.release ();
12455 vec_oprnds3.release ();
12457 return true;
12460 /* Helper of vectorizable_comparison.
12462 Check if STMT_INFO is comparison expression CODE that can be vectorized.
12463 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12464 comparison, put it in VEC_STMT, and insert it at GSI.
12466 Return true if STMT_INFO is vectorizable in this way. */
12468 static bool
12469 vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
12470 stmt_vec_info stmt_info, tree_code code,
12471 gimple_stmt_iterator *gsi, gimple **vec_stmt,
12472 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12474 tree lhs, rhs1, rhs2;
12475 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12476 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12477 tree new_temp;
12478 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12479 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
12480 int ndts = 2;
12481 poly_uint64 nunits;
12482 int ncopies;
12483 enum tree_code bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12484 int i;
12485 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12486 vec<tree> vec_oprnds0 = vNULL;
12487 vec<tree> vec_oprnds1 = vNULL;
12488 tree mask_type;
12489 tree mask;
12491 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12492 return false;
12494 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
12495 return false;
12497 mask_type = vectype;
12498 nunits = TYPE_VECTOR_SUBPARTS (vectype);
12500 if (slp_node)
12501 ncopies = 1;
12502 else
12503 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12505 gcc_assert (ncopies >= 1);
12507 if (TREE_CODE_CLASS (code) != tcc_comparison)
12508 return false;
12510 slp_tree slp_rhs1, slp_rhs2;
12511 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12512 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
12513 return false;
12515 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12516 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
12517 return false;
12519 if (vectype1 && vectype2
12520 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12521 TYPE_VECTOR_SUBPARTS (vectype2)))
12522 return false;
12524 vectype = vectype1 ? vectype1 : vectype2;
12526 /* Invariant comparison. */
12527 if (!vectype)
12529 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
12530 vectype = mask_type;
12531 else
12532 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
12533 slp_node);
12534 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
12535 return false;
12537 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
12538 return false;
12540 /* Can't compare mask and non-mask types. */
12541 if (vectype1 && vectype2
12542 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12543 return false;
12545 /* Boolean values may have another representation in vectors
12546 and therefore we prefer bit operations over comparison for
12547 them (which also works for scalar masks). We store opcodes
12548 to use in bitop1 and bitop2. Statement is vectorized as
12549 BITOP2 (rhs1 BITOP1 rhs2) or
12550 rhs1 BITOP2 (BITOP1 rhs2)
12551 depending on bitop1 and bitop2 arity. */
12552 bool swap_p = false;
12553 if (VECTOR_BOOLEAN_TYPE_P (vectype))
12555 if (code == GT_EXPR)
12557 bitop1 = BIT_NOT_EXPR;
12558 bitop2 = BIT_AND_EXPR;
12560 else if (code == GE_EXPR)
12562 bitop1 = BIT_NOT_EXPR;
12563 bitop2 = BIT_IOR_EXPR;
12565 else if (code == LT_EXPR)
12567 bitop1 = BIT_NOT_EXPR;
12568 bitop2 = BIT_AND_EXPR;
12569 swap_p = true;
12571 else if (code == LE_EXPR)
12573 bitop1 = BIT_NOT_EXPR;
12574 bitop2 = BIT_IOR_EXPR;
12575 swap_p = true;
12577 else
12579 bitop1 = BIT_XOR_EXPR;
12580 if (code == EQ_EXPR)
12581 bitop2 = BIT_NOT_EXPR;
12585 if (!vec_stmt)
12587 if (bitop1 == NOP_EXPR)
12589 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12590 return false;
12592 else
12594 machine_mode mode = TYPE_MODE (vectype);
12595 optab optab;
12597 optab = optab_for_tree_code (bitop1, vectype, optab_default);
12598 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12599 return false;
12601 if (bitop2 != NOP_EXPR)
12603 optab = optab_for_tree_code (bitop2, vectype, optab_default);
12604 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12605 return false;
12609 /* Put types on constant and invariant SLP children. */
12610 if (slp_node
12611 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12612 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
12614 if (dump_enabled_p ())
12615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12616 "incompatible vector types for invariants\n");
12617 return false;
12620 vect_model_simple_cost (vinfo, stmt_info,
12621 ncopies * (1 + (bitop2 != NOP_EXPR)),
12622 dts, ndts, slp_node, cost_vec);
12623 return true;
12626 /* Transform. */
12628 /* Handle def. */
12629 lhs = gimple_assign_lhs (STMT_VINFO_STMT (stmt_info));
12630 mask = vect_create_destination_var (lhs, mask_type);
12632 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12633 rhs1, vectype, &vec_oprnds0,
12634 rhs2, vectype, &vec_oprnds1);
12635 if (swap_p)
12636 std::swap (vec_oprnds0, vec_oprnds1);
12638 /* Arguments are ready. Create the new vector stmt. */
12639 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12641 gimple *new_stmt;
12642 vec_rhs2 = vec_oprnds1[i];
12644 new_temp = make_ssa_name (mask);
12645 if (bitop1 == NOP_EXPR)
12647 new_stmt = gimple_build_assign (new_temp, code,
12648 vec_rhs1, vec_rhs2);
12649 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12651 else
12653 if (bitop1 == BIT_NOT_EXPR)
12654 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12655 else
12656 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12657 vec_rhs2);
12658 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12659 if (bitop2 != NOP_EXPR)
12661 tree res = make_ssa_name (mask);
12662 if (bitop2 == BIT_NOT_EXPR)
12663 new_stmt = gimple_build_assign (res, bitop2, new_temp);
12664 else
12665 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12666 new_temp);
12667 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12670 if (slp_node)
12671 slp_node->push_vec_def (new_stmt);
12672 else
12673 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12676 if (!slp_node)
12677 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12679 vec_oprnds0.release ();
12680 vec_oprnds1.release ();
12682 return true;
12685 /* vectorizable_comparison.
12687 Check if STMT_INFO is comparison expression that can be vectorized.
12688 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12689 comparison, put it in VEC_STMT, and insert it at GSI.
12691 Return true if STMT_INFO is vectorizable in this way. */
12693 static bool
12694 vectorizable_comparison (vec_info *vinfo,
12695 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12696 gimple **vec_stmt,
12697 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12699 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12701 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12702 return false;
12704 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12705 return false;
12707 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12708 if (!stmt)
12709 return false;
12711 enum tree_code code = gimple_assign_rhs_code (stmt);
12712 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12713 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12714 vec_stmt, slp_node, cost_vec))
12715 return false;
12717 if (!vec_stmt)
12718 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
12720 return true;
12723 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
12724 can handle all live statements in the node. Otherwise return true
12725 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
12726 VEC_STMT_P is as for vectorizable_live_operation. */
12728 static bool
12729 can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
12730 slp_tree slp_node, slp_instance slp_node_instance,
12731 bool vec_stmt_p,
12732 stmt_vector_for_cost *cost_vec)
12734 if (slp_node)
12736 stmt_vec_info slp_stmt_info;
12737 unsigned int i;
12738 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
12740 if (STMT_VINFO_LIVE_P (slp_stmt_info)
12741 && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
12742 slp_node_instance, i,
12743 vec_stmt_p, cost_vec))
12744 return false;
12747 else if (STMT_VINFO_LIVE_P (stmt_info)
12748 && !vectorizable_live_operation (vinfo, stmt_info,
12749 slp_node, slp_node_instance, -1,
12750 vec_stmt_p, cost_vec))
12751 return false;
12753 return true;
12756 /* Make sure the statement is vectorizable. */
12758 opt_result
12759 vect_analyze_stmt (vec_info *vinfo,
12760 stmt_vec_info stmt_info, bool *need_to_vectorize,
12761 slp_tree node, slp_instance node_instance,
12762 stmt_vector_for_cost *cost_vec)
12764 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12765 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
12766 bool ok;
12767 gimple_seq pattern_def_seq;
12769 if (dump_enabled_p ())
12770 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
12771 stmt_info->stmt);
12773 if (gimple_has_volatile_ops (stmt_info->stmt))
12774 return opt_result::failure_at (stmt_info->stmt,
12775 "not vectorized:"
12776 " stmt has volatile operands: %G\n",
12777 stmt_info->stmt);
12779 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12780 && node == NULL
12781 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
12783 gimple_stmt_iterator si;
12785 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
12787 stmt_vec_info pattern_def_stmt_info
12788 = vinfo->lookup_stmt (gsi_stmt (si));
12789 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
12790 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
12792 /* Analyze def stmt of STMT if it's a pattern stmt. */
12793 if (dump_enabled_p ())
12794 dump_printf_loc (MSG_NOTE, vect_location,
12795 "==> examining pattern def statement: %G",
12796 pattern_def_stmt_info->stmt);
12798 opt_result res
12799 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
12800 need_to_vectorize, node, node_instance,
12801 cost_vec);
12802 if (!res)
12803 return res;
12808 /* Skip stmts that do not need to be vectorized. In loops this is expected
12809 to include:
12810 - the COND_EXPR which is the loop exit condition
12811 - any LABEL_EXPRs in the loop
12812 - computations that are used only for array indexing or loop control.
12813 In basic blocks we only analyze statements that are a part of some SLP
12814 instance, therefore, all the statements are relevant.
12816 Pattern statement needs to be analyzed instead of the original statement
12817 if the original statement is not relevant. Otherwise, we analyze both
12818 statements. In basic blocks we are called from some SLP instance
12819 traversal, don't analyze pattern stmts instead, the pattern stmts
12820 already will be part of SLP instance. */
12822 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
12823 if (!STMT_VINFO_RELEVANT_P (stmt_info)
12824 && !STMT_VINFO_LIVE_P (stmt_info))
12826 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12827 && pattern_stmt_info
12828 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12829 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12831 /* Analyze PATTERN_STMT instead of the original stmt. */
12832 stmt_info = pattern_stmt_info;
12833 if (dump_enabled_p ())
12834 dump_printf_loc (MSG_NOTE, vect_location,
12835 "==> examining pattern statement: %G",
12836 stmt_info->stmt);
12838 else
12840 if (dump_enabled_p ())
12841 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
12843 return opt_result::success ();
12846 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12847 && node == NULL
12848 && pattern_stmt_info
12849 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12850 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12852 /* Analyze PATTERN_STMT too. */
12853 if (dump_enabled_p ())
12854 dump_printf_loc (MSG_NOTE, vect_location,
12855 "==> examining pattern statement: %G",
12856 pattern_stmt_info->stmt);
12858 opt_result res
12859 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
12860 node_instance, cost_vec);
12861 if (!res)
12862 return res;
12865 switch (STMT_VINFO_DEF_TYPE (stmt_info))
12867 case vect_internal_def:
12868 break;
12870 case vect_reduction_def:
12871 case vect_nested_cycle:
12872 gcc_assert (!bb_vinfo
12873 && (relevance == vect_used_in_outer
12874 || relevance == vect_used_in_outer_by_reduction
12875 || relevance == vect_used_by_reduction
12876 || relevance == vect_unused_in_scope
12877 || relevance == vect_used_only_live));
12878 break;
12880 case vect_induction_def:
12881 case vect_first_order_recurrence:
12882 gcc_assert (!bb_vinfo);
12883 break;
12885 case vect_constant_def:
12886 case vect_external_def:
12887 case vect_unknown_def_type:
12888 default:
12889 gcc_unreachable ();
12892 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12893 if (node)
12894 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
12896 if (STMT_VINFO_RELEVANT_P (stmt_info))
12898 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
12899 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
12900 || (call && gimple_call_lhs (call) == NULL_TREE));
12901 *need_to_vectorize = true;
12904 if (PURE_SLP_STMT (stmt_info) && !node)
12906 if (dump_enabled_p ())
12907 dump_printf_loc (MSG_NOTE, vect_location,
12908 "handled only by SLP analysis\n");
12909 return opt_result::success ();
12912 ok = true;
12913 if (!bb_vinfo
12914 && (STMT_VINFO_RELEVANT_P (stmt_info)
12915 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
12916 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
12917 -mveclibabi= takes preference over library functions with
12918 the simd attribute. */
12919 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12920 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
12921 cost_vec)
12922 || vectorizable_conversion (vinfo, stmt_info,
12923 NULL, NULL, node, cost_vec)
12924 || vectorizable_operation (vinfo, stmt_info,
12925 NULL, NULL, node, cost_vec)
12926 || vectorizable_assignment (vinfo, stmt_info,
12927 NULL, NULL, node, cost_vec)
12928 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12929 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12930 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
12931 node, node_instance, cost_vec)
12932 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
12933 NULL, node, cost_vec)
12934 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12935 || vectorizable_condition (vinfo, stmt_info,
12936 NULL, NULL, node, cost_vec)
12937 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12938 cost_vec)
12939 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
12940 stmt_info, NULL, node)
12941 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
12942 stmt_info, NULL, node, cost_vec));
12943 else
12945 if (bb_vinfo)
12946 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12947 || vectorizable_simd_clone_call (vinfo, stmt_info,
12948 NULL, NULL, node, cost_vec)
12949 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
12950 cost_vec)
12951 || vectorizable_shift (vinfo, stmt_info,
12952 NULL, NULL, node, cost_vec)
12953 || vectorizable_operation (vinfo, stmt_info,
12954 NULL, NULL, node, cost_vec)
12955 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
12956 cost_vec)
12957 || vectorizable_load (vinfo, stmt_info,
12958 NULL, NULL, node, cost_vec)
12959 || vectorizable_store (vinfo, stmt_info,
12960 NULL, NULL, node, cost_vec)
12961 || vectorizable_condition (vinfo, stmt_info,
12962 NULL, NULL, node, cost_vec)
12963 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12964 cost_vec)
12965 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
12968 if (node)
12969 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12971 if (!ok)
12972 return opt_result::failure_at (stmt_info->stmt,
12973 "not vectorized:"
12974 " relevant stmt not supported: %G",
12975 stmt_info->stmt);
12977 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
12978 need extra handling, except for vectorizable reductions. */
12979 if (!bb_vinfo
12980 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
12981 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
12982 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
12983 stmt_info, node, node_instance,
12984 false, cost_vec))
12985 return opt_result::failure_at (stmt_info->stmt,
12986 "not vectorized:"
12987 " live stmt not supported: %G",
12988 stmt_info->stmt);
12990 return opt_result::success ();
12994 /* Function vect_transform_stmt.
12996 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
12998 bool
12999 vect_transform_stmt (vec_info *vinfo,
13000 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
13001 slp_tree slp_node, slp_instance slp_node_instance)
13003 bool is_store = false;
13004 gimple *vec_stmt = NULL;
13005 bool done;
13007 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
13009 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
13010 if (slp_node)
13011 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
13013 switch (STMT_VINFO_TYPE (stmt_info))
13015 case type_demotion_vec_info_type:
13016 case type_promotion_vec_info_type:
13017 case type_conversion_vec_info_type:
13018 done = vectorizable_conversion (vinfo, stmt_info,
13019 gsi, &vec_stmt, slp_node, NULL);
13020 gcc_assert (done);
13021 break;
13023 case induc_vec_info_type:
13024 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
13025 stmt_info, &vec_stmt, slp_node,
13026 NULL);
13027 gcc_assert (done);
13028 break;
13030 case shift_vec_info_type:
13031 done = vectorizable_shift (vinfo, stmt_info,
13032 gsi, &vec_stmt, slp_node, NULL);
13033 gcc_assert (done);
13034 break;
13036 case op_vec_info_type:
13037 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
13038 NULL);
13039 gcc_assert (done);
13040 break;
13042 case assignment_vec_info_type:
13043 done = vectorizable_assignment (vinfo, stmt_info,
13044 gsi, &vec_stmt, slp_node, NULL);
13045 gcc_assert (done);
13046 break;
13048 case load_vec_info_type:
13049 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
13050 NULL);
13051 gcc_assert (done);
13052 break;
13054 case store_vec_info_type:
13055 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
13056 && !slp_node
13057 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))
13058 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))))
13059 /* In case of interleaving, the whole chain is vectorized when the
13060 last store in the chain is reached. Store stmts before the last
13061 one are skipped, and there vec_stmt_info shouldn't be freed
13062 meanwhile. */
13064 else
13066 done = vectorizable_store (vinfo, stmt_info,
13067 gsi, &vec_stmt, slp_node, NULL);
13068 gcc_assert (done);
13069 is_store = true;
13071 break;
13073 case condition_vec_info_type:
13074 done = vectorizable_condition (vinfo, stmt_info,
13075 gsi, &vec_stmt, slp_node, NULL);
13076 gcc_assert (done);
13077 break;
13079 case comparison_vec_info_type:
13080 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
13081 slp_node, NULL);
13082 gcc_assert (done);
13083 break;
13085 case call_vec_info_type:
13086 done = vectorizable_call (vinfo, stmt_info,
13087 gsi, &vec_stmt, slp_node, NULL);
13088 break;
13090 case call_simd_clone_vec_info_type:
13091 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
13092 slp_node, NULL);
13093 break;
13095 case reduc_vec_info_type:
13096 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
13097 gsi, &vec_stmt, slp_node);
13098 gcc_assert (done);
13099 break;
13101 case cycle_phi_info_type:
13102 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
13103 &vec_stmt, slp_node, slp_node_instance);
13104 gcc_assert (done);
13105 break;
13107 case lc_phi_info_type:
13108 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
13109 stmt_info, &vec_stmt, slp_node);
13110 gcc_assert (done);
13111 break;
13113 case recurr_info_type:
13114 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13115 stmt_info, &vec_stmt, slp_node, NULL);
13116 gcc_assert (done);
13117 break;
13119 case phi_info_type:
13120 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
13121 gcc_assert (done);
13122 break;
13124 default:
13125 if (!STMT_VINFO_LIVE_P (stmt_info))
13127 if (dump_enabled_p ())
13128 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13129 "stmt not supported.\n");
13130 gcc_unreachable ();
13132 done = true;
13135 if (!slp_node && vec_stmt)
13136 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
13138 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
13140 /* Handle stmts whose DEF is used outside the loop-nest that is
13141 being vectorized. */
13142 done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
13143 slp_node_instance, true, NULL);
13144 gcc_assert (done);
13147 if (slp_node)
13148 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13150 return is_store;
13154 /* Remove a group of stores (for SLP or interleaving), free their
13155 stmt_vec_info. */
13157 void
13158 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
13160 stmt_vec_info next_stmt_info = first_stmt_info;
13162 while (next_stmt_info)
13164 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
13165 next_stmt_info = vect_orig_stmt (next_stmt_info);
13166 /* Free the attached stmt_vec_info and remove the stmt. */
13167 vinfo->remove_stmt (next_stmt_info);
13168 next_stmt_info = tmp;
13172 /* If NUNITS is nonzero, return a vector type that contains NUNITS
13173 elements of type SCALAR_TYPE, or null if the target doesn't support
13174 such a type.
13176 If NUNITS is zero, return a vector type that contains elements of
13177 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13179 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13180 for this vectorization region and want to "autodetect" the best choice.
13181 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13182 and we want the new type to be interoperable with it. PREVAILING_MODE
13183 in this case can be a scalar integer mode or a vector mode; when it
13184 is a vector mode, the function acts like a tree-level version of
13185 related_vector_mode. */
13187 tree
13188 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
13189 tree scalar_type, poly_uint64 nunits)
13191 tree orig_scalar_type = scalar_type;
13192 scalar_mode inner_mode;
13193 machine_mode simd_mode;
13194 tree vectype;
13196 if ((!INTEGRAL_TYPE_P (scalar_type)
13197 && !POINTER_TYPE_P (scalar_type)
13198 && !SCALAR_FLOAT_TYPE_P (scalar_type))
13199 || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
13200 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
13201 return NULL_TREE;
13203 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
13205 /* Interoperability between modes requires one to be a constant multiple
13206 of the other, so that the number of vectors required for each operation
13207 is a compile-time constant. */
13208 if (prevailing_mode != VOIDmode
13209 && !constant_multiple_p (nunits * nbytes,
13210 GET_MODE_SIZE (prevailing_mode))
13211 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
13212 nunits * nbytes))
13213 return NULL_TREE;
13215 /* For vector types of elements whose mode precision doesn't
13216 match their types precision we use a element type of mode
13217 precision. The vectorization routines will have to make sure
13218 they support the proper result truncation/extension.
13219 We also make sure to build vector types with INTEGER_TYPE
13220 component type only. */
13221 if (INTEGRAL_TYPE_P (scalar_type)
13222 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
13223 || TREE_CODE (scalar_type) != INTEGER_TYPE))
13224 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
13225 TYPE_UNSIGNED (scalar_type));
13227 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13228 When the component mode passes the above test simply use a type
13229 corresponding to that mode. The theory is that any use that
13230 would cause problems with this will disable vectorization anyway. */
13231 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
13232 && !INTEGRAL_TYPE_P (scalar_type))
13233 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
13235 /* We can't build a vector type of elements with alignment bigger than
13236 their size. */
13237 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
13238 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
13239 TYPE_UNSIGNED (scalar_type));
13241 /* If we felt back to using the mode fail if there was
13242 no scalar type for it. */
13243 if (scalar_type == NULL_TREE)
13244 return NULL_TREE;
13246 /* If no prevailing mode was supplied, use the mode the target prefers.
13247 Otherwise lookup a vector mode based on the prevailing mode. */
13248 if (prevailing_mode == VOIDmode)
13250 gcc_assert (known_eq (nunits, 0U));
13251 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
13252 if (SCALAR_INT_MODE_P (simd_mode))
13254 /* Traditional behavior is not to take the integer mode
13255 literally, but simply to use it as a way of determining
13256 the vector size. It is up to mode_for_vector to decide
13257 what the TYPE_MODE should be.
13259 Note that nunits == 1 is allowed in order to support single
13260 element vector types. */
13261 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
13262 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13263 return NULL_TREE;
13266 else if (SCALAR_INT_MODE_P (prevailing_mode)
13267 || !related_vector_mode (prevailing_mode,
13268 inner_mode, nunits).exists (&simd_mode))
13270 /* Fall back to using mode_for_vector, mostly in the hope of being
13271 able to use an integer mode. */
13272 if (known_eq (nunits, 0U)
13273 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
13274 return NULL_TREE;
13276 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13277 return NULL_TREE;
13280 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
13282 /* In cases where the mode was chosen by mode_for_vector, check that
13283 the target actually supports the chosen mode, or that it at least
13284 allows the vector mode to be replaced by a like-sized integer. */
13285 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
13286 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
13287 return NULL_TREE;
13289 /* Re-attach the address-space qualifier if we canonicalized the scalar
13290 type. */
13291 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
13292 return build_qualified_type
13293 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
13295 return vectype;
13298 /* Function get_vectype_for_scalar_type.
13300 Returns the vector type corresponding to SCALAR_TYPE as supported
13301 by the target. If GROUP_SIZE is nonzero and we're performing BB
13302 vectorization, make sure that the number of elements in the vector
13303 is no bigger than GROUP_SIZE. */
13305 tree
13306 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
13307 unsigned int group_size)
13309 /* For BB vectorization, we should always have a group size once we've
13310 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13311 are tentative requests during things like early data reference
13312 analysis and pattern recognition. */
13313 if (is_a <bb_vec_info> (vinfo))
13314 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13315 else
13316 group_size = 0;
13318 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13319 scalar_type);
13320 if (vectype && vinfo->vector_mode == VOIDmode)
13321 vinfo->vector_mode = TYPE_MODE (vectype);
13323 /* Register the natural choice of vector type, before the group size
13324 has been applied. */
13325 if (vectype)
13326 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
13328 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13329 try again with an explicit number of elements. */
13330 if (vectype
13331 && group_size
13332 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
13334 /* Start with the biggest number of units that fits within
13335 GROUP_SIZE and halve it until we find a valid vector type.
13336 Usually either the first attempt will succeed or all will
13337 fail (in the latter case because GROUP_SIZE is too small
13338 for the target), but it's possible that a target could have
13339 a hole between supported vector types.
13341 If GROUP_SIZE is not a power of 2, this has the effect of
13342 trying the largest power of 2 that fits within the group,
13343 even though the group is not a multiple of that vector size.
13344 The BB vectorizer will then try to carve up the group into
13345 smaller pieces. */
13346 unsigned int nunits = 1 << floor_log2 (group_size);
13349 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13350 scalar_type, nunits);
13351 nunits /= 2;
13353 while (nunits > 1 && !vectype);
13356 return vectype;
13359 /* Return the vector type corresponding to SCALAR_TYPE as supported
13360 by the target. NODE, if nonnull, is the SLP tree node that will
13361 use the returned vector type. */
13363 tree
13364 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
13366 unsigned int group_size = 0;
13367 if (node)
13368 group_size = SLP_TREE_LANES (node);
13369 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13372 /* Function get_mask_type_for_scalar_type.
13374 Returns the mask type corresponding to a result of comparison
13375 of vectors of specified SCALAR_TYPE as supported by target.
13376 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13377 make sure that the number of elements in the vector is no bigger
13378 than GROUP_SIZE. */
13380 tree
13381 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13382 unsigned int group_size)
13384 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13386 if (!vectype)
13387 return NULL;
13389 return truth_type_for (vectype);
13392 /* Function get_mask_type_for_scalar_type.
13394 Returns the mask type corresponding to a result of comparison
13395 of vectors of specified SCALAR_TYPE as supported by target.
13396 NODE, if nonnull, is the SLP tree node that will use the returned
13397 vector type. */
13399 tree
13400 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13401 slp_tree node)
13403 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
13405 if (!vectype)
13406 return NULL;
13408 return truth_type_for (vectype);
13411 /* Function get_same_sized_vectype
13413 Returns a vector type corresponding to SCALAR_TYPE of size
13414 VECTOR_TYPE if supported by the target. */
13416 tree
13417 get_same_sized_vectype (tree scalar_type, tree vector_type)
13419 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
13420 return truth_type_for (vector_type);
13422 poly_uint64 nunits;
13423 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
13424 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
13425 return NULL_TREE;
13427 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
13428 scalar_type, nunits);
13431 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13432 would not change the chosen vector modes. */
13434 bool
13435 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
13437 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
13438 i != vinfo->used_vector_modes.end (); ++i)
13439 if (!VECTOR_MODE_P (*i)
13440 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
13441 return false;
13442 return true;
13445 /* Function vect_is_simple_use.
13447 Input:
13448 VINFO - the vect info of the loop or basic block that is being vectorized.
13449 OPERAND - operand in the loop or bb.
13450 Output:
13451 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13452 case OPERAND is an SSA_NAME that is defined in the vectorizable region
13453 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13454 the definition could be anywhere in the function
13455 DT - the type of definition
13457 Returns whether a stmt with OPERAND can be vectorized.
13458 For loops, supportable operands are constants, loop invariants, and operands
13459 that are defined by the current iteration of the loop. Unsupportable
13460 operands are those that are defined by a previous iteration of the loop (as
13461 is the case in reduction/induction computations).
13462 For basic blocks, supportable operands are constants and bb invariants.
13463 For now, operands defined outside the basic block are not supported. */
13465 bool
13466 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13467 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
13469 if (def_stmt_info_out)
13470 *def_stmt_info_out = NULL;
13471 if (def_stmt_out)
13472 *def_stmt_out = NULL;
13473 *dt = vect_unknown_def_type;
13475 if (dump_enabled_p ())
13477 dump_printf_loc (MSG_NOTE, vect_location,
13478 "vect_is_simple_use: operand ");
13479 if (TREE_CODE (operand) == SSA_NAME
13480 && !SSA_NAME_IS_DEFAULT_DEF (operand))
13481 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
13482 else
13483 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
13486 if (CONSTANT_CLASS_P (operand))
13487 *dt = vect_constant_def;
13488 else if (is_gimple_min_invariant (operand))
13489 *dt = vect_external_def;
13490 else if (TREE_CODE (operand) != SSA_NAME)
13491 *dt = vect_unknown_def_type;
13492 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
13493 *dt = vect_external_def;
13494 else
13496 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
13497 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
13498 if (!stmt_vinfo)
13499 *dt = vect_external_def;
13500 else
13502 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
13503 def_stmt = stmt_vinfo->stmt;
13504 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
13505 if (def_stmt_info_out)
13506 *def_stmt_info_out = stmt_vinfo;
13508 if (def_stmt_out)
13509 *def_stmt_out = def_stmt;
13512 if (dump_enabled_p ())
13514 dump_printf (MSG_NOTE, ", type of def: ");
13515 switch (*dt)
13517 case vect_uninitialized_def:
13518 dump_printf (MSG_NOTE, "uninitialized\n");
13519 break;
13520 case vect_constant_def:
13521 dump_printf (MSG_NOTE, "constant\n");
13522 break;
13523 case vect_external_def:
13524 dump_printf (MSG_NOTE, "external\n");
13525 break;
13526 case vect_internal_def:
13527 dump_printf (MSG_NOTE, "internal\n");
13528 break;
13529 case vect_induction_def:
13530 dump_printf (MSG_NOTE, "induction\n");
13531 break;
13532 case vect_reduction_def:
13533 dump_printf (MSG_NOTE, "reduction\n");
13534 break;
13535 case vect_double_reduction_def:
13536 dump_printf (MSG_NOTE, "double reduction\n");
13537 break;
13538 case vect_nested_cycle:
13539 dump_printf (MSG_NOTE, "nested cycle\n");
13540 break;
13541 case vect_first_order_recurrence:
13542 dump_printf (MSG_NOTE, "first order recurrence\n");
13543 break;
13544 case vect_unknown_def_type:
13545 dump_printf (MSG_NOTE, "unknown\n");
13546 break;
13550 if (*dt == vect_unknown_def_type)
13552 if (dump_enabled_p ())
13553 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13554 "Unsupported pattern.\n");
13555 return false;
13558 return true;
13561 /* Function vect_is_simple_use.
13563 Same as vect_is_simple_use but also determines the vector operand
13564 type of OPERAND and stores it to *VECTYPE. If the definition of
13565 OPERAND is vect_uninitialized_def, vect_constant_def or
13566 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13567 is responsible to compute the best suited vector type for the
13568 scalar operand. */
13570 bool
13571 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13572 tree *vectype, stmt_vec_info *def_stmt_info_out,
13573 gimple **def_stmt_out)
13575 stmt_vec_info def_stmt_info;
13576 gimple *def_stmt;
13577 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
13578 return false;
13580 if (def_stmt_out)
13581 *def_stmt_out = def_stmt;
13582 if (def_stmt_info_out)
13583 *def_stmt_info_out = def_stmt_info;
13585 /* Now get a vector type if the def is internal, otherwise supply
13586 NULL_TREE and leave it up to the caller to figure out a proper
13587 type for the use stmt. */
13588 if (*dt == vect_internal_def
13589 || *dt == vect_induction_def
13590 || *dt == vect_reduction_def
13591 || *dt == vect_double_reduction_def
13592 || *dt == vect_nested_cycle
13593 || *dt == vect_first_order_recurrence)
13595 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
13596 gcc_assert (*vectype != NULL_TREE);
13597 if (dump_enabled_p ())
13598 dump_printf_loc (MSG_NOTE, vect_location,
13599 "vect_is_simple_use: vectype %T\n", *vectype);
13601 else if (*dt == vect_uninitialized_def
13602 || *dt == vect_constant_def
13603 || *dt == vect_external_def)
13604 *vectype = NULL_TREE;
13605 else
13606 gcc_unreachable ();
13608 return true;
13611 /* Function vect_is_simple_use.
13613 Same as vect_is_simple_use but determines the operand by operand
13614 position OPERAND from either STMT or SLP_NODE, filling in *OP
13615 and *SLP_DEF (when SLP_NODE is not NULL). */
13617 bool
13618 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
13619 unsigned operand, tree *op, slp_tree *slp_def,
13620 enum vect_def_type *dt,
13621 tree *vectype, stmt_vec_info *def_stmt_info_out)
13623 if (slp_node)
13625 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
13626 *slp_def = child;
13627 *vectype = SLP_TREE_VECTYPE (child);
13628 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
13630 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
13631 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
13633 else
13635 if (def_stmt_info_out)
13636 *def_stmt_info_out = NULL;
13637 *op = SLP_TREE_SCALAR_OPS (child)[0];
13638 *dt = SLP_TREE_DEF_TYPE (child);
13639 return true;
13642 else
13644 *slp_def = NULL;
13645 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
13647 if (gimple_assign_rhs_code (ass) == COND_EXPR
13648 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
13650 if (operand < 2)
13651 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
13652 else
13653 *op = gimple_op (ass, operand);
13655 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
13656 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
13657 else
13658 *op = gimple_op (ass, operand + 1);
13660 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
13661 *op = gimple_call_arg (call, operand);
13662 else
13663 gcc_unreachable ();
13664 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
13668 /* If OP is not NULL and is external or constant update its vector
13669 type with VECTYPE. Returns true if successful or false if not,
13670 for example when conflicting vector types are present. */
13672 bool
13673 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
13675 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
13676 return true;
13677 if (SLP_TREE_VECTYPE (op))
13678 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
13679 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
13680 should be handled by patters. Allow vect_constant_def for now. */
13681 if (VECTOR_BOOLEAN_TYPE_P (vectype)
13682 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
13683 return false;
13684 SLP_TREE_VECTYPE (op) = vectype;
13685 return true;
13688 /* Function supportable_widening_operation
13690 Check whether an operation represented by the code CODE is a
13691 widening operation that is supported by the target platform in
13692 vector form (i.e., when operating on arguments of type VECTYPE_IN
13693 producing a result of type VECTYPE_OUT).
13695 Widening operations we currently support are NOP (CONVERT), FLOAT,
13696 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
13697 are supported by the target platform either directly (via vector
13698 tree-codes), or via target builtins.
13700 Output:
13701 - CODE1 and CODE2 are codes of vector operations to be used when
13702 vectorizing the operation, if available.
13703 - MULTI_STEP_CVT determines the number of required intermediate steps in
13704 case of multi-step conversion (like char->short->int - in that case
13705 MULTI_STEP_CVT will be 1).
13706 - INTERM_TYPES contains the intermediate type required to perform the
13707 widening operation (short in the above example). */
13709 bool
13710 supportable_widening_operation (vec_info *vinfo,
13711 code_helper code,
13712 stmt_vec_info stmt_info,
13713 tree vectype_out, tree vectype_in,
13714 code_helper *code1,
13715 code_helper *code2,
13716 int *multi_step_cvt,
13717 vec<tree> *interm_types)
13719 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
13720 class loop *vect_loop = NULL;
13721 machine_mode vec_mode;
13722 enum insn_code icode1, icode2;
13723 optab optab1 = unknown_optab, optab2 = unknown_optab;
13724 tree vectype = vectype_in;
13725 tree wide_vectype = vectype_out;
13726 tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
13727 int i;
13728 tree prev_type, intermediate_type;
13729 machine_mode intermediate_mode, prev_mode;
13730 optab optab3, optab4;
13732 *multi_step_cvt = 0;
13733 if (loop_info)
13734 vect_loop = LOOP_VINFO_LOOP (loop_info);
13736 switch (code.safe_as_tree_code ())
13738 case MAX_TREE_CODES:
13739 /* Don't set c1 and c2 if code is not a tree_code. */
13740 break;
13742 case WIDEN_MULT_EXPR:
13743 /* The result of a vectorized widening operation usually requires
13744 two vectors (because the widened results do not fit into one vector).
13745 The generated vector results would normally be expected to be
13746 generated in the same order as in the original scalar computation,
13747 i.e. if 8 results are generated in each vector iteration, they are
13748 to be organized as follows:
13749 vect1: [res1,res2,res3,res4],
13750 vect2: [res5,res6,res7,res8].
13752 However, in the special case that the result of the widening
13753 operation is used in a reduction computation only, the order doesn't
13754 matter (because when vectorizing a reduction we change the order of
13755 the computation). Some targets can take advantage of this and
13756 generate more efficient code. For example, targets like Altivec,
13757 that support widen_mult using a sequence of {mult_even,mult_odd}
13758 generate the following vectors:
13759 vect1: [res1,res3,res5,res7],
13760 vect2: [res2,res4,res6,res8].
13762 When vectorizing outer-loops, we execute the inner-loop sequentially
13763 (each vectorized inner-loop iteration contributes to VF outer-loop
13764 iterations in parallel). We therefore don't allow to change the
13765 order of the computation in the inner-loop during outer-loop
13766 vectorization. */
13767 /* TODO: Another case in which order doesn't *really* matter is when we
13768 widen and then contract again, e.g. (short)((int)x * y >> 8).
13769 Normally, pack_trunc performs an even/odd permute, whereas the
13770 repack from an even/odd expansion would be an interleave, which
13771 would be significantly simpler for e.g. AVX2. */
13772 /* In any case, in order to avoid duplicating the code below, recurse
13773 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
13774 are properly set up for the caller. If we fail, we'll continue with
13775 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
13776 if (vect_loop
13777 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
13778 && !nested_in_vect_loop_p (vect_loop, stmt_info)
13779 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
13780 stmt_info, vectype_out,
13781 vectype_in, code1,
13782 code2, multi_step_cvt,
13783 interm_types))
13785 /* Elements in a vector with vect_used_by_reduction property cannot
13786 be reordered if the use chain with this property does not have the
13787 same operation. One such an example is s += a * b, where elements
13788 in a and b cannot be reordered. Here we check if the vector defined
13789 by STMT is only directly used in the reduction statement. */
13790 tree lhs = gimple_assign_lhs (stmt_info->stmt);
13791 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
13792 if (use_stmt_info
13793 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
13794 return true;
13796 c1 = VEC_WIDEN_MULT_LO_EXPR;
13797 c2 = VEC_WIDEN_MULT_HI_EXPR;
13798 break;
13800 case DOT_PROD_EXPR:
13801 c1 = DOT_PROD_EXPR;
13802 c2 = DOT_PROD_EXPR;
13803 break;
13805 case SAD_EXPR:
13806 c1 = SAD_EXPR;
13807 c2 = SAD_EXPR;
13808 break;
13810 case VEC_WIDEN_MULT_EVEN_EXPR:
13811 /* Support the recursion induced just above. */
13812 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
13813 c2 = VEC_WIDEN_MULT_ODD_EXPR;
13814 break;
13816 case WIDEN_LSHIFT_EXPR:
13817 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
13818 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
13819 break;
13821 CASE_CONVERT:
13822 c1 = VEC_UNPACK_LO_EXPR;
13823 c2 = VEC_UNPACK_HI_EXPR;
13824 break;
13826 case FLOAT_EXPR:
13827 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
13828 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
13829 break;
13831 case FIX_TRUNC_EXPR:
13832 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
13833 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
13834 break;
13836 default:
13837 gcc_unreachable ();
13840 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
13841 std::swap (c1, c2);
13843 if (code == FIX_TRUNC_EXPR)
13845 /* The signedness is determined from output operand. */
13846 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13847 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13849 else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
13850 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13851 && VECTOR_BOOLEAN_TYPE_P (vectype)
13852 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13853 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13855 /* If the input and result modes are the same, a different optab
13856 is needed where we pass in the number of units in vectype. */
13857 optab1 = vec_unpacks_sbool_lo_optab;
13858 optab2 = vec_unpacks_sbool_hi_optab;
13861 vec_mode = TYPE_MODE (vectype);
13862 if (widening_fn_p (code))
13864 /* If this is an internal fn then we must check whether the target
13865 supports either a low-high split or an even-odd split. */
13866 internal_fn ifn = as_internal_fn ((combined_fn) code);
13868 internal_fn lo, hi, even, odd;
13869 lookup_hilo_internal_fn (ifn, &lo, &hi);
13870 *code1 = as_combined_fn (lo);
13871 *code2 = as_combined_fn (hi);
13872 optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
13873 optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
13875 /* If we don't support low-high, then check for even-odd. */
13876 if (!optab1
13877 || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13878 || !optab2
13879 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13881 lookup_evenodd_internal_fn (ifn, &even, &odd);
13882 *code1 = as_combined_fn (even);
13883 *code2 = as_combined_fn (odd);
13884 optab1 = direct_internal_fn_optab (even, {vectype, vectype});
13885 optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
13888 else if (code.is_tree_code ())
13890 if (code == FIX_TRUNC_EXPR)
13892 /* The signedness is determined from output operand. */
13893 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13894 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13896 else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
13897 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13898 && VECTOR_BOOLEAN_TYPE_P (vectype)
13899 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13900 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13902 /* If the input and result modes are the same, a different optab
13903 is needed where we pass in the number of units in vectype. */
13904 optab1 = vec_unpacks_sbool_lo_optab;
13905 optab2 = vec_unpacks_sbool_hi_optab;
13907 else
13909 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13910 optab2 = optab_for_tree_code (c2, vectype, optab_default);
13912 *code1 = c1;
13913 *code2 = c2;
13916 if (!optab1 || !optab2)
13917 return false;
13919 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13920 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13921 return false;
13924 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13925 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13927 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13928 return true;
13929 /* For scalar masks we may have different boolean
13930 vector types having the same QImode. Thus we
13931 add additional check for elements number. */
13932 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
13933 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13934 return true;
13937 /* Check if it's a multi-step conversion that can be done using intermediate
13938 types. */
13940 prev_type = vectype;
13941 prev_mode = vec_mode;
13943 if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
13944 return false;
13946 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13947 intermediate steps in promotion sequence. We try
13948 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
13949 not. */
13950 interm_types->create (MAX_INTERM_CVT_STEPS);
13951 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
13953 intermediate_mode = insn_data[icode1].operand[0].mode;
13954 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13955 intermediate_type
13956 = vect_halve_mask_nunits (prev_type, intermediate_mode);
13957 else if (VECTOR_MODE_P (intermediate_mode))
13959 tree intermediate_element_type
13960 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
13961 TYPE_UNSIGNED (prev_type));
13962 intermediate_type
13963 = build_vector_type_for_mode (intermediate_element_type,
13964 intermediate_mode);
13966 else
13967 intermediate_type
13968 = lang_hooks.types.type_for_mode (intermediate_mode,
13969 TYPE_UNSIGNED (prev_type));
13971 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13972 && VECTOR_BOOLEAN_TYPE_P (prev_type)
13973 && intermediate_mode == prev_mode
13974 && SCALAR_INT_MODE_P (prev_mode))
13976 /* If the input and result modes are the same, a different optab
13977 is needed where we pass in the number of units in vectype. */
13978 optab3 = vec_unpacks_sbool_lo_optab;
13979 optab4 = vec_unpacks_sbool_hi_optab;
13981 else
13983 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
13984 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
13987 if (!optab3 || !optab4
13988 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
13989 || insn_data[icode1].operand[0].mode != intermediate_mode
13990 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
13991 || insn_data[icode2].operand[0].mode != intermediate_mode
13992 || ((icode1 = optab_handler (optab3, intermediate_mode))
13993 == CODE_FOR_nothing)
13994 || ((icode2 = optab_handler (optab4, intermediate_mode))
13995 == CODE_FOR_nothing))
13996 break;
13998 interm_types->quick_push (intermediate_type);
13999 (*multi_step_cvt)++;
14001 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
14002 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
14004 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14005 return true;
14006 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
14007 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
14008 return true;
14011 prev_type = intermediate_type;
14012 prev_mode = intermediate_mode;
14015 interm_types->release ();
14016 return false;
14020 /* Function supportable_narrowing_operation
14022 Check whether an operation represented by the code CODE is a
14023 narrowing operation that is supported by the target platform in
14024 vector form (i.e., when operating on arguments of type VECTYPE_IN
14025 and producing a result of type VECTYPE_OUT).
14027 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14028 and FLOAT. This function checks if these operations are supported by
14029 the target platform directly via vector tree-codes.
14031 Output:
14032 - CODE1 is the code of a vector operation to be used when
14033 vectorizing the operation, if available.
14034 - MULTI_STEP_CVT determines the number of required intermediate steps in
14035 case of multi-step conversion (like int->short->char - in that case
14036 MULTI_STEP_CVT will be 1).
14037 - INTERM_TYPES contains the intermediate type required to perform the
14038 narrowing operation (short in the above example). */
14040 bool
14041 supportable_narrowing_operation (code_helper code,
14042 tree vectype_out, tree vectype_in,
14043 code_helper *code1, int *multi_step_cvt,
14044 vec<tree> *interm_types)
14046 machine_mode vec_mode;
14047 enum insn_code icode1;
14048 optab optab1, interm_optab;
14049 tree vectype = vectype_in;
14050 tree narrow_vectype = vectype_out;
14051 enum tree_code c1;
14052 tree intermediate_type, prev_type;
14053 machine_mode intermediate_mode, prev_mode;
14054 int i;
14055 unsigned HOST_WIDE_INT n_elts;
14056 bool uns;
14058 if (!code.is_tree_code ())
14059 return false;
14061 *multi_step_cvt = 0;
14062 switch ((tree_code) code)
14064 CASE_CONVERT:
14065 c1 = VEC_PACK_TRUNC_EXPR;
14066 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
14067 && VECTOR_BOOLEAN_TYPE_P (vectype)
14068 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
14069 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
14070 && n_elts < BITS_PER_UNIT)
14071 optab1 = vec_pack_sbool_trunc_optab;
14072 else
14073 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14074 break;
14076 case FIX_TRUNC_EXPR:
14077 c1 = VEC_PACK_FIX_TRUNC_EXPR;
14078 /* The signedness is determined from output operand. */
14079 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14080 break;
14082 case FLOAT_EXPR:
14083 c1 = VEC_PACK_FLOAT_EXPR;
14084 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14085 break;
14087 default:
14088 gcc_unreachable ();
14091 if (!optab1)
14092 return false;
14094 vec_mode = TYPE_MODE (vectype);
14095 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
14096 return false;
14098 *code1 = c1;
14100 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14102 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14103 return true;
14104 /* For scalar masks we may have different boolean
14105 vector types having the same QImode. Thus we
14106 add additional check for elements number. */
14107 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
14108 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14109 return true;
14112 if (code == FLOAT_EXPR)
14113 return false;
14115 /* Check if it's a multi-step conversion that can be done using intermediate
14116 types. */
14117 prev_mode = vec_mode;
14118 prev_type = vectype;
14119 if (code == FIX_TRUNC_EXPR)
14120 uns = TYPE_UNSIGNED (vectype_out);
14121 else
14122 uns = TYPE_UNSIGNED (vectype);
14124 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14125 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14126 costly than signed. */
14127 if (code == FIX_TRUNC_EXPR && uns)
14129 enum insn_code icode2;
14131 intermediate_type
14132 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
14133 interm_optab
14134 = optab_for_tree_code (c1, intermediate_type, optab_default);
14135 if (interm_optab != unknown_optab
14136 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
14137 && insn_data[icode1].operand[0].mode
14138 == insn_data[icode2].operand[0].mode)
14140 uns = false;
14141 optab1 = interm_optab;
14142 icode1 = icode2;
14146 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14147 intermediate steps in promotion sequence. We try
14148 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14149 interm_types->create (MAX_INTERM_CVT_STEPS);
14150 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14152 intermediate_mode = insn_data[icode1].operand[0].mode;
14153 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14154 intermediate_type
14155 = vect_double_mask_nunits (prev_type, intermediate_mode);
14156 else
14157 intermediate_type
14158 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
14159 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14160 && VECTOR_BOOLEAN_TYPE_P (prev_type)
14161 && SCALAR_INT_MODE_P (prev_mode)
14162 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
14163 && n_elts < BITS_PER_UNIT)
14164 interm_optab = vec_pack_sbool_trunc_optab;
14165 else
14166 interm_optab
14167 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
14168 optab_default);
14169 if (!interm_optab
14170 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
14171 || insn_data[icode1].operand[0].mode != intermediate_mode
14172 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
14173 == CODE_FOR_nothing))
14174 break;
14176 interm_types->quick_push (intermediate_type);
14177 (*multi_step_cvt)++;
14179 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14181 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14182 return true;
14183 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
14184 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14185 return true;
14188 prev_mode = intermediate_mode;
14189 prev_type = intermediate_type;
14190 optab1 = interm_optab;
14193 interm_types->release ();
14194 return false;
14197 /* Generate and return a vector mask of MASK_TYPE such that
14198 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14199 Add the statements to SEQ. */
14201 tree
14202 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
14203 tree end_index, const char *name)
14205 tree cmp_type = TREE_TYPE (start_index);
14206 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
14207 cmp_type, mask_type,
14208 OPTIMIZE_FOR_SPEED));
14209 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
14210 start_index, end_index,
14211 build_zero_cst (mask_type));
14212 tree tmp;
14213 if (name)
14214 tmp = make_temp_ssa_name (mask_type, NULL, name);
14215 else
14216 tmp = make_ssa_name (mask_type);
14217 gimple_call_set_lhs (call, tmp);
14218 gimple_seq_add_stmt (seq, call);
14219 return tmp;
14222 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14223 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14225 tree
14226 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
14227 tree end_index)
14229 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
14230 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
14233 /* Try to compute the vector types required to vectorize STMT_INFO,
14234 returning true on success and false if vectorization isn't possible.
14235 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14236 take sure that the number of elements in the vectors is no bigger
14237 than GROUP_SIZE.
14239 On success:
14241 - Set *STMT_VECTYPE_OUT to:
14242 - NULL_TREE if the statement doesn't need to be vectorized;
14243 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14245 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14246 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14247 statement does not help to determine the overall number of units. */
14249 opt_result
14250 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
14251 tree *stmt_vectype_out,
14252 tree *nunits_vectype_out,
14253 unsigned int group_size)
14255 gimple *stmt = stmt_info->stmt;
14257 /* For BB vectorization, we should always have a group size once we've
14258 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14259 are tentative requests during things like early data reference
14260 analysis and pattern recognition. */
14261 if (is_a <bb_vec_info> (vinfo))
14262 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
14263 else
14264 group_size = 0;
14266 *stmt_vectype_out = NULL_TREE;
14267 *nunits_vectype_out = NULL_TREE;
14269 if (gimple_get_lhs (stmt) == NULL_TREE
14270 /* MASK_STORE has no lhs, but is ok. */
14271 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
14273 if (is_a <gcall *> (stmt))
14275 /* Ignore calls with no lhs. These must be calls to
14276 #pragma omp simd functions, and what vectorization factor
14277 it really needs can't be determined until
14278 vectorizable_simd_clone_call. */
14279 if (dump_enabled_p ())
14280 dump_printf_loc (MSG_NOTE, vect_location,
14281 "defer to SIMD clone analysis.\n");
14282 return opt_result::success ();
14285 return opt_result::failure_at (stmt,
14286 "not vectorized: irregular stmt.%G", stmt);
14289 tree vectype;
14290 tree scalar_type = NULL_TREE;
14291 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
14293 vectype = STMT_VINFO_VECTYPE (stmt_info);
14294 if (dump_enabled_p ())
14295 dump_printf_loc (MSG_NOTE, vect_location,
14296 "precomputed vectype: %T\n", vectype);
14298 else if (vect_use_mask_type_p (stmt_info))
14300 unsigned int precision = stmt_info->mask_precision;
14301 scalar_type = build_nonstandard_integer_type (precision, 1);
14302 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
14303 if (!vectype)
14304 return opt_result::failure_at (stmt, "not vectorized: unsupported"
14305 " data-type %T\n", scalar_type);
14306 if (dump_enabled_p ())
14307 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14309 else
14311 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
14312 scalar_type = TREE_TYPE (DR_REF (dr));
14313 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
14314 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
14315 else
14316 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
14318 if (dump_enabled_p ())
14320 if (group_size)
14321 dump_printf_loc (MSG_NOTE, vect_location,
14322 "get vectype for scalar type (group size %d):"
14323 " %T\n", group_size, scalar_type);
14324 else
14325 dump_printf_loc (MSG_NOTE, vect_location,
14326 "get vectype for scalar type: %T\n", scalar_type);
14328 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
14329 if (!vectype)
14330 return opt_result::failure_at (stmt,
14331 "not vectorized:"
14332 " unsupported data-type %T\n",
14333 scalar_type);
14335 if (dump_enabled_p ())
14336 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14339 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
14340 return opt_result::failure_at (stmt,
14341 "not vectorized: vector stmt in loop:%G",
14342 stmt);
14344 *stmt_vectype_out = vectype;
14346 /* Don't try to compute scalar types if the stmt produces a boolean
14347 vector; use the existing vector type instead. */
14348 tree nunits_vectype = vectype;
14349 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14351 /* The number of units is set according to the smallest scalar
14352 type (or the largest vector size, but we only support one
14353 vector size per vectorization). */
14354 scalar_type = vect_get_smallest_scalar_type (stmt_info,
14355 TREE_TYPE (vectype));
14356 if (scalar_type != TREE_TYPE (vectype))
14358 if (dump_enabled_p ())
14359 dump_printf_loc (MSG_NOTE, vect_location,
14360 "get vectype for smallest scalar type: %T\n",
14361 scalar_type);
14362 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
14363 group_size);
14364 if (!nunits_vectype)
14365 return opt_result::failure_at
14366 (stmt, "not vectorized: unsupported data-type %T\n",
14367 scalar_type);
14368 if (dump_enabled_p ())
14369 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
14370 nunits_vectype);
14374 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
14375 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
14376 return opt_result::failure_at (stmt,
14377 "Not vectorized: Incompatible number "
14378 "of vector subparts between %T and %T\n",
14379 nunits_vectype, *stmt_vectype_out);
14381 if (dump_enabled_p ())
14383 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
14384 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
14385 dump_printf (MSG_NOTE, "\n");
14388 *nunits_vectype_out = nunits_vectype;
14389 return opt_result::success ();
14392 /* Generate and return statement sequence that sets vector length LEN that is:
14394 min_of_start_and_end = min (START_INDEX, END_INDEX);
14395 left_len = END_INDEX - min_of_start_and_end;
14396 rhs = min (left_len, LEN_LIMIT);
14397 LEN = rhs;
14399 Note: the cost of the code generated by this function is modeled
14400 by vect_estimate_min_profitable_iters, so changes here may need
14401 corresponding changes there. */
14403 gimple_seq
14404 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
14406 gimple_seq stmts = NULL;
14407 tree len_type = TREE_TYPE (len);
14408 gcc_assert (TREE_TYPE (start_index) == len_type);
14410 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
14411 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
14412 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
14413 gimple* stmt = gimple_build_assign (len, rhs);
14414 gimple_seq_add_stmt (&stmts, stmt);
14416 return stmts;