Skip various cmp-mem-const tests on lp64 hppa*-*-*
[official-gcc.git] / gcc / tree-vect-stmts.cc
blobcabd4e3ae86473c4fdaf1315cdbf434914b68b7b
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2024 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
57 #include "regs.h"
58 #include "attribs.h"
59 #include "optabs-libfuncs.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* Return the vectorized type for the given statement. */
66 tree
67 stmt_vectype (class _stmt_vec_info *stmt_info)
69 return STMT_VINFO_VECTYPE (stmt_info);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
74 bool
75 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
77 gimple *stmt = STMT_VINFO_STMT (stmt_info);
78 basic_block bb = gimple_bb (stmt);
79 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
80 class loop* loop;
82 if (!loop_vinfo)
83 return false;
85 loop = LOOP_VINFO_LOOP (loop_vinfo);
87 return (bb->loop_father == loop->inner);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
94 static unsigned
95 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
96 enum vect_cost_for_stmt kind,
97 stmt_vec_info stmt_info, slp_tree node,
98 tree vectype, int misalign,
99 enum vect_cost_model_location where)
101 if ((kind == vector_load || kind == unaligned_load)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_gather_load;
104 if ((kind == vector_store || kind == unaligned_store)
105 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
106 kind = vector_scatter_store;
108 stmt_info_for_cost si
109 = { count, kind, where, stmt_info, node, vectype, misalign };
110 body_cost_vec->safe_push (si);
112 return (unsigned)
113 (builtin_vectorization_cost (kind, vectype, misalign) * count);
116 unsigned
117 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
118 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
119 tree vectype, int misalign,
120 enum vect_cost_model_location where)
122 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
123 vectype, misalign, where);
126 unsigned
127 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
128 enum vect_cost_for_stmt kind, slp_tree node,
129 tree vectype, int misalign,
130 enum vect_cost_model_location where)
132 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
133 vectype, misalign, where);
136 unsigned
137 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
138 enum vect_cost_for_stmt kind,
139 enum vect_cost_model_location where)
141 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
142 || kind == scalar_stmt);
143 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
144 NULL_TREE, 0, where);
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
149 static tree
150 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
152 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
153 "vect_array");
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
161 static tree
162 read_vector_array (vec_info *vinfo,
163 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
164 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
166 tree vect_type, vect, vect_name, array_ref;
167 gimple *new_stmt;
169 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170 vect_type = TREE_TYPE (TREE_TYPE (array));
171 vect = vect_create_destination_var (scalar_dest, vect_type);
172 array_ref = build4 (ARRAY_REF, vect_type, array,
173 build_int_cst (size_type_node, n),
174 NULL_TREE, NULL_TREE);
176 new_stmt = gimple_build_assign (vect, array_ref);
177 vect_name = make_ssa_name (vect, new_stmt);
178 gimple_assign_set_lhs (new_stmt, vect_name);
179 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
181 return vect_name;
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
188 static void
189 write_vector_array (vec_info *vinfo,
190 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
191 tree vect, tree array, unsigned HOST_WIDE_INT n)
193 tree array_ref;
194 gimple *new_stmt;
196 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
197 build_int_cst (size_type_node, n),
198 NULL_TREE, NULL_TREE);
200 new_stmt = gimple_build_assign (array_ref, vect);
201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
206 (and its group). */
208 static tree
209 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
211 tree mem_ref;
213 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
216 return mem_ref;
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
222 static void
223 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
224 gimple_stmt_iterator *gsi, tree var)
226 tree clobber = build_clobber (TREE_TYPE (var));
227 gimple *new_stmt = gimple_build_assign (var, clobber);
228 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
233 /* Function vect_mark_relevant.
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
237 static void
238 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
239 enum vect_relevant relevant, bool live_p)
241 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
242 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "mark relevant %d, live %d: %G", relevant, live_p,
247 stmt_info->stmt);
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE, vect_location,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
265 stmt_vec_info old_stmt_info = stmt_info;
266 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
268 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
269 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
271 if (live_p && relevant == vect_unused_in_scope)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
276 "relevant.\n");
277 relevant = vect_used_only_live;
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE, vect_location,
282 "mark relevant %d, live %d: %G", relevant, live_p,
283 stmt_info->stmt);
286 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
287 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
288 STMT_VINFO_RELEVANT (stmt_info) = relevant;
290 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE, vect_location,
295 "already marked relevant/live.\n");
296 return;
299 worklist->safe_push (stmt_info);
303 /* Function is_simple_and_all_uses_invariant
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
307 bool
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
309 loop_vec_info loop_vinfo)
311 tree op;
312 ssa_op_iter iter;
314 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
315 if (!stmt)
316 return false;
318 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
320 enum vect_def_type dt = vect_uninitialized_def;
322 if (!vect_is_simple_use (op, loop_vinfo, &dt))
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "use not simple.\n");
327 return false;
330 if (dt != vect_external_def && dt != vect_constant_def)
331 return false;
333 return true;
336 /* Function vect_stmt_relevant_p.
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
345 - it is an induction and we have multiple exits.
347 CHECKME: what other side effects would the vectorizer allow? */
349 static bool
350 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
351 enum vect_relevant *relevant, bool *live_p)
353 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
354 ssa_op_iter op_iter;
355 imm_use_iterator imm_iter;
356 use_operand_p use_p;
357 def_operand_p def_p;
359 *relevant = vect_unused_in_scope;
360 *live_p = false;
362 /* cond stmt other than loop exit cond. */
363 gimple *stmt = STMT_VINFO_STMT (stmt_info);
364 if (is_ctrl_stmt (stmt)
365 && LOOP_VINFO_LOOP_IV_COND (loop_vinfo) != stmt
366 && (!loop->inner || gimple_bb (stmt)->loop_father == loop))
367 *relevant = vect_used_in_scope;
369 /* changing memory. */
370 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
371 if (gimple_vdef (stmt_info->stmt)
372 && !gimple_clobber_p (stmt_info->stmt))
374 if (dump_enabled_p ())
375 dump_printf_loc (MSG_NOTE, vect_location,
376 "vec_stmt_relevant_p: stmt has vdefs.\n");
377 *relevant = vect_used_in_scope;
380 /* uses outside the loop. */
381 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
383 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
385 basic_block bb = gimple_bb (USE_STMT (use_p));
386 if (!flow_bb_inside_loop_p (loop, bb))
388 if (is_gimple_debug (USE_STMT (use_p)))
389 continue;
391 if (dump_enabled_p ())
392 dump_printf_loc (MSG_NOTE, vect_location,
393 "vec_stmt_relevant_p: used out of loop.\n");
395 /* We expect all such uses to be in the loop exit phis
396 (because of loop closed form) */
397 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
399 *live_p = true;
404 /* Check if it's an induction and multiple exits. In this case there will be
405 a usage later on after peeling which is needed for the alternate exit. */
406 if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
407 && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
409 if (dump_enabled_p ())
410 dump_printf_loc (MSG_NOTE, vect_location,
411 "vec_stmt_relevant_p: induction forced for "
412 "early break.\n");
413 *live_p = true;
417 if (*live_p && *relevant == vect_unused_in_scope
418 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
420 if (dump_enabled_p ())
421 dump_printf_loc (MSG_NOTE, vect_location,
422 "vec_stmt_relevant_p: stmt live but not relevant.\n");
423 *relevant = vect_used_only_live;
426 return (*live_p || *relevant);
430 /* Function exist_non_indexing_operands_for_use_p
432 USE is one of the uses attached to STMT_INFO. Check if USE is
433 used in STMT_INFO for anything other than indexing an array. */
435 static bool
436 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
438 tree operand;
440 /* USE corresponds to some operand in STMT. If there is no data
441 reference in STMT, then any operand that corresponds to USE
442 is not indexing an array. */
443 if (!STMT_VINFO_DATA_REF (stmt_info))
444 return true;
446 /* STMT has a data_ref. FORNOW this means that its of one of
447 the following forms:
448 -1- ARRAY_REF = var
449 -2- var = ARRAY_REF
450 (This should have been verified in analyze_data_refs).
452 'var' in the second case corresponds to a def, not a use,
453 so USE cannot correspond to any operands that are not used
454 for array indexing.
456 Therefore, all we need to check is if STMT falls into the
457 first case, and whether var corresponds to USE. */
459 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
460 if (!assign || !gimple_assign_copy_p (assign))
462 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
463 if (call && gimple_call_internal_p (call))
465 internal_fn ifn = gimple_call_internal_fn (call);
466 int mask_index = internal_fn_mask_index (ifn);
467 if (mask_index >= 0
468 && use == gimple_call_arg (call, mask_index))
469 return true;
470 int stored_value_index = internal_fn_stored_value_index (ifn);
471 if (stored_value_index >= 0
472 && use == gimple_call_arg (call, stored_value_index))
473 return true;
474 if (internal_gather_scatter_fn_p (ifn)
475 && use == gimple_call_arg (call, 1))
476 return true;
478 return false;
481 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
482 return false;
483 operand = gimple_assign_rhs1 (assign);
484 if (TREE_CODE (operand) != SSA_NAME)
485 return false;
487 if (operand == use)
488 return true;
490 return false;
495 Function process_use.
497 Inputs:
498 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
499 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
500 that defined USE. This is done by calling mark_relevant and passing it
501 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
502 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
503 be performed.
505 Outputs:
506 Generally, LIVE_P and RELEVANT are used to define the liveness and
507 relevance info of the DEF_STMT of this USE:
508 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
509 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
510 Exceptions:
511 - case 1: If USE is used only for address computations (e.g. array indexing),
512 which does not need to be directly vectorized, then the liveness/relevance
513 of the respective DEF_STMT is left unchanged.
514 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
515 we skip DEF_STMT cause it had already been processed.
516 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
517 "relevant" will be modified accordingly.
519 Return true if everything is as expected. Return false otherwise. */
521 static opt_result
522 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
523 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
524 bool force)
526 stmt_vec_info dstmt_vinfo;
527 enum vect_def_type dt;
529 /* case 1: we are only interested in uses that need to be vectorized. Uses
530 that are used for address computation are not considered relevant. */
531 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
532 return opt_result::success ();
534 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
535 return opt_result::failure_at (stmt_vinfo->stmt,
536 "not vectorized:"
537 " unsupported use in stmt.\n");
539 if (!dstmt_vinfo)
540 return opt_result::success ();
542 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
543 basic_block bb = gimple_bb (stmt_vinfo->stmt);
545 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
546 We have to force the stmt live since the epilogue loop needs it to
547 continue computing the reduction. */
548 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
549 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
550 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
551 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
552 && bb->loop_father == def_bb->loop_father)
554 if (dump_enabled_p ())
555 dump_printf_loc (MSG_NOTE, vect_location,
556 "reduc-stmt defining reduc-phi in the same nest.\n");
557 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
558 return opt_result::success ();
561 /* case 3a: outer-loop stmt defining an inner-loop stmt:
562 outer-loop-header-bb:
563 d = dstmt_vinfo
564 inner-loop:
565 stmt # use (d)
566 outer-loop-tail-bb:
567 ... */
568 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
570 if (dump_enabled_p ())
571 dump_printf_loc (MSG_NOTE, vect_location,
572 "outer-loop def-stmt defining inner-loop stmt.\n");
574 switch (relevant)
576 case vect_unused_in_scope:
577 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
578 vect_used_in_scope : vect_unused_in_scope;
579 break;
581 case vect_used_in_outer_by_reduction:
582 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
583 relevant = vect_used_by_reduction;
584 break;
586 case vect_used_in_outer:
587 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
588 relevant = vect_used_in_scope;
589 break;
591 case vect_used_in_scope:
592 break;
594 default:
595 gcc_unreachable ();
599 /* case 3b: inner-loop stmt defining an outer-loop stmt:
600 outer-loop-header-bb:
602 inner-loop:
603 d = dstmt_vinfo
604 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
605 stmt # use (d) */
606 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
608 if (dump_enabled_p ())
609 dump_printf_loc (MSG_NOTE, vect_location,
610 "inner-loop def-stmt defining outer-loop stmt.\n");
612 switch (relevant)
614 case vect_unused_in_scope:
615 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
616 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
617 vect_used_in_outer_by_reduction : vect_unused_in_scope;
618 break;
620 case vect_used_by_reduction:
621 case vect_used_only_live:
622 relevant = vect_used_in_outer_by_reduction;
623 break;
625 case vect_used_in_scope:
626 relevant = vect_used_in_outer;
627 break;
629 default:
630 gcc_unreachable ();
633 /* We are also not interested in uses on loop PHI backedges that are
634 inductions. Otherwise we'll needlessly vectorize the IV increment
635 and cause hybrid SLP for SLP inductions. Unless the PHI is live
636 of course. */
637 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
638 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
639 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
640 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
641 loop_latch_edge (bb->loop_father))
642 == use))
644 if (dump_enabled_p ())
645 dump_printf_loc (MSG_NOTE, vect_location,
646 "induction value on backedge.\n");
647 return opt_result::success ();
651 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
652 return opt_result::success ();
656 /* Function vect_mark_stmts_to_be_vectorized.
658 Not all stmts in the loop need to be vectorized. For example:
660 for i...
661 for j...
662 1. T0 = i + j
663 2. T1 = a[T0]
665 3. j = j + 1
667 Stmt 1 and 3 do not need to be vectorized, because loop control and
668 addressing of vectorized data-refs are handled differently.
670 This pass detects such stmts. */
672 opt_result
673 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
675 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
676 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
677 unsigned int nbbs = loop->num_nodes;
678 gimple_stmt_iterator si;
679 unsigned int i;
680 basic_block bb;
681 bool live_p;
682 enum vect_relevant relevant;
684 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
686 auto_vec<stmt_vec_info, 64> worklist;
688 /* 1. Init worklist. */
689 for (i = 0; i < nbbs; i++)
691 bb = bbs[i];
692 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
694 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
695 if (dump_enabled_p ())
696 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
697 phi_info->stmt);
699 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
700 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
702 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
704 if (is_gimple_debug (gsi_stmt (si)))
705 continue;
706 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
707 if (dump_enabled_p ())
708 dump_printf_loc (MSG_NOTE, vect_location,
709 "init: stmt relevant? %G", stmt_info->stmt);
711 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
712 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
716 /* 2. Process_worklist */
717 while (worklist.length () > 0)
719 use_operand_p use_p;
720 ssa_op_iter iter;
722 stmt_vec_info stmt_vinfo = worklist.pop ();
723 if (dump_enabled_p ())
724 dump_printf_loc (MSG_NOTE, vect_location,
725 "worklist: examine stmt: %G", stmt_vinfo->stmt);
727 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
728 (DEF_STMT) as relevant/irrelevant according to the relevance property
729 of STMT. */
730 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
732 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
733 propagated as is to the DEF_STMTs of its USEs.
735 One exception is when STMT has been identified as defining a reduction
736 variable; in this case we set the relevance to vect_used_by_reduction.
737 This is because we distinguish between two kinds of relevant stmts -
738 those that are used by a reduction computation, and those that are
739 (also) used by a regular computation. This allows us later on to
740 identify stmts that are used solely by a reduction, and therefore the
741 order of the results that they produce does not have to be kept. */
743 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
745 case vect_reduction_def:
746 gcc_assert (relevant != vect_unused_in_scope);
747 if (relevant != vect_unused_in_scope
748 && relevant != vect_used_in_scope
749 && relevant != vect_used_by_reduction
750 && relevant != vect_used_only_live)
751 return opt_result::failure_at
752 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
753 break;
755 case vect_nested_cycle:
756 if (relevant != vect_unused_in_scope
757 && relevant != vect_used_in_outer_by_reduction
758 && relevant != vect_used_in_outer)
759 return opt_result::failure_at
760 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
761 break;
763 case vect_double_reduction_def:
764 if (relevant != vect_unused_in_scope
765 && relevant != vect_used_by_reduction
766 && relevant != vect_used_only_live)
767 return opt_result::failure_at
768 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
769 break;
771 default:
772 break;
775 if (is_pattern_stmt_p (stmt_vinfo))
777 /* Pattern statements are not inserted into the code, so
778 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
779 have to scan the RHS or function arguments instead. */
780 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
782 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
783 tree op = gimple_assign_rhs1 (assign);
785 i = 1;
786 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
788 opt_result res
789 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
790 loop_vinfo, relevant, &worklist, false);
791 if (!res)
792 return res;
793 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
794 loop_vinfo, relevant, &worklist, false);
795 if (!res)
796 return res;
797 i = 2;
799 for (; i < gimple_num_ops (assign); i++)
801 op = gimple_op (assign, i);
802 if (TREE_CODE (op) == SSA_NAME)
804 opt_result res
805 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
806 &worklist, false);
807 if (!res)
808 return res;
812 else if (gcond *cond = dyn_cast <gcond *> (stmt_vinfo->stmt))
814 tree_code rhs_code = gimple_cond_code (cond);
815 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
816 opt_result res
817 = process_use (stmt_vinfo, gimple_cond_lhs (cond),
818 loop_vinfo, relevant, &worklist, false);
819 if (!res)
820 return res;
821 res = process_use (stmt_vinfo, gimple_cond_rhs (cond),
822 loop_vinfo, relevant, &worklist, false);
823 if (!res)
824 return res;
826 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
828 for (i = 0; i < gimple_call_num_args (call); i++)
830 tree arg = gimple_call_arg (call, i);
831 opt_result res
832 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
833 &worklist, false);
834 if (!res)
835 return res;
838 else
839 gcc_unreachable ();
841 else
842 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
844 tree op = USE_FROM_PTR (use_p);
845 opt_result res
846 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
847 &worklist, false);
848 if (!res)
849 return res;
852 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
854 gather_scatter_info gs_info;
855 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
856 gcc_unreachable ();
857 opt_result res
858 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
859 &worklist, true);
860 if (!res)
862 if (fatal)
863 *fatal = false;
864 return res;
867 } /* while worklist */
869 return opt_result::success ();
872 /* Function vect_model_simple_cost.
874 Models cost for simple operations, i.e. those that only emit ncopies of a
875 single op. Right now, this does not account for multiple insns that could
876 be generated for the single vector op. We will handle that shortly. */
878 static void
879 vect_model_simple_cost (vec_info *,
880 stmt_vec_info stmt_info, int ncopies,
881 enum vect_def_type *dt,
882 int ndts,
883 slp_tree node,
884 stmt_vector_for_cost *cost_vec,
885 vect_cost_for_stmt kind = vector_stmt)
887 int inside_cost = 0, prologue_cost = 0;
889 gcc_assert (cost_vec != NULL);
891 /* ??? Somehow we need to fix this at the callers. */
892 if (node)
893 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
895 if (!node)
896 /* Cost the "broadcast" of a scalar operand in to a vector operand.
897 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
898 cost model. */
899 for (int i = 0; i < ndts; i++)
900 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
901 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
902 stmt_info, 0, vect_prologue);
904 /* Pass the inside-of-loop statements to the target-specific cost model. */
905 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
906 stmt_info, 0, vect_body);
908 if (dump_enabled_p ())
909 dump_printf_loc (MSG_NOTE, vect_location,
910 "vect_model_simple_cost: inside_cost = %d, "
911 "prologue_cost = %d .\n", inside_cost, prologue_cost);
915 /* Model cost for type demotion and promotion operations. PWR is
916 normally zero for single-step promotions and demotions. It will be
917 one if two-step promotion/demotion is required, and so on. NCOPIES
918 is the number of vector results (and thus number of instructions)
919 for the narrowest end of the operation chain. Each additional
920 step doubles the number of instructions required. If WIDEN_ARITH
921 is true the stmt is doing widening arithmetic. */
923 static void
924 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
925 enum vect_def_type *dt,
926 unsigned int ncopies, int pwr,
927 stmt_vector_for_cost *cost_vec,
928 bool widen_arith)
930 int i;
931 int inside_cost = 0, prologue_cost = 0;
933 for (i = 0; i < pwr + 1; i++)
935 inside_cost += record_stmt_cost (cost_vec, ncopies,
936 widen_arith
937 ? vector_stmt : vec_promote_demote,
938 stmt_info, 0, vect_body);
939 ncopies *= 2;
942 /* FORNOW: Assuming maximum 2 args per stmts. */
943 for (i = 0; i < 2; i++)
944 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
945 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
946 stmt_info, 0, vect_prologue);
948 if (dump_enabled_p ())
949 dump_printf_loc (MSG_NOTE, vect_location,
950 "vect_model_promotion_demotion_cost: inside_cost = %d, "
951 "prologue_cost = %d .\n", inside_cost, prologue_cost);
954 /* Returns true if the current function returns DECL. */
956 static bool
957 cfun_returns (tree decl)
959 edge_iterator ei;
960 edge e;
961 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
963 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
964 if (!ret)
965 continue;
966 if (gimple_return_retval (ret) == decl)
967 return true;
968 /* We often end up with an aggregate copy to the result decl,
969 handle that case as well. First skip intermediate clobbers
970 though. */
971 gimple *def = ret;
974 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
976 while (gimple_clobber_p (def));
977 if (is_a <gassign *> (def)
978 && gimple_assign_lhs (def) == gimple_return_retval (ret)
979 && gimple_assign_rhs1 (def) == decl)
980 return true;
982 return false;
985 /* Calculate cost of DR's memory access. */
986 void
987 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
988 dr_alignment_support alignment_support_scheme,
989 int misalignment,
990 unsigned int *inside_cost,
991 stmt_vector_for_cost *body_cost_vec)
993 switch (alignment_support_scheme)
995 case dr_aligned:
997 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
998 vector_store, stmt_info, 0,
999 vect_body);
1001 if (dump_enabled_p ())
1002 dump_printf_loc (MSG_NOTE, vect_location,
1003 "vect_model_store_cost: aligned.\n");
1004 break;
1007 case dr_unaligned_supported:
1009 /* Here, we assign an additional cost for the unaligned store. */
1010 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1011 unaligned_store, stmt_info,
1012 misalignment, vect_body);
1013 if (dump_enabled_p ())
1014 dump_printf_loc (MSG_NOTE, vect_location,
1015 "vect_model_store_cost: unaligned supported by "
1016 "hardware.\n");
1017 break;
1020 case dr_unaligned_unsupported:
1022 *inside_cost = VECT_MAX_COST;
1024 if (dump_enabled_p ())
1025 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1026 "vect_model_store_cost: unsupported access.\n");
1027 break;
1030 default:
1031 gcc_unreachable ();
1035 /* Calculate cost of DR's memory access. */
1036 void
1037 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1038 dr_alignment_support alignment_support_scheme,
1039 int misalignment,
1040 bool add_realign_cost, unsigned int *inside_cost,
1041 unsigned int *prologue_cost,
1042 stmt_vector_for_cost *prologue_cost_vec,
1043 stmt_vector_for_cost *body_cost_vec,
1044 bool record_prologue_costs)
1046 switch (alignment_support_scheme)
1048 case dr_aligned:
1050 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1051 stmt_info, 0, vect_body);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE, vect_location,
1055 "vect_model_load_cost: aligned.\n");
1057 break;
1059 case dr_unaligned_supported:
1061 /* Here, we assign an additional cost for the unaligned load. */
1062 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1063 unaligned_load, stmt_info,
1064 misalignment, vect_body);
1066 if (dump_enabled_p ())
1067 dump_printf_loc (MSG_NOTE, vect_location,
1068 "vect_model_load_cost: unaligned supported by "
1069 "hardware.\n");
1071 break;
1073 case dr_explicit_realign:
1075 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1076 vector_load, stmt_info, 0, vect_body);
1077 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1078 vec_perm, stmt_info, 0, vect_body);
1080 /* FIXME: If the misalignment remains fixed across the iterations of
1081 the containing loop, the following cost should be added to the
1082 prologue costs. */
1083 if (targetm.vectorize.builtin_mask_for_load)
1084 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1085 stmt_info, 0, vect_body);
1087 if (dump_enabled_p ())
1088 dump_printf_loc (MSG_NOTE, vect_location,
1089 "vect_model_load_cost: explicit realign\n");
1091 break;
1093 case dr_explicit_realign_optimized:
1095 if (dump_enabled_p ())
1096 dump_printf_loc (MSG_NOTE, vect_location,
1097 "vect_model_load_cost: unaligned software "
1098 "pipelined.\n");
1100 /* Unaligned software pipeline has a load of an address, an initial
1101 load, and possibly a mask operation to "prime" the loop. However,
1102 if this is an access in a group of loads, which provide grouped
1103 access, then the above cost should only be considered for one
1104 access in the group. Inside the loop, there is a load op
1105 and a realignment op. */
1107 if (add_realign_cost && record_prologue_costs)
1109 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1110 vector_stmt, stmt_info,
1111 0, vect_prologue);
1112 if (targetm.vectorize.builtin_mask_for_load)
1113 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1114 vector_stmt, stmt_info,
1115 0, vect_prologue);
1118 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1119 stmt_info, 0, vect_body);
1120 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1121 stmt_info, 0, vect_body);
1123 if (dump_enabled_p ())
1124 dump_printf_loc (MSG_NOTE, vect_location,
1125 "vect_model_load_cost: explicit realign optimized"
1126 "\n");
1128 break;
1131 case dr_unaligned_unsupported:
1133 *inside_cost = VECT_MAX_COST;
1135 if (dump_enabled_p ())
1136 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1137 "vect_model_load_cost: unsupported access.\n");
1138 break;
1141 default:
1142 gcc_unreachable ();
1146 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1147 the loop preheader for the vectorized stmt STMT_VINFO. */
1149 static void
1150 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1151 gimple_stmt_iterator *gsi)
1153 if (gsi)
1154 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1155 else
1156 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1158 if (dump_enabled_p ())
1159 dump_printf_loc (MSG_NOTE, vect_location,
1160 "created new init_stmt: %G", new_stmt);
1163 /* Function vect_init_vector.
1165 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1166 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1167 vector type a vector with all elements equal to VAL is created first.
1168 Place the initialization at GSI if it is not NULL. Otherwise, place the
1169 initialization at the loop preheader.
1170 Return the DEF of INIT_STMT.
1171 It will be used in the vectorization of STMT_INFO. */
1173 tree
1174 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1175 gimple_stmt_iterator *gsi)
1177 gimple *init_stmt;
1178 tree new_temp;
1180 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1181 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1183 gcc_assert (VECTOR_TYPE_P (type));
1184 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1186 /* Scalar boolean value should be transformed into
1187 all zeros or all ones value before building a vector. */
1188 if (VECTOR_BOOLEAN_TYPE_P (type))
1190 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1191 tree false_val = build_zero_cst (TREE_TYPE (type));
1193 if (CONSTANT_CLASS_P (val))
1194 val = integer_zerop (val) ? false_val : true_val;
1195 else
1197 new_temp = make_ssa_name (TREE_TYPE (type));
1198 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1199 val, true_val, false_val);
1200 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1201 val = new_temp;
1204 else
1206 gimple_seq stmts = NULL;
1207 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1208 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1209 TREE_TYPE (type), val);
1210 else
1211 /* ??? Condition vectorization expects us to do
1212 promotion of invariant/external defs. */
1213 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1214 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1215 !gsi_end_p (gsi2); )
1217 init_stmt = gsi_stmt (gsi2);
1218 gsi_remove (&gsi2, false);
1219 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1223 val = build_vector_from_val (type, val);
1226 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1227 init_stmt = gimple_build_assign (new_temp, val);
1228 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1229 return new_temp;
1233 /* Function vect_get_vec_defs_for_operand.
1235 OP is an operand in STMT_VINFO. This function returns a vector of
1236 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1238 In the case that OP is an SSA_NAME which is defined in the loop, then
1239 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1241 In case OP is an invariant or constant, a new stmt that creates a vector def
1242 needs to be introduced. VECTYPE may be used to specify a required type for
1243 vector invariant. */
1245 void
1246 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1247 unsigned ncopies,
1248 tree op, vec<tree> *vec_oprnds, tree vectype)
1250 gimple *def_stmt;
1251 enum vect_def_type dt;
1252 bool is_simple_use;
1253 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE, vect_location,
1257 "vect_get_vec_defs_for_operand: %T\n", op);
1259 stmt_vec_info def_stmt_info;
1260 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1261 &def_stmt_info, &def_stmt);
1262 gcc_assert (is_simple_use);
1263 if (def_stmt && dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1266 vec_oprnds->create (ncopies);
1267 if (dt == vect_constant_def || dt == vect_external_def)
1269 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1270 tree vector_type;
1272 if (vectype)
1273 vector_type = vectype;
1274 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1275 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1276 vector_type = truth_type_for (stmt_vectype);
1277 else
1278 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1280 gcc_assert (vector_type);
1281 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1282 while (ncopies--)
1283 vec_oprnds->quick_push (vop);
1285 else
1287 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1288 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1289 for (unsigned i = 0; i < ncopies; ++i)
1290 vec_oprnds->quick_push (gimple_get_lhs
1291 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1296 /* Get vectorized definitions for OP0 and OP1. */
1298 void
1299 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1300 unsigned ncopies,
1301 tree op0, tree vectype0, vec<tree> *vec_oprnds0,
1302 tree op1, tree vectype1, vec<tree> *vec_oprnds1,
1303 tree op2, tree vectype2, vec<tree> *vec_oprnds2,
1304 tree op3, tree vectype3, vec<tree> *vec_oprnds3)
1306 if (slp_node)
1308 if (op0)
1309 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1310 if (op1)
1311 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1312 if (op2)
1313 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1314 if (op3)
1315 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1317 else
1319 if (op0)
1320 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1321 op0, vec_oprnds0, vectype0);
1322 if (op1)
1323 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1324 op1, vec_oprnds1, vectype1);
1325 if (op2)
1326 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1327 op2, vec_oprnds2, vectype2);
1328 if (op3)
1329 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1330 op3, vec_oprnds3, vectype3);
1334 void
1335 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1336 unsigned ncopies,
1337 tree op0, vec<tree> *vec_oprnds0,
1338 tree op1, vec<tree> *vec_oprnds1,
1339 tree op2, vec<tree> *vec_oprnds2,
1340 tree op3, vec<tree> *vec_oprnds3)
1342 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1343 op0, NULL_TREE, vec_oprnds0,
1344 op1, NULL_TREE, vec_oprnds1,
1345 op2, NULL_TREE, vec_oprnds2,
1346 op3, NULL_TREE, vec_oprnds3);
1349 /* Helper function called by vect_finish_replace_stmt and
1350 vect_finish_stmt_generation. Set the location of the new
1351 statement and create and return a stmt_vec_info for it. */
1353 static void
1354 vect_finish_stmt_generation_1 (vec_info *,
1355 stmt_vec_info stmt_info, gimple *vec_stmt)
1357 if (dump_enabled_p ())
1358 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1360 if (stmt_info)
1362 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1364 /* While EH edges will generally prevent vectorization, stmt might
1365 e.g. be in a must-not-throw region. Ensure newly created stmts
1366 that could throw are part of the same region. */
1367 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1368 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1369 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1371 else
1372 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1375 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1376 which sets the same scalar result as STMT_INFO did. Create and return a
1377 stmt_vec_info for VEC_STMT. */
1379 void
1380 vect_finish_replace_stmt (vec_info *vinfo,
1381 stmt_vec_info stmt_info, gimple *vec_stmt)
1383 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1384 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1386 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1387 gsi_replace (&gsi, vec_stmt, true);
1389 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1392 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1393 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1395 void
1396 vect_finish_stmt_generation (vec_info *vinfo,
1397 stmt_vec_info stmt_info, gimple *vec_stmt,
1398 gimple_stmt_iterator *gsi)
1400 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1402 if (!gsi_end_p (*gsi)
1403 && gimple_has_mem_ops (vec_stmt))
1405 gimple *at_stmt = gsi_stmt (*gsi);
1406 tree vuse = gimple_vuse (at_stmt);
1407 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1409 tree vdef = gimple_vdef (at_stmt);
1410 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1411 gimple_set_modified (vec_stmt, true);
1412 /* If we have an SSA vuse and insert a store, update virtual
1413 SSA form to avoid triggering the renamer. Do so only
1414 if we can easily see all uses - which is what almost always
1415 happens with the way vectorized stmts are inserted. */
1416 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1417 && ((is_gimple_assign (vec_stmt)
1418 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1419 || (is_gimple_call (vec_stmt)
1420 && (!(gimple_call_flags (vec_stmt)
1421 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1422 || (gimple_call_lhs (vec_stmt)
1423 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1425 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1426 gimple_set_vdef (vec_stmt, new_vdef);
1427 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1431 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1432 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1435 /* We want to vectorize a call to combined function CFN with function
1436 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1437 as the types of all inputs. Check whether this is possible using
1438 an internal function, returning its code if so or IFN_LAST if not. */
1440 static internal_fn
1441 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1442 tree vectype_out, tree vectype_in)
1444 internal_fn ifn;
1445 if (internal_fn_p (cfn))
1446 ifn = as_internal_fn (cfn);
1447 else
1448 ifn = associated_internal_fn (fndecl);
1449 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1451 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1452 if (info.vectorizable)
1454 bool same_size_p = TYPE_SIZE (vectype_in) == TYPE_SIZE (vectype_out);
1455 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1456 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1458 /* The type size of both the vectype_in and vectype_out should be
1459 exactly the same when vectype_out isn't participating the optab.
1460 While there is no restriction for type size when vectype_out
1461 is part of the optab query. */
1462 if (type0 != vectype_out && type1 != vectype_out && !same_size_p)
1463 return IFN_LAST;
1465 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1466 OPTIMIZE_FOR_SPEED))
1467 return ifn;
1470 return IFN_LAST;
1474 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1475 gimple_stmt_iterator *);
1477 /* Check whether a load or store statement in the loop described by
1478 LOOP_VINFO is possible in a loop using partial vectors. This is
1479 testing whether the vectorizer pass has the appropriate support,
1480 as well as whether the target does.
1482 VLS_TYPE says whether the statement is a load or store and VECTYPE
1483 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1484 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1485 says how the load or store is going to be implemented and GROUP_SIZE
1486 is the number of load or store statements in the containing group.
1487 If the access is a gather load or scatter store, GS_INFO describes
1488 its arguments. If the load or store is conditional, SCALAR_MASK is the
1489 condition under which it occurs.
1491 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1492 vectors is not supported, otherwise record the required rgroup control
1493 types. */
1495 static void
1496 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1497 slp_tree slp_node,
1498 vec_load_store_type vls_type,
1499 int group_size,
1500 vect_memory_access_type
1501 memory_access_type,
1502 gather_scatter_info *gs_info,
1503 tree scalar_mask)
1505 /* Invariant loads need no special support. */
1506 if (memory_access_type == VMAT_INVARIANT)
1507 return;
1509 unsigned int nvectors;
1510 if (slp_node)
1511 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1512 else
1513 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1515 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1516 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1517 machine_mode vecmode = TYPE_MODE (vectype);
1518 bool is_load = (vls_type == VLS_LOAD);
1519 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1521 internal_fn ifn
1522 = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1523 : vect_store_lanes_supported (vectype, group_size, true));
1524 if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1525 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1526 else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1527 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1528 scalar_mask);
1529 else
1531 if (dump_enabled_p ())
1532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1533 "can't operate on partial vectors because"
1534 " the target doesn't have an appropriate"
1535 " load/store-lanes instruction.\n");
1536 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1538 return;
1541 if (memory_access_type == VMAT_GATHER_SCATTER)
1543 internal_fn ifn = (is_load
1544 ? IFN_MASK_GATHER_LOAD
1545 : IFN_MASK_SCATTER_STORE);
1546 internal_fn len_ifn = (is_load
1547 ? IFN_MASK_LEN_GATHER_LOAD
1548 : IFN_MASK_LEN_SCATTER_STORE);
1549 if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1550 gs_info->memory_type,
1551 gs_info->offset_vectype,
1552 gs_info->scale))
1553 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1554 else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1555 gs_info->memory_type,
1556 gs_info->offset_vectype,
1557 gs_info->scale))
1558 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1559 scalar_mask);
1560 else
1562 if (dump_enabled_p ())
1563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1564 "can't operate on partial vectors because"
1565 " the target doesn't have an appropriate"
1566 " gather load or scatter store instruction.\n");
1567 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1569 return;
1572 if (memory_access_type != VMAT_CONTIGUOUS
1573 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1575 /* Element X of the data must come from iteration i * VF + X of the
1576 scalar loop. We need more work to support other mappings. */
1577 if (dump_enabled_p ())
1578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1579 "can't operate on partial vectors because an"
1580 " access isn't contiguous.\n");
1581 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1582 return;
1585 if (!VECTOR_MODE_P (vecmode))
1587 if (dump_enabled_p ())
1588 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1589 "can't operate on partial vectors when emulating"
1590 " vector operations.\n");
1591 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1592 return;
1595 /* We might load more scalars than we need for permuting SLP loads.
1596 We checked in get_group_load_store_type that the extra elements
1597 don't leak into a new vector. */
1598 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1600 unsigned int nvectors;
1601 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1602 return nvectors;
1603 gcc_unreachable ();
1606 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1607 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1608 machine_mode mask_mode;
1609 machine_mode vmode;
1610 bool using_partial_vectors_p = false;
1611 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1613 nvectors = group_memory_nvectors (group_size * vf, nunits);
1614 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1615 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1616 using_partial_vectors_p = true;
1618 else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1619 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1621 nvectors = group_memory_nvectors (group_size * vf, nunits);
1622 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1623 using_partial_vectors_p = true;
1626 if (!using_partial_vectors_p)
1628 if (dump_enabled_p ())
1629 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1630 "can't operate on partial vectors because the"
1631 " target doesn't have the appropriate partial"
1632 " vectorization load or store.\n");
1633 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1637 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1638 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1639 that needs to be applied to all loads and stores in a vectorized loop.
1640 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1641 otherwise return VEC_MASK & LOOP_MASK.
1643 MASK_TYPE is the type of both masks. If new statements are needed,
1644 insert them before GSI. */
1646 static tree
1647 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1648 tree vec_mask, gimple_stmt_iterator *gsi)
1650 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1651 if (!loop_mask)
1652 return vec_mask;
1654 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1656 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1657 return vec_mask;
1659 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1660 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1661 vec_mask, loop_mask);
1663 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1664 return and_res;
1667 /* Determine whether we can use a gather load or scatter store to vectorize
1668 strided load or store STMT_INFO by truncating the current offset to a
1669 smaller width. We need to be able to construct an offset vector:
1671 { 0, X, X*2, X*3, ... }
1673 without loss of precision, where X is STMT_INFO's DR_STEP.
1675 Return true if this is possible, describing the gather load or scatter
1676 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1678 static bool
1679 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1680 loop_vec_info loop_vinfo, bool masked_p,
1681 gather_scatter_info *gs_info)
1683 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1684 data_reference *dr = dr_info->dr;
1685 tree step = DR_STEP (dr);
1686 if (TREE_CODE (step) != INTEGER_CST)
1688 /* ??? Perhaps we could use range information here? */
1689 if (dump_enabled_p ())
1690 dump_printf_loc (MSG_NOTE, vect_location,
1691 "cannot truncate variable step.\n");
1692 return false;
1695 /* Get the number of bits in an element. */
1696 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1697 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1698 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1700 /* Set COUNT to the upper limit on the number of elements - 1.
1701 Start with the maximum vectorization factor. */
1702 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1704 /* Try lowering COUNT to the number of scalar latch iterations. */
1705 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1706 widest_int max_iters;
1707 if (max_loop_iterations (loop, &max_iters)
1708 && max_iters < count)
1709 count = max_iters.to_shwi ();
1711 /* Try scales of 1 and the element size. */
1712 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1713 wi::overflow_type overflow = wi::OVF_NONE;
1714 for (int i = 0; i < 2; ++i)
1716 int scale = scales[i];
1717 widest_int factor;
1718 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1719 continue;
1721 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1722 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1723 if (overflow)
1724 continue;
1725 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1726 unsigned int min_offset_bits = wi::min_precision (range, sign);
1728 /* Find the narrowest viable offset type. */
1729 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1730 tree offset_type = build_nonstandard_integer_type (offset_bits,
1731 sign == UNSIGNED);
1733 /* See whether the target supports the operation with an offset
1734 no narrower than OFFSET_TYPE. */
1735 tree memory_type = TREE_TYPE (DR_REF (dr));
1736 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1737 vectype, memory_type, offset_type, scale,
1738 &gs_info->ifn, &gs_info->offset_vectype)
1739 || gs_info->ifn == IFN_LAST)
1740 continue;
1742 gs_info->decl = NULL_TREE;
1743 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1744 but we don't need to store that here. */
1745 gs_info->base = NULL_TREE;
1746 gs_info->element_type = TREE_TYPE (vectype);
1747 gs_info->offset = fold_convert (offset_type, step);
1748 gs_info->offset_dt = vect_constant_def;
1749 gs_info->scale = scale;
1750 gs_info->memory_type = memory_type;
1751 return true;
1754 if (overflow && dump_enabled_p ())
1755 dump_printf_loc (MSG_NOTE, vect_location,
1756 "truncating gather/scatter offset to %d bits"
1757 " might change its value.\n", element_bits);
1759 return false;
1762 /* Return true if we can use gather/scatter internal functions to
1763 vectorize STMT_INFO, which is a grouped or strided load or store.
1764 MASKED_P is true if load or store is conditional. When returning
1765 true, fill in GS_INFO with the information required to perform the
1766 operation. */
1768 static bool
1769 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1770 loop_vec_info loop_vinfo, bool masked_p,
1771 gather_scatter_info *gs_info)
1773 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1774 || gs_info->ifn == IFN_LAST)
1775 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1776 masked_p, gs_info);
1778 tree old_offset_type = TREE_TYPE (gs_info->offset);
1779 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1781 gcc_assert (TYPE_PRECISION (new_offset_type)
1782 >= TYPE_PRECISION (old_offset_type));
1783 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1785 if (dump_enabled_p ())
1786 dump_printf_loc (MSG_NOTE, vect_location,
1787 "using gather/scatter for strided/grouped access,"
1788 " scale = %d\n", gs_info->scale);
1790 return true;
1793 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1794 elements with a known constant step. Return -1 if that step
1795 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1797 static int
1798 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1800 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1801 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1802 size_zero_node);
1805 /* If the target supports a permute mask that reverses the elements in
1806 a vector of type VECTYPE, return that mask, otherwise return null. */
1808 tree
1809 perm_mask_for_reverse (tree vectype)
1811 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1813 /* The encoding has a single stepped pattern. */
1814 vec_perm_builder sel (nunits, 1, 3);
1815 for (int i = 0; i < 3; ++i)
1816 sel.quick_push (nunits - 1 - i);
1818 vec_perm_indices indices (sel, 1, nunits);
1819 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1820 indices))
1821 return NULL_TREE;
1822 return vect_gen_perm_mask_checked (vectype, indices);
1825 /* A subroutine of get_load_store_type, with a subset of the same
1826 arguments. Handle the case where STMT_INFO is a load or store that
1827 accesses consecutive elements with a negative step. Sets *POFFSET
1828 to the offset to be applied to the DR for the first access. */
1830 static vect_memory_access_type
1831 get_negative_load_store_type (vec_info *vinfo,
1832 stmt_vec_info stmt_info, tree vectype,
1833 vec_load_store_type vls_type,
1834 unsigned int ncopies, poly_int64 *poffset)
1836 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1837 dr_alignment_support alignment_support_scheme;
1839 if (ncopies > 1)
1841 if (dump_enabled_p ())
1842 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1843 "multiple types with negative step.\n");
1844 return VMAT_ELEMENTWISE;
1847 /* For backward running DRs the first access in vectype actually is
1848 N-1 elements before the address of the DR. */
1849 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1850 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1852 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
1853 alignment_support_scheme
1854 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1855 if (alignment_support_scheme != dr_aligned
1856 && alignment_support_scheme != dr_unaligned_supported)
1858 if (dump_enabled_p ())
1859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1860 "negative step but alignment required.\n");
1861 *poffset = 0;
1862 return VMAT_ELEMENTWISE;
1865 if (vls_type == VLS_STORE_INVARIANT)
1867 if (dump_enabled_p ())
1868 dump_printf_loc (MSG_NOTE, vect_location,
1869 "negative step with invariant source;"
1870 " no permute needed.\n");
1871 return VMAT_CONTIGUOUS_DOWN;
1874 if (!perm_mask_for_reverse (vectype))
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1878 "negative step and reversing not supported.\n");
1879 *poffset = 0;
1880 return VMAT_ELEMENTWISE;
1883 return VMAT_CONTIGUOUS_REVERSE;
1886 /* STMT_INFO is either a masked or unconditional store. Return the value
1887 being stored. */
1889 tree
1890 vect_get_store_rhs (stmt_vec_info stmt_info)
1892 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1894 gcc_assert (gimple_assign_single_p (assign));
1895 return gimple_assign_rhs1 (assign);
1897 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1899 internal_fn ifn = gimple_call_internal_fn (call);
1900 int index = internal_fn_stored_value_index (ifn);
1901 gcc_assert (index >= 0);
1902 return gimple_call_arg (call, index);
1904 gcc_unreachable ();
1907 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1909 This function returns a vector type which can be composed with NETLS pieces,
1910 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1911 same vector size as the return vector. It checks target whether supports
1912 pieces-size vector mode for construction firstly, if target fails to, check
1913 pieces-size scalar mode for construction further. It returns NULL_TREE if
1914 fails to find the available composition.
1916 For example, for (vtype=V16QI, nelts=4), we can probably get:
1917 - V16QI with PTYPE V4QI.
1918 - V4SI with PTYPE SI.
1919 - NULL_TREE. */
1921 static tree
1922 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
1924 gcc_assert (VECTOR_TYPE_P (vtype));
1925 gcc_assert (known_gt (nelts, 0U));
1927 machine_mode vmode = TYPE_MODE (vtype);
1928 if (!VECTOR_MODE_P (vmode))
1929 return NULL_TREE;
1931 /* When we are asked to compose the vector from its components let
1932 that happen directly. */
1933 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
1935 *ptype = TREE_TYPE (vtype);
1936 return vtype;
1939 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
1940 unsigned int pbsize;
1941 if (constant_multiple_p (vbsize, nelts, &pbsize))
1943 /* First check if vec_init optab supports construction from
1944 vector pieces directly. */
1945 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
1946 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
1947 machine_mode rmode;
1948 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
1949 && (convert_optab_handler (vec_init_optab, vmode, rmode)
1950 != CODE_FOR_nothing))
1952 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
1953 return vtype;
1956 /* Otherwise check if exists an integer type of the same piece size and
1957 if vec_init optab supports construction from it directly. */
1958 if (int_mode_for_size (pbsize, 0).exists (&elmode)
1959 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
1960 && (convert_optab_handler (vec_init_optab, rmode, elmode)
1961 != CODE_FOR_nothing))
1963 *ptype = build_nonstandard_integer_type (pbsize, 1);
1964 return build_vector_type (*ptype, nelts);
1968 return NULL_TREE;
1971 /* A subroutine of get_load_store_type, with a subset of the same
1972 arguments. Handle the case where STMT_INFO is part of a grouped load
1973 or store.
1975 For stores, the statements in the group are all consecutive
1976 and there is no gap at the end. For loads, the statements in the
1977 group might not be consecutive; there can be gaps between statements
1978 as well as at the end. */
1980 static bool
1981 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
1982 tree vectype, slp_tree slp_node,
1983 bool masked_p, vec_load_store_type vls_type,
1984 vect_memory_access_type *memory_access_type,
1985 poly_int64 *poffset,
1986 dr_alignment_support *alignment_support_scheme,
1987 int *misalignment,
1988 gather_scatter_info *gs_info,
1989 internal_fn *lanes_ifn)
1991 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1992 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1993 stmt_vec_info first_stmt_info;
1994 unsigned int group_size;
1995 unsigned HOST_WIDE_INT gap;
1996 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1998 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1999 group_size = DR_GROUP_SIZE (first_stmt_info);
2000 gap = DR_GROUP_GAP (first_stmt_info);
2002 else
2004 first_stmt_info = stmt_info;
2005 group_size = 1;
2006 gap = 0;
2008 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2009 bool single_element_p = (stmt_info == first_stmt_info
2010 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2011 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2013 /* True if the vectorized statements would access beyond the last
2014 statement in the group. */
2015 bool overrun_p = false;
2017 /* True if we can cope with such overrun by peeling for gaps, so that
2018 there is at least one final scalar iteration after the vector loop. */
2019 bool can_overrun_p = (!masked_p
2020 && vls_type == VLS_LOAD
2021 && loop_vinfo
2022 && !loop->inner);
2024 /* There can only be a gap at the end of the group if the stride is
2025 known at compile time. */
2026 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2028 /* Stores can't yet have gaps. */
2029 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2031 if (slp_node)
2033 /* For SLP vectorization we directly vectorize a subchain
2034 without permutation. */
2035 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2036 first_dr_info
2037 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2038 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2040 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2041 separated by the stride, until we have a complete vector.
2042 Fall back to scalar accesses if that isn't possible. */
2043 if (multiple_p (nunits, group_size))
2044 *memory_access_type = VMAT_STRIDED_SLP;
2045 else
2046 *memory_access_type = VMAT_ELEMENTWISE;
2048 else
2050 overrun_p = loop_vinfo && gap != 0;
2051 if (overrun_p && vls_type != VLS_LOAD)
2053 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2054 "Grouped store with gaps requires"
2055 " non-consecutive accesses\n");
2056 return false;
2058 /* An overrun is fine if the trailing elements are smaller
2059 than the alignment boundary B. Every vector access will
2060 be a multiple of B and so we are guaranteed to access a
2061 non-gap element in the same B-sized block. */
2062 if (overrun_p
2063 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2064 vectype)
2065 / vect_get_scalar_dr_size (first_dr_info)))
2066 overrun_p = false;
2068 /* If the gap splits the vector in half and the target
2069 can do half-vector operations avoid the epilogue peeling
2070 by simply loading half of the vector only. Usually
2071 the construction with an upper zero half will be elided. */
2072 dr_alignment_support alss;
2073 int misalign = dr_misalignment (first_dr_info, vectype);
2074 tree half_vtype;
2075 if (overrun_p
2076 && !masked_p
2077 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2078 vectype, misalign)))
2079 == dr_aligned
2080 || alss == dr_unaligned_supported)
2081 && known_eq (nunits, (group_size - gap) * 2)
2082 && known_eq (nunits, group_size)
2083 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2084 != NULL_TREE))
2085 overrun_p = false;
2087 if (overrun_p && !can_overrun_p)
2089 if (dump_enabled_p ())
2090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2091 "Peeling for outer loop is not supported\n");
2092 return false;
2094 int cmp = compare_step_with_zero (vinfo, stmt_info);
2095 if (cmp < 0)
2097 if (single_element_p)
2098 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2099 only correct for single element "interleaving" SLP. */
2100 *memory_access_type = get_negative_load_store_type
2101 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2102 else
2104 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2105 separated by the stride, until we have a complete vector.
2106 Fall back to scalar accesses if that isn't possible. */
2107 if (multiple_p (nunits, group_size))
2108 *memory_access_type = VMAT_STRIDED_SLP;
2109 else
2110 *memory_access_type = VMAT_ELEMENTWISE;
2113 else if (cmp == 0 && loop_vinfo)
2115 gcc_assert (vls_type == VLS_LOAD);
2116 *memory_access_type = VMAT_INVARIANT;
2117 /* Invariant accesses perform only component accesses, alignment
2118 is irrelevant for them. */
2119 *alignment_support_scheme = dr_unaligned_supported;
2121 else
2122 *memory_access_type = VMAT_CONTIGUOUS;
2124 /* When we have a contiguous access across loop iterations
2125 but the access in the loop doesn't cover the full vector
2126 we can end up with no gap recorded but still excess
2127 elements accessed, see PR103116. Make sure we peel for
2128 gaps if necessary and sufficient and give up if not.
2130 If there is a combination of the access not covering the full
2131 vector and a gap recorded then we may need to peel twice. */
2132 if (loop_vinfo
2133 && *memory_access_type == VMAT_CONTIGUOUS
2134 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2135 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2136 nunits))
2138 unsigned HOST_WIDE_INT cnunits, cvf;
2139 if (!can_overrun_p
2140 || !nunits.is_constant (&cnunits)
2141 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2142 /* Peeling for gaps assumes that a single scalar iteration
2143 is enough to make sure the last vector iteration doesn't
2144 access excess elements.
2145 ??? Enhancements include peeling multiple iterations
2146 or using masked loads with a static mask. */
2147 || (group_size * cvf) % cnunits + group_size - gap < cnunits)
2149 if (dump_enabled_p ())
2150 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2151 "peeling for gaps insufficient for "
2152 "access\n");
2153 return false;
2155 overrun_p = true;
2159 else
2161 /* We can always handle this case using elementwise accesses,
2162 but see if something more efficient is available. */
2163 *memory_access_type = VMAT_ELEMENTWISE;
2165 /* If there is a gap at the end of the group then these optimizations
2166 would access excess elements in the last iteration. */
2167 bool would_overrun_p = (gap != 0);
2168 /* An overrun is fine if the trailing elements are smaller than the
2169 alignment boundary B. Every vector access will be a multiple of B
2170 and so we are guaranteed to access a non-gap element in the
2171 same B-sized block. */
2172 if (would_overrun_p
2173 && !masked_p
2174 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2175 / vect_get_scalar_dr_size (first_dr_info)))
2176 would_overrun_p = false;
2178 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2179 && (can_overrun_p || !would_overrun_p)
2180 && compare_step_with_zero (vinfo, stmt_info) > 0)
2182 /* First cope with the degenerate case of a single-element
2183 vector. */
2184 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2187 else
2189 /* Otherwise try using LOAD/STORE_LANES. */
2190 *lanes_ifn
2191 = vls_type == VLS_LOAD
2192 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2193 : vect_store_lanes_supported (vectype, group_size,
2194 masked_p);
2195 if (*lanes_ifn != IFN_LAST)
2197 *memory_access_type = VMAT_LOAD_STORE_LANES;
2198 overrun_p = would_overrun_p;
2201 /* If that fails, try using permuting loads. */
2202 else if (vls_type == VLS_LOAD
2203 ? vect_grouped_load_supported (vectype,
2204 single_element_p,
2205 group_size)
2206 : vect_grouped_store_supported (vectype, group_size))
2208 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2209 overrun_p = would_overrun_p;
2214 /* As a last resort, trying using a gather load or scatter store.
2216 ??? Although the code can handle all group sizes correctly,
2217 it probably isn't a win to use separate strided accesses based
2218 on nearby locations. Or, even if it's a win over scalar code,
2219 it might not be a win over vectorizing at a lower VF, if that
2220 allows us to use contiguous accesses. */
2221 if (*memory_access_type == VMAT_ELEMENTWISE
2222 && single_element_p
2223 && loop_vinfo
2224 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2225 masked_p, gs_info))
2226 *memory_access_type = VMAT_GATHER_SCATTER;
2229 if (*memory_access_type == VMAT_GATHER_SCATTER
2230 || *memory_access_type == VMAT_ELEMENTWISE)
2232 *alignment_support_scheme = dr_unaligned_supported;
2233 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2235 else
2237 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2238 *alignment_support_scheme
2239 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2240 *misalignment);
2243 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2245 /* STMT is the leader of the group. Check the operands of all the
2246 stmts of the group. */
2247 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2248 while (next_stmt_info)
2250 tree op = vect_get_store_rhs (next_stmt_info);
2251 enum vect_def_type dt;
2252 if (!vect_is_simple_use (op, vinfo, &dt))
2254 if (dump_enabled_p ())
2255 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2256 "use not simple.\n");
2257 return false;
2259 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2263 if (overrun_p)
2265 gcc_assert (can_overrun_p);
2266 if (dump_enabled_p ())
2267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2268 "Data access with gaps requires scalar "
2269 "epilogue loop\n");
2270 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2273 return true;
2276 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2277 if there is a memory access type that the vectorized form can use,
2278 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2279 or scatters, fill in GS_INFO accordingly. In addition
2280 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2281 the target does not support the alignment scheme. *MISALIGNMENT
2282 is set according to the alignment of the access (including
2283 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2285 SLP says whether we're performing SLP rather than loop vectorization.
2286 MASKED_P is true if the statement is conditional on a vectorized mask.
2287 VECTYPE is the vector type that the vectorized statements will use.
2288 NCOPIES is the number of vector statements that will be needed. */
2290 static bool
2291 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2292 tree vectype, slp_tree slp_node,
2293 bool masked_p, vec_load_store_type vls_type,
2294 unsigned int ncopies,
2295 vect_memory_access_type *memory_access_type,
2296 poly_int64 *poffset,
2297 dr_alignment_support *alignment_support_scheme,
2298 int *misalignment,
2299 gather_scatter_info *gs_info,
2300 internal_fn *lanes_ifn)
2302 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2303 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2304 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2305 *poffset = 0;
2306 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2308 *memory_access_type = VMAT_GATHER_SCATTER;
2309 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2310 gcc_unreachable ();
2311 /* When using internal functions, we rely on pattern recognition
2312 to convert the type of the offset to the type that the target
2313 requires, with the result being a call to an internal function.
2314 If that failed for some reason (e.g. because another pattern
2315 took priority), just handle cases in which the offset already
2316 has the right type. */
2317 else if (gs_info->ifn != IFN_LAST
2318 && !is_gimple_call (stmt_info->stmt)
2319 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2320 TREE_TYPE (gs_info->offset_vectype)))
2322 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2324 "%s offset requires a conversion\n",
2325 vls_type == VLS_LOAD ? "gather" : "scatter");
2326 return false;
2328 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2329 &gs_info->offset_dt,
2330 &gs_info->offset_vectype))
2332 if (dump_enabled_p ())
2333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2334 "%s index use not simple.\n",
2335 vls_type == VLS_LOAD ? "gather" : "scatter");
2336 return false;
2338 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2340 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2341 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2342 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2343 (gs_info->offset_vectype),
2344 TYPE_VECTOR_SUBPARTS (vectype)))
2346 if (dump_enabled_p ())
2347 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2348 "unsupported vector types for emulated "
2349 "gather.\n");
2350 return false;
2353 /* Gather-scatter accesses perform only component accesses, alignment
2354 is irrelevant for them. */
2355 *alignment_support_scheme = dr_unaligned_supported;
2357 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
2359 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2360 masked_p,
2361 vls_type, memory_access_type, poffset,
2362 alignment_support_scheme,
2363 misalignment, gs_info, lanes_ifn))
2364 return false;
2366 else if (STMT_VINFO_STRIDED_P (stmt_info))
2368 gcc_assert (!slp_node);
2369 if (loop_vinfo
2370 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2371 masked_p, gs_info))
2372 *memory_access_type = VMAT_GATHER_SCATTER;
2373 else
2374 *memory_access_type = VMAT_ELEMENTWISE;
2375 /* Alignment is irrelevant here. */
2376 *alignment_support_scheme = dr_unaligned_supported;
2378 else
2380 int cmp = compare_step_with_zero (vinfo, stmt_info);
2381 if (cmp == 0)
2383 gcc_assert (vls_type == VLS_LOAD);
2384 *memory_access_type = VMAT_INVARIANT;
2385 /* Invariant accesses perform only component accesses, alignment
2386 is irrelevant for them. */
2387 *alignment_support_scheme = dr_unaligned_supported;
2389 else
2391 if (cmp < 0)
2392 *memory_access_type = get_negative_load_store_type
2393 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2394 else
2395 *memory_access_type = VMAT_CONTIGUOUS;
2396 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2397 vectype, *poffset);
2398 *alignment_support_scheme
2399 = vect_supportable_dr_alignment (vinfo,
2400 STMT_VINFO_DR_INFO (stmt_info),
2401 vectype, *misalignment);
2405 if ((*memory_access_type == VMAT_ELEMENTWISE
2406 || *memory_access_type == VMAT_STRIDED_SLP)
2407 && !nunits.is_constant ())
2409 if (dump_enabled_p ())
2410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2411 "Not using elementwise accesses due to variable "
2412 "vectorization factor.\n");
2413 return false;
2416 if (*alignment_support_scheme == dr_unaligned_unsupported)
2418 if (dump_enabled_p ())
2419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2420 "unsupported unaligned access\n");
2421 return false;
2424 /* FIXME: At the moment the cost model seems to underestimate the
2425 cost of using elementwise accesses. This check preserves the
2426 traditional behavior until that can be fixed. */
2427 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2428 if (!first_stmt_info)
2429 first_stmt_info = stmt_info;
2430 if (*memory_access_type == VMAT_ELEMENTWISE
2431 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2432 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2433 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2434 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2436 if (dump_enabled_p ())
2437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2438 "not falling back to elementwise accesses\n");
2439 return false;
2441 return true;
2444 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2445 conditional operation STMT_INFO. When returning true, store the mask
2446 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2447 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2448 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2450 static bool
2451 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2452 slp_tree slp_node, unsigned mask_index,
2453 tree *mask, slp_tree *mask_node,
2454 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2456 enum vect_def_type mask_dt;
2457 tree mask_vectype;
2458 slp_tree mask_node_1;
2459 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2460 mask, &mask_node_1, &mask_dt, &mask_vectype))
2462 if (dump_enabled_p ())
2463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2464 "mask use not simple.\n");
2465 return false;
2468 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2470 if (dump_enabled_p ())
2471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2472 "mask argument is not a boolean.\n");
2473 return false;
2476 /* If the caller is not prepared for adjusting an external/constant
2477 SLP mask vector type fail. */
2478 if (slp_node
2479 && !mask_node
2480 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2482 if (dump_enabled_p ())
2483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2484 "SLP mask argument is not vectorized.\n");
2485 return false;
2488 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2489 if (!mask_vectype)
2490 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype),
2491 mask_node_1);
2493 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2495 if (dump_enabled_p ())
2496 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2497 "could not find an appropriate vector mask type.\n");
2498 return false;
2501 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2502 TYPE_VECTOR_SUBPARTS (vectype)))
2504 if (dump_enabled_p ())
2505 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2506 "vector mask type %T"
2507 " does not match vector data type %T.\n",
2508 mask_vectype, vectype);
2510 return false;
2513 *mask_dt_out = mask_dt;
2514 *mask_vectype_out = mask_vectype;
2515 if (mask_node)
2516 *mask_node = mask_node_1;
2517 return true;
2520 /* Return true if stored value is suitable for vectorizing store
2521 statement STMT_INFO. When returning true, store the scalar stored
2522 in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
2523 the type of the vectorized store value in
2524 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2526 static bool
2527 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2528 slp_tree slp_node, tree *rhs, slp_tree *rhs_node,
2529 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2530 vec_load_store_type *vls_type_out)
2532 int op_no = 0;
2533 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2535 if (gimple_call_internal_p (call)
2536 && internal_store_fn_p (gimple_call_internal_fn (call)))
2537 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2539 if (slp_node)
2540 op_no = vect_slp_child_index_for_operand
2541 (stmt_info->stmt, op_no, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
2543 enum vect_def_type rhs_dt;
2544 tree rhs_vectype;
2545 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2546 rhs, rhs_node, &rhs_dt, &rhs_vectype))
2548 if (dump_enabled_p ())
2549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2550 "use not simple.\n");
2551 return false;
2554 /* In the case this is a store from a constant make sure
2555 native_encode_expr can handle it. */
2556 if (CONSTANT_CLASS_P (*rhs) && native_encode_expr (*rhs, NULL, 64) == 0)
2558 if (dump_enabled_p ())
2559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2560 "cannot encode constant as a byte sequence.\n");
2561 return false;
2564 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2565 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2567 if (dump_enabled_p ())
2568 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2569 "incompatible vector types.\n");
2570 return false;
2573 *rhs_dt_out = rhs_dt;
2574 *rhs_vectype_out = rhs_vectype;
2575 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2576 *vls_type_out = VLS_STORE_INVARIANT;
2577 else
2578 *vls_type_out = VLS_STORE;
2579 return true;
2582 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2583 Note that we support masks with floating-point type, in which case the
2584 floats are interpreted as a bitmask. */
2586 static tree
2587 vect_build_all_ones_mask (vec_info *vinfo,
2588 stmt_vec_info stmt_info, tree masktype)
2590 if (TREE_CODE (masktype) == INTEGER_TYPE)
2591 return build_int_cst (masktype, -1);
2592 else if (VECTOR_BOOLEAN_TYPE_P (masktype)
2593 || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2595 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2596 mask = build_vector_from_val (masktype, mask);
2597 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2599 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2601 REAL_VALUE_TYPE r;
2602 long tmp[6];
2603 for (int j = 0; j < 6; ++j)
2604 tmp[j] = -1;
2605 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2606 tree mask = build_real (TREE_TYPE (masktype), r);
2607 mask = build_vector_from_val (masktype, mask);
2608 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2610 gcc_unreachable ();
2613 /* Build an all-zero merge value of type VECTYPE while vectorizing
2614 STMT_INFO as a gather load. */
2616 static tree
2617 vect_build_zero_merge_argument (vec_info *vinfo,
2618 stmt_vec_info stmt_info, tree vectype)
2620 tree merge;
2621 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2622 merge = build_int_cst (TREE_TYPE (vectype), 0);
2623 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2625 REAL_VALUE_TYPE r;
2626 long tmp[6];
2627 for (int j = 0; j < 6; ++j)
2628 tmp[j] = 0;
2629 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2630 merge = build_real (TREE_TYPE (vectype), r);
2632 else
2633 gcc_unreachable ();
2634 merge = build_vector_from_val (vectype, merge);
2635 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2638 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2639 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2640 the gather load operation. If the load is conditional, MASK is the
2641 vectorized condition, otherwise MASK is null. PTR is the base
2642 pointer and OFFSET is the vectorized offset. */
2644 static gimple *
2645 vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
2646 gimple_stmt_iterator *gsi,
2647 gather_scatter_info *gs_info,
2648 tree ptr, tree offset, tree mask)
2650 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2651 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2652 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2653 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2654 /* ptrtype */ arglist = TREE_CHAIN (arglist);
2655 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2656 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2657 tree scaletype = TREE_VALUE (arglist);
2658 tree var;
2659 gcc_checking_assert (types_compatible_p (srctype, rettype)
2660 && (!mask
2661 || TREE_CODE (masktype) == INTEGER_TYPE
2662 || types_compatible_p (srctype, masktype)));
2664 tree op = offset;
2665 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2667 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2668 TYPE_VECTOR_SUBPARTS (idxtype)));
2669 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2670 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2671 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2672 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2673 op = var;
2676 tree src_op = NULL_TREE;
2677 tree mask_op = NULL_TREE;
2678 if (mask)
2680 if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
2682 tree utype, optype = TREE_TYPE (mask);
2683 if (VECTOR_TYPE_P (masktype)
2684 || TYPE_MODE (masktype) == TYPE_MODE (optype))
2685 utype = masktype;
2686 else
2687 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2688 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2689 tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
2690 gassign *new_stmt
2691 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2692 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2693 mask_arg = var;
2694 if (!useless_type_conversion_p (masktype, utype))
2696 gcc_assert (TYPE_PRECISION (utype)
2697 <= TYPE_PRECISION (masktype));
2698 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2699 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2700 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2701 mask_arg = var;
2703 src_op = build_zero_cst (srctype);
2704 mask_op = mask_arg;
2706 else
2708 src_op = mask;
2709 mask_op = mask;
2712 else
2714 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2715 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2718 tree scale = build_int_cst (scaletype, gs_info->scale);
2719 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2720 mask_op, scale);
2722 if (!useless_type_conversion_p (vectype, rettype))
2724 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2725 TYPE_VECTOR_SUBPARTS (rettype)));
2726 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2727 gimple_call_set_lhs (new_stmt, op);
2728 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2729 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2730 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
2733 return new_stmt;
2736 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2737 instructions before GSI. GS_INFO describes the scatter store operation.
2738 PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
2739 vectorized data to store.
2740 If the store is conditional, MASK is the vectorized condition, otherwise
2741 MASK is null. */
2743 static gimple *
2744 vect_build_one_scatter_store_call (vec_info *vinfo, stmt_vec_info stmt_info,
2745 gimple_stmt_iterator *gsi,
2746 gather_scatter_info *gs_info,
2747 tree ptr, tree offset, tree oprnd, tree mask)
2749 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2750 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2751 /* tree ptrtype = TREE_VALUE (arglist); */ arglist = TREE_CHAIN (arglist);
2752 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2753 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2754 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2755 tree scaletype = TREE_VALUE (arglist);
2756 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
2757 && TREE_CODE (rettype) == VOID_TYPE);
2759 tree mask_arg = NULL_TREE;
2760 if (mask)
2762 mask_arg = mask;
2763 tree optype = TREE_TYPE (mask_arg);
2764 tree utype;
2765 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
2766 utype = masktype;
2767 else
2768 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2769 tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
2770 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
2771 gassign *new_stmt
2772 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2773 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2774 mask_arg = var;
2775 if (!useless_type_conversion_p (masktype, utype))
2777 gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
2778 tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2779 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2780 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2781 mask_arg = var;
2784 else
2786 mask_arg = build_int_cst (masktype, -1);
2787 mask_arg = vect_init_vector (vinfo, stmt_info, mask_arg, masktype, NULL);
2790 tree src = oprnd;
2791 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
2793 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
2794 TYPE_VECTOR_SUBPARTS (srctype)));
2795 tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
2796 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
2797 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
2798 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2799 src = var;
2802 tree op = offset;
2803 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2805 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2806 TYPE_VECTOR_SUBPARTS (idxtype)));
2807 tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2808 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2809 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2810 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2811 op = var;
2814 tree scale = build_int_cst (scaletype, gs_info->scale);
2815 gcall *new_stmt
2816 = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
2817 return new_stmt;
2820 /* Prepare the base and offset in GS_INFO for vectorization.
2821 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2822 to the vectorized offset argument for the first copy of STMT_INFO.
2823 STMT_INFO is the statement described by GS_INFO and LOOP is the
2824 containing loop. */
2826 static void
2827 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2828 class loop *loop, stmt_vec_info stmt_info,
2829 slp_tree slp_node, gather_scatter_info *gs_info,
2830 tree *dataref_ptr, vec<tree> *vec_offset)
2832 gimple_seq stmts = NULL;
2833 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2834 if (stmts != NULL)
2836 basic_block new_bb;
2837 edge pe = loop_preheader_edge (loop);
2838 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2839 gcc_assert (!new_bb);
2841 if (slp_node)
2842 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
2843 else
2845 unsigned ncopies
2846 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2847 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2848 gs_info->offset, vec_offset,
2849 gs_info->offset_vectype);
2853 /* Prepare to implement a grouped or strided load or store using
2854 the gather load or scatter store operation described by GS_INFO.
2855 STMT_INFO is the load or store statement.
2857 Set *DATAREF_BUMP to the amount that should be added to the base
2858 address after each copy of the vectorized statement. Set *VEC_OFFSET
2859 to an invariant offset vector in which element I has the value
2860 I * DR_STEP / SCALE. */
2862 static void
2863 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2864 loop_vec_info loop_vinfo,
2865 gimple_stmt_iterator *gsi,
2866 gather_scatter_info *gs_info,
2867 tree *dataref_bump, tree *vec_offset,
2868 vec_loop_lens *loop_lens)
2870 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2871 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2873 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2875 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
2876 ivtmp_8 = _31 * 16 (step in bytes);
2877 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2878 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
2879 tree loop_len
2880 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
2881 tree tmp
2882 = fold_build2 (MULT_EXPR, sizetype,
2883 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2884 loop_len);
2885 *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
2886 GSI_SAME_STMT);
2888 else
2890 tree bump
2891 = size_binop (MULT_EXPR,
2892 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2893 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2894 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
2897 /* The offset given in GS_INFO can have pointer type, so use the element
2898 type of the vector instead. */
2899 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
2901 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2902 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
2903 ssize_int (gs_info->scale));
2904 step = fold_convert (offset_type, step);
2906 /* Create {0, X, X*2, X*3, ...}. */
2907 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
2908 build_zero_cst (offset_type), step);
2909 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
2912 /* Prepare the pointer IVs which needs to be updated by a variable amount.
2913 Such variable amount is the outcome of .SELECT_VL. In this case, we can
2914 allow each iteration process the flexible number of elements as long as
2915 the number <= vf elments.
2917 Return data reference according to SELECT_VL.
2918 If new statements are needed, insert them before GSI. */
2920 static tree
2921 vect_get_loop_variant_data_ptr_increment (
2922 vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
2923 vec_loop_lens *loop_lens, dr_vec_info *dr_info,
2924 vect_memory_access_type memory_access_type)
2926 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2927 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2929 /* gather/scatter never reach here. */
2930 gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
2932 /* When we support SELECT_VL pattern, we dynamic adjust
2933 the memory address by .SELECT_VL result.
2935 The result of .SELECT_VL is the number of elements to
2936 be processed of each iteration. So the memory address
2937 adjustment operation should be:
2939 addr = addr + .SELECT_VL (ARG..) * step;
2941 tree loop_len
2942 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
2943 tree len_type = TREE_TYPE (loop_len);
2944 /* Since the outcome of .SELECT_VL is element size, we should adjust
2945 it into bytesize so that it can be used in address pointer variable
2946 amount IVs adjustment. */
2947 tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
2948 wide_int_to_tree (len_type, wi::to_widest (step)));
2949 tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
2950 gassign *assign = gimple_build_assign (bump, tmp);
2951 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
2952 return bump;
2955 /* Return the amount that should be added to a vector pointer to move
2956 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2957 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2958 vectorization. */
2960 static tree
2961 vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
2962 dr_vec_info *dr_info, tree aggr_type,
2963 vect_memory_access_type memory_access_type,
2964 vec_loop_lens *loop_lens = nullptr)
2966 if (memory_access_type == VMAT_INVARIANT)
2967 return size_zero_node;
2969 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2970 if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2971 return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
2972 loop_lens, dr_info,
2973 memory_access_type);
2975 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
2976 tree step = vect_dr_behavior (vinfo, dr_info)->step;
2977 if (tree_int_cst_sgn (step) == -1)
2978 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
2979 return iv_step;
2982 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2984 static bool
2985 vectorizable_bswap (vec_info *vinfo,
2986 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
2987 gimple **vec_stmt, slp_tree slp_node,
2988 slp_tree *slp_op,
2989 tree vectype_in, stmt_vector_for_cost *cost_vec)
2991 tree op, vectype;
2992 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
2993 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2994 unsigned ncopies;
2996 op = gimple_call_arg (stmt, 0);
2997 vectype = STMT_VINFO_VECTYPE (stmt_info);
2998 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3000 /* Multiple types in SLP are handled by creating the appropriate number of
3001 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3002 case of SLP. */
3003 if (slp_node)
3004 ncopies = 1;
3005 else
3006 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3008 gcc_assert (ncopies >= 1);
3010 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype))
3012 if (dump_enabled_p ())
3013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3014 "mismatched vector sizes %T and %T\n",
3015 vectype_in, vectype);
3016 return false;
3019 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3020 if (! char_vectype)
3021 return false;
3023 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3024 unsigned word_bytes;
3025 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3026 return false;
3028 /* The encoding uses one stepped pattern for each byte in the word. */
3029 vec_perm_builder elts (num_bytes, word_bytes, 3);
3030 for (unsigned i = 0; i < 3; ++i)
3031 for (unsigned j = 0; j < word_bytes; ++j)
3032 elts.quick_push ((i + 1) * word_bytes - j - 1);
3034 vec_perm_indices indices (elts, 1, num_bytes);
3035 machine_mode vmode = TYPE_MODE (char_vectype);
3036 if (!can_vec_perm_const_p (vmode, vmode, indices))
3037 return false;
3039 if (! vec_stmt)
3041 if (slp_node
3042 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3044 if (dump_enabled_p ())
3045 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3046 "incompatible vector types for invariants\n");
3047 return false;
3050 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3051 DUMP_VECT_SCOPE ("vectorizable_bswap");
3052 record_stmt_cost (cost_vec,
3053 1, vector_stmt, stmt_info, 0, vect_prologue);
3054 record_stmt_cost (cost_vec,
3055 slp_node
3056 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3057 vec_perm, stmt_info, 0, vect_body);
3058 return true;
3061 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3063 /* Transform. */
3064 vec<tree> vec_oprnds = vNULL;
3065 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3066 op, &vec_oprnds);
3067 /* Arguments are ready. create the new vector stmt. */
3068 unsigned i;
3069 tree vop;
3070 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3072 gimple *new_stmt;
3073 tree tem = make_ssa_name (char_vectype);
3074 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3075 char_vectype, vop));
3076 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3077 tree tem2 = make_ssa_name (char_vectype);
3078 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3079 tem, tem, bswap_vconst);
3080 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3081 tem = make_ssa_name (vectype);
3082 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3083 vectype, tem2));
3084 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3085 if (slp_node)
3086 slp_node->push_vec_def (new_stmt);
3087 else
3088 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3091 if (!slp_node)
3092 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3094 vec_oprnds.release ();
3095 return true;
3098 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3099 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3100 in a single step. On success, store the binary pack code in
3101 *CONVERT_CODE. */
3103 static bool
3104 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3105 code_helper *convert_code)
3107 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3108 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3109 return false;
3111 code_helper code;
3112 int multi_step_cvt = 0;
3113 auto_vec <tree, 8> interm_types;
3114 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3115 &code, &multi_step_cvt, &interm_types)
3116 || multi_step_cvt)
3117 return false;
3119 *convert_code = code;
3120 return true;
3123 /* Function vectorizable_call.
3125 Check if STMT_INFO performs a function call that can be vectorized.
3126 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3127 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3128 Return true if STMT_INFO is vectorizable in this way. */
3130 static bool
3131 vectorizable_call (vec_info *vinfo,
3132 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3133 gimple **vec_stmt, slp_tree slp_node,
3134 stmt_vector_for_cost *cost_vec)
3136 gcall *stmt;
3137 tree vec_dest;
3138 tree scalar_dest;
3139 tree op;
3140 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3141 tree vectype_out, vectype_in;
3142 poly_uint64 nunits_in;
3143 poly_uint64 nunits_out;
3144 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3145 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3146 tree fndecl, new_temp, rhs_type;
3147 enum vect_def_type dt[4]
3148 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3149 vect_unknown_def_type };
3150 tree vectypes[ARRAY_SIZE (dt)] = {};
3151 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3152 int ndts = ARRAY_SIZE (dt);
3153 int ncopies, j;
3154 auto_vec<tree, 8> vargs;
3155 enum { NARROW, NONE, WIDEN } modifier;
3156 size_t i, nargs;
3157 tree lhs;
3158 tree clz_ctz_arg1 = NULL_TREE;
3160 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3161 return false;
3163 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3164 && ! vec_stmt)
3165 return false;
3167 /* Is STMT_INFO a vectorizable call? */
3168 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3169 if (!stmt)
3170 return false;
3172 if (gimple_call_internal_p (stmt)
3173 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3174 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3175 /* Handled by vectorizable_load and vectorizable_store. */
3176 return false;
3178 if (gimple_call_lhs (stmt) == NULL_TREE
3179 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3180 return false;
3182 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3184 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3186 /* Process function arguments. */
3187 rhs_type = NULL_TREE;
3188 vectype_in = NULL_TREE;
3189 nargs = gimple_call_num_args (stmt);
3191 /* Bail out if the function has more than four arguments, we do not have
3192 interesting builtin functions to vectorize with more than two arguments
3193 except for fma. No arguments is also not good. */
3194 if (nargs == 0 || nargs > 4)
3195 return false;
3197 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3198 combined_fn cfn = gimple_call_combined_fn (stmt);
3199 if (cfn == CFN_GOMP_SIMD_LANE)
3201 nargs = 0;
3202 rhs_type = unsigned_type_node;
3204 /* Similarly pretend IFN_CLZ and IFN_CTZ only has one argument, the second
3205 argument just says whether it is well-defined at zero or not and what
3206 value should be returned for it. */
3207 if ((cfn == CFN_CLZ || cfn == CFN_CTZ) && nargs == 2)
3209 nargs = 1;
3210 clz_ctz_arg1 = gimple_call_arg (stmt, 1);
3213 int mask_opno = -1;
3214 if (internal_fn_p (cfn))
3215 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3217 for (i = 0; i < nargs; i++)
3219 if ((int) i == mask_opno)
3221 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3222 &op, &slp_op[i], &dt[i], &vectypes[i]))
3223 return false;
3224 continue;
3227 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3228 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3230 if (dump_enabled_p ())
3231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3232 "use not simple.\n");
3233 return false;
3236 /* We can only handle calls with arguments of the same type. */
3237 if (rhs_type
3238 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3240 if (dump_enabled_p ())
3241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3242 "argument types differ.\n");
3243 return false;
3245 if (!rhs_type)
3246 rhs_type = TREE_TYPE (op);
3248 if (!vectype_in)
3249 vectype_in = vectypes[i];
3250 else if (vectypes[i]
3251 && !types_compatible_p (vectypes[i], vectype_in))
3253 if (dump_enabled_p ())
3254 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3255 "argument vector types differ.\n");
3256 return false;
3259 /* If all arguments are external or constant defs, infer the vector type
3260 from the scalar type. */
3261 if (!vectype_in)
3262 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3263 if (vec_stmt)
3264 gcc_assert (vectype_in);
3265 if (!vectype_in)
3267 if (dump_enabled_p ())
3268 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3269 "no vectype for scalar type %T\n", rhs_type);
3271 return false;
3274 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3275 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3277 if (dump_enabled_p ())
3278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3279 "mixed mask and nonmask vector types\n");
3280 return false;
3283 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3287 "use emulated vector type for call\n");
3288 return false;
3291 /* FORNOW */
3292 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3293 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3294 if (known_eq (nunits_in * 2, nunits_out))
3295 modifier = NARROW;
3296 else if (known_eq (nunits_out, nunits_in))
3297 modifier = NONE;
3298 else if (known_eq (nunits_out * 2, nunits_in))
3299 modifier = WIDEN;
3300 else
3301 return false;
3303 /* We only handle functions that do not read or clobber memory. */
3304 if (gimple_vuse (stmt))
3306 if (dump_enabled_p ())
3307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3308 "function reads from or writes to memory.\n");
3309 return false;
3312 /* For now, we only vectorize functions if a target specific builtin
3313 is available. TODO -- in some cases, it might be profitable to
3314 insert the calls for pieces of the vector, in order to be able
3315 to vectorize other operations in the loop. */
3316 fndecl = NULL_TREE;
3317 internal_fn ifn = IFN_LAST;
3318 tree callee = gimple_call_fndecl (stmt);
3320 /* First try using an internal function. */
3321 code_helper convert_code = MAX_TREE_CODES;
3322 if (cfn != CFN_LAST
3323 && (modifier == NONE
3324 || (modifier == NARROW
3325 && simple_integer_narrowing (vectype_out, vectype_in,
3326 &convert_code))))
3327 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3328 vectype_in);
3330 /* If that fails, try asking for a target-specific built-in function. */
3331 if (ifn == IFN_LAST)
3333 if (cfn != CFN_LAST)
3334 fndecl = targetm.vectorize.builtin_vectorized_function
3335 (cfn, vectype_out, vectype_in);
3336 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3337 fndecl = targetm.vectorize.builtin_md_vectorized_function
3338 (callee, vectype_out, vectype_in);
3341 if (ifn == IFN_LAST && !fndecl)
3343 if (cfn == CFN_GOMP_SIMD_LANE
3344 && !slp_node
3345 && loop_vinfo
3346 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3347 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3348 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3349 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3351 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3352 { 0, 1, 2, ... vf - 1 } vector. */
3353 gcc_assert (nargs == 0);
3355 else if (modifier == NONE
3356 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3357 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3358 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3359 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3360 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3361 slp_op, vectype_in, cost_vec);
3362 else
3364 if (dump_enabled_p ())
3365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3366 "function is not vectorizable.\n");
3367 return false;
3371 if (slp_node)
3372 ncopies = 1;
3373 else if (modifier == NARROW && ifn == IFN_LAST)
3374 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3375 else
3376 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3378 /* Sanity check: make sure that at least one copy of the vectorized stmt
3379 needs to be generated. */
3380 gcc_assert (ncopies >= 1);
3382 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3383 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3384 internal_fn cond_len_fn = get_len_internal_fn (ifn);
3385 int len_opno = internal_fn_len_index (cond_len_fn);
3386 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3387 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3388 if (!vec_stmt) /* transformation not required. */
3390 if (slp_node)
3391 for (i = 0; i < nargs; ++i)
3392 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3393 vectypes[i]
3394 ? vectypes[i] : vectype_in))
3396 if (dump_enabled_p ())
3397 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3398 "incompatible vector types for invariants\n");
3399 return false;
3401 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3402 DUMP_VECT_SCOPE ("vectorizable_call");
3403 vect_model_simple_cost (vinfo, stmt_info,
3404 ncopies, dt, ndts, slp_node, cost_vec);
3405 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3406 record_stmt_cost (cost_vec, ncopies / 2,
3407 vec_promote_demote, stmt_info, 0, vect_body);
3409 if (loop_vinfo
3410 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3411 && (reduc_idx >= 0 || mask_opno >= 0))
3413 if (reduc_idx >= 0
3414 && (cond_fn == IFN_LAST
3415 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3416 OPTIMIZE_FOR_SPEED))
3417 && (cond_len_fn == IFN_LAST
3418 || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3419 OPTIMIZE_FOR_SPEED)))
3421 if (dump_enabled_p ())
3422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3423 "can't use a fully-masked loop because no"
3424 " conditional operation is available.\n");
3425 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3427 else
3429 unsigned int nvectors
3430 = (slp_node
3431 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3432 : ncopies);
3433 tree scalar_mask = NULL_TREE;
3434 if (mask_opno >= 0)
3435 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3436 if (cond_len_fn != IFN_LAST
3437 && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3438 OPTIMIZE_FOR_SPEED))
3439 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3441 else
3442 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3443 scalar_mask);
3446 return true;
3449 /* Transform. */
3451 if (dump_enabled_p ())
3452 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3454 /* Handle def. */
3455 scalar_dest = gimple_call_lhs (stmt);
3456 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3458 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3459 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3460 unsigned int vect_nargs = nargs;
3461 if (len_loop_p)
3463 if (len_opno >= 0)
3465 ifn = cond_len_fn;
3466 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3467 vect_nargs += 2;
3469 else if (reduc_idx >= 0)
3470 gcc_unreachable ();
3472 else if (masked_loop_p && reduc_idx >= 0)
3474 ifn = cond_fn;
3475 vect_nargs += 2;
3477 if (clz_ctz_arg1)
3478 ++vect_nargs;
3480 if (modifier == NONE || ifn != IFN_LAST)
3482 tree prev_res = NULL_TREE;
3483 vargs.safe_grow (vect_nargs, true);
3484 auto_vec<vec<tree> > vec_defs (nargs);
3485 for (j = 0; j < ncopies; ++j)
3487 /* Build argument list for the vectorized call. */
3488 if (slp_node)
3490 vec<tree> vec_oprnds0;
3492 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3493 vec_oprnds0 = vec_defs[0];
3495 /* Arguments are ready. Create the new vector stmt. */
3496 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3498 int varg = 0;
3499 if (masked_loop_p && reduc_idx >= 0)
3501 unsigned int vec_num = vec_oprnds0.length ();
3502 /* Always true for SLP. */
3503 gcc_assert (ncopies == 1);
3504 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
3505 gsi, masks, vec_num,
3506 vectype_out, i);
3508 size_t k;
3509 for (k = 0; k < nargs; k++)
3511 vec<tree> vec_oprndsk = vec_defs[k];
3512 vargs[varg++] = vec_oprndsk[i];
3514 if (masked_loop_p && reduc_idx >= 0)
3515 vargs[varg++] = vargs[reduc_idx + 1];
3516 if (clz_ctz_arg1)
3517 vargs[varg++] = clz_ctz_arg1;
3519 gimple *new_stmt;
3520 if (modifier == NARROW)
3522 /* We don't define any narrowing conditional functions
3523 at present. */
3524 gcc_assert (mask_opno < 0);
3525 tree half_res = make_ssa_name (vectype_in);
3526 gcall *call
3527 = gimple_build_call_internal_vec (ifn, vargs);
3528 gimple_call_set_lhs (call, half_res);
3529 gimple_call_set_nothrow (call, true);
3530 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3531 if ((i & 1) == 0)
3533 prev_res = half_res;
3534 continue;
3536 new_temp = make_ssa_name (vec_dest);
3537 new_stmt = vect_gimple_build (new_temp, convert_code,
3538 prev_res, half_res);
3539 vect_finish_stmt_generation (vinfo, stmt_info,
3540 new_stmt, gsi);
3542 else
3544 if (len_opno >= 0 && len_loop_p)
3546 unsigned int vec_num = vec_oprnds0.length ();
3547 /* Always true for SLP. */
3548 gcc_assert (ncopies == 1);
3549 tree len
3550 = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3551 vectype_out, i, 1);
3552 signed char biasval
3553 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3554 tree bias = build_int_cst (intQI_type_node, biasval);
3555 vargs[len_opno] = len;
3556 vargs[len_opno + 1] = bias;
3558 else if (mask_opno >= 0 && masked_loop_p)
3560 unsigned int vec_num = vec_oprnds0.length ();
3561 /* Always true for SLP. */
3562 gcc_assert (ncopies == 1);
3563 tree mask = vect_get_loop_mask (loop_vinfo,
3564 gsi, masks, vec_num,
3565 vectype_out, i);
3566 vargs[mask_opno] = prepare_vec_mask
3567 (loop_vinfo, TREE_TYPE (mask), mask,
3568 vargs[mask_opno], gsi);
3571 gcall *call;
3572 if (ifn != IFN_LAST)
3573 call = gimple_build_call_internal_vec (ifn, vargs);
3574 else
3575 call = gimple_build_call_vec (fndecl, vargs);
3576 new_temp = make_ssa_name (vec_dest, call);
3577 gimple_call_set_lhs (call, new_temp);
3578 gimple_call_set_nothrow (call, true);
3579 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3580 new_stmt = call;
3582 slp_node->push_vec_def (new_stmt);
3584 continue;
3587 int varg = 0;
3588 if (masked_loop_p && reduc_idx >= 0)
3589 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3590 vectype_out, j);
3591 for (i = 0; i < nargs; i++)
3593 op = gimple_call_arg (stmt, i);
3594 if (j == 0)
3596 vec_defs.quick_push (vNULL);
3597 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3598 op, &vec_defs[i],
3599 vectypes[i]);
3601 vargs[varg++] = vec_defs[i][j];
3603 if (masked_loop_p && reduc_idx >= 0)
3604 vargs[varg++] = vargs[reduc_idx + 1];
3605 if (clz_ctz_arg1)
3606 vargs[varg++] = clz_ctz_arg1;
3608 if (len_opno >= 0 && len_loop_p)
3610 tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3611 vectype_out, j, 1);
3612 signed char biasval
3613 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3614 tree bias = build_int_cst (intQI_type_node, biasval);
3615 vargs[len_opno] = len;
3616 vargs[len_opno + 1] = bias;
3618 else if (mask_opno >= 0 && masked_loop_p)
3620 tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3621 vectype_out, j);
3622 vargs[mask_opno]
3623 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3624 vargs[mask_opno], gsi);
3627 gimple *new_stmt;
3628 if (cfn == CFN_GOMP_SIMD_LANE)
3630 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3631 tree new_var
3632 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3633 gimple *init_stmt = gimple_build_assign (new_var, cst);
3634 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3635 new_temp = make_ssa_name (vec_dest);
3636 new_stmt = gimple_build_assign (new_temp, new_var);
3637 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3639 else if (modifier == NARROW)
3641 /* We don't define any narrowing conditional functions at
3642 present. */
3643 gcc_assert (mask_opno < 0);
3644 tree half_res = make_ssa_name (vectype_in);
3645 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3646 gimple_call_set_lhs (call, half_res);
3647 gimple_call_set_nothrow (call, true);
3648 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3649 if ((j & 1) == 0)
3651 prev_res = half_res;
3652 continue;
3654 new_temp = make_ssa_name (vec_dest);
3655 new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
3656 half_res);
3657 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3659 else
3661 gcall *call;
3662 if (ifn != IFN_LAST)
3663 call = gimple_build_call_internal_vec (ifn, vargs);
3664 else
3665 call = gimple_build_call_vec (fndecl, vargs);
3666 new_temp = make_ssa_name (vec_dest, call);
3667 gimple_call_set_lhs (call, new_temp);
3668 gimple_call_set_nothrow (call, true);
3669 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3670 new_stmt = call;
3673 if (j == (modifier == NARROW ? 1 : 0))
3674 *vec_stmt = new_stmt;
3675 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3677 for (i = 0; i < nargs; i++)
3679 vec<tree> vec_oprndsi = vec_defs[i];
3680 vec_oprndsi.release ();
3683 else if (modifier == NARROW)
3685 auto_vec<vec<tree> > vec_defs (nargs);
3686 /* We don't define any narrowing conditional functions at present. */
3687 gcc_assert (mask_opno < 0);
3688 for (j = 0; j < ncopies; ++j)
3690 /* Build argument list for the vectorized call. */
3691 if (j == 0)
3692 vargs.create (nargs * 2);
3693 else
3694 vargs.truncate (0);
3696 if (slp_node)
3698 vec<tree> vec_oprnds0;
3700 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3701 vec_oprnds0 = vec_defs[0];
3703 /* Arguments are ready. Create the new vector stmt. */
3704 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3706 size_t k;
3707 vargs.truncate (0);
3708 for (k = 0; k < nargs; k++)
3710 vec<tree> vec_oprndsk = vec_defs[k];
3711 vargs.quick_push (vec_oprndsk[i]);
3712 vargs.quick_push (vec_oprndsk[i + 1]);
3714 gcall *call;
3715 if (ifn != IFN_LAST)
3716 call = gimple_build_call_internal_vec (ifn, vargs);
3717 else
3718 call = gimple_build_call_vec (fndecl, vargs);
3719 new_temp = make_ssa_name (vec_dest, call);
3720 gimple_call_set_lhs (call, new_temp);
3721 gimple_call_set_nothrow (call, true);
3722 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3723 slp_node->push_vec_def (call);
3725 continue;
3728 for (i = 0; i < nargs; i++)
3730 op = gimple_call_arg (stmt, i);
3731 if (j == 0)
3733 vec_defs.quick_push (vNULL);
3734 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3735 op, &vec_defs[i], vectypes[i]);
3737 vec_oprnd0 = vec_defs[i][2*j];
3738 vec_oprnd1 = vec_defs[i][2*j+1];
3740 vargs.quick_push (vec_oprnd0);
3741 vargs.quick_push (vec_oprnd1);
3744 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3745 new_temp = make_ssa_name (vec_dest, new_stmt);
3746 gimple_call_set_lhs (new_stmt, new_temp);
3747 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3749 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3752 if (!slp_node)
3753 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3755 for (i = 0; i < nargs; i++)
3757 vec<tree> vec_oprndsi = vec_defs[i];
3758 vec_oprndsi.release ();
3761 else
3762 /* No current target implements this case. */
3763 return false;
3765 vargs.release ();
3767 /* The call in STMT might prevent it from being removed in dce.
3768 We however cannot remove it here, due to the way the ssa name
3769 it defines is mapped to the new definition. So just replace
3770 rhs of the statement with something harmless. */
3772 if (slp_node)
3773 return true;
3775 stmt_info = vect_orig_stmt (stmt_info);
3776 lhs = gimple_get_lhs (stmt_info->stmt);
3778 gassign *new_stmt
3779 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3780 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3782 return true;
3786 struct simd_call_arg_info
3788 tree vectype;
3789 tree op;
3790 HOST_WIDE_INT linear_step;
3791 enum vect_def_type dt;
3792 unsigned int align;
3793 bool simd_lane_linear;
3796 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3797 is linear within simd lane (but not within whole loop), note it in
3798 *ARGINFO. */
3800 static void
3801 vect_simd_lane_linear (tree op, class loop *loop,
3802 struct simd_call_arg_info *arginfo)
3804 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3806 if (!is_gimple_assign (def_stmt)
3807 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3808 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3809 return;
3811 tree base = gimple_assign_rhs1 (def_stmt);
3812 HOST_WIDE_INT linear_step = 0;
3813 tree v = gimple_assign_rhs2 (def_stmt);
3814 while (TREE_CODE (v) == SSA_NAME)
3816 tree t;
3817 def_stmt = SSA_NAME_DEF_STMT (v);
3818 if (is_gimple_assign (def_stmt))
3819 switch (gimple_assign_rhs_code (def_stmt))
3821 case PLUS_EXPR:
3822 t = gimple_assign_rhs2 (def_stmt);
3823 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3824 return;
3825 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3826 v = gimple_assign_rhs1 (def_stmt);
3827 continue;
3828 case MULT_EXPR:
3829 t = gimple_assign_rhs2 (def_stmt);
3830 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3831 return;
3832 linear_step = tree_to_shwi (t);
3833 v = gimple_assign_rhs1 (def_stmt);
3834 continue;
3835 CASE_CONVERT:
3836 t = gimple_assign_rhs1 (def_stmt);
3837 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3838 || (TYPE_PRECISION (TREE_TYPE (v))
3839 < TYPE_PRECISION (TREE_TYPE (t))))
3840 return;
3841 if (!linear_step)
3842 linear_step = 1;
3843 v = t;
3844 continue;
3845 default:
3846 return;
3848 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3849 && loop->simduid
3850 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3851 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3852 == loop->simduid))
3854 if (!linear_step)
3855 linear_step = 1;
3856 arginfo->linear_step = linear_step;
3857 arginfo->op = base;
3858 arginfo->simd_lane_linear = true;
3859 return;
3864 /* Function vectorizable_simd_clone_call.
3866 Check if STMT_INFO performs a function call that can be vectorized
3867 by calling a simd clone of the function.
3868 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3869 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3870 Return true if STMT_INFO is vectorizable in this way. */
3872 static bool
3873 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3874 gimple_stmt_iterator *gsi,
3875 gimple **vec_stmt, slp_tree slp_node,
3876 stmt_vector_for_cost *)
3878 tree vec_dest;
3879 tree scalar_dest;
3880 tree op, type;
3881 tree vec_oprnd0 = NULL_TREE;
3882 tree vectype;
3883 poly_uint64 nunits;
3884 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3885 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3886 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3887 tree fndecl, new_temp;
3888 int ncopies, j;
3889 auto_vec<simd_call_arg_info> arginfo;
3890 vec<tree> vargs = vNULL;
3891 size_t i, nargs;
3892 tree lhs, rtype, ratype;
3893 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3894 int masked_call_offset = 0;
3896 /* Is STMT a vectorizable call? */
3897 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3898 if (!stmt)
3899 return false;
3901 fndecl = gimple_call_fndecl (stmt);
3902 if (fndecl == NULL_TREE
3903 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
3905 fndecl = gimple_call_arg (stmt, 0);
3906 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
3907 fndecl = TREE_OPERAND (fndecl, 0);
3908 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
3909 masked_call_offset = 1;
3911 if (fndecl == NULL_TREE)
3912 return false;
3914 struct cgraph_node *node = cgraph_node::get (fndecl);
3915 if (node == NULL || node->simd_clones == NULL)
3916 return false;
3918 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3919 return false;
3921 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3922 && ! vec_stmt)
3923 return false;
3925 if (gimple_call_lhs (stmt)
3926 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3927 return false;
3929 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3931 vectype = STMT_VINFO_VECTYPE (stmt_info);
3933 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3934 return false;
3936 /* Process function arguments. */
3937 nargs = gimple_call_num_args (stmt) - masked_call_offset;
3939 /* Bail out if the function has zero arguments. */
3940 if (nargs == 0)
3941 return false;
3943 vec<tree>& simd_clone_info = (slp_node ? SLP_TREE_SIMD_CLONE_INFO (slp_node)
3944 : STMT_VINFO_SIMD_CLONE_INFO (stmt_info));
3945 arginfo.reserve (nargs, true);
3946 auto_vec<slp_tree> slp_op;
3947 slp_op.safe_grow_cleared (nargs);
3949 for (i = 0; i < nargs; i++)
3951 simd_call_arg_info thisarginfo;
3952 affine_iv iv;
3954 thisarginfo.linear_step = 0;
3955 thisarginfo.align = 0;
3956 thisarginfo.op = NULL_TREE;
3957 thisarginfo.simd_lane_linear = false;
3959 int op_no = i + masked_call_offset;
3960 if (slp_node)
3961 op_no = vect_slp_child_index_for_operand (stmt, op_no, false);
3962 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3963 op_no, &op, &slp_op[i],
3964 &thisarginfo.dt, &thisarginfo.vectype)
3965 || thisarginfo.dt == vect_uninitialized_def)
3967 if (dump_enabled_p ())
3968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3969 "use not simple.\n");
3970 return false;
3973 if (thisarginfo.dt == vect_constant_def
3974 || thisarginfo.dt == vect_external_def)
3976 /* With SLP we determine the vector type of constants/externals
3977 at analysis time, handling conflicts via
3978 vect_maybe_update_slp_op_vectype. At transform time
3979 we have a vector type recorded for SLP. */
3980 gcc_assert (!vec_stmt
3981 || !slp_node
3982 || thisarginfo.vectype != NULL_TREE);
3983 if (!vec_stmt)
3984 thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
3985 TREE_TYPE (op),
3986 slp_node);
3988 else
3989 gcc_assert (thisarginfo.vectype != NULL_TREE);
3991 /* For linear arguments, the analyze phase should have saved
3992 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */
3993 if (i * 3 + 4 <= simd_clone_info.length ()
3994 && simd_clone_info[i * 3 + 2])
3996 gcc_assert (vec_stmt);
3997 thisarginfo.linear_step = tree_to_shwi (simd_clone_info[i * 3 + 2]);
3998 thisarginfo.op = simd_clone_info[i * 3 + 1];
3999 thisarginfo.simd_lane_linear
4000 = (simd_clone_info[i * 3 + 3] == boolean_true_node);
4001 /* If loop has been peeled for alignment, we need to adjust it. */
4002 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4003 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4004 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4006 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4007 tree step = simd_clone_info[i * 3 + 2];
4008 tree opt = TREE_TYPE (thisarginfo.op);
4009 bias = fold_convert (TREE_TYPE (step), bias);
4010 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4011 thisarginfo.op
4012 = fold_build2 (POINTER_TYPE_P (opt)
4013 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4014 thisarginfo.op, bias);
4017 else if (!vec_stmt
4018 && thisarginfo.dt != vect_constant_def
4019 && thisarginfo.dt != vect_external_def
4020 && loop_vinfo
4021 && TREE_CODE (op) == SSA_NAME
4022 && simple_iv (loop, loop_containing_stmt (stmt), op,
4023 &iv, false)
4024 && tree_fits_shwi_p (iv.step))
4026 thisarginfo.linear_step = tree_to_shwi (iv.step);
4027 thisarginfo.op = iv.base;
4029 else if ((thisarginfo.dt == vect_constant_def
4030 || thisarginfo.dt == vect_external_def)
4031 && POINTER_TYPE_P (TREE_TYPE (op)))
4032 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4033 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4034 linear too. */
4035 if (POINTER_TYPE_P (TREE_TYPE (op))
4036 && !thisarginfo.linear_step
4037 && !vec_stmt
4038 && thisarginfo.dt != vect_constant_def
4039 && thisarginfo.dt != vect_external_def
4040 && loop_vinfo
4041 && TREE_CODE (op) == SSA_NAME)
4042 vect_simd_lane_linear (op, loop, &thisarginfo);
4044 arginfo.quick_push (thisarginfo);
4047 poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
4048 unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : 1;
4049 unsigned int badness = 0;
4050 struct cgraph_node *bestn = NULL;
4051 if (simd_clone_info.exists ())
4052 bestn = cgraph_node::get (simd_clone_info[0]);
4053 else
4054 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4055 n = n->simdclone->next_clone)
4057 unsigned int this_badness = 0;
4058 unsigned int num_calls;
4059 /* The number of arguments in the call and the number of parameters in
4060 the simdclone should match. However, when the simdclone is
4061 'inbranch', it could have one more paramater than nargs when using
4062 an inbranch simdclone to call a non-inbranch call, either in a
4063 non-masked loop using a all true constant mask, or inside a masked
4064 loop using it's mask. */
4065 size_t simd_nargs = n->simdclone->nargs;
4066 if (!masked_call_offset && n->simdclone->inbranch)
4067 simd_nargs--;
4068 if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen,
4069 &num_calls)
4070 || (!n->simdclone->inbranch && (masked_call_offset > 0))
4071 || (nargs != simd_nargs))
4072 continue;
4073 if (num_calls != 1)
4074 this_badness += exact_log2 (num_calls) * 4096;
4075 if (n->simdclone->inbranch)
4076 this_badness += 8192;
4077 int target_badness = targetm.simd_clone.usable (n);
4078 if (target_badness < 0)
4079 continue;
4080 this_badness += target_badness * 512;
4081 for (i = 0; i < nargs; i++)
4083 switch (n->simdclone->args[i].arg_type)
4085 case SIMD_CLONE_ARG_TYPE_VECTOR:
4086 if (!useless_type_conversion_p
4087 (n->simdclone->args[i].orig_type,
4088 TREE_TYPE (gimple_call_arg (stmt,
4089 i + masked_call_offset))))
4090 i = -1;
4091 else if (arginfo[i].dt == vect_constant_def
4092 || arginfo[i].dt == vect_external_def
4093 || arginfo[i].linear_step)
4094 this_badness += 64;
4095 break;
4096 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4097 if (arginfo[i].dt != vect_constant_def
4098 && arginfo[i].dt != vect_external_def)
4099 i = -1;
4100 break;
4101 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4102 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4103 if (arginfo[i].dt == vect_constant_def
4104 || arginfo[i].dt == vect_external_def
4105 || (arginfo[i].linear_step
4106 != n->simdclone->args[i].linear_step))
4107 i = -1;
4108 break;
4109 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4110 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4111 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4112 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4113 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4114 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4115 /* FORNOW */
4116 i = -1;
4117 break;
4118 case SIMD_CLONE_ARG_TYPE_MASK:
4119 /* While we can create a traditional data vector from
4120 an incoming integer mode mask we have no good way to
4121 force generate an integer mode mask from a traditional
4122 boolean vector input. */
4123 if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4124 && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4125 i = -1;
4126 else if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4127 && SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4128 this_badness += 2048;
4129 break;
4131 if (i == (size_t) -1)
4132 break;
4133 if (n->simdclone->args[i].alignment > arginfo[i].align)
4135 i = -1;
4136 break;
4138 if (arginfo[i].align)
4139 this_badness += (exact_log2 (arginfo[i].align)
4140 - exact_log2 (n->simdclone->args[i].alignment));
4142 if (i == (size_t) -1)
4143 continue;
4144 if (masked_call_offset == 0
4145 && n->simdclone->inbranch
4146 && n->simdclone->nargs > nargs)
4148 gcc_assert (n->simdclone->args[n->simdclone->nargs - 1].arg_type ==
4149 SIMD_CLONE_ARG_TYPE_MASK);
4150 /* Penalize using a masked SIMD clone in a non-masked loop, that is
4151 not in a branch, as we'd have to construct an all-true mask. */
4152 if (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4153 this_badness += 64;
4155 if (bestn == NULL || this_badness < badness)
4157 bestn = n;
4158 badness = this_badness;
4162 if (bestn == NULL)
4163 return false;
4165 unsigned int num_mask_args = 0;
4166 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4167 for (i = 0; i < nargs; i++)
4168 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4169 num_mask_args++;
4171 for (i = 0; i < nargs; i++)
4173 if ((arginfo[i].dt == vect_constant_def
4174 || arginfo[i].dt == vect_external_def)
4175 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4177 tree arg_type = TREE_TYPE (gimple_call_arg (stmt,
4178 i + masked_call_offset));
4179 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4180 slp_node);
4181 if (arginfo[i].vectype == NULL
4182 || !constant_multiple_p (bestn->simdclone->simdlen,
4183 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4184 return false;
4187 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4188 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4190 if (dump_enabled_p ())
4191 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4192 "vector mask arguments are not supported.\n");
4193 return false;
4196 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4198 tree clone_arg_vectype = bestn->simdclone->args[i].vector_type;
4199 if (bestn->simdclone->mask_mode == VOIDmode)
4201 if (maybe_ne (TYPE_VECTOR_SUBPARTS (clone_arg_vectype),
4202 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4204 /* FORNOW we only have partial support for vector-type masks
4205 that can't hold all of simdlen. */
4206 if (dump_enabled_p ())
4207 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4208 vect_location,
4209 "in-branch vector clones are not yet"
4210 " supported for mismatched vector sizes.\n");
4211 return false;
4214 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4216 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype))
4217 || maybe_ne (exact_div (bestn->simdclone->simdlen,
4218 num_mask_args),
4219 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4221 /* FORNOW we only have partial support for integer-type masks
4222 that represent the same number of lanes as the
4223 vectorized mask inputs. */
4224 if (dump_enabled_p ())
4225 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4226 vect_location,
4227 "in-branch vector clones are not yet "
4228 "supported for mismatched vector sizes.\n");
4229 return false;
4232 else
4234 if (dump_enabled_p ())
4235 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4236 vect_location,
4237 "in-branch vector clones not supported"
4238 " on this target.\n");
4239 return false;
4244 fndecl = bestn->decl;
4245 nunits = bestn->simdclone->simdlen;
4246 if (slp_node)
4247 ncopies = vector_unroll_factor (vf * group_size, nunits);
4248 else
4249 ncopies = vector_unroll_factor (vf, nunits);
4251 /* If the function isn't const, only allow it in simd loops where user
4252 has asserted that at least nunits consecutive iterations can be
4253 performed using SIMD instructions. */
4254 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4255 && gimple_vuse (stmt))
4256 return false;
4258 /* Sanity check: make sure that at least one copy of the vectorized stmt
4259 needs to be generated. */
4260 gcc_assert (ncopies >= 1);
4262 if (!vec_stmt) /* transformation not required. */
4264 if (slp_node)
4265 for (unsigned i = 0; i < nargs; ++i)
4266 if (!vect_maybe_update_slp_op_vectype (slp_op[i], arginfo[i].vectype))
4268 if (dump_enabled_p ())
4269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4270 "incompatible vector types for invariants\n");
4271 return false;
4273 /* When the original call is pure or const but the SIMD ABI dictates
4274 an aggregate return we will have to use a virtual definition and
4275 in a loop eventually even need to add a virtual PHI. That's
4276 not straight-forward so allow to fix this up via renaming. */
4277 if (gimple_call_lhs (stmt)
4278 && !gimple_vdef (stmt)
4279 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4280 vinfo->any_known_not_updated_vssa = true;
4281 /* ??? For SLP code-gen we end up inserting after the last
4282 vector argument def rather than at the original call position
4283 so automagic virtual operand updating doesn't work. */
4284 if (gimple_vuse (stmt) && slp_node)
4285 vinfo->any_known_not_updated_vssa = true;
4286 simd_clone_info.safe_push (bestn->decl);
4287 for (i = 0; i < bestn->simdclone->nargs; i++)
4289 switch (bestn->simdclone->args[i].arg_type)
4291 default:
4292 continue;
4293 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4294 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4296 simd_clone_info.safe_grow_cleared (i * 3 + 1, true);
4297 simd_clone_info.safe_push (arginfo[i].op);
4298 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4299 ? size_type_node : TREE_TYPE (arginfo[i].op);
4300 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4301 simd_clone_info.safe_push (ls);
4302 tree sll = arginfo[i].simd_lane_linear
4303 ? boolean_true_node : boolean_false_node;
4304 simd_clone_info.safe_push (sll);
4306 break;
4307 case SIMD_CLONE_ARG_TYPE_MASK:
4308 if (loop_vinfo
4309 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4310 vect_record_loop_mask (loop_vinfo,
4311 &LOOP_VINFO_MASKS (loop_vinfo),
4312 ncopies, vectype, op);
4314 break;
4318 if (!bestn->simdclone->inbranch && loop_vinfo)
4320 if (dump_enabled_p ()
4321 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4322 dump_printf_loc (MSG_NOTE, vect_location,
4323 "can't use a fully-masked loop because a"
4324 " non-masked simd clone was selected.\n");
4325 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
4328 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4329 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4330 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4331 dt, slp_node, cost_vec); */
4332 return true;
4335 /* Transform. */
4337 if (dump_enabled_p ())
4338 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4340 /* Handle def. */
4341 scalar_dest = gimple_call_lhs (stmt);
4342 vec_dest = NULL_TREE;
4343 rtype = NULL_TREE;
4344 ratype = NULL_TREE;
4345 if (scalar_dest)
4347 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4348 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4349 if (TREE_CODE (rtype) == ARRAY_TYPE)
4351 ratype = rtype;
4352 rtype = TREE_TYPE (ratype);
4356 auto_vec<vec<tree> > vec_oprnds;
4357 auto_vec<unsigned> vec_oprnds_i;
4358 vec_oprnds_i.safe_grow_cleared (nargs, true);
4359 if (slp_node)
4361 vec_oprnds.reserve_exact (nargs);
4362 vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
4364 else
4365 vec_oprnds.safe_grow_cleared (nargs, true);
4366 for (j = 0; j < ncopies; ++j)
4368 poly_uint64 callee_nelements;
4369 poly_uint64 caller_nelements;
4370 /* Build argument list for the vectorized call. */
4371 if (j == 0)
4372 vargs.create (nargs);
4373 else
4374 vargs.truncate (0);
4376 for (i = 0; i < nargs; i++)
4378 unsigned int k, l, m, o;
4379 tree atype;
4380 op = gimple_call_arg (stmt, i + masked_call_offset);
4381 switch (bestn->simdclone->args[i].arg_type)
4383 case SIMD_CLONE_ARG_TYPE_VECTOR:
4384 atype = bestn->simdclone->args[i].vector_type;
4385 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4386 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4387 o = vector_unroll_factor (nunits, callee_nelements);
4388 for (m = j * o; m < (j + 1) * o; m++)
4390 if (known_lt (callee_nelements, caller_nelements))
4392 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4393 if (!constant_multiple_p (caller_nelements,
4394 callee_nelements, &k))
4395 gcc_unreachable ();
4397 gcc_assert ((k & (k - 1)) == 0);
4398 if (m == 0)
4400 if (!slp_node)
4401 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4402 ncopies * o / k, op,
4403 &vec_oprnds[i]);
4404 vec_oprnds_i[i] = 0;
4405 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4407 else
4409 vec_oprnd0 = arginfo[i].op;
4410 if ((m & (k - 1)) == 0)
4411 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4413 arginfo[i].op = vec_oprnd0;
4414 vec_oprnd0
4415 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4416 bitsize_int (prec),
4417 bitsize_int ((m & (k - 1)) * prec));
4418 gassign *new_stmt
4419 = gimple_build_assign (make_ssa_name (atype),
4420 vec_oprnd0);
4421 vect_finish_stmt_generation (vinfo, stmt_info,
4422 new_stmt, gsi);
4423 vargs.safe_push (gimple_assign_lhs (new_stmt));
4425 else
4427 if (!constant_multiple_p (callee_nelements,
4428 caller_nelements, &k))
4429 gcc_unreachable ();
4430 gcc_assert ((k & (k - 1)) == 0);
4431 vec<constructor_elt, va_gc> *ctor_elts;
4432 if (k != 1)
4433 vec_alloc (ctor_elts, k);
4434 else
4435 ctor_elts = NULL;
4436 for (l = 0; l < k; l++)
4438 if (m == 0 && l == 0)
4440 if (!slp_node)
4441 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4442 k * o * ncopies,
4444 &vec_oprnds[i]);
4445 vec_oprnds_i[i] = 0;
4446 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4448 else
4449 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4450 arginfo[i].op = vec_oprnd0;
4451 if (k == 1)
4452 break;
4453 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4454 vec_oprnd0);
4456 if (k == 1)
4457 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4458 atype))
4460 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, atype,
4461 vec_oprnd0);
4462 gassign *new_stmt
4463 = gimple_build_assign (make_ssa_name (atype),
4464 vec_oprnd0);
4465 vect_finish_stmt_generation (vinfo, stmt_info,
4466 new_stmt, gsi);
4467 vargs.safe_push (gimple_get_lhs (new_stmt));
4469 else
4470 vargs.safe_push (vec_oprnd0);
4471 else
4473 vec_oprnd0 = build_constructor (atype, ctor_elts);
4474 gassign *new_stmt
4475 = gimple_build_assign (make_ssa_name (atype),
4476 vec_oprnd0);
4477 vect_finish_stmt_generation (vinfo, stmt_info,
4478 new_stmt, gsi);
4479 vargs.safe_push (gimple_assign_lhs (new_stmt));
4483 break;
4484 case SIMD_CLONE_ARG_TYPE_MASK:
4485 if (bestn->simdclone->mask_mode == VOIDmode)
4487 atype = bestn->simdclone->args[i].vector_type;
4488 tree elt_type = TREE_TYPE (atype);
4489 tree one = fold_convert (elt_type, integer_one_node);
4490 tree zero = fold_convert (elt_type, integer_zero_node);
4491 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4492 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4493 o = vector_unroll_factor (nunits, callee_nelements);
4494 for (m = j * o; m < (j + 1) * o; m++)
4496 if (maybe_lt (callee_nelements, caller_nelements))
4498 /* The mask type has fewer elements than simdlen. */
4500 /* FORNOW */
4501 gcc_unreachable ();
4503 else if (known_eq (callee_nelements, caller_nelements))
4505 /* The SIMD clone function has the same number of
4506 elements as the current function. */
4507 if (m == 0)
4509 if (!slp_node)
4510 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4511 o * ncopies,
4513 &vec_oprnds[i]);
4514 vec_oprnds_i[i] = 0;
4516 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4517 if (loop_vinfo
4518 && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4520 vec_loop_masks *loop_masks
4521 = &LOOP_VINFO_MASKS (loop_vinfo);
4522 tree loop_mask
4523 = vect_get_loop_mask (loop_vinfo, gsi,
4524 loop_masks, ncopies,
4525 vectype, j);
4526 vec_oprnd0
4527 = prepare_vec_mask (loop_vinfo,
4528 TREE_TYPE (loop_mask),
4529 loop_mask, vec_oprnd0,
4530 gsi);
4531 loop_vinfo->vec_cond_masked_set.add ({ vec_oprnd0,
4532 loop_mask });
4535 vec_oprnd0
4536 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4537 build_vector_from_val (atype, one),
4538 build_vector_from_val (atype, zero));
4539 gassign *new_stmt
4540 = gimple_build_assign (make_ssa_name (atype),
4541 vec_oprnd0);
4542 vect_finish_stmt_generation (vinfo, stmt_info,
4543 new_stmt, gsi);
4544 vargs.safe_push (gimple_assign_lhs (new_stmt));
4546 else
4548 /* The mask type has more elements than simdlen. */
4550 /* FORNOW */
4551 gcc_unreachable ();
4555 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4557 atype = bestn->simdclone->args[i].vector_type;
4558 /* Guess the number of lanes represented by atype. */
4559 poly_uint64 atype_subparts
4560 = exact_div (bestn->simdclone->simdlen,
4561 num_mask_args);
4562 o = vector_unroll_factor (nunits, atype_subparts);
4563 for (m = j * o; m < (j + 1) * o; m++)
4565 if (m == 0)
4567 if (!slp_node)
4568 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4569 o * ncopies,
4571 &vec_oprnds[i]);
4572 vec_oprnds_i[i] = 0;
4574 if (maybe_lt (atype_subparts,
4575 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4577 /* The mask argument has fewer elements than the
4578 input vector. */
4579 /* FORNOW */
4580 gcc_unreachable ();
4582 else if (known_eq (atype_subparts,
4583 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4585 /* The vector mask argument matches the input
4586 in the number of lanes, but not necessarily
4587 in the mode. */
4588 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4589 tree st = lang_hooks.types.type_for_mode
4590 (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
4591 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
4592 vec_oprnd0);
4593 gassign *new_stmt
4594 = gimple_build_assign (make_ssa_name (st),
4595 vec_oprnd0);
4596 vect_finish_stmt_generation (vinfo, stmt_info,
4597 new_stmt, gsi);
4598 if (!types_compatible_p (atype, st))
4600 new_stmt
4601 = gimple_build_assign (make_ssa_name (atype),
4602 NOP_EXPR,
4603 gimple_assign_lhs
4604 (new_stmt));
4605 vect_finish_stmt_generation (vinfo, stmt_info,
4606 new_stmt, gsi);
4608 vargs.safe_push (gimple_assign_lhs (new_stmt));
4610 else
4612 /* The mask argument has more elements than the
4613 input vector. */
4614 /* FORNOW */
4615 gcc_unreachable ();
4619 else
4620 gcc_unreachable ();
4621 break;
4622 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4623 vargs.safe_push (op);
4624 break;
4625 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4626 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4627 if (j == 0)
4629 gimple_seq stmts;
4630 arginfo[i].op
4631 = force_gimple_operand (unshare_expr (arginfo[i].op),
4632 &stmts, true, NULL_TREE);
4633 if (stmts != NULL)
4635 basic_block new_bb;
4636 edge pe = loop_preheader_edge (loop);
4637 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4638 gcc_assert (!new_bb);
4640 if (arginfo[i].simd_lane_linear)
4642 vargs.safe_push (arginfo[i].op);
4643 break;
4645 tree phi_res = copy_ssa_name (op);
4646 gphi *new_phi = create_phi_node (phi_res, loop->header);
4647 add_phi_arg (new_phi, arginfo[i].op,
4648 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4649 enum tree_code code
4650 = POINTER_TYPE_P (TREE_TYPE (op))
4651 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4652 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4653 ? sizetype : TREE_TYPE (op);
4654 poly_widest_int cst
4655 = wi::mul (bestn->simdclone->args[i].linear_step,
4656 ncopies * nunits);
4657 tree tcst = wide_int_to_tree (type, cst);
4658 tree phi_arg = copy_ssa_name (op);
4659 gassign *new_stmt
4660 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4661 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4662 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4663 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4664 UNKNOWN_LOCATION);
4665 arginfo[i].op = phi_res;
4666 vargs.safe_push (phi_res);
4668 else
4670 enum tree_code code
4671 = POINTER_TYPE_P (TREE_TYPE (op))
4672 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4673 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4674 ? sizetype : TREE_TYPE (op);
4675 poly_widest_int cst
4676 = wi::mul (bestn->simdclone->args[i].linear_step,
4677 j * nunits);
4678 tree tcst = wide_int_to_tree (type, cst);
4679 new_temp = make_ssa_name (TREE_TYPE (op));
4680 gassign *new_stmt
4681 = gimple_build_assign (new_temp, code,
4682 arginfo[i].op, tcst);
4683 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4684 vargs.safe_push (new_temp);
4686 break;
4687 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4688 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4689 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4690 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4691 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4692 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4693 default:
4694 gcc_unreachable ();
4698 if (masked_call_offset == 0
4699 && bestn->simdclone->inbranch
4700 && bestn->simdclone->nargs > nargs)
4702 unsigned long m, o;
4703 size_t mask_i = bestn->simdclone->nargs - 1;
4704 tree mask;
4705 gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
4706 SIMD_CLONE_ARG_TYPE_MASK);
4708 tree masktype = bestn->simdclone->args[mask_i].vector_type;
4709 callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
4710 o = vector_unroll_factor (nunits, callee_nelements);
4711 for (m = j * o; m < (j + 1) * o; m++)
4713 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4715 vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
4716 mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
4717 ncopies, vectype, j);
4719 else
4720 mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
4722 gassign *new_stmt;
4723 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4725 /* This means we are dealing with integer mask modes.
4726 First convert to an integer type with the same size as
4727 the current vector type. */
4728 unsigned HOST_WIDE_INT intermediate_size
4729 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
4730 tree mid_int_type =
4731 build_nonstandard_integer_type (intermediate_size, 1);
4732 mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
4733 new_stmt
4734 = gimple_build_assign (make_ssa_name (mid_int_type),
4735 mask);
4736 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4737 /* Then zero-extend to the mask mode. */
4738 mask = fold_build1 (NOP_EXPR, masktype,
4739 gimple_get_lhs (new_stmt));
4741 else if (bestn->simdclone->mask_mode == VOIDmode)
4743 tree one = fold_convert (TREE_TYPE (masktype),
4744 integer_one_node);
4745 tree zero = fold_convert (TREE_TYPE (masktype),
4746 integer_zero_node);
4747 mask = build3 (VEC_COND_EXPR, masktype, mask,
4748 build_vector_from_val (masktype, one),
4749 build_vector_from_val (masktype, zero));
4751 else
4752 gcc_unreachable ();
4754 new_stmt = gimple_build_assign (make_ssa_name (masktype), mask);
4755 vect_finish_stmt_generation (vinfo, stmt_info,
4756 new_stmt, gsi);
4757 mask = gimple_assign_lhs (new_stmt);
4758 vargs.safe_push (mask);
4762 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4763 if (vec_dest)
4765 gcc_assert (ratype
4766 || known_eq (TYPE_VECTOR_SUBPARTS (rtype), nunits));
4767 if (ratype)
4768 new_temp = create_tmp_var (ratype);
4769 else if (useless_type_conversion_p (vectype, rtype))
4770 new_temp = make_ssa_name (vec_dest, new_call);
4771 else
4772 new_temp = make_ssa_name (rtype, new_call);
4773 gimple_call_set_lhs (new_call, new_temp);
4775 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4776 gimple *new_stmt = new_call;
4778 if (vec_dest)
4780 if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype), nunits))
4782 unsigned int k, l;
4783 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4784 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4785 k = vector_unroll_factor (nunits,
4786 TYPE_VECTOR_SUBPARTS (vectype));
4787 gcc_assert ((k & (k - 1)) == 0);
4788 for (l = 0; l < k; l++)
4790 tree t;
4791 if (ratype)
4793 t = build_fold_addr_expr (new_temp);
4794 t = build2 (MEM_REF, vectype, t,
4795 build_int_cst (TREE_TYPE (t), l * bytes));
4797 else
4798 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4799 bitsize_int (prec), bitsize_int (l * prec));
4800 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4801 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4803 if (j == 0 && l == 0)
4804 *vec_stmt = new_stmt;
4805 if (slp_node)
4806 SLP_TREE_VEC_DEFS (slp_node)
4807 .quick_push (gimple_assign_lhs (new_stmt));
4808 else
4809 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4812 if (ratype)
4813 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4814 continue;
4816 else if (!multiple_p (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
4818 unsigned int k;
4819 if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
4820 TYPE_VECTOR_SUBPARTS (rtype), &k))
4821 gcc_unreachable ();
4822 gcc_assert ((k & (k - 1)) == 0);
4823 if ((j & (k - 1)) == 0)
4824 vec_alloc (ret_ctor_elts, k);
4825 if (ratype)
4827 unsigned int m, o;
4828 o = vector_unroll_factor (nunits,
4829 TYPE_VECTOR_SUBPARTS (rtype));
4830 for (m = 0; m < o; m++)
4832 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4833 size_int (m), NULL_TREE, NULL_TREE);
4834 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4835 tem);
4836 vect_finish_stmt_generation (vinfo, stmt_info,
4837 new_stmt, gsi);
4838 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4839 gimple_assign_lhs (new_stmt));
4841 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4843 else
4844 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4845 if ((j & (k - 1)) != k - 1)
4846 continue;
4847 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4848 new_stmt
4849 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4850 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4852 if ((unsigned) j == k - 1)
4853 *vec_stmt = new_stmt;
4854 if (slp_node)
4855 SLP_TREE_VEC_DEFS (slp_node)
4856 .quick_push (gimple_assign_lhs (new_stmt));
4857 else
4858 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4859 continue;
4861 else if (ratype)
4863 tree t = build_fold_addr_expr (new_temp);
4864 t = build2 (MEM_REF, vectype, t,
4865 build_int_cst (TREE_TYPE (t), 0));
4866 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4867 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4868 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4870 else if (!useless_type_conversion_p (vectype, rtype))
4872 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4873 new_stmt
4874 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4875 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4879 if (j == 0)
4880 *vec_stmt = new_stmt;
4881 if (slp_node)
4882 SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
4883 else
4884 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4887 for (i = 0; i < nargs; ++i)
4889 vec<tree> oprndsi = vec_oprnds[i];
4890 oprndsi.release ();
4892 vargs.release ();
4894 /* Mark the clone as no longer being a candidate for GC. */
4895 bestn->gc_candidate = false;
4897 /* The call in STMT might prevent it from being removed in dce.
4898 We however cannot remove it here, due to the way the ssa name
4899 it defines is mapped to the new definition. So just replace
4900 rhs of the statement with something harmless. */
4902 if (slp_node)
4903 return true;
4905 gimple *new_stmt;
4906 if (scalar_dest)
4908 type = TREE_TYPE (scalar_dest);
4909 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4910 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4912 else
4913 new_stmt = gimple_build_nop ();
4914 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4915 unlink_stmt_vdef (stmt);
4917 return true;
4921 /* Function vect_gen_widened_results_half
4923 Create a vector stmt whose code, type, number of arguments, and result
4924 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4925 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4926 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4927 needs to be created (DECL is a function-decl of a target-builtin).
4928 STMT_INFO is the original scalar stmt that we are vectorizing. */
4930 static gimple *
4931 vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
4932 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4933 tree vec_dest, gimple_stmt_iterator *gsi,
4934 stmt_vec_info stmt_info)
4936 gimple *new_stmt;
4937 tree new_temp;
4939 /* Generate half of the widened result: */
4940 if (op_type != binary_op)
4941 vec_oprnd1 = NULL;
4942 new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
4943 new_temp = make_ssa_name (vec_dest, new_stmt);
4944 gimple_set_lhs (new_stmt, new_temp);
4945 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4947 return new_stmt;
4951 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4952 For multi-step conversions store the resulting vectors and call the function
4953 recursively. When NARROW_SRC_P is true, there's still a conversion after
4954 narrowing, don't store the vectors in the SLP_NODE or in vector info of
4955 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
4957 static void
4958 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4959 int multi_step_cvt,
4960 stmt_vec_info stmt_info,
4961 vec<tree> &vec_dsts,
4962 gimple_stmt_iterator *gsi,
4963 slp_tree slp_node, code_helper code,
4964 bool narrow_src_p)
4966 unsigned int i;
4967 tree vop0, vop1, new_tmp, vec_dest;
4969 vec_dest = vec_dsts.pop ();
4971 for (i = 0; i < vec_oprnds->length (); i += 2)
4973 /* Create demotion operation. */
4974 vop0 = (*vec_oprnds)[i];
4975 vop1 = (*vec_oprnds)[i + 1];
4976 gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
4977 new_tmp = make_ssa_name (vec_dest, new_stmt);
4978 gimple_set_lhs (new_stmt, new_tmp);
4979 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4980 if (multi_step_cvt || narrow_src_p)
4981 /* Store the resulting vector for next recursive call,
4982 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
4983 (*vec_oprnds)[i/2] = new_tmp;
4984 else
4986 /* This is the last step of the conversion sequence. Store the
4987 vectors in SLP_NODE or in vector info of the scalar statement
4988 (or in STMT_VINFO_RELATED_STMT chain). */
4989 if (slp_node)
4990 slp_node->push_vec_def (new_stmt);
4991 else
4992 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4996 /* For multi-step demotion operations we first generate demotion operations
4997 from the source type to the intermediate types, and then combine the
4998 results (stored in VEC_OPRNDS) in demotion operation to the destination
4999 type. */
5000 if (multi_step_cvt)
5002 /* At each level of recursion we have half of the operands we had at the
5003 previous level. */
5004 vec_oprnds->truncate ((i+1)/2);
5005 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
5006 multi_step_cvt - 1,
5007 stmt_info, vec_dsts, gsi,
5008 slp_node, VEC_PACK_TRUNC_EXPR,
5009 narrow_src_p);
5012 vec_dsts.quick_push (vec_dest);
5016 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5017 and VEC_OPRNDS1, for a binary operation associated with scalar statement
5018 STMT_INFO. For multi-step conversions store the resulting vectors and
5019 call the function recursively. */
5021 static void
5022 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
5023 vec<tree> *vec_oprnds0,
5024 vec<tree> *vec_oprnds1,
5025 stmt_vec_info stmt_info, tree vec_dest,
5026 gimple_stmt_iterator *gsi,
5027 code_helper ch1,
5028 code_helper ch2, int op_type)
5030 int i;
5031 tree vop0, vop1, new_tmp1, new_tmp2;
5032 gimple *new_stmt1, *new_stmt2;
5033 vec<tree> vec_tmp = vNULL;
5035 vec_tmp.create (vec_oprnds0->length () * 2);
5036 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5038 if (op_type == binary_op)
5039 vop1 = (*vec_oprnds1)[i];
5040 else
5041 vop1 = NULL_TREE;
5043 /* Generate the two halves of promotion operation. */
5044 new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
5045 op_type, vec_dest, gsi,
5046 stmt_info);
5047 new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
5048 op_type, vec_dest, gsi,
5049 stmt_info);
5050 if (is_gimple_call (new_stmt1))
5052 new_tmp1 = gimple_call_lhs (new_stmt1);
5053 new_tmp2 = gimple_call_lhs (new_stmt2);
5055 else
5057 new_tmp1 = gimple_assign_lhs (new_stmt1);
5058 new_tmp2 = gimple_assign_lhs (new_stmt2);
5061 /* Store the results for the next step. */
5062 vec_tmp.quick_push (new_tmp1);
5063 vec_tmp.quick_push (new_tmp2);
5066 vec_oprnds0->release ();
5067 *vec_oprnds0 = vec_tmp;
5070 /* Create vectorized promotion stmts for widening stmts using only half the
5071 potential vector size for input. */
5072 static void
5073 vect_create_half_widening_stmts (vec_info *vinfo,
5074 vec<tree> *vec_oprnds0,
5075 vec<tree> *vec_oprnds1,
5076 stmt_vec_info stmt_info, tree vec_dest,
5077 gimple_stmt_iterator *gsi,
5078 code_helper code1,
5079 int op_type)
5081 int i;
5082 tree vop0, vop1;
5083 gimple *new_stmt1;
5084 gimple *new_stmt2;
5085 gimple *new_stmt3;
5086 vec<tree> vec_tmp = vNULL;
5088 vec_tmp.create (vec_oprnds0->length ());
5089 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5091 tree new_tmp1, new_tmp2, new_tmp3, out_type;
5093 gcc_assert (op_type == binary_op);
5094 vop1 = (*vec_oprnds1)[i];
5096 /* Widen the first vector input. */
5097 out_type = TREE_TYPE (vec_dest);
5098 new_tmp1 = make_ssa_name (out_type);
5099 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5100 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
5101 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5103 /* Widen the second vector input. */
5104 new_tmp2 = make_ssa_name (out_type);
5105 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5106 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
5107 /* Perform the operation. With both vector inputs widened. */
5108 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5110 else
5112 /* Perform the operation. With the single vector input widened. */
5113 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5116 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
5117 gimple_assign_set_lhs (new_stmt3, new_tmp3);
5118 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
5120 /* Store the results for the next step. */
5121 vec_tmp.quick_push (new_tmp3);
5124 vec_oprnds0->release ();
5125 *vec_oprnds0 = vec_tmp;
5129 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5130 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5131 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5132 Return true if STMT_INFO is vectorizable in this way. */
5134 static bool
5135 vectorizable_conversion (vec_info *vinfo,
5136 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5137 gimple **vec_stmt, slp_tree slp_node,
5138 stmt_vector_for_cost *cost_vec)
5140 tree vec_dest, cvt_op = NULL_TREE;
5141 tree scalar_dest;
5142 tree op0, op1 = NULL_TREE;
5143 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5144 tree_code tc1, tc2;
5145 code_helper code, code1, code2;
5146 code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5147 tree new_temp;
5148 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5149 int ndts = 2;
5150 poly_uint64 nunits_in;
5151 poly_uint64 nunits_out;
5152 tree vectype_out, vectype_in;
5153 int ncopies, i;
5154 tree lhs_type, rhs_type;
5155 /* For conversions between floating point and integer, there're 2 NARROW
5156 cases. NARROW_SRC is for FLOAT_EXPR, means
5157 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5158 This is safe when the range of the source integer can fit into the lower
5159 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5160 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5161 For other conversions, when there's narrowing, NARROW_DST is used as
5162 default. */
5163 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5164 vec<tree> vec_oprnds0 = vNULL;
5165 vec<tree> vec_oprnds1 = vNULL;
5166 tree vop0;
5167 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5168 int multi_step_cvt = 0;
5169 vec<tree> interm_types = vNULL;
5170 tree intermediate_type, cvt_type = NULL_TREE;
5171 int op_type;
5172 unsigned short fltsz;
5174 /* Is STMT a vectorizable conversion? */
5176 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5177 return false;
5179 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5180 && ! vec_stmt)
5181 return false;
5183 gimple* stmt = stmt_info->stmt;
5184 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
5185 return false;
5187 if (gimple_get_lhs (stmt) == NULL_TREE
5188 || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5189 return false;
5191 if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5192 return false;
5194 if (is_gimple_assign (stmt))
5196 code = gimple_assign_rhs_code (stmt);
5197 op_type = TREE_CODE_LENGTH ((tree_code) code);
5199 else if (gimple_call_internal_p (stmt))
5201 code = gimple_call_internal_fn (stmt);
5202 op_type = gimple_call_num_args (stmt);
5204 else
5205 return false;
5207 bool widen_arith = (code == WIDEN_MULT_EXPR
5208 || code == WIDEN_LSHIFT_EXPR
5209 || widening_fn_p (code));
5211 if (!widen_arith
5212 && !CONVERT_EXPR_CODE_P (code)
5213 && code != FIX_TRUNC_EXPR
5214 && code != FLOAT_EXPR)
5215 return false;
5217 /* Check types of lhs and rhs. */
5218 scalar_dest = gimple_get_lhs (stmt);
5219 lhs_type = TREE_TYPE (scalar_dest);
5220 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5222 /* Check the operands of the operation. */
5223 slp_tree slp_op0, slp_op1 = NULL;
5224 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5225 0, &op0, &slp_op0, &dt[0], &vectype_in))
5227 if (dump_enabled_p ())
5228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5229 "use not simple.\n");
5230 return false;
5233 rhs_type = TREE_TYPE (op0);
5234 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5235 && !((INTEGRAL_TYPE_P (lhs_type)
5236 && INTEGRAL_TYPE_P (rhs_type))
5237 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5238 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5239 return false;
5241 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5242 && ((INTEGRAL_TYPE_P (lhs_type)
5243 && !type_has_mode_precision_p (lhs_type))
5244 || (INTEGRAL_TYPE_P (rhs_type)
5245 && !type_has_mode_precision_p (rhs_type))))
5247 if (dump_enabled_p ())
5248 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5249 "type conversion to/from bit-precision unsupported."
5250 "\n");
5251 return false;
5254 if (op_type == binary_op)
5256 gcc_assert (code == WIDEN_MULT_EXPR
5257 || code == WIDEN_LSHIFT_EXPR
5258 || widening_fn_p (code));
5260 op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
5261 gimple_call_arg (stmt, 0);
5262 tree vectype1_in;
5263 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5264 &op1, &slp_op1, &dt[1], &vectype1_in))
5266 if (dump_enabled_p ())
5267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5268 "use not simple.\n");
5269 return false;
5271 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5272 OP1. */
5273 if (!vectype_in)
5274 vectype_in = vectype1_in;
5277 /* If op0 is an external or constant def, infer the vector type
5278 from the scalar type. */
5279 if (!vectype_in)
5280 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5281 if (vec_stmt)
5282 gcc_assert (vectype_in);
5283 if (!vectype_in)
5285 if (dump_enabled_p ())
5286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5287 "no vectype for scalar type %T\n", rhs_type);
5289 return false;
5292 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5293 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5295 if (dump_enabled_p ())
5296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5297 "can't convert between boolean and non "
5298 "boolean vectors %T\n", rhs_type);
5300 return false;
5303 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5304 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5305 if (known_eq (nunits_out, nunits_in))
5306 if (widen_arith)
5307 modifier = WIDEN;
5308 else
5309 modifier = NONE;
5310 else if (multiple_p (nunits_out, nunits_in))
5311 modifier = NARROW_DST;
5312 else
5314 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5315 modifier = WIDEN;
5318 /* Multiple types in SLP are handled by creating the appropriate number of
5319 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5320 case of SLP. */
5321 if (slp_node)
5322 ncopies = 1;
5323 else if (modifier == NARROW_DST)
5324 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5325 else
5326 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5328 /* Sanity check: make sure that at least one copy of the vectorized stmt
5329 needs to be generated. */
5330 gcc_assert (ncopies >= 1);
5332 bool found_mode = false;
5333 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5334 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5335 opt_scalar_mode rhs_mode_iter;
5337 /* Supportable by target? */
5338 switch (modifier)
5340 case NONE:
5341 if (code != FIX_TRUNC_EXPR
5342 && code != FLOAT_EXPR
5343 && !CONVERT_EXPR_CODE_P (code))
5344 return false;
5345 gcc_assert (code.is_tree_code ());
5346 if (supportable_convert_operation ((tree_code) code, vectype_out,
5347 vectype_in, &tc1))
5349 code1 = tc1;
5350 break;
5353 /* For conversions between float and integer types try whether
5354 we can use intermediate signed integer types to support the
5355 conversion. */
5356 if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
5357 && (code == FLOAT_EXPR ||
5358 (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
5360 bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
5361 bool float_expr_p = code == FLOAT_EXPR;
5362 unsigned short target_size;
5363 scalar_mode intermediate_mode;
5364 if (demotion)
5366 intermediate_mode = lhs_mode;
5367 target_size = GET_MODE_SIZE (rhs_mode);
5369 else
5371 target_size = GET_MODE_SIZE (lhs_mode);
5372 if (!int_mode_for_size
5373 (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
5374 goto unsupported;
5376 code1 = float_expr_p ? code : NOP_EXPR;
5377 codecvt1 = float_expr_p ? NOP_EXPR : code;
5378 opt_scalar_mode mode_iter;
5379 FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
5381 intermediate_mode = mode_iter.require ();
5383 if (GET_MODE_SIZE (intermediate_mode) > target_size)
5384 break;
5386 scalar_mode cvt_mode;
5387 if (!int_mode_for_size
5388 (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
5389 break;
5391 cvt_type = build_nonstandard_integer_type
5392 (GET_MODE_BITSIZE (cvt_mode), 0);
5394 /* Check if the intermediate type can hold OP0's range.
5395 When converting from float to integer this is not necessary
5396 because values that do not fit the (smaller) target type are
5397 unspecified anyway. */
5398 if (demotion && float_expr_p)
5400 wide_int op_min_value, op_max_value;
5401 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5402 break;
5404 if (cvt_type == NULL_TREE
5405 || (wi::min_precision (op_max_value, SIGNED)
5406 > TYPE_PRECISION (cvt_type))
5407 || (wi::min_precision (op_min_value, SIGNED)
5408 > TYPE_PRECISION (cvt_type)))
5409 continue;
5412 cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
5413 /* This should only happened for SLP as long as loop vectorizer
5414 only supports same-sized vector. */
5415 if (cvt_type == NULL_TREE
5416 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
5417 || !supportable_convert_operation ((tree_code) code1,
5418 vectype_out,
5419 cvt_type, &tc1)
5420 || !supportable_convert_operation ((tree_code) codecvt1,
5421 cvt_type,
5422 vectype_in, &tc2))
5423 continue;
5425 found_mode = true;
5426 break;
5429 if (found_mode)
5431 multi_step_cvt++;
5432 interm_types.safe_push (cvt_type);
5433 cvt_type = NULL_TREE;
5434 code1 = tc1;
5435 codecvt1 = tc2;
5436 break;
5439 /* FALLTHRU */
5440 unsupported:
5441 if (dump_enabled_p ())
5442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5443 "conversion not supported by target.\n");
5444 return false;
5446 case WIDEN:
5447 if (known_eq (nunits_in, nunits_out))
5449 if (!(code.is_tree_code ()
5450 && supportable_half_widening_operation ((tree_code) code,
5451 vectype_out, vectype_in,
5452 &tc1)))
5453 goto unsupported;
5454 code1 = tc1;
5455 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5456 break;
5458 if (supportable_widening_operation (vinfo, code, stmt_info,
5459 vectype_out, vectype_in, &code1,
5460 &code2, &multi_step_cvt,
5461 &interm_types))
5463 /* Binary widening operation can only be supported directly by the
5464 architecture. */
5465 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5466 break;
5469 if (code != FLOAT_EXPR
5470 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5471 goto unsupported;
5473 fltsz = GET_MODE_SIZE (lhs_mode);
5474 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5476 rhs_mode = rhs_mode_iter.require ();
5477 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5478 break;
5480 cvt_type
5481 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5482 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5483 if (cvt_type == NULL_TREE)
5484 goto unsupported;
5486 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5488 tc1 = ERROR_MARK;
5489 gcc_assert (code.is_tree_code ());
5490 if (!supportable_convert_operation ((tree_code) code, vectype_out,
5491 cvt_type, &tc1))
5492 goto unsupported;
5493 codecvt1 = tc1;
5495 else if (!supportable_widening_operation (vinfo, code,
5496 stmt_info, vectype_out,
5497 cvt_type, &codecvt1,
5498 &codecvt2, &multi_step_cvt,
5499 &interm_types))
5500 continue;
5501 else
5502 gcc_assert (multi_step_cvt == 0);
5504 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5505 cvt_type,
5506 vectype_in, &code1,
5507 &code2, &multi_step_cvt,
5508 &interm_types))
5510 found_mode = true;
5511 break;
5515 if (!found_mode)
5516 goto unsupported;
5518 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5519 codecvt2 = ERROR_MARK;
5520 else
5522 multi_step_cvt++;
5523 interm_types.safe_push (cvt_type);
5524 cvt_type = NULL_TREE;
5526 break;
5528 case NARROW_DST:
5529 gcc_assert (op_type == unary_op);
5530 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5531 &code1, &multi_step_cvt,
5532 &interm_types))
5533 break;
5535 if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5536 goto unsupported;
5538 if (code == FIX_TRUNC_EXPR)
5540 cvt_type
5541 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5542 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5543 if (cvt_type == NULL_TREE)
5544 goto unsupported;
5545 if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5546 &tc1))
5547 codecvt1 = tc1;
5548 else
5549 goto unsupported;
5550 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5551 &code1, &multi_step_cvt,
5552 &interm_types))
5553 break;
5555 /* If op0 can be represented with low precision integer,
5556 truncate it to cvt_type and the do FLOAT_EXPR. */
5557 else if (code == FLOAT_EXPR)
5559 wide_int op_min_value, op_max_value;
5560 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5561 goto unsupported;
5563 cvt_type
5564 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5565 if (cvt_type == NULL_TREE
5566 || (wi::min_precision (op_max_value, SIGNED)
5567 > TYPE_PRECISION (cvt_type))
5568 || (wi::min_precision (op_min_value, SIGNED)
5569 > TYPE_PRECISION (cvt_type)))
5570 goto unsupported;
5572 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5573 if (cvt_type == NULL_TREE)
5574 goto unsupported;
5575 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5576 &code1, &multi_step_cvt,
5577 &interm_types))
5578 goto unsupported;
5579 if (supportable_convert_operation ((tree_code) code, vectype_out,
5580 cvt_type, &tc1))
5582 codecvt1 = tc1;
5583 modifier = NARROW_SRC;
5584 break;
5588 goto unsupported;
5590 default:
5591 gcc_unreachable ();
5594 if (!vec_stmt) /* transformation not required. */
5596 if (slp_node
5597 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5598 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5600 if (dump_enabled_p ())
5601 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5602 "incompatible vector types for invariants\n");
5603 return false;
5605 DUMP_VECT_SCOPE ("vectorizable_conversion");
5606 if (modifier == NONE)
5608 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5609 vect_model_simple_cost (vinfo, stmt_info,
5610 ncopies * (1 + multi_step_cvt),
5611 dt, ndts, slp_node, cost_vec);
5613 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5615 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5616 /* The final packing step produces one vector result per copy. */
5617 unsigned int nvectors
5618 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5619 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5620 multi_step_cvt, cost_vec,
5621 widen_arith);
5623 else
5625 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5626 /* The initial unpacking step produces two vector results
5627 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5628 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5629 unsigned int nvectors
5630 = (slp_node
5631 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5632 : ncopies * 2);
5633 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5634 multi_step_cvt, cost_vec,
5635 widen_arith);
5637 interm_types.release ();
5638 return true;
5641 /* Transform. */
5642 if (dump_enabled_p ())
5643 dump_printf_loc (MSG_NOTE, vect_location,
5644 "transform conversion. ncopies = %d.\n", ncopies);
5646 if (op_type == binary_op)
5648 if (CONSTANT_CLASS_P (op0))
5649 op0 = fold_convert (TREE_TYPE (op1), op0);
5650 else if (CONSTANT_CLASS_P (op1))
5651 op1 = fold_convert (TREE_TYPE (op0), op1);
5654 /* In case of multi-step conversion, we first generate conversion operations
5655 to the intermediate types, and then from that types to the final one.
5656 We create vector destinations for the intermediate type (TYPES) received
5657 from supportable_*_operation, and store them in the correct order
5658 for future use in vect_create_vectorized_*_stmts (). */
5659 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5660 bool widen_or_narrow_float_p
5661 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5662 vec_dest = vect_create_destination_var (scalar_dest,
5663 widen_or_narrow_float_p
5664 ? cvt_type : vectype_out);
5665 vec_dsts.quick_push (vec_dest);
5667 if (multi_step_cvt)
5669 for (i = interm_types.length () - 1;
5670 interm_types.iterate (i, &intermediate_type); i--)
5672 vec_dest = vect_create_destination_var (scalar_dest,
5673 intermediate_type);
5674 vec_dsts.quick_push (vec_dest);
5678 if (cvt_type)
5679 vec_dest = vect_create_destination_var (scalar_dest,
5680 widen_or_narrow_float_p
5681 ? vectype_out : cvt_type);
5683 int ninputs = 1;
5684 if (!slp_node)
5686 if (modifier == WIDEN)
5688 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5690 if (multi_step_cvt)
5691 ninputs = vect_pow2 (multi_step_cvt);
5692 ninputs *= 2;
5696 switch (modifier)
5698 case NONE:
5699 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5700 op0, vectype_in, &vec_oprnds0);
5701 /* vec_dest is intermediate type operand when multi_step_cvt. */
5702 if (multi_step_cvt)
5704 cvt_op = vec_dest;
5705 vec_dest = vec_dsts[0];
5708 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5710 /* Arguments are ready, create the new vector stmt. */
5711 gimple* new_stmt;
5712 if (multi_step_cvt)
5714 gcc_assert (multi_step_cvt == 1);
5715 new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5716 new_temp = make_ssa_name (cvt_op, new_stmt);
5717 gimple_assign_set_lhs (new_stmt, new_temp);
5718 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5719 vop0 = new_temp;
5721 new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5722 new_temp = make_ssa_name (vec_dest, new_stmt);
5723 gimple_set_lhs (new_stmt, new_temp);
5724 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5726 if (slp_node)
5727 slp_node->push_vec_def (new_stmt);
5728 else
5729 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5731 break;
5733 case WIDEN:
5734 /* In case the vectorization factor (VF) is bigger than the number
5735 of elements that we can fit in a vectype (nunits), we have to
5736 generate more than one vector stmt - i.e - we need to "unroll"
5737 the vector stmt by a factor VF/nunits. */
5738 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5739 op0, vectype_in, &vec_oprnds0,
5740 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5741 vectype_in, &vec_oprnds1);
5742 if (code == WIDEN_LSHIFT_EXPR)
5744 int oprnds_size = vec_oprnds0.length ();
5745 vec_oprnds1.create (oprnds_size);
5746 for (i = 0; i < oprnds_size; ++i)
5747 vec_oprnds1.quick_push (op1);
5749 /* Arguments are ready. Create the new vector stmts. */
5750 for (i = multi_step_cvt; i >= 0; i--)
5752 tree this_dest = vec_dsts[i];
5753 code_helper c1 = code1, c2 = code2;
5754 if (i == 0 && codecvt2 != ERROR_MARK)
5756 c1 = codecvt1;
5757 c2 = codecvt2;
5759 if (known_eq (nunits_out, nunits_in))
5760 vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
5761 stmt_info, this_dest, gsi, c1,
5762 op_type);
5763 else
5764 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5765 &vec_oprnds1, stmt_info,
5766 this_dest, gsi,
5767 c1, c2, op_type);
5770 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5772 gimple *new_stmt;
5773 if (cvt_type)
5775 new_temp = make_ssa_name (vec_dest);
5776 new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5777 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5779 else
5780 new_stmt = SSA_NAME_DEF_STMT (vop0);
5782 if (slp_node)
5783 slp_node->push_vec_def (new_stmt);
5784 else
5785 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5787 break;
5789 case NARROW_SRC:
5790 case NARROW_DST:
5791 /* In case the vectorization factor (VF) is bigger than the number
5792 of elements that we can fit in a vectype (nunits), we have to
5793 generate more than one vector stmt - i.e - we need to "unroll"
5794 the vector stmt by a factor VF/nunits. */
5795 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5796 op0, vectype_in, &vec_oprnds0);
5797 /* Arguments are ready. Create the new vector stmts. */
5798 if (cvt_type && modifier == NARROW_DST)
5799 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5801 new_temp = make_ssa_name (vec_dest);
5802 gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5803 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5804 vec_oprnds0[i] = new_temp;
5807 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5808 multi_step_cvt,
5809 stmt_info, vec_dsts, gsi,
5810 slp_node, code1,
5811 modifier == NARROW_SRC);
5812 /* After demoting op0 to cvt_type, convert it to dest. */
5813 if (cvt_type && code == FLOAT_EXPR)
5815 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5817 /* Arguments are ready, create the new vector stmt. */
5818 gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5819 gimple *new_stmt
5820 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5821 new_temp = make_ssa_name (vec_dest, new_stmt);
5822 gimple_set_lhs (new_stmt, new_temp);
5823 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5825 /* This is the last step of the conversion sequence. Store the
5826 vectors in SLP_NODE or in vector info of the scalar statement
5827 (or in STMT_VINFO_RELATED_STMT chain). */
5828 if (slp_node)
5829 slp_node->push_vec_def (new_stmt);
5830 else
5831 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5834 break;
5836 if (!slp_node)
5837 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5839 vec_oprnds0.release ();
5840 vec_oprnds1.release ();
5841 interm_types.release ();
5843 return true;
5846 /* Return true if we can assume from the scalar form of STMT_INFO that
5847 neither the scalar nor the vector forms will generate code. STMT_INFO
5848 is known not to involve a data reference. */
5850 bool
5851 vect_nop_conversion_p (stmt_vec_info stmt_info)
5853 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5854 if (!stmt)
5855 return false;
5857 tree lhs = gimple_assign_lhs (stmt);
5858 tree_code code = gimple_assign_rhs_code (stmt);
5859 tree rhs = gimple_assign_rhs1 (stmt);
5861 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5862 return true;
5864 if (CONVERT_EXPR_CODE_P (code))
5865 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5867 return false;
5870 /* Function vectorizable_assignment.
5872 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5873 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5874 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5875 Return true if STMT_INFO is vectorizable in this way. */
5877 static bool
5878 vectorizable_assignment (vec_info *vinfo,
5879 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5880 gimple **vec_stmt, slp_tree slp_node,
5881 stmt_vector_for_cost *cost_vec)
5883 tree vec_dest;
5884 tree scalar_dest;
5885 tree op;
5886 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5887 tree new_temp;
5888 enum vect_def_type dt[1] = {vect_unknown_def_type};
5889 int ndts = 1;
5890 int ncopies;
5891 int i;
5892 vec<tree> vec_oprnds = vNULL;
5893 tree vop;
5894 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5895 enum tree_code code;
5896 tree vectype_in;
5898 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5899 return false;
5901 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5902 && ! vec_stmt)
5903 return false;
5905 /* Is vectorizable assignment? */
5906 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5907 if (!stmt)
5908 return false;
5910 scalar_dest = gimple_assign_lhs (stmt);
5911 if (TREE_CODE (scalar_dest) != SSA_NAME)
5912 return false;
5914 if (STMT_VINFO_DATA_REF (stmt_info))
5915 return false;
5917 code = gimple_assign_rhs_code (stmt);
5918 if (!(gimple_assign_single_p (stmt)
5919 || code == PAREN_EXPR
5920 || CONVERT_EXPR_CODE_P (code)))
5921 return false;
5923 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5924 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5926 /* Multiple types in SLP are handled by creating the appropriate number of
5927 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5928 case of SLP. */
5929 if (slp_node)
5930 ncopies = 1;
5931 else
5932 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5934 gcc_assert (ncopies >= 1);
5936 slp_tree slp_op;
5937 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5938 &dt[0], &vectype_in))
5940 if (dump_enabled_p ())
5941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5942 "use not simple.\n");
5943 return false;
5945 if (!vectype_in)
5946 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5948 /* We can handle NOP_EXPR conversions that do not change the number
5949 of elements or the vector size. */
5950 if ((CONVERT_EXPR_CODE_P (code)
5951 || code == VIEW_CONVERT_EXPR)
5952 && (!vectype_in
5953 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5954 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5955 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5956 return false;
5958 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5960 if (dump_enabled_p ())
5961 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5962 "can't convert between boolean and non "
5963 "boolean vectors %T\n", TREE_TYPE (op));
5965 return false;
5968 /* We do not handle bit-precision changes. */
5969 if ((CONVERT_EXPR_CODE_P (code)
5970 || code == VIEW_CONVERT_EXPR)
5971 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5972 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5973 || (INTEGRAL_TYPE_P (TREE_TYPE (op))
5974 && !type_has_mode_precision_p (TREE_TYPE (op))))
5975 /* But a conversion that does not change the bit-pattern is ok. */
5976 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5977 && INTEGRAL_TYPE_P (TREE_TYPE (op))
5978 && (((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5979 > TYPE_PRECISION (TREE_TYPE (op)))
5980 && TYPE_UNSIGNED (TREE_TYPE (op)))
5981 || (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5982 == TYPE_PRECISION (TREE_TYPE (op))))))
5984 if (dump_enabled_p ())
5985 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5986 "type conversion to/from bit-precision "
5987 "unsupported.\n");
5988 return false;
5991 if (!vec_stmt) /* transformation not required. */
5993 if (slp_node
5994 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5996 if (dump_enabled_p ())
5997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5998 "incompatible vector types for invariants\n");
5999 return false;
6001 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
6002 DUMP_VECT_SCOPE ("vectorizable_assignment");
6003 if (!vect_nop_conversion_p (stmt_info))
6004 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
6005 cost_vec);
6006 return true;
6009 /* Transform. */
6010 if (dump_enabled_p ())
6011 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
6013 /* Handle def. */
6014 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6016 /* Handle use. */
6017 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
6019 /* Arguments are ready. create the new vector stmt. */
6020 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
6022 if (CONVERT_EXPR_CODE_P (code)
6023 || code == VIEW_CONVERT_EXPR)
6024 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
6025 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
6026 new_temp = make_ssa_name (vec_dest, new_stmt);
6027 gimple_assign_set_lhs (new_stmt, new_temp);
6028 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6029 if (slp_node)
6030 slp_node->push_vec_def (new_stmt);
6031 else
6032 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6034 if (!slp_node)
6035 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6037 vec_oprnds.release ();
6038 return true;
6042 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6043 either as shift by a scalar or by a vector. */
6045 bool
6046 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
6049 machine_mode vec_mode;
6050 optab optab;
6051 int icode;
6052 tree vectype;
6054 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6055 if (!vectype)
6056 return false;
6058 optab = optab_for_tree_code (code, vectype, optab_scalar);
6059 if (!optab
6060 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
6062 optab = optab_for_tree_code (code, vectype, optab_vector);
6063 if (!optab
6064 || (optab_handler (optab, TYPE_MODE (vectype))
6065 == CODE_FOR_nothing))
6066 return false;
6069 vec_mode = TYPE_MODE (vectype);
6070 icode = (int) optab_handler (optab, vec_mode);
6071 if (icode == CODE_FOR_nothing)
6072 return false;
6074 return true;
6078 /* Function vectorizable_shift.
6080 Check if STMT_INFO performs a shift operation that can be vectorized.
6081 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6082 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6083 Return true if STMT_INFO is vectorizable in this way. */
6085 static bool
6086 vectorizable_shift (vec_info *vinfo,
6087 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6088 gimple **vec_stmt, slp_tree slp_node,
6089 stmt_vector_for_cost *cost_vec)
6091 tree vec_dest;
6092 tree scalar_dest;
6093 tree op0, op1 = NULL;
6094 tree vec_oprnd1 = NULL_TREE;
6095 tree vectype;
6096 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6097 enum tree_code code;
6098 machine_mode vec_mode;
6099 tree new_temp;
6100 optab optab;
6101 int icode;
6102 machine_mode optab_op2_mode;
6103 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
6104 int ndts = 2;
6105 poly_uint64 nunits_in;
6106 poly_uint64 nunits_out;
6107 tree vectype_out;
6108 tree op1_vectype;
6109 int ncopies;
6110 int i;
6111 vec<tree> vec_oprnds0 = vNULL;
6112 vec<tree> vec_oprnds1 = vNULL;
6113 tree vop0, vop1;
6114 unsigned int k;
6115 bool scalar_shift_arg = true;
6116 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6117 bool incompatible_op1_vectype_p = false;
6119 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6120 return false;
6122 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6123 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6124 && ! vec_stmt)
6125 return false;
6127 /* Is STMT a vectorizable binary/unary operation? */
6128 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6129 if (!stmt)
6130 return false;
6132 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6133 return false;
6135 code = gimple_assign_rhs_code (stmt);
6137 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6138 || code == RROTATE_EXPR))
6139 return false;
6141 scalar_dest = gimple_assign_lhs (stmt);
6142 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6143 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6145 if (dump_enabled_p ())
6146 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6147 "bit-precision shifts not supported.\n");
6148 return false;
6151 slp_tree slp_op0;
6152 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6153 0, &op0, &slp_op0, &dt[0], &vectype))
6155 if (dump_enabled_p ())
6156 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6157 "use not simple.\n");
6158 return false;
6160 /* If op0 is an external or constant def, infer the vector type
6161 from the scalar type. */
6162 if (!vectype)
6163 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6164 if (vec_stmt)
6165 gcc_assert (vectype);
6166 if (!vectype)
6168 if (dump_enabled_p ())
6169 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6170 "no vectype for scalar type\n");
6171 return false;
6174 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6175 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6176 if (maybe_ne (nunits_out, nunits_in))
6177 return false;
6179 stmt_vec_info op1_def_stmt_info;
6180 slp_tree slp_op1;
6181 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
6182 &dt[1], &op1_vectype, &op1_def_stmt_info))
6184 if (dump_enabled_p ())
6185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6186 "use not simple.\n");
6187 return false;
6190 /* Multiple types in SLP are handled by creating the appropriate number of
6191 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6192 case of SLP. */
6193 if (slp_node)
6194 ncopies = 1;
6195 else
6196 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6198 gcc_assert (ncopies >= 1);
6200 /* Determine whether the shift amount is a vector, or scalar. If the
6201 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6203 if ((dt[1] == vect_internal_def
6204 || dt[1] == vect_induction_def
6205 || dt[1] == vect_nested_cycle)
6206 && !slp_node)
6207 scalar_shift_arg = false;
6208 else if (dt[1] == vect_constant_def
6209 || dt[1] == vect_external_def
6210 || dt[1] == vect_internal_def)
6212 /* In SLP, need to check whether the shift count is the same,
6213 in loops if it is a constant or invariant, it is always
6214 a scalar shift. */
6215 if (slp_node)
6217 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6218 stmt_vec_info slpstmt_info;
6220 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6222 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
6223 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
6224 scalar_shift_arg = false;
6227 /* For internal SLP defs we have to make sure we see scalar stmts
6228 for all vector elements.
6229 ??? For different vectors we could resort to a different
6230 scalar shift operand but code-generation below simply always
6231 takes the first. */
6232 if (dt[1] == vect_internal_def
6233 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6234 stmts.length ()))
6235 scalar_shift_arg = false;
6238 /* If the shift amount is computed by a pattern stmt we cannot
6239 use the scalar amount directly thus give up and use a vector
6240 shift. */
6241 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
6242 scalar_shift_arg = false;
6244 else
6246 if (dump_enabled_p ())
6247 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6248 "operand mode requires invariant argument.\n");
6249 return false;
6252 /* Vector shifted by vector. */
6253 bool was_scalar_shift_arg = scalar_shift_arg;
6254 if (!scalar_shift_arg)
6256 optab = optab_for_tree_code (code, vectype, optab_vector);
6257 if (dump_enabled_p ())
6258 dump_printf_loc (MSG_NOTE, vect_location,
6259 "vector/vector shift/rotate found.\n");
6261 if (!op1_vectype)
6262 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6263 slp_op1);
6264 incompatible_op1_vectype_p
6265 = (op1_vectype == NULL_TREE
6266 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
6267 TYPE_VECTOR_SUBPARTS (vectype))
6268 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6269 if (incompatible_op1_vectype_p
6270 && (!slp_node
6271 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6272 || slp_op1->refcnt != 1))
6274 if (dump_enabled_p ())
6275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6276 "unusable type for last operand in"
6277 " vector/vector shift/rotate.\n");
6278 return false;
6281 /* See if the machine has a vector shifted by scalar insn and if not
6282 then see if it has a vector shifted by vector insn. */
6283 else
6285 optab = optab_for_tree_code (code, vectype, optab_scalar);
6286 if (optab
6287 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6289 if (dump_enabled_p ())
6290 dump_printf_loc (MSG_NOTE, vect_location,
6291 "vector/scalar shift/rotate found.\n");
6293 else
6295 optab = optab_for_tree_code (code, vectype, optab_vector);
6296 if (optab
6297 && (optab_handler (optab, TYPE_MODE (vectype))
6298 != CODE_FOR_nothing))
6300 scalar_shift_arg = false;
6302 if (dump_enabled_p ())
6303 dump_printf_loc (MSG_NOTE, vect_location,
6304 "vector/vector shift/rotate found.\n");
6306 if (!op1_vectype)
6307 op1_vectype = get_vectype_for_scalar_type (vinfo,
6308 TREE_TYPE (op1),
6309 slp_op1);
6311 /* Unlike the other binary operators, shifts/rotates have
6312 the rhs being int, instead of the same type as the lhs,
6313 so make sure the scalar is the right type if we are
6314 dealing with vectors of long long/long/short/char. */
6315 incompatible_op1_vectype_p
6316 = (!op1_vectype
6317 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6318 TREE_TYPE (op1)));
6319 if (incompatible_op1_vectype_p
6320 && dt[1] == vect_internal_def)
6322 if (dump_enabled_p ())
6323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6324 "unusable type for last operand in"
6325 " vector/vector shift/rotate.\n");
6326 return false;
6332 /* Supportable by target? */
6333 if (!optab)
6335 if (dump_enabled_p ())
6336 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6337 "no optab.\n");
6338 return false;
6340 vec_mode = TYPE_MODE (vectype);
6341 icode = (int) optab_handler (optab, vec_mode);
6342 if (icode == CODE_FOR_nothing)
6344 if (dump_enabled_p ())
6345 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6346 "op not supported by target.\n");
6347 return false;
6349 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6350 if (vect_emulated_vector_p (vectype))
6351 return false;
6353 if (!vec_stmt) /* transformation not required. */
6355 if (slp_node
6356 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6357 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6358 && (!incompatible_op1_vectype_p
6359 || dt[1] == vect_constant_def)
6360 && !vect_maybe_update_slp_op_vectype
6361 (slp_op1,
6362 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6364 if (dump_enabled_p ())
6365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6366 "incompatible vector types for invariants\n");
6367 return false;
6369 /* Now adjust the constant shift amount in place. */
6370 if (slp_node
6371 && incompatible_op1_vectype_p
6372 && dt[1] == vect_constant_def)
6374 for (unsigned i = 0;
6375 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6377 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6378 = fold_convert (TREE_TYPE (vectype),
6379 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6380 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6381 == INTEGER_CST));
6384 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6385 DUMP_VECT_SCOPE ("vectorizable_shift");
6386 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6387 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6388 return true;
6391 /* Transform. */
6393 if (dump_enabled_p ())
6394 dump_printf_loc (MSG_NOTE, vect_location,
6395 "transform binary/unary operation.\n");
6397 if (incompatible_op1_vectype_p && !slp_node)
6399 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6400 op1 = fold_convert (TREE_TYPE (vectype), op1);
6401 if (dt[1] != vect_constant_def)
6402 op1 = vect_init_vector (vinfo, stmt_info, op1,
6403 TREE_TYPE (vectype), NULL);
6406 /* Handle def. */
6407 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6409 if (scalar_shift_arg && dt[1] != vect_internal_def)
6411 /* Vector shl and shr insn patterns can be defined with scalar
6412 operand 2 (shift operand). In this case, use constant or loop
6413 invariant op1 directly, without extending it to vector mode
6414 first. */
6415 optab_op2_mode = insn_data[icode].operand[2].mode;
6416 if (!VECTOR_MODE_P (optab_op2_mode))
6418 if (dump_enabled_p ())
6419 dump_printf_loc (MSG_NOTE, vect_location,
6420 "operand 1 using scalar mode.\n");
6421 vec_oprnd1 = op1;
6422 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6423 vec_oprnds1.quick_push (vec_oprnd1);
6424 /* Store vec_oprnd1 for every vector stmt to be created.
6425 We check during the analysis that all the shift arguments
6426 are the same.
6427 TODO: Allow different constants for different vector
6428 stmts generated for an SLP instance. */
6429 for (k = 0;
6430 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6431 vec_oprnds1.quick_push (vec_oprnd1);
6434 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6436 if (was_scalar_shift_arg)
6438 /* If the argument was the same in all lanes create
6439 the correctly typed vector shift amount directly. */
6440 op1 = fold_convert (TREE_TYPE (vectype), op1);
6441 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6442 !loop_vinfo ? gsi : NULL);
6443 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6444 !loop_vinfo ? gsi : NULL);
6445 vec_oprnds1.create (slp_node->vec_stmts_size);
6446 for (k = 0; k < slp_node->vec_stmts_size; k++)
6447 vec_oprnds1.quick_push (vec_oprnd1);
6449 else if (dt[1] == vect_constant_def)
6450 /* The constant shift amount has been adjusted in place. */
6452 else
6453 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6456 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6457 (a special case for certain kind of vector shifts); otherwise,
6458 operand 1 should be of a vector type (the usual case). */
6459 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6460 op0, &vec_oprnds0,
6461 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6463 /* Arguments are ready. Create the new vector stmt. */
6464 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6466 /* For internal defs where we need to use a scalar shift arg
6467 extract the first lane. */
6468 if (scalar_shift_arg && dt[1] == vect_internal_def)
6470 vop1 = vec_oprnds1[0];
6471 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6472 gassign *new_stmt
6473 = gimple_build_assign (new_temp,
6474 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6475 vop1,
6476 TYPE_SIZE (TREE_TYPE (new_temp)),
6477 bitsize_zero_node));
6478 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6479 vop1 = new_temp;
6481 else
6482 vop1 = vec_oprnds1[i];
6483 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6484 new_temp = make_ssa_name (vec_dest, new_stmt);
6485 gimple_assign_set_lhs (new_stmt, new_temp);
6486 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6487 if (slp_node)
6488 slp_node->push_vec_def (new_stmt);
6489 else
6490 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6493 if (!slp_node)
6494 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6496 vec_oprnds0.release ();
6497 vec_oprnds1.release ();
6499 return true;
6502 /* Function vectorizable_operation.
6504 Check if STMT_INFO performs a binary, unary or ternary operation that can
6505 be vectorized.
6506 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6507 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6508 Return true if STMT_INFO is vectorizable in this way. */
6510 static bool
6511 vectorizable_operation (vec_info *vinfo,
6512 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6513 gimple **vec_stmt, slp_tree slp_node,
6514 stmt_vector_for_cost *cost_vec)
6516 tree vec_dest;
6517 tree scalar_dest;
6518 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6519 tree vectype;
6520 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6521 enum tree_code code, orig_code;
6522 machine_mode vec_mode;
6523 tree new_temp;
6524 int op_type;
6525 optab optab;
6526 bool target_support_p;
6527 enum vect_def_type dt[3]
6528 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6529 int ndts = 3;
6530 poly_uint64 nunits_in;
6531 poly_uint64 nunits_out;
6532 tree vectype_out;
6533 int ncopies, vec_num;
6534 int i;
6535 vec<tree> vec_oprnds0 = vNULL;
6536 vec<tree> vec_oprnds1 = vNULL;
6537 vec<tree> vec_oprnds2 = vNULL;
6538 tree vop0, vop1, vop2;
6539 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6541 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6542 return false;
6544 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6545 && ! vec_stmt)
6546 return false;
6548 /* Is STMT a vectorizable binary/unary operation? */
6549 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6550 if (!stmt)
6551 return false;
6553 /* Loads and stores are handled in vectorizable_{load,store}. */
6554 if (STMT_VINFO_DATA_REF (stmt_info))
6555 return false;
6557 orig_code = code = gimple_assign_rhs_code (stmt);
6559 /* Shifts are handled in vectorizable_shift. */
6560 if (code == LSHIFT_EXPR
6561 || code == RSHIFT_EXPR
6562 || code == LROTATE_EXPR
6563 || code == RROTATE_EXPR)
6564 return false;
6566 /* Comparisons are handled in vectorizable_comparison. */
6567 if (TREE_CODE_CLASS (code) == tcc_comparison)
6568 return false;
6570 /* Conditions are handled in vectorizable_condition. */
6571 if (code == COND_EXPR)
6572 return false;
6574 /* For pointer addition and subtraction, we should use the normal
6575 plus and minus for the vector operation. */
6576 if (code == POINTER_PLUS_EXPR)
6577 code = PLUS_EXPR;
6578 if (code == POINTER_DIFF_EXPR)
6579 code = MINUS_EXPR;
6581 /* Support only unary or binary operations. */
6582 op_type = TREE_CODE_LENGTH (code);
6583 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6585 if (dump_enabled_p ())
6586 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6587 "num. args = %d (not unary/binary/ternary op).\n",
6588 op_type);
6589 return false;
6592 scalar_dest = gimple_assign_lhs (stmt);
6593 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6595 /* Most operations cannot handle bit-precision types without extra
6596 truncations. */
6597 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6598 if (!mask_op_p
6599 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6600 /* Exception are bitwise binary operations. */
6601 && code != BIT_IOR_EXPR
6602 && code != BIT_XOR_EXPR
6603 && code != BIT_AND_EXPR)
6605 if (dump_enabled_p ())
6606 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6607 "bit-precision arithmetic not supported.\n");
6608 return false;
6611 slp_tree slp_op0;
6612 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6613 0, &op0, &slp_op0, &dt[0], &vectype))
6615 if (dump_enabled_p ())
6616 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6617 "use not simple.\n");
6618 return false;
6620 bool is_invariant = (dt[0] == vect_external_def
6621 || dt[0] == vect_constant_def);
6622 /* If op0 is an external or constant def, infer the vector type
6623 from the scalar type. */
6624 if (!vectype)
6626 /* For boolean type we cannot determine vectype by
6627 invariant value (don't know whether it is a vector
6628 of booleans or vector of integers). We use output
6629 vectype because operations on boolean don't change
6630 type. */
6631 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6633 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6635 if (dump_enabled_p ())
6636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6637 "not supported operation on bool value.\n");
6638 return false;
6640 vectype = vectype_out;
6642 else
6643 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6644 slp_node);
6646 if (vec_stmt)
6647 gcc_assert (vectype);
6648 if (!vectype)
6650 if (dump_enabled_p ())
6651 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6652 "no vectype for scalar type %T\n",
6653 TREE_TYPE (op0));
6655 return false;
6658 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6659 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6660 if (maybe_ne (nunits_out, nunits_in))
6661 return false;
6663 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6664 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6665 if (op_type == binary_op || op_type == ternary_op)
6667 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6668 1, &op1, &slp_op1, &dt[1], &vectype2))
6670 if (dump_enabled_p ())
6671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6672 "use not simple.\n");
6673 return false;
6675 is_invariant &= (dt[1] == vect_external_def
6676 || dt[1] == vect_constant_def);
6677 if (vectype2
6678 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6679 return false;
6681 if (op_type == ternary_op)
6683 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6684 2, &op2, &slp_op2, &dt[2], &vectype3))
6686 if (dump_enabled_p ())
6687 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6688 "use not simple.\n");
6689 return false;
6691 is_invariant &= (dt[2] == vect_external_def
6692 || dt[2] == vect_constant_def);
6693 if (vectype3
6694 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6695 return false;
6698 /* Multiple types in SLP are handled by creating the appropriate number of
6699 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6700 case of SLP. */
6701 if (slp_node)
6703 ncopies = 1;
6704 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6706 else
6708 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6709 vec_num = 1;
6712 gcc_assert (ncopies >= 1);
6714 /* Reject attempts to combine mask types with nonmask types, e.g. if
6715 we have an AND between a (nonmask) boolean loaded from memory and
6716 a (mask) boolean result of a comparison.
6718 TODO: We could easily fix these cases up using pattern statements. */
6719 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6720 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6721 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6723 if (dump_enabled_p ())
6724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6725 "mixed mask and nonmask vector types\n");
6726 return false;
6729 /* Supportable by target? */
6731 vec_mode = TYPE_MODE (vectype);
6732 if (code == MULT_HIGHPART_EXPR)
6733 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6734 else
6736 optab = optab_for_tree_code (code, vectype, optab_default);
6737 if (!optab)
6739 if (dump_enabled_p ())
6740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6741 "no optab.\n");
6742 return false;
6744 target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing
6745 || optab_libfunc (optab, vec_mode));
6748 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6749 if (!target_support_p || using_emulated_vectors_p)
6751 if (dump_enabled_p ())
6752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6753 "op not supported by target.\n");
6754 /* When vec_mode is not a vector mode and we verified ops we
6755 do not have to lower like AND are natively supported let
6756 those through even when the mode isn't word_mode. For
6757 ops we have to lower the lowering code assumes we are
6758 dealing with word_mode. */
6759 if ((((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6760 || !target_support_p)
6761 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6762 /* Check only during analysis. */
6763 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6765 if (dump_enabled_p ())
6766 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6767 return false;
6769 if (dump_enabled_p ())
6770 dump_printf_loc (MSG_NOTE, vect_location,
6771 "proceeding using word mode.\n");
6772 using_emulated_vectors_p = true;
6775 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6776 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6777 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6778 internal_fn cond_fn = get_conditional_internal_fn (code);
6779 internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6781 /* If operating on inactive elements could generate spurious traps,
6782 we need to restrict the operation to active lanes. Note that this
6783 specifically doesn't apply to unhoisted invariants, since they
6784 operate on the same value for every lane.
6786 Similarly, if this operation is part of a reduction, a fully-masked
6787 loop should only change the active lanes of the reduction chain,
6788 keeping the inactive lanes as-is. */
6789 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6790 || reduc_idx >= 0);
6792 if (!vec_stmt) /* transformation not required. */
6794 if (loop_vinfo
6795 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6796 && mask_out_inactive)
6798 if (cond_len_fn != IFN_LAST
6799 && direct_internal_fn_supported_p (cond_len_fn, vectype,
6800 OPTIMIZE_FOR_SPEED))
6801 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6803 else if (cond_fn != IFN_LAST
6804 && direct_internal_fn_supported_p (cond_fn, vectype,
6805 OPTIMIZE_FOR_SPEED))
6806 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6807 vectype, NULL);
6808 else
6810 if (dump_enabled_p ())
6811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6812 "can't use a fully-masked loop because no"
6813 " conditional operation is available.\n");
6814 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6818 /* Put types on constant and invariant SLP children. */
6819 if (slp_node
6820 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6821 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6822 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6824 if (dump_enabled_p ())
6825 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6826 "incompatible vector types for invariants\n");
6827 return false;
6830 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6831 DUMP_VECT_SCOPE ("vectorizable_operation");
6832 vect_model_simple_cost (vinfo, stmt_info,
6833 ncopies, dt, ndts, slp_node, cost_vec);
6834 if (using_emulated_vectors_p)
6836 /* The above vect_model_simple_cost call handles constants
6837 in the prologue and (mis-)costs one of the stmts as
6838 vector stmt. See below for the actual lowering that will
6839 be applied. */
6840 unsigned n
6841 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6842 switch (code)
6844 case PLUS_EXPR:
6845 n *= 5;
6846 break;
6847 case MINUS_EXPR:
6848 n *= 6;
6849 break;
6850 case NEGATE_EXPR:
6851 n *= 4;
6852 break;
6853 default:
6854 /* Bit operations do not have extra cost and are accounted
6855 as vector stmt by vect_model_simple_cost. */
6856 n = 0;
6857 break;
6859 if (n != 0)
6861 /* We also need to materialize two large constants. */
6862 record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6863 0, vect_prologue);
6864 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6865 0, vect_body);
6868 return true;
6871 /* Transform. */
6873 if (dump_enabled_p ())
6874 dump_printf_loc (MSG_NOTE, vect_location,
6875 "transform binary/unary operation.\n");
6877 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6878 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6880 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6881 vectors with unsigned elements, but the result is signed. So, we
6882 need to compute the MINUS_EXPR into vectype temporary and
6883 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6884 tree vec_cvt_dest = NULL_TREE;
6885 if (orig_code == POINTER_DIFF_EXPR)
6887 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6888 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6890 /* Handle def. */
6891 else
6892 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6894 /* In case the vectorization factor (VF) is bigger than the number
6895 of elements that we can fit in a vectype (nunits), we have to generate
6896 more than one vector stmt - i.e - we need to "unroll" the
6897 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6898 from one copy of the vector stmt to the next, in the field
6899 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6900 stages to find the correct vector defs to be used when vectorizing
6901 stmts that use the defs of the current stmt. The example below
6902 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6903 we need to create 4 vectorized stmts):
6905 before vectorization:
6906 RELATED_STMT VEC_STMT
6907 S1: x = memref - -
6908 S2: z = x + 1 - -
6910 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6911 there):
6912 RELATED_STMT VEC_STMT
6913 VS1_0: vx0 = memref0 VS1_1 -
6914 VS1_1: vx1 = memref1 VS1_2 -
6915 VS1_2: vx2 = memref2 VS1_3 -
6916 VS1_3: vx3 = memref3 - -
6917 S1: x = load - VS1_0
6918 S2: z = x + 1 - -
6920 step2: vectorize stmt S2 (done here):
6921 To vectorize stmt S2 we first need to find the relevant vector
6922 def for the first operand 'x'. This is, as usual, obtained from
6923 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6924 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6925 relevant vector def 'vx0'. Having found 'vx0' we can generate
6926 the vector stmt VS2_0, and as usual, record it in the
6927 STMT_VINFO_VEC_STMT of stmt S2.
6928 When creating the second copy (VS2_1), we obtain the relevant vector
6929 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6930 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6931 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6932 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6933 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6934 chain of stmts and pointers:
6935 RELATED_STMT VEC_STMT
6936 VS1_0: vx0 = memref0 VS1_1 -
6937 VS1_1: vx1 = memref1 VS1_2 -
6938 VS1_2: vx2 = memref2 VS1_3 -
6939 VS1_3: vx3 = memref3 - -
6940 S1: x = load - VS1_0
6941 VS2_0: vz0 = vx0 + v1 VS2_1 -
6942 VS2_1: vz1 = vx1 + v1 VS2_2 -
6943 VS2_2: vz2 = vx2 + v1 VS2_3 -
6944 VS2_3: vz3 = vx3 + v1 - -
6945 S2: z = x + 1 - VS2_0 */
6947 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6948 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6949 /* Arguments are ready. Create the new vector stmt. */
6950 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6952 gimple *new_stmt = NULL;
6953 vop1 = ((op_type == binary_op || op_type == ternary_op)
6954 ? vec_oprnds1[i] : NULL_TREE);
6955 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6956 if (using_emulated_vectors_p
6957 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
6959 /* Lower the operation. This follows vector lowering. */
6960 unsigned int width = vector_element_bits (vectype);
6961 tree inner_type = TREE_TYPE (vectype);
6962 tree word_type
6963 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
6964 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
6965 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
6966 tree high_bits
6967 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
6968 tree wvop0 = make_ssa_name (word_type);
6969 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
6970 build1 (VIEW_CONVERT_EXPR,
6971 word_type, vop0));
6972 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6973 tree result_low, signs;
6974 if (code == PLUS_EXPR || code == MINUS_EXPR)
6976 tree wvop1 = make_ssa_name (word_type);
6977 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
6978 build1 (VIEW_CONVERT_EXPR,
6979 word_type, vop1));
6980 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6981 signs = make_ssa_name (word_type);
6982 new_stmt = gimple_build_assign (signs,
6983 BIT_XOR_EXPR, wvop0, wvop1);
6984 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6985 tree b_low = make_ssa_name (word_type);
6986 new_stmt = gimple_build_assign (b_low,
6987 BIT_AND_EXPR, wvop1, low_bits);
6988 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6989 tree a_low = make_ssa_name (word_type);
6990 if (code == PLUS_EXPR)
6991 new_stmt = gimple_build_assign (a_low,
6992 BIT_AND_EXPR, wvop0, low_bits);
6993 else
6994 new_stmt = gimple_build_assign (a_low,
6995 BIT_IOR_EXPR, wvop0, high_bits);
6996 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6997 if (code == MINUS_EXPR)
6999 new_stmt = gimple_build_assign (NULL_TREE,
7000 BIT_NOT_EXPR, signs);
7001 signs = make_ssa_name (word_type);
7002 gimple_assign_set_lhs (new_stmt, signs);
7003 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7005 new_stmt = gimple_build_assign (NULL_TREE,
7006 BIT_AND_EXPR, signs, high_bits);
7007 signs = make_ssa_name (word_type);
7008 gimple_assign_set_lhs (new_stmt, signs);
7009 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7010 result_low = make_ssa_name (word_type);
7011 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
7012 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7014 else
7016 tree a_low = make_ssa_name (word_type);
7017 new_stmt = gimple_build_assign (a_low,
7018 BIT_AND_EXPR, wvop0, low_bits);
7019 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7020 signs = make_ssa_name (word_type);
7021 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
7022 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7023 new_stmt = gimple_build_assign (NULL_TREE,
7024 BIT_AND_EXPR, signs, high_bits);
7025 signs = make_ssa_name (word_type);
7026 gimple_assign_set_lhs (new_stmt, signs);
7027 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7028 result_low = make_ssa_name (word_type);
7029 new_stmt = gimple_build_assign (result_low,
7030 MINUS_EXPR, high_bits, a_low);
7031 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7033 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
7034 signs);
7035 result_low = make_ssa_name (word_type);
7036 gimple_assign_set_lhs (new_stmt, result_low);
7037 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7038 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
7039 build1 (VIEW_CONVERT_EXPR,
7040 vectype, result_low));
7041 new_temp = make_ssa_name (vectype);
7042 gimple_assign_set_lhs (new_stmt, new_temp);
7043 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7045 else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
7047 tree mask;
7048 if (masked_loop_p)
7049 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7050 vec_num * ncopies, vectype, i);
7051 else
7052 /* Dummy mask. */
7053 mask = build_minus_one_cst (truth_type_for (vectype));
7054 auto_vec<tree> vops (6);
7055 vops.quick_push (mask);
7056 vops.quick_push (vop0);
7057 if (vop1)
7058 vops.quick_push (vop1);
7059 if (vop2)
7060 vops.quick_push (vop2);
7061 if (reduc_idx >= 0)
7063 /* Perform the operation on active elements only and take
7064 inactive elements from the reduction chain input. */
7065 gcc_assert (!vop2);
7066 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
7068 else
7070 auto else_value = targetm.preferred_else_value
7071 (cond_fn, vectype, vops.length () - 1, &vops[1]);
7072 vops.quick_push (else_value);
7074 if (len_loop_p)
7076 tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7077 vec_num * ncopies, vectype, i, 1);
7078 signed char biasval
7079 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7080 tree bias = build_int_cst (intQI_type_node, biasval);
7081 vops.quick_push (len);
7082 vops.quick_push (bias);
7084 gcall *call
7085 = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7086 : cond_len_fn,
7087 vops);
7088 new_temp = make_ssa_name (vec_dest, call);
7089 gimple_call_set_lhs (call, new_temp);
7090 gimple_call_set_nothrow (call, true);
7091 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7092 new_stmt = call;
7094 else
7096 tree mask = NULL_TREE;
7097 /* When combining two masks check if either of them is elsewhere
7098 combined with a loop mask, if that's the case we can mark that the
7099 new combined mask doesn't need to be combined with a loop mask. */
7100 if (masked_loop_p
7101 && code == BIT_AND_EXPR
7102 && VECTOR_BOOLEAN_TYPE_P (vectype))
7104 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
7105 ncopies}))
7107 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7108 vec_num * ncopies, vectype, i);
7110 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7111 vop0, gsi);
7114 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
7115 ncopies }))
7117 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7118 vec_num * ncopies, vectype, i);
7120 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7121 vop1, gsi);
7125 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7126 new_temp = make_ssa_name (vec_dest, new_stmt);
7127 gimple_assign_set_lhs (new_stmt, new_temp);
7128 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7129 if (using_emulated_vectors_p)
7130 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7132 /* Enter the combined value into the vector cond hash so we don't
7133 AND it with a loop mask again. */
7134 if (mask)
7135 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
7138 if (vec_cvt_dest)
7140 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7141 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7142 new_temp);
7143 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
7144 gimple_assign_set_lhs (new_stmt, new_temp);
7145 vect_finish_stmt_generation (vinfo, stmt_info,
7146 new_stmt, gsi);
7149 if (slp_node)
7150 slp_node->push_vec_def (new_stmt);
7151 else
7152 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7155 if (!slp_node)
7156 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7158 vec_oprnds0.release ();
7159 vec_oprnds1.release ();
7160 vec_oprnds2.release ();
7162 return true;
7165 /* A helper function to ensure data reference DR_INFO's base alignment. */
7167 static void
7168 ensure_base_align (dr_vec_info *dr_info)
7170 /* Alignment is only analyzed for the first element of a DR group,
7171 use that to look at base alignment we need to enforce. */
7172 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7173 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7175 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7177 if (dr_info->base_misaligned)
7179 tree base_decl = dr_info->base_decl;
7181 // We should only be able to increase the alignment of a base object if
7182 // we know what its new alignment should be at compile time.
7183 unsigned HOST_WIDE_INT align_base_to =
7184 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7186 if (decl_in_symtab_p (base_decl))
7187 symtab_node::get (base_decl)->increase_alignment (align_base_to);
7188 else if (DECL_ALIGN (base_decl) < align_base_to)
7190 SET_DECL_ALIGN (base_decl, align_base_to);
7191 DECL_USER_ALIGN (base_decl) = 1;
7193 dr_info->base_misaligned = false;
7198 /* Function get_group_alias_ptr_type.
7200 Return the alias type for the group starting at FIRST_STMT_INFO. */
7202 static tree
7203 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7205 struct data_reference *first_dr, *next_dr;
7207 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7208 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7209 while (next_stmt_info)
7211 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7212 if (get_alias_set (DR_REF (first_dr))
7213 != get_alias_set (DR_REF (next_dr)))
7215 if (dump_enabled_p ())
7216 dump_printf_loc (MSG_NOTE, vect_location,
7217 "conflicting alias set types.\n");
7218 return ptr_type_node;
7220 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7222 return reference_alias_ptr_type (DR_REF (first_dr));
7226 /* Function scan_operand_equal_p.
7228 Helper function for check_scan_store. Compare two references
7229 with .GOMP_SIMD_LANE bases. */
7231 static bool
7232 scan_operand_equal_p (tree ref1, tree ref2)
7234 tree ref[2] = { ref1, ref2 };
7235 poly_int64 bitsize[2], bitpos[2];
7236 tree offset[2], base[2];
7237 for (int i = 0; i < 2; ++i)
7239 machine_mode mode;
7240 int unsignedp, reversep, volatilep = 0;
7241 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7242 &offset[i], &mode, &unsignedp,
7243 &reversep, &volatilep);
7244 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
7245 return false;
7246 if (TREE_CODE (base[i]) == MEM_REF
7247 && offset[i] == NULL_TREE
7248 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7250 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7251 if (is_gimple_assign (def_stmt)
7252 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
7253 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7254 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7256 if (maybe_ne (mem_ref_offset (base[i]), 0))
7257 return false;
7258 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7259 offset[i] = gimple_assign_rhs2 (def_stmt);
7264 if (!operand_equal_p (base[0], base[1], 0))
7265 return false;
7266 if (maybe_ne (bitsize[0], bitsize[1]))
7267 return false;
7268 if (offset[0] != offset[1])
7270 if (!offset[0] || !offset[1])
7271 return false;
7272 if (!operand_equal_p (offset[0], offset[1], 0))
7274 tree step[2];
7275 for (int i = 0; i < 2; ++i)
7277 step[i] = integer_one_node;
7278 if (TREE_CODE (offset[i]) == SSA_NAME)
7280 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7281 if (is_gimple_assign (def_stmt)
7282 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
7283 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7284 == INTEGER_CST))
7286 step[i] = gimple_assign_rhs2 (def_stmt);
7287 offset[i] = gimple_assign_rhs1 (def_stmt);
7290 else if (TREE_CODE (offset[i]) == MULT_EXPR)
7292 step[i] = TREE_OPERAND (offset[i], 1);
7293 offset[i] = TREE_OPERAND (offset[i], 0);
7295 tree rhs1 = NULL_TREE;
7296 if (TREE_CODE (offset[i]) == SSA_NAME)
7298 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7299 if (gimple_assign_cast_p (def_stmt))
7300 rhs1 = gimple_assign_rhs1 (def_stmt);
7302 else if (CONVERT_EXPR_P (offset[i]))
7303 rhs1 = TREE_OPERAND (offset[i], 0);
7304 if (rhs1
7305 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7306 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7307 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7308 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7309 offset[i] = rhs1;
7311 if (!operand_equal_p (offset[0], offset[1], 0)
7312 || !operand_equal_p (step[0], step[1], 0))
7313 return false;
7316 return true;
7320 enum scan_store_kind {
7321 /* Normal permutation. */
7322 scan_store_kind_perm,
7324 /* Whole vector left shift permutation with zero init. */
7325 scan_store_kind_lshift_zero,
7327 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7328 scan_store_kind_lshift_cond
7331 /* Function check_scan_store.
7333 Verify if we can perform the needed permutations or whole vector shifts.
7334 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7335 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7336 to do at each step. */
7338 static int
7339 scan_store_can_perm_p (tree vectype, tree init,
7340 vec<enum scan_store_kind> *use_whole_vector = NULL)
7342 enum machine_mode vec_mode = TYPE_MODE (vectype);
7343 unsigned HOST_WIDE_INT nunits;
7344 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7345 return -1;
7346 int units_log2 = exact_log2 (nunits);
7347 if (units_log2 <= 0)
7348 return -1;
7350 int i;
7351 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7352 for (i = 0; i <= units_log2; ++i)
7354 unsigned HOST_WIDE_INT j, k;
7355 enum scan_store_kind kind = scan_store_kind_perm;
7356 vec_perm_builder sel (nunits, nunits, 1);
7357 sel.quick_grow (nunits);
7358 if (i == units_log2)
7360 for (j = 0; j < nunits; ++j)
7361 sel[j] = nunits - 1;
7363 else
7365 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7366 sel[j] = j;
7367 for (k = 0; j < nunits; ++j, ++k)
7368 sel[j] = nunits + k;
7370 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7371 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7373 if (i == units_log2)
7374 return -1;
7376 if (whole_vector_shift_kind == scan_store_kind_perm)
7378 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
7379 return -1;
7380 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7381 /* Whole vector shifts shift in zeros, so if init is all zero
7382 constant, there is no need to do anything further. */
7383 if ((TREE_CODE (init) != INTEGER_CST
7384 && TREE_CODE (init) != REAL_CST)
7385 || !initializer_zerop (init))
7387 tree masktype = truth_type_for (vectype);
7388 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7389 return -1;
7390 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7393 kind = whole_vector_shift_kind;
7395 if (use_whole_vector)
7397 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7398 use_whole_vector->safe_grow_cleared (i, true);
7399 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7400 use_whole_vector->safe_push (kind);
7404 return units_log2;
7408 /* Function check_scan_store.
7410 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7412 static bool
7413 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7414 enum vect_def_type rhs_dt, bool slp, tree mask,
7415 vect_memory_access_type memory_access_type)
7417 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7418 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7419 tree ref_type;
7421 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7422 if (slp
7423 || mask
7424 || memory_access_type != VMAT_CONTIGUOUS
7425 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7426 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7427 || loop_vinfo == NULL
7428 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7429 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7430 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7431 || !integer_zerop (DR_INIT (dr_info->dr))
7432 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7433 || !alias_sets_conflict_p (get_alias_set (vectype),
7434 get_alias_set (TREE_TYPE (ref_type))))
7436 if (dump_enabled_p ())
7437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7438 "unsupported OpenMP scan store.\n");
7439 return false;
7442 /* We need to pattern match code built by OpenMP lowering and simplified
7443 by following optimizations into something we can handle.
7444 #pragma omp simd reduction(inscan,+:r)
7445 for (...)
7447 r += something ();
7448 #pragma omp scan inclusive (r)
7449 use (r);
7451 shall have body with:
7452 // Initialization for input phase, store the reduction initializer:
7453 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7454 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7455 D.2042[_21] = 0;
7456 // Actual input phase:
7458 r.0_5 = D.2042[_20];
7459 _6 = _4 + r.0_5;
7460 D.2042[_20] = _6;
7461 // Initialization for scan phase:
7462 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7463 _26 = D.2043[_25];
7464 _27 = D.2042[_25];
7465 _28 = _26 + _27;
7466 D.2043[_25] = _28;
7467 D.2042[_25] = _28;
7468 // Actual scan phase:
7470 r.1_8 = D.2042[_20];
7472 The "omp simd array" variable D.2042 holds the privatized copy used
7473 inside of the loop and D.2043 is another one that holds copies of
7474 the current original list item. The separate GOMP_SIMD_LANE ifn
7475 kinds are there in order to allow optimizing the initializer store
7476 and combiner sequence, e.g. if it is originally some C++ish user
7477 defined reduction, but allow the vectorizer to pattern recognize it
7478 and turn into the appropriate vectorized scan.
7480 For exclusive scan, this is slightly different:
7481 #pragma omp simd reduction(inscan,+:r)
7482 for (...)
7484 use (r);
7485 #pragma omp scan exclusive (r)
7486 r += something ();
7488 shall have body with:
7489 // Initialization for input phase, store the reduction initializer:
7490 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7491 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7492 D.2042[_21] = 0;
7493 // Actual input phase:
7495 r.0_5 = D.2042[_20];
7496 _6 = _4 + r.0_5;
7497 D.2042[_20] = _6;
7498 // Initialization for scan phase:
7499 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7500 _26 = D.2043[_25];
7501 D.2044[_25] = _26;
7502 _27 = D.2042[_25];
7503 _28 = _26 + _27;
7504 D.2043[_25] = _28;
7505 // Actual scan phase:
7507 r.1_8 = D.2044[_20];
7508 ... */
7510 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7512 /* Match the D.2042[_21] = 0; store above. Just require that
7513 it is a constant or external definition store. */
7514 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7516 fail_init:
7517 if (dump_enabled_p ())
7518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7519 "unsupported OpenMP scan initializer store.\n");
7520 return false;
7523 if (! loop_vinfo->scan_map)
7524 loop_vinfo->scan_map = new hash_map<tree, tree>;
7525 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7526 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7527 if (cached)
7528 goto fail_init;
7529 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7531 /* These stores can be vectorized normally. */
7532 return true;
7535 if (rhs_dt != vect_internal_def)
7537 fail:
7538 if (dump_enabled_p ())
7539 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7540 "unsupported OpenMP scan combiner pattern.\n");
7541 return false;
7544 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7545 tree rhs = gimple_assign_rhs1 (stmt);
7546 if (TREE_CODE (rhs) != SSA_NAME)
7547 goto fail;
7549 gimple *other_store_stmt = NULL;
7550 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7551 bool inscan_var_store
7552 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7554 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7556 if (!inscan_var_store)
7558 use_operand_p use_p;
7559 imm_use_iterator iter;
7560 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7562 gimple *use_stmt = USE_STMT (use_p);
7563 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7564 continue;
7565 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7566 || !is_gimple_assign (use_stmt)
7567 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7568 || other_store_stmt
7569 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7570 goto fail;
7571 other_store_stmt = use_stmt;
7573 if (other_store_stmt == NULL)
7574 goto fail;
7575 rhs = gimple_assign_lhs (other_store_stmt);
7576 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7577 goto fail;
7580 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7582 use_operand_p use_p;
7583 imm_use_iterator iter;
7584 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7586 gimple *use_stmt = USE_STMT (use_p);
7587 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7588 continue;
7589 if (other_store_stmt)
7590 goto fail;
7591 other_store_stmt = use_stmt;
7594 else
7595 goto fail;
7597 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7598 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7599 || !is_gimple_assign (def_stmt)
7600 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7601 goto fail;
7603 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7604 /* For pointer addition, we should use the normal plus for the vector
7605 operation. */
7606 switch (code)
7608 case POINTER_PLUS_EXPR:
7609 code = PLUS_EXPR;
7610 break;
7611 case MULT_HIGHPART_EXPR:
7612 goto fail;
7613 default:
7614 break;
7616 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7617 goto fail;
7619 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7620 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7621 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7622 goto fail;
7624 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7625 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7626 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7627 || !gimple_assign_load_p (load1_stmt)
7628 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7629 || !gimple_assign_load_p (load2_stmt))
7630 goto fail;
7632 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7633 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7634 if (load1_stmt_info == NULL
7635 || load2_stmt_info == NULL
7636 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7637 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7638 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7639 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7640 goto fail;
7642 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7644 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7645 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7646 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7647 goto fail;
7648 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7649 tree lrhs;
7650 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7651 lrhs = rhs1;
7652 else
7653 lrhs = rhs2;
7654 use_operand_p use_p;
7655 imm_use_iterator iter;
7656 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7658 gimple *use_stmt = USE_STMT (use_p);
7659 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7660 continue;
7661 if (other_store_stmt)
7662 goto fail;
7663 other_store_stmt = use_stmt;
7667 if (other_store_stmt == NULL)
7668 goto fail;
7669 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7670 || !gimple_store_p (other_store_stmt))
7671 goto fail;
7673 stmt_vec_info other_store_stmt_info
7674 = loop_vinfo->lookup_stmt (other_store_stmt);
7675 if (other_store_stmt_info == NULL
7676 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7677 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7678 goto fail;
7680 gimple *stmt1 = stmt;
7681 gimple *stmt2 = other_store_stmt;
7682 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7683 std::swap (stmt1, stmt2);
7684 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7685 gimple_assign_rhs1 (load2_stmt)))
7687 std::swap (rhs1, rhs2);
7688 std::swap (load1_stmt, load2_stmt);
7689 std::swap (load1_stmt_info, load2_stmt_info);
7691 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7692 gimple_assign_rhs1 (load1_stmt)))
7693 goto fail;
7695 tree var3 = NULL_TREE;
7696 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7697 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7698 gimple_assign_rhs1 (load2_stmt)))
7699 goto fail;
7700 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7702 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7703 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7704 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7705 goto fail;
7706 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7707 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7708 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7709 || lookup_attribute ("omp simd inscan exclusive",
7710 DECL_ATTRIBUTES (var3)))
7711 goto fail;
7714 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7715 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7716 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7717 goto fail;
7719 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7720 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7721 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7722 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7723 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7724 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7725 goto fail;
7727 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7728 std::swap (var1, var2);
7730 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7732 if (!lookup_attribute ("omp simd inscan exclusive",
7733 DECL_ATTRIBUTES (var1)))
7734 goto fail;
7735 var1 = var3;
7738 if (loop_vinfo->scan_map == NULL)
7739 goto fail;
7740 tree *init = loop_vinfo->scan_map->get (var1);
7741 if (init == NULL)
7742 goto fail;
7744 /* The IL is as expected, now check if we can actually vectorize it.
7745 Inclusive scan:
7746 _26 = D.2043[_25];
7747 _27 = D.2042[_25];
7748 _28 = _26 + _27;
7749 D.2043[_25] = _28;
7750 D.2042[_25] = _28;
7751 should be vectorized as (where _40 is the vectorized rhs
7752 from the D.2042[_21] = 0; store):
7753 _30 = MEM <vector(8) int> [(int *)&D.2043];
7754 _31 = MEM <vector(8) int> [(int *)&D.2042];
7755 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7756 _33 = _31 + _32;
7757 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7758 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7759 _35 = _33 + _34;
7760 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7761 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7762 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7763 _37 = _35 + _36;
7764 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7765 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7766 _38 = _30 + _37;
7767 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7768 MEM <vector(8) int> [(int *)&D.2043] = _39;
7769 MEM <vector(8) int> [(int *)&D.2042] = _38;
7770 Exclusive scan:
7771 _26 = D.2043[_25];
7772 D.2044[_25] = _26;
7773 _27 = D.2042[_25];
7774 _28 = _26 + _27;
7775 D.2043[_25] = _28;
7776 should be vectorized as (where _40 is the vectorized rhs
7777 from the D.2042[_21] = 0; store):
7778 _30 = MEM <vector(8) int> [(int *)&D.2043];
7779 _31 = MEM <vector(8) int> [(int *)&D.2042];
7780 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7781 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7782 _34 = _32 + _33;
7783 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7784 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7785 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7786 _36 = _34 + _35;
7787 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7788 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7789 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7790 _38 = _36 + _37;
7791 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7792 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7793 _39 = _30 + _38;
7794 _50 = _31 + _39;
7795 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7796 MEM <vector(8) int> [(int *)&D.2044] = _39;
7797 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7798 enum machine_mode vec_mode = TYPE_MODE (vectype);
7799 optab optab = optab_for_tree_code (code, vectype, optab_default);
7800 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7801 goto fail;
7803 int units_log2 = scan_store_can_perm_p (vectype, *init);
7804 if (units_log2 == -1)
7805 goto fail;
7807 return true;
7811 /* Function vectorizable_scan_store.
7813 Helper of vectorizable_score, arguments like on vectorizable_store.
7814 Handle only the transformation, checking is done in check_scan_store. */
7816 static bool
7817 vectorizable_scan_store (vec_info *vinfo,
7818 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7819 gimple **vec_stmt, int ncopies)
7821 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7822 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7823 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7824 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7826 if (dump_enabled_p ())
7827 dump_printf_loc (MSG_NOTE, vect_location,
7828 "transform scan store. ncopies = %d\n", ncopies);
7830 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7831 tree rhs = gimple_assign_rhs1 (stmt);
7832 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7834 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7835 bool inscan_var_store
7836 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7838 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7840 use_operand_p use_p;
7841 imm_use_iterator iter;
7842 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7844 gimple *use_stmt = USE_STMT (use_p);
7845 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7846 continue;
7847 rhs = gimple_assign_lhs (use_stmt);
7848 break;
7852 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7853 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7854 if (code == POINTER_PLUS_EXPR)
7855 code = PLUS_EXPR;
7856 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7857 && commutative_tree_code (code));
7858 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7859 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7860 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7861 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7862 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7863 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7864 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7865 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7866 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7867 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7868 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7870 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7872 std::swap (rhs1, rhs2);
7873 std::swap (var1, var2);
7874 std::swap (load1_dr_info, load2_dr_info);
7877 tree *init = loop_vinfo->scan_map->get (var1);
7878 gcc_assert (init);
7880 unsigned HOST_WIDE_INT nunits;
7881 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7882 gcc_unreachable ();
7883 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7884 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7885 gcc_assert (units_log2 > 0);
7886 auto_vec<tree, 16> perms;
7887 perms.quick_grow (units_log2 + 1);
7888 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7889 for (int i = 0; i <= units_log2; ++i)
7891 unsigned HOST_WIDE_INT j, k;
7892 vec_perm_builder sel (nunits, nunits, 1);
7893 sel.quick_grow (nunits);
7894 if (i == units_log2)
7895 for (j = 0; j < nunits; ++j)
7896 sel[j] = nunits - 1;
7897 else
7899 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7900 sel[j] = j;
7901 for (k = 0; j < nunits; ++j, ++k)
7902 sel[j] = nunits + k;
7904 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7905 if (!use_whole_vector.is_empty ()
7906 && use_whole_vector[i] != scan_store_kind_perm)
7908 if (zero_vec == NULL_TREE)
7909 zero_vec = build_zero_cst (vectype);
7910 if (masktype == NULL_TREE
7911 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7912 masktype = truth_type_for (vectype);
7913 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7915 else
7916 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7919 tree vec_oprnd1 = NULL_TREE;
7920 tree vec_oprnd2 = NULL_TREE;
7921 tree vec_oprnd3 = NULL_TREE;
7922 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7923 tree dataref_offset = build_int_cst (ref_type, 0);
7924 tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
7925 vectype, VMAT_CONTIGUOUS);
7926 tree ldataref_ptr = NULL_TREE;
7927 tree orig = NULL_TREE;
7928 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7929 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7930 auto_vec<tree> vec_oprnds1;
7931 auto_vec<tree> vec_oprnds2;
7932 auto_vec<tree> vec_oprnds3;
7933 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7934 *init, &vec_oprnds1,
7935 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7936 rhs2, &vec_oprnds3);
7937 for (int j = 0; j < ncopies; j++)
7939 vec_oprnd1 = vec_oprnds1[j];
7940 if (ldataref_ptr == NULL)
7941 vec_oprnd2 = vec_oprnds2[j];
7942 vec_oprnd3 = vec_oprnds3[j];
7943 if (j == 0)
7944 orig = vec_oprnd3;
7945 else if (!inscan_var_store)
7946 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7948 if (ldataref_ptr)
7950 vec_oprnd2 = make_ssa_name (vectype);
7951 tree data_ref = fold_build2 (MEM_REF, vectype,
7952 unshare_expr (ldataref_ptr),
7953 dataref_offset);
7954 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7955 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7956 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7957 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7958 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7961 tree v = vec_oprnd2;
7962 for (int i = 0; i < units_log2; ++i)
7964 tree new_temp = make_ssa_name (vectype);
7965 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7966 (zero_vec
7967 && (use_whole_vector[i]
7968 != scan_store_kind_perm))
7969 ? zero_vec : vec_oprnd1, v,
7970 perms[i]);
7971 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7972 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7973 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7975 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7977 /* Whole vector shift shifted in zero bits, but if *init
7978 is not initializer_zerop, we need to replace those elements
7979 with elements from vec_oprnd1. */
7980 tree_vector_builder vb (masktype, nunits, 1);
7981 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7982 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7983 ? boolean_false_node : boolean_true_node);
7985 tree new_temp2 = make_ssa_name (vectype);
7986 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7987 new_temp, vec_oprnd1);
7988 vect_finish_stmt_generation (vinfo, stmt_info,
7989 g, gsi);
7990 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7991 new_temp = new_temp2;
7994 /* For exclusive scan, perform the perms[i] permutation once
7995 more. */
7996 if (i == 0
7997 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7998 && v == vec_oprnd2)
8000 v = new_temp;
8001 --i;
8002 continue;
8005 tree new_temp2 = make_ssa_name (vectype);
8006 g = gimple_build_assign (new_temp2, code, v, new_temp);
8007 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8008 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8010 v = new_temp2;
8013 tree new_temp = make_ssa_name (vectype);
8014 gimple *g = gimple_build_assign (new_temp, code, orig, v);
8015 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8016 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8018 tree last_perm_arg = new_temp;
8019 /* For exclusive scan, new_temp computed above is the exclusive scan
8020 prefix sum. Turn it into inclusive prefix sum for the broadcast
8021 of the last element into orig. */
8022 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
8024 last_perm_arg = make_ssa_name (vectype);
8025 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
8026 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8027 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8030 orig = make_ssa_name (vectype);
8031 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
8032 last_perm_arg, perms[units_log2]);
8033 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8034 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8036 if (!inscan_var_store)
8038 tree data_ref = fold_build2 (MEM_REF, vectype,
8039 unshare_expr (dataref_ptr),
8040 dataref_offset);
8041 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8042 g = gimple_build_assign (data_ref, new_temp);
8043 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8044 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8048 if (inscan_var_store)
8049 for (int j = 0; j < ncopies; j++)
8051 if (j != 0)
8052 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8054 tree data_ref = fold_build2 (MEM_REF, vectype,
8055 unshare_expr (dataref_ptr),
8056 dataref_offset);
8057 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8058 gimple *g = gimple_build_assign (data_ref, orig);
8059 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8060 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8062 return true;
8066 /* Function vectorizable_store.
8068 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8069 that can be vectorized.
8070 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8071 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8072 Return true if STMT_INFO is vectorizable in this way. */
8074 static bool
8075 vectorizable_store (vec_info *vinfo,
8076 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8077 gimple **vec_stmt, slp_tree slp_node,
8078 stmt_vector_for_cost *cost_vec)
8080 tree data_ref;
8081 tree vec_oprnd = NULL_TREE;
8082 tree elem_type;
8083 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8084 class loop *loop = NULL;
8085 machine_mode vec_mode;
8086 tree dummy;
8087 enum vect_def_type rhs_dt = vect_unknown_def_type;
8088 enum vect_def_type mask_dt = vect_unknown_def_type;
8089 tree dataref_ptr = NULL_TREE;
8090 tree dataref_offset = NULL_TREE;
8091 gimple *ptr_incr = NULL;
8092 int ncopies;
8093 int j;
8094 stmt_vec_info first_stmt_info;
8095 bool grouped_store;
8096 unsigned int group_size, i;
8097 bool slp = (slp_node != NULL);
8098 unsigned int vec_num;
8099 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8100 tree aggr_type;
8101 gather_scatter_info gs_info;
8102 poly_uint64 vf;
8103 vec_load_store_type vls_type;
8104 tree ref_type;
8106 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8107 return false;
8109 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8110 && ! vec_stmt)
8111 return false;
8113 /* Is vectorizable store? */
8115 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8116 slp_tree mask_node = NULL;
8117 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8119 tree scalar_dest = gimple_assign_lhs (assign);
8120 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8121 && is_pattern_stmt_p (stmt_info))
8122 scalar_dest = TREE_OPERAND (scalar_dest, 0);
8123 if (TREE_CODE (scalar_dest) != ARRAY_REF
8124 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8125 && TREE_CODE (scalar_dest) != INDIRECT_REF
8126 && TREE_CODE (scalar_dest) != COMPONENT_REF
8127 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8128 && TREE_CODE (scalar_dest) != REALPART_EXPR
8129 && TREE_CODE (scalar_dest) != MEM_REF)
8130 return false;
8132 else
8134 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8135 if (!call || !gimple_call_internal_p (call))
8136 return false;
8138 internal_fn ifn = gimple_call_internal_fn (call);
8139 if (!internal_store_fn_p (ifn))
8140 return false;
8142 int mask_index = internal_fn_mask_index (ifn);
8143 if (mask_index >= 0 && slp_node)
8144 mask_index = vect_slp_child_index_for_operand
8145 (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8146 if (mask_index >= 0
8147 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8148 &mask, &mask_node, &mask_dt,
8149 &mask_vectype))
8150 return false;
8153 /* Cannot have hybrid store SLP -- that would mean storing to the
8154 same location twice. */
8155 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8157 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8158 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8160 if (loop_vinfo)
8162 loop = LOOP_VINFO_LOOP (loop_vinfo);
8163 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8165 else
8166 vf = 1;
8168 /* Multiple types in SLP are handled by creating the appropriate number of
8169 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8170 case of SLP. */
8171 if (slp)
8172 ncopies = 1;
8173 else
8174 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8176 gcc_assert (ncopies >= 1);
8178 /* FORNOW. This restriction should be relaxed. */
8179 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
8181 if (dump_enabled_p ())
8182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8183 "multiple types in nested loop.\n");
8184 return false;
8187 tree op;
8188 slp_tree op_node;
8189 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8190 &op, &op_node, &rhs_dt, &rhs_vectype, &vls_type))
8191 return false;
8193 elem_type = TREE_TYPE (vectype);
8194 vec_mode = TYPE_MODE (vectype);
8196 if (!STMT_VINFO_DATA_REF (stmt_info))
8197 return false;
8199 vect_memory_access_type memory_access_type;
8200 enum dr_alignment_support alignment_support_scheme;
8201 int misalignment;
8202 poly_int64 poffset;
8203 internal_fn lanes_ifn;
8204 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
8205 ncopies, &memory_access_type, &poffset,
8206 &alignment_support_scheme, &misalignment, &gs_info,
8207 &lanes_ifn))
8208 return false;
8210 if (mask)
8212 if (memory_access_type == VMAT_CONTIGUOUS)
8214 if (!VECTOR_MODE_P (vec_mode)
8215 || !can_vec_mask_load_store_p (vec_mode,
8216 TYPE_MODE (mask_vectype), false))
8217 return false;
8219 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8220 && (memory_access_type != VMAT_GATHER_SCATTER
8221 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8223 if (dump_enabled_p ())
8224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8225 "unsupported access type for masked store.\n");
8226 return false;
8228 else if (memory_access_type == VMAT_GATHER_SCATTER
8229 && gs_info.ifn == IFN_LAST
8230 && !gs_info.decl)
8232 if (dump_enabled_p ())
8233 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8234 "unsupported masked emulated scatter.\n");
8235 return false;
8238 else
8240 /* FORNOW. In some cases can vectorize even if data-type not supported
8241 (e.g. - array initialization with 0). */
8242 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
8243 return false;
8246 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8247 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8248 && memory_access_type != VMAT_GATHER_SCATTER
8249 && (slp || memory_access_type != VMAT_CONTIGUOUS));
8250 if (grouped_store)
8252 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8253 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8254 group_size = DR_GROUP_SIZE (first_stmt_info);
8256 else
8258 first_stmt_info = stmt_info;
8259 first_dr_info = dr_info;
8260 group_size = vec_num = 1;
8263 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
8265 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
8266 memory_access_type))
8267 return false;
8270 bool costing_p = !vec_stmt;
8271 if (costing_p) /* transformation not required. */
8273 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8275 if (loop_vinfo
8276 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8277 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8278 vls_type, group_size,
8279 memory_access_type, &gs_info,
8280 mask);
8282 if (slp_node
8283 && (!vect_maybe_update_slp_op_vectype (op_node, vectype)
8284 || (mask
8285 && !vect_maybe_update_slp_op_vectype (mask_node,
8286 mask_vectype))))
8288 if (dump_enabled_p ())
8289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8290 "incompatible vector types for invariants\n");
8291 return false;
8294 if (dump_enabled_p ()
8295 && memory_access_type != VMAT_ELEMENTWISE
8296 && memory_access_type != VMAT_GATHER_SCATTER
8297 && alignment_support_scheme != dr_aligned)
8298 dump_printf_loc (MSG_NOTE, vect_location,
8299 "Vectorizing an unaligned access.\n");
8301 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8303 /* As function vect_transform_stmt shows, for interleaving stores
8304 the whole chain is vectorized when the last store in the chain
8305 is reached, the other stores in the group are skipped. So we
8306 want to only cost the last one here, but it's not trivial to
8307 get the last, as it's equivalent to use the first one for
8308 costing, use the first one instead. */
8309 if (grouped_store
8310 && !slp
8311 && first_stmt_info != stmt_info)
8312 return true;
8314 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8316 /* Transform. */
8318 ensure_base_align (dr_info);
8320 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8322 gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
8323 gcc_assert (!slp);
8324 if (costing_p)
8326 unsigned int inside_cost = 0, prologue_cost = 0;
8327 if (vls_type == VLS_STORE_INVARIANT)
8328 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8329 stmt_info, 0, vect_prologue);
8330 vect_get_store_cost (vinfo, stmt_info, ncopies,
8331 alignment_support_scheme, misalignment,
8332 &inside_cost, cost_vec);
8334 if (dump_enabled_p ())
8335 dump_printf_loc (MSG_NOTE, vect_location,
8336 "vect_model_store_cost: inside_cost = %d, "
8337 "prologue_cost = %d .\n",
8338 inside_cost, prologue_cost);
8340 return true;
8342 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8345 if (grouped_store)
8347 /* FORNOW */
8348 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8350 if (slp)
8352 grouped_store = false;
8353 /* VEC_NUM is the number of vect stmts to be created for this
8354 group. */
8355 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8356 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8357 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8358 == first_stmt_info);
8359 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8360 op = vect_get_store_rhs (first_stmt_info);
8362 else
8363 /* VEC_NUM is the number of vect stmts to be created for this
8364 group. */
8365 vec_num = group_size;
8367 ref_type = get_group_alias_ptr_type (first_stmt_info);
8369 else
8370 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8372 if (!costing_p && dump_enabled_p ())
8373 dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n",
8374 ncopies);
8376 /* Check if we need to update prologue cost for invariant,
8377 and update it accordingly if so. If it's not for
8378 interleaving store, we can just check vls_type; but if
8379 it's for interleaving store, need to check the def_type
8380 of the stored value since the current vls_type is just
8381 for first_stmt_info. */
8382 auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
8384 gcc_assert (costing_p);
8385 if (slp)
8386 return;
8387 if (grouped_store)
8389 gcc_assert (store_rhs);
8390 enum vect_def_type cdt;
8391 gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
8392 if (cdt != vect_constant_def && cdt != vect_external_def)
8393 return;
8395 else if (vls_type != VLS_STORE_INVARIANT)
8396 return;
8397 *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info,
8398 0, vect_prologue);
8401 if (memory_access_type == VMAT_ELEMENTWISE
8402 || memory_access_type == VMAT_STRIDED_SLP)
8404 unsigned inside_cost = 0, prologue_cost = 0;
8405 gimple_stmt_iterator incr_gsi;
8406 bool insert_after;
8407 gimple *incr;
8408 tree offvar;
8409 tree ivstep;
8410 tree running_off;
8411 tree stride_base, stride_step, alias_off;
8412 tree vec_oprnd = NULL_TREE;
8413 tree dr_offset;
8414 unsigned int g;
8415 /* Checked by get_load_store_type. */
8416 unsigned int const_nunits = nunits.to_constant ();
8418 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8419 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8421 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8422 stride_base
8423 = fold_build_pointer_plus
8424 (DR_BASE_ADDRESS (first_dr_info->dr),
8425 size_binop (PLUS_EXPR,
8426 convert_to_ptrofftype (dr_offset),
8427 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8428 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8430 /* For a store with loop-invariant (but other than power-of-2)
8431 stride (i.e. not a grouped access) like so:
8433 for (i = 0; i < n; i += stride)
8434 array[i] = ...;
8436 we generate a new induction variable and new stores from
8437 the components of the (vectorized) rhs:
8439 for (j = 0; ; j += VF*stride)
8440 vectemp = ...;
8441 tmp1 = vectemp[0];
8442 array[j] = tmp1;
8443 tmp2 = vectemp[1];
8444 array[j + stride] = tmp2;
8448 unsigned nstores = const_nunits;
8449 unsigned lnel = 1;
8450 tree ltype = elem_type;
8451 tree lvectype = vectype;
8452 if (slp)
8454 if (group_size < const_nunits
8455 && const_nunits % group_size == 0)
8457 nstores = const_nunits / group_size;
8458 lnel = group_size;
8459 ltype = build_vector_type (elem_type, group_size);
8460 lvectype = vectype;
8462 /* First check if vec_extract optab doesn't support extraction
8463 of vector elts directly. */
8464 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8465 machine_mode vmode;
8466 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8467 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8468 group_size).exists (&vmode)
8469 || (convert_optab_handler (vec_extract_optab,
8470 TYPE_MODE (vectype), vmode)
8471 == CODE_FOR_nothing))
8473 /* Try to avoid emitting an extract of vector elements
8474 by performing the extracts using an integer type of the
8475 same size, extracting from a vector of those and then
8476 re-interpreting it as the original vector type if
8477 supported. */
8478 unsigned lsize
8479 = group_size * GET_MODE_BITSIZE (elmode);
8480 unsigned int lnunits = const_nunits / group_size;
8481 /* If we can't construct such a vector fall back to
8482 element extracts from the original vector type and
8483 element size stores. */
8484 if (int_mode_for_size (lsize, 0).exists (&elmode)
8485 && VECTOR_MODE_P (TYPE_MODE (vectype))
8486 && related_vector_mode (TYPE_MODE (vectype), elmode,
8487 lnunits).exists (&vmode)
8488 && (convert_optab_handler (vec_extract_optab,
8489 vmode, elmode)
8490 != CODE_FOR_nothing))
8492 nstores = lnunits;
8493 lnel = group_size;
8494 ltype = build_nonstandard_integer_type (lsize, 1);
8495 lvectype = build_vector_type (ltype, nstores);
8497 /* Else fall back to vector extraction anyway.
8498 Fewer stores are more important than avoiding spilling
8499 of the vector we extract from. Compared to the
8500 construction case in vectorizable_load no store-forwarding
8501 issue exists here for reasonable archs. */
8504 else if (group_size >= const_nunits
8505 && group_size % const_nunits == 0)
8507 int mis_align = dr_misalignment (first_dr_info, vectype);
8508 dr_alignment_support dr_align
8509 = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
8510 mis_align);
8511 if (dr_align == dr_aligned
8512 || dr_align == dr_unaligned_supported)
8514 nstores = 1;
8515 lnel = const_nunits;
8516 ltype = vectype;
8517 lvectype = vectype;
8518 alignment_support_scheme = dr_align;
8519 misalignment = mis_align;
8522 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8523 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8526 if (!costing_p)
8528 ivstep = stride_step;
8529 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8530 build_int_cst (TREE_TYPE (ivstep), vf));
8532 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8534 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8535 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8536 create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
8537 insert_after, &offvar, NULL);
8538 incr = gsi_stmt (incr_gsi);
8540 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8543 alias_off = build_int_cst (ref_type, 0);
8544 stmt_vec_info next_stmt_info = first_stmt_info;
8545 auto_vec<tree> vec_oprnds (ncopies);
8546 /* For costing some adjacent vector stores, we'd like to cost with
8547 the total number of them once instead of cost each one by one. */
8548 unsigned int n_adjacent_stores = 0;
8549 for (g = 0; g < group_size; g++)
8551 running_off = offvar;
8552 if (!costing_p)
8554 if (g)
8556 tree size = TYPE_SIZE_UNIT (ltype);
8557 tree pos
8558 = fold_build2 (MULT_EXPR, sizetype, size_int (g), size);
8559 tree newoff = copy_ssa_name (running_off, NULL);
8560 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8561 running_off, pos);
8562 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8563 running_off = newoff;
8566 if (!slp)
8567 op = vect_get_store_rhs (next_stmt_info);
8568 if (!costing_p)
8569 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies, op,
8570 &vec_oprnds);
8571 else
8572 update_prologue_cost (&prologue_cost, op);
8573 unsigned int group_el = 0;
8574 unsigned HOST_WIDE_INT
8575 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8576 for (j = 0; j < ncopies; j++)
8578 if (!costing_p)
8580 vec_oprnd = vec_oprnds[j];
8581 /* Pun the vector to extract from if necessary. */
8582 if (lvectype != vectype)
8584 tree tem = make_ssa_name (lvectype);
8585 tree cvt
8586 = build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
8587 gimple *pun = gimple_build_assign (tem, cvt);
8588 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8589 vec_oprnd = tem;
8592 for (i = 0; i < nstores; i++)
8594 if (costing_p)
8596 /* Only need vector extracting when there are more
8597 than one stores. */
8598 if (nstores > 1)
8599 inside_cost
8600 += record_stmt_cost (cost_vec, 1, vec_to_scalar,
8601 stmt_info, 0, vect_body);
8602 /* Take a single lane vector type store as scalar
8603 store to avoid ICE like 110776. */
8604 if (VECTOR_TYPE_P (ltype)
8605 && known_ne (TYPE_VECTOR_SUBPARTS (ltype), 1U))
8606 n_adjacent_stores++;
8607 else
8608 inside_cost
8609 += record_stmt_cost (cost_vec, 1, scalar_store,
8610 stmt_info, 0, vect_body);
8611 continue;
8613 tree newref, newoff;
8614 gimple *incr, *assign;
8615 tree size = TYPE_SIZE (ltype);
8616 /* Extract the i'th component. */
8617 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8618 bitsize_int (i), size);
8619 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8620 size, pos);
8622 elem = force_gimple_operand_gsi (gsi, elem, true,
8623 NULL_TREE, true,
8624 GSI_SAME_STMT);
8626 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8627 group_el * elsz);
8628 newref = build2 (MEM_REF, ltype,
8629 running_off, this_off);
8630 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8632 /* And store it to *running_off. */
8633 assign = gimple_build_assign (newref, elem);
8634 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8636 group_el += lnel;
8637 if (! slp
8638 || group_el == group_size)
8640 newoff = copy_ssa_name (running_off, NULL);
8641 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8642 running_off, stride_step);
8643 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8645 running_off = newoff;
8646 group_el = 0;
8648 if (g == group_size - 1
8649 && !slp)
8651 if (j == 0 && i == 0)
8652 *vec_stmt = assign;
8653 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8657 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8658 vec_oprnds.truncate(0);
8659 if (slp)
8660 break;
8663 if (costing_p)
8665 if (n_adjacent_stores > 0)
8666 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
8667 alignment_support_scheme, misalignment,
8668 &inside_cost, cost_vec);
8669 if (dump_enabled_p ())
8670 dump_printf_loc (MSG_NOTE, vect_location,
8671 "vect_model_store_cost: inside_cost = %d, "
8672 "prologue_cost = %d .\n",
8673 inside_cost, prologue_cost);
8676 return true;
8679 gcc_assert (alignment_support_scheme);
8680 vec_loop_masks *loop_masks
8681 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8682 ? &LOOP_VINFO_MASKS (loop_vinfo)
8683 : NULL);
8684 vec_loop_lens *loop_lens
8685 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8686 ? &LOOP_VINFO_LENS (loop_vinfo)
8687 : NULL);
8689 /* Shouldn't go with length-based approach if fully masked. */
8690 gcc_assert (!loop_lens || !loop_masks);
8692 /* Targets with store-lane instructions must not require explicit
8693 realignment. vect_supportable_dr_alignment always returns either
8694 dr_aligned or dr_unaligned_supported for masked operations. */
8695 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8696 && !mask
8697 && !loop_masks)
8698 || alignment_support_scheme == dr_aligned
8699 || alignment_support_scheme == dr_unaligned_supported);
8701 tree offset = NULL_TREE;
8702 if (!known_eq (poffset, 0))
8703 offset = size_int (poffset);
8705 tree bump;
8706 tree vec_offset = NULL_TREE;
8707 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8709 aggr_type = NULL_TREE;
8710 bump = NULL_TREE;
8712 else if (memory_access_type == VMAT_GATHER_SCATTER)
8714 aggr_type = elem_type;
8715 if (!costing_p)
8716 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
8717 &bump, &vec_offset, loop_lens);
8719 else
8721 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8722 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8723 else
8724 aggr_type = vectype;
8725 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8726 memory_access_type, loop_lens);
8729 if (mask && !costing_p)
8730 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8732 /* In case the vectorization factor (VF) is bigger than the number
8733 of elements that we can fit in a vectype (nunits), we have to generate
8734 more than one vector stmt - i.e - we need to "unroll" the
8735 vector stmt by a factor VF/nunits. */
8737 /* In case of interleaving (non-unit grouped access):
8739 S1: &base + 2 = x2
8740 S2: &base = x0
8741 S3: &base + 1 = x1
8742 S4: &base + 3 = x3
8744 We create vectorized stores starting from base address (the access of the
8745 first stmt in the chain (S2 in the above example), when the last store stmt
8746 of the chain (S4) is reached:
8748 VS1: &base = vx2
8749 VS2: &base + vec_size*1 = vx0
8750 VS3: &base + vec_size*2 = vx1
8751 VS4: &base + vec_size*3 = vx3
8753 Then permutation statements are generated:
8755 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8756 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8759 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8760 (the order of the data-refs in the output of vect_permute_store_chain
8761 corresponds to the order of scalar stmts in the interleaving chain - see
8762 the documentation of vect_permute_store_chain()).
8764 In case of both multiple types and interleaving, above vector stores and
8765 permutation stmts are created for every copy. The result vector stmts are
8766 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8767 STMT_VINFO_RELATED_STMT for the next copies.
8770 auto_vec<tree> dr_chain (group_size);
8771 auto_vec<tree> vec_masks;
8772 tree vec_mask = NULL;
8773 auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8774 for (i = 0; i < group_size; i++)
8775 gvec_oprnds.quick_push (new auto_vec<tree> (ncopies));
8777 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8779 gcc_assert (!slp && grouped_store);
8780 unsigned inside_cost = 0, prologue_cost = 0;
8781 /* For costing some adjacent vector stores, we'd like to cost with
8782 the total number of them once instead of cost each one by one. */
8783 unsigned int n_adjacent_stores = 0;
8784 for (j = 0; j < ncopies; j++)
8786 gimple *new_stmt;
8787 if (j == 0)
8789 /* For interleaved stores we collect vectorized defs for all
8790 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8791 as an input to vect_permute_store_chain(). */
8792 stmt_vec_info next_stmt_info = first_stmt_info;
8793 for (i = 0; i < group_size; i++)
8795 /* Since gaps are not supported for interleaved stores,
8796 DR_GROUP_SIZE is the exact number of stmts in the
8797 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8798 op = vect_get_store_rhs (next_stmt_info);
8799 if (costing_p)
8800 update_prologue_cost (&prologue_cost, op);
8801 else
8803 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8804 ncopies, op,
8805 gvec_oprnds[i]);
8806 vec_oprnd = (*gvec_oprnds[i])[0];
8807 dr_chain.quick_push (vec_oprnd);
8809 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8812 if (!costing_p)
8814 if (mask)
8816 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8817 mask, &vec_masks,
8818 mask_vectype);
8819 vec_mask = vec_masks[0];
8822 /* We should have catched mismatched types earlier. */
8823 gcc_assert (
8824 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
8825 dataref_ptr
8826 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8827 aggr_type, NULL, offset, &dummy,
8828 gsi, &ptr_incr, false, bump);
8831 else if (!costing_p)
8833 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8834 /* DR_CHAIN is then used as an input to
8835 vect_permute_store_chain(). */
8836 for (i = 0; i < group_size; i++)
8838 vec_oprnd = (*gvec_oprnds[i])[j];
8839 dr_chain[i] = vec_oprnd;
8841 if (mask)
8842 vec_mask = vec_masks[j];
8843 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8844 stmt_info, bump);
8847 if (costing_p)
8849 n_adjacent_stores += vec_num;
8850 continue;
8853 /* Get an array into which we can store the individual vectors. */
8854 tree vec_array = create_vector_array (vectype, vec_num);
8856 /* Invalidate the current contents of VEC_ARRAY. This should
8857 become an RTL clobber too, which prevents the vector registers
8858 from being upward-exposed. */
8859 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8861 /* Store the individual vectors into the array. */
8862 for (i = 0; i < vec_num; i++)
8864 vec_oprnd = dr_chain[i];
8865 write_vector_array (vinfo, stmt_info, gsi, vec_oprnd, vec_array,
8869 tree final_mask = NULL;
8870 tree final_len = NULL;
8871 tree bias = NULL;
8872 if (loop_masks)
8873 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8874 ncopies, vectype, j);
8875 if (vec_mask)
8876 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
8877 vec_mask, gsi);
8879 if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8881 if (loop_lens)
8882 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8883 ncopies, vectype, j, 1);
8884 else
8885 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8886 signed char biasval
8887 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8888 bias = build_int_cst (intQI_type_node, biasval);
8889 if (!final_mask)
8891 mask_vectype = truth_type_for (vectype);
8892 final_mask = build_minus_one_cst (mask_vectype);
8896 gcall *call;
8897 if (final_len && final_mask)
8899 /* Emit:
8900 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8901 LEN, BIAS, VEC_ARRAY). */
8902 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8903 tree alias_ptr = build_int_cst (ref_type, align);
8904 call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
8905 dataref_ptr, alias_ptr,
8906 final_mask, final_len, bias,
8907 vec_array);
8909 else if (final_mask)
8911 /* Emit:
8912 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8913 VEC_ARRAY). */
8914 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8915 tree alias_ptr = build_int_cst (ref_type, align);
8916 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8917 dataref_ptr, alias_ptr,
8918 final_mask, vec_array);
8920 else
8922 /* Emit:
8923 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8924 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8925 call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
8926 gimple_call_set_lhs (call, data_ref);
8928 gimple_call_set_nothrow (call, true);
8929 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8930 new_stmt = call;
8932 /* Record that VEC_ARRAY is now dead. */
8933 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8934 if (j == 0)
8935 *vec_stmt = new_stmt;
8936 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8939 if (costing_p)
8941 if (n_adjacent_stores > 0)
8942 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
8943 alignment_support_scheme, misalignment,
8944 &inside_cost, cost_vec);
8945 if (dump_enabled_p ())
8946 dump_printf_loc (MSG_NOTE, vect_location,
8947 "vect_model_store_cost: inside_cost = %d, "
8948 "prologue_cost = %d .\n",
8949 inside_cost, prologue_cost);
8952 return true;
8955 if (memory_access_type == VMAT_GATHER_SCATTER)
8957 gcc_assert (!grouped_store);
8958 auto_vec<tree> vec_offsets;
8959 unsigned int inside_cost = 0, prologue_cost = 0;
8960 for (j = 0; j < ncopies; j++)
8962 gimple *new_stmt;
8963 if (j == 0)
8965 if (costing_p && vls_type == VLS_STORE_INVARIANT)
8966 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8967 stmt_info, 0, vect_prologue);
8968 else if (!costing_p)
8970 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
8971 DR_CHAIN is of size 1. */
8972 gcc_assert (group_size == 1);
8973 if (slp_node)
8974 vect_get_slp_defs (op_node, gvec_oprnds[0]);
8975 else
8976 vect_get_vec_defs_for_operand (vinfo, first_stmt_info,
8977 ncopies, op, gvec_oprnds[0]);
8978 if (mask)
8980 if (slp_node)
8981 vect_get_slp_defs (mask_node, &vec_masks);
8982 else
8983 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8984 ncopies,
8985 mask, &vec_masks,
8986 mask_vectype);
8989 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8990 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8991 slp_node, &gs_info,
8992 &dataref_ptr, &vec_offsets);
8993 else
8994 dataref_ptr
8995 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8996 aggr_type, NULL, offset,
8997 &dummy, gsi, &ptr_incr, false,
8998 bump);
9001 else if (!costing_p)
9003 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9004 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9005 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9006 gsi, stmt_info, bump);
9009 new_stmt = NULL;
9010 for (i = 0; i < vec_num; ++i)
9012 if (!costing_p)
9014 vec_oprnd = (*gvec_oprnds[0])[vec_num * j + i];
9015 if (mask)
9016 vec_mask = vec_masks[vec_num * j + i];
9017 /* We should have catched mismatched types earlier. */
9018 gcc_assert (useless_type_conversion_p (vectype,
9019 TREE_TYPE (vec_oprnd)));
9021 unsigned HOST_WIDE_INT align;
9022 tree final_mask = NULL_TREE;
9023 tree final_len = NULL_TREE;
9024 tree bias = NULL_TREE;
9025 if (!costing_p)
9027 if (loop_masks)
9028 final_mask = vect_get_loop_mask (loop_vinfo, gsi,
9029 loop_masks, ncopies,
9030 vectype, j);
9031 if (vec_mask)
9032 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9033 final_mask, vec_mask, gsi);
9036 if (gs_info.ifn != IFN_LAST)
9038 if (costing_p)
9040 unsigned int cnunits = vect_nunits_for_cost (vectype);
9041 inside_cost
9042 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9043 stmt_info, 0, vect_body);
9044 continue;
9047 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9048 vec_offset = vec_offsets[vec_num * j + i];
9049 tree scale = size_int (gs_info.scale);
9051 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
9053 if (loop_lens)
9054 final_len = vect_get_loop_len (loop_vinfo, gsi,
9055 loop_lens, ncopies,
9056 vectype, j, 1);
9057 else
9058 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9059 signed char biasval
9060 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9061 bias = build_int_cst (intQI_type_node, biasval);
9062 if (!final_mask)
9064 mask_vectype = truth_type_for (vectype);
9065 final_mask = build_minus_one_cst (mask_vectype);
9069 gcall *call;
9070 if (final_len && final_mask)
9071 call = gimple_build_call_internal
9072 (IFN_MASK_LEN_SCATTER_STORE, 7, dataref_ptr,
9073 vec_offset, scale, vec_oprnd, final_mask,
9074 final_len, bias);
9075 else if (final_mask)
9076 call = gimple_build_call_internal
9077 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr,
9078 vec_offset, scale, vec_oprnd, final_mask);
9079 else
9080 call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
9081 dataref_ptr, vec_offset,
9082 scale, vec_oprnd);
9083 gimple_call_set_nothrow (call, true);
9084 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9085 new_stmt = call;
9087 else if (gs_info.decl)
9089 /* The builtin decls path for scatter is legacy, x86 only. */
9090 gcc_assert (nunits.is_constant ()
9091 && (!final_mask
9092 || SCALAR_INT_MODE_P
9093 (TYPE_MODE (TREE_TYPE (final_mask)))));
9094 if (costing_p)
9096 unsigned int cnunits = vect_nunits_for_cost (vectype);
9097 inside_cost
9098 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9099 stmt_info, 0, vect_body);
9100 continue;
9102 poly_uint64 offset_nunits
9103 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
9104 if (known_eq (nunits, offset_nunits))
9106 new_stmt = vect_build_one_scatter_store_call
9107 (vinfo, stmt_info, gsi, &gs_info,
9108 dataref_ptr, vec_offsets[vec_num * j + i],
9109 vec_oprnd, final_mask);
9110 vect_finish_stmt_generation (vinfo, stmt_info,
9111 new_stmt, gsi);
9113 else if (known_eq (nunits, offset_nunits * 2))
9115 /* We have a offset vector with half the number of
9116 lanes but the builtins will store full vectype
9117 data from the lower lanes. */
9118 new_stmt = vect_build_one_scatter_store_call
9119 (vinfo, stmt_info, gsi, &gs_info,
9120 dataref_ptr,
9121 vec_offsets[2 * vec_num * j + 2 * i],
9122 vec_oprnd, final_mask);
9123 vect_finish_stmt_generation (vinfo, stmt_info,
9124 new_stmt, gsi);
9125 int count = nunits.to_constant ();
9126 vec_perm_builder sel (count, count, 1);
9127 sel.quick_grow (count);
9128 for (int i = 0; i < count; ++i)
9129 sel[i] = i | (count / 2);
9130 vec_perm_indices indices (sel, 2, count);
9131 tree perm_mask
9132 = vect_gen_perm_mask_checked (vectype, indices);
9133 new_stmt = gimple_build_assign (NULL_TREE, VEC_PERM_EXPR,
9134 vec_oprnd, vec_oprnd,
9135 perm_mask);
9136 vec_oprnd = make_ssa_name (vectype);
9137 gimple_set_lhs (new_stmt, vec_oprnd);
9138 vect_finish_stmt_generation (vinfo, stmt_info,
9139 new_stmt, gsi);
9140 if (final_mask)
9142 new_stmt = gimple_build_assign (NULL_TREE,
9143 VEC_UNPACK_HI_EXPR,
9144 final_mask);
9145 final_mask = make_ssa_name
9146 (truth_type_for (gs_info.offset_vectype));
9147 gimple_set_lhs (new_stmt, final_mask);
9148 vect_finish_stmt_generation (vinfo, stmt_info,
9149 new_stmt, gsi);
9151 new_stmt = vect_build_one_scatter_store_call
9152 (vinfo, stmt_info, gsi, &gs_info,
9153 dataref_ptr,
9154 vec_offsets[2 * vec_num * j + 2 * i + 1],
9155 vec_oprnd, final_mask);
9156 vect_finish_stmt_generation (vinfo, stmt_info,
9157 new_stmt, gsi);
9159 else if (known_eq (nunits * 2, offset_nunits))
9161 /* We have a offset vector with double the number of
9162 lanes. Select the low/high part accordingly. */
9163 vec_offset = vec_offsets[(vec_num * j + i) / 2];
9164 if ((vec_num * j + i) & 1)
9166 int count = offset_nunits.to_constant ();
9167 vec_perm_builder sel (count, count, 1);
9168 sel.quick_grow (count);
9169 for (int i = 0; i < count; ++i)
9170 sel[i] = i | (count / 2);
9171 vec_perm_indices indices (sel, 2, count);
9172 tree perm_mask = vect_gen_perm_mask_checked
9173 (TREE_TYPE (vec_offset), indices);
9174 new_stmt = gimple_build_assign (NULL_TREE,
9175 VEC_PERM_EXPR,
9176 vec_offset,
9177 vec_offset,
9178 perm_mask);
9179 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
9180 gimple_set_lhs (new_stmt, vec_offset);
9181 vect_finish_stmt_generation (vinfo, stmt_info,
9182 new_stmt, gsi);
9184 new_stmt = vect_build_one_scatter_store_call
9185 (vinfo, stmt_info, gsi, &gs_info,
9186 dataref_ptr, vec_offset,
9187 vec_oprnd, final_mask);
9188 vect_finish_stmt_generation (vinfo, stmt_info,
9189 new_stmt, gsi);
9191 else
9192 gcc_unreachable ();
9194 else
9196 /* Emulated scatter. */
9197 gcc_assert (!final_mask);
9198 if (costing_p)
9200 unsigned int cnunits = vect_nunits_for_cost (vectype);
9201 /* For emulated scatter N offset vector element extracts
9202 (we assume the scalar scaling and ptr + offset add is
9203 consumed by the load). */
9204 inside_cost
9205 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9206 stmt_info, 0, vect_body);
9207 /* N scalar stores plus extracting the elements. */
9208 inside_cost
9209 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9210 stmt_info, 0, vect_body);
9211 inside_cost
9212 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9213 stmt_info, 0, vect_body);
9214 continue;
9217 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9218 unsigned HOST_WIDE_INT const_offset_nunits
9219 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant ();
9220 vec<constructor_elt, va_gc> *ctor_elts;
9221 vec_alloc (ctor_elts, const_nunits);
9222 gimple_seq stmts = NULL;
9223 tree elt_type = TREE_TYPE (vectype);
9224 unsigned HOST_WIDE_INT elt_size
9225 = tree_to_uhwi (TYPE_SIZE (elt_type));
9226 /* We support offset vectors with more elements
9227 than the data vector for now. */
9228 unsigned HOST_WIDE_INT factor
9229 = const_offset_nunits / const_nunits;
9230 vec_offset = vec_offsets[(vec_num * j + i) / factor];
9231 unsigned elt_offset
9232 = ((vec_num * j + i) % factor) * const_nunits;
9233 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9234 tree scale = size_int (gs_info.scale);
9235 align = get_object_alignment (DR_REF (first_dr_info->dr));
9236 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9237 for (unsigned k = 0; k < const_nunits; ++k)
9239 /* Compute the offsetted pointer. */
9240 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9241 bitsize_int (k + elt_offset));
9242 tree idx
9243 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
9244 vec_offset, TYPE_SIZE (idx_type), boff);
9245 idx = gimple_convert (&stmts, sizetype, idx);
9246 idx = gimple_build (&stmts, MULT_EXPR, sizetype,
9247 idx, scale);
9248 tree ptr
9249 = gimple_build (&stmts, PLUS_EXPR,
9250 TREE_TYPE (dataref_ptr),
9251 dataref_ptr, idx);
9252 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9253 /* Extract the element to be stored. */
9254 tree elt
9255 = gimple_build (&stmts, BIT_FIELD_REF,
9256 TREE_TYPE (vectype),
9257 vec_oprnd, TYPE_SIZE (elt_type),
9258 bitsize_int (k * elt_size));
9259 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9260 stmts = NULL;
9261 tree ref
9262 = build2 (MEM_REF, ltype, ptr,
9263 build_int_cst (ref_type, 0));
9264 new_stmt = gimple_build_assign (ref, elt);
9265 vect_finish_stmt_generation (vinfo, stmt_info,
9266 new_stmt, gsi);
9268 if (slp)
9269 slp_node->push_vec_def (new_stmt);
9272 if (!slp && !costing_p)
9273 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9276 if (!slp && !costing_p)
9277 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9279 if (costing_p && dump_enabled_p ())
9280 dump_printf_loc (MSG_NOTE, vect_location,
9281 "vect_model_store_cost: inside_cost = %d, "
9282 "prologue_cost = %d .\n",
9283 inside_cost, prologue_cost);
9285 return true;
9288 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
9289 || memory_access_type == VMAT_CONTIGUOUS_DOWN
9290 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE
9291 || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
9293 unsigned inside_cost = 0, prologue_cost = 0;
9294 /* For costing some adjacent vector stores, we'd like to cost with
9295 the total number of them once instead of cost each one by one. */
9296 unsigned int n_adjacent_stores = 0;
9297 auto_vec<tree> result_chain (group_size);
9298 auto_vec<tree, 1> vec_oprnds;
9299 for (j = 0; j < ncopies; j++)
9301 gimple *new_stmt;
9302 if (j == 0)
9304 if (slp && !costing_p)
9306 /* Get vectorized arguments for SLP_NODE. */
9307 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
9308 &vec_oprnds, mask, &vec_masks);
9309 vec_oprnd = vec_oprnds[0];
9310 if (mask)
9311 vec_mask = vec_masks[0];
9313 else
9315 /* For interleaved stores we collect vectorized defs for all the
9316 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9317 input to vect_permute_store_chain().
9319 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9320 is of size 1. */
9321 stmt_vec_info next_stmt_info = first_stmt_info;
9322 for (i = 0; i < group_size; i++)
9324 /* Since gaps are not supported for interleaved stores,
9325 DR_GROUP_SIZE is the exact number of stmts in the chain.
9326 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9327 that there is no interleaving, DR_GROUP_SIZE is 1,
9328 and only one iteration of the loop will be executed. */
9329 op = vect_get_store_rhs (next_stmt_info);
9330 if (costing_p)
9331 update_prologue_cost (&prologue_cost, op);
9332 else
9334 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
9335 ncopies, op,
9336 gvec_oprnds[i]);
9337 vec_oprnd = (*gvec_oprnds[i])[0];
9338 dr_chain.quick_push (vec_oprnd);
9340 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9342 if (mask && !costing_p)
9344 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
9345 mask, &vec_masks,
9346 mask_vectype);
9347 vec_mask = vec_masks[0];
9351 /* We should have catched mismatched types earlier. */
9352 gcc_assert (costing_p
9353 || useless_type_conversion_p (vectype,
9354 TREE_TYPE (vec_oprnd)));
9355 bool simd_lane_access_p
9356 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9357 if (!costing_p
9358 && simd_lane_access_p
9359 && !loop_masks
9360 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9361 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9362 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9363 && integer_zerop (DR_INIT (first_dr_info->dr))
9364 && alias_sets_conflict_p (get_alias_set (aggr_type),
9365 get_alias_set (TREE_TYPE (ref_type))))
9367 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9368 dataref_offset = build_int_cst (ref_type, 0);
9370 else if (!costing_p)
9371 dataref_ptr
9372 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9373 simd_lane_access_p ? loop : NULL,
9374 offset, &dummy, gsi, &ptr_incr,
9375 simd_lane_access_p, bump);
9377 else if (!costing_p)
9379 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9380 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9381 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9382 of size 1. */
9383 for (i = 0; i < group_size; i++)
9385 vec_oprnd = (*gvec_oprnds[i])[j];
9386 dr_chain[i] = vec_oprnd;
9388 if (mask)
9389 vec_mask = vec_masks[j];
9390 if (dataref_offset)
9391 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
9392 else
9393 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9394 stmt_info, bump);
9397 new_stmt = NULL;
9398 if (grouped_store)
9400 /* Permute. */
9401 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
9402 if (costing_p)
9404 int group_size = DR_GROUP_SIZE (first_stmt_info);
9405 int nstmts = ceil_log2 (group_size) * group_size;
9406 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
9407 stmt_info, 0, vect_body);
9408 if (dump_enabled_p ())
9409 dump_printf_loc (MSG_NOTE, vect_location,
9410 "vect_model_store_cost: "
9411 "strided group_size = %d .\n",
9412 group_size);
9414 else
9415 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
9416 gsi, &result_chain);
9419 stmt_vec_info next_stmt_info = first_stmt_info;
9420 for (i = 0; i < vec_num; i++)
9422 if (!costing_p)
9424 if (slp)
9425 vec_oprnd = vec_oprnds[i];
9426 else if (grouped_store)
9427 /* For grouped stores vectorized defs are interleaved in
9428 vect_permute_store_chain(). */
9429 vec_oprnd = result_chain[i];
9432 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9434 if (costing_p)
9435 inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
9436 stmt_info, 0, vect_body);
9437 else
9439 tree perm_mask = perm_mask_for_reverse (vectype);
9440 tree perm_dest = vect_create_destination_var (
9441 vect_get_store_rhs (stmt_info), vectype);
9442 tree new_temp = make_ssa_name (perm_dest);
9444 /* Generate the permute statement. */
9445 gimple *perm_stmt
9446 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9447 vec_oprnd, perm_mask);
9448 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
9449 gsi);
9451 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9452 vec_oprnd = new_temp;
9456 if (costing_p)
9458 n_adjacent_stores++;
9460 if (!slp)
9462 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9463 if (!next_stmt_info)
9464 break;
9467 continue;
9470 tree final_mask = NULL_TREE;
9471 tree final_len = NULL_TREE;
9472 tree bias = NULL_TREE;
9473 if (loop_masks)
9474 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9475 vec_num * ncopies, vectype,
9476 vec_num * j + i);
9477 if (slp && vec_mask)
9478 vec_mask = vec_masks[i];
9479 if (vec_mask)
9480 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
9481 vec_mask, gsi);
9483 if (i > 0)
9484 /* Bump the vector pointer. */
9485 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9486 stmt_info, bump);
9488 unsigned misalign;
9489 unsigned HOST_WIDE_INT align;
9490 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9491 if (alignment_support_scheme == dr_aligned)
9492 misalign = 0;
9493 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9495 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
9496 misalign = 0;
9498 else
9499 misalign = misalignment;
9500 if (dataref_offset == NULL_TREE
9501 && TREE_CODE (dataref_ptr) == SSA_NAME)
9502 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9503 misalign);
9504 align = least_bit_hwi (misalign | align);
9506 /* Compute IFN when LOOP_LENS or final_mask valid. */
9507 machine_mode vmode = TYPE_MODE (vectype);
9508 machine_mode new_vmode = vmode;
9509 internal_fn partial_ifn = IFN_LAST;
9510 if (loop_lens)
9512 opt_machine_mode new_ovmode
9513 = get_len_load_store_mode (vmode, false, &partial_ifn);
9514 new_vmode = new_ovmode.require ();
9515 unsigned factor
9516 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9517 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9518 vec_num * ncopies, vectype,
9519 vec_num * j + i, factor);
9521 else if (final_mask)
9523 if (!can_vec_mask_load_store_p (
9524 vmode, TYPE_MODE (TREE_TYPE (final_mask)), false,
9525 &partial_ifn))
9526 gcc_unreachable ();
9529 if (partial_ifn == IFN_MASK_LEN_STORE)
9531 if (!final_len)
9533 /* Pass VF value to 'len' argument of
9534 MASK_LEN_STORE if LOOP_LENS is invalid. */
9535 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9537 if (!final_mask)
9539 /* Pass all ones value to 'mask' argument of
9540 MASK_LEN_STORE if final_mask is invalid. */
9541 mask_vectype = truth_type_for (vectype);
9542 final_mask = build_minus_one_cst (mask_vectype);
9545 if (final_len)
9547 signed char biasval
9548 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9550 bias = build_int_cst (intQI_type_node, biasval);
9553 /* Arguments are ready. Create the new vector stmt. */
9554 if (final_len)
9556 gcall *call;
9557 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9558 /* Need conversion if it's wrapped with VnQI. */
9559 if (vmode != new_vmode)
9561 tree new_vtype
9562 = build_vector_type_for_mode (unsigned_intQI_type_node,
9563 new_vmode);
9564 tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9565 vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9566 gassign *new_stmt
9567 = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9568 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9569 vec_oprnd = var;
9572 if (partial_ifn == IFN_MASK_LEN_STORE)
9573 call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9574 dataref_ptr, ptr, final_mask,
9575 final_len, bias, vec_oprnd);
9576 else
9577 call = gimple_build_call_internal (IFN_LEN_STORE, 5,
9578 dataref_ptr, ptr, final_len,
9579 bias, vec_oprnd);
9580 gimple_call_set_nothrow (call, true);
9581 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9582 new_stmt = call;
9584 else if (final_mask)
9586 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9587 gcall *call
9588 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
9589 ptr, final_mask, vec_oprnd);
9590 gimple_call_set_nothrow (call, true);
9591 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9592 new_stmt = call;
9594 else
9596 data_ref
9597 = fold_build2 (MEM_REF, vectype, dataref_ptr,
9598 dataref_offset ? dataref_offset
9599 : build_int_cst (ref_type, 0));
9600 if (alignment_support_scheme == dr_aligned)
9602 else
9603 TREE_TYPE (data_ref)
9604 = build_aligned_type (TREE_TYPE (data_ref),
9605 align * BITS_PER_UNIT);
9606 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9607 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9608 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9611 if (slp)
9612 continue;
9614 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9615 if (!next_stmt_info)
9616 break;
9618 if (!slp && !costing_p)
9620 if (j == 0)
9621 *vec_stmt = new_stmt;
9622 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9626 if (costing_p)
9628 if (n_adjacent_stores > 0)
9629 vect_get_store_cost (vinfo, stmt_info, n_adjacent_stores,
9630 alignment_support_scheme, misalignment,
9631 &inside_cost, cost_vec);
9633 /* When vectorizing a store into the function result assign
9634 a penalty if the function returns in a multi-register location.
9635 In this case we assume we'll end up with having to spill the
9636 vector result and do piecewise loads as a conservative estimate. */
9637 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
9638 if (base
9639 && (TREE_CODE (base) == RESULT_DECL
9640 || (DECL_P (base) && cfun_returns (base)))
9641 && !aggregate_value_p (base, cfun->decl))
9643 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
9644 /* ??? Handle PARALLEL in some way. */
9645 if (REG_P (reg))
9647 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
9648 /* Assume that a single reg-reg move is possible and cheap,
9649 do not account for vector to gp register move cost. */
9650 if (nregs > 1)
9652 /* Spill. */
9653 prologue_cost
9654 += record_stmt_cost (cost_vec, ncopies, vector_store,
9655 stmt_info, 0, vect_epilogue);
9656 /* Loads. */
9657 prologue_cost
9658 += record_stmt_cost (cost_vec, ncopies * nregs, scalar_load,
9659 stmt_info, 0, vect_epilogue);
9663 if (dump_enabled_p ())
9664 dump_printf_loc (MSG_NOTE, vect_location,
9665 "vect_model_store_cost: inside_cost = %d, "
9666 "prologue_cost = %d .\n",
9667 inside_cost, prologue_cost);
9670 return true;
9673 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9674 VECTOR_CST mask. No checks are made that the target platform supports the
9675 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9676 vect_gen_perm_mask_checked. */
9678 tree
9679 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9681 tree mask_type;
9683 poly_uint64 nunits = sel.length ();
9684 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9686 mask_type = build_vector_type (ssizetype, nunits);
9687 return vec_perm_indices_to_tree (mask_type, sel);
9690 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9691 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9693 tree
9694 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9696 machine_mode vmode = TYPE_MODE (vectype);
9697 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9698 return vect_gen_perm_mask_any (vectype, sel);
9701 /* Given a vector variable X and Y, that was generated for the scalar
9702 STMT_INFO, generate instructions to permute the vector elements of X and Y
9703 using permutation mask MASK_VEC, insert them at *GSI and return the
9704 permuted vector variable. */
9706 static tree
9707 permute_vec_elements (vec_info *vinfo,
9708 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9709 gimple_stmt_iterator *gsi)
9711 tree vectype = TREE_TYPE (x);
9712 tree perm_dest, data_ref;
9713 gimple *perm_stmt;
9715 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9716 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9717 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9718 else
9719 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9720 data_ref = make_ssa_name (perm_dest);
9722 /* Generate the permute statement. */
9723 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9724 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9726 return data_ref;
9729 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9730 inserting them on the loops preheader edge. Returns true if we
9731 were successful in doing so (and thus STMT_INFO can be moved then),
9732 otherwise returns false. HOIST_P indicates if we want to hoist the
9733 definitions of all SSA uses, it would be false when we are costing. */
9735 static bool
9736 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
9738 ssa_op_iter i;
9739 tree op;
9740 bool any = false;
9742 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9744 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9745 if (!gimple_nop_p (def_stmt)
9746 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9748 /* Make sure we don't need to recurse. While we could do
9749 so in simple cases when there are more complex use webs
9750 we don't have an easy way to preserve stmt order to fulfil
9751 dependencies within them. */
9752 tree op2;
9753 ssa_op_iter i2;
9754 if (gimple_code (def_stmt) == GIMPLE_PHI)
9755 return false;
9756 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9758 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9759 if (!gimple_nop_p (def_stmt2)
9760 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9761 return false;
9763 any = true;
9767 if (!any)
9768 return true;
9770 if (!hoist_p)
9771 return true;
9773 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9775 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9776 if (!gimple_nop_p (def_stmt)
9777 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9779 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9780 gsi_remove (&gsi, false);
9781 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9785 return true;
9788 /* vectorizable_load.
9790 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9791 that can be vectorized.
9792 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9793 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9794 Return true if STMT_INFO is vectorizable in this way. */
9796 static bool
9797 vectorizable_load (vec_info *vinfo,
9798 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9799 gimple **vec_stmt, slp_tree slp_node,
9800 stmt_vector_for_cost *cost_vec)
9802 tree scalar_dest;
9803 tree vec_dest = NULL;
9804 tree data_ref = NULL;
9805 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9806 class loop *loop = NULL;
9807 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9808 bool nested_in_vect_loop = false;
9809 tree elem_type;
9810 /* Avoid false positive uninitialized warning, see PR110652. */
9811 tree new_temp = NULL_TREE;
9812 machine_mode mode;
9813 tree dummy;
9814 tree dataref_ptr = NULL_TREE;
9815 tree dataref_offset = NULL_TREE;
9816 gimple *ptr_incr = NULL;
9817 int ncopies;
9818 int i, j;
9819 unsigned int group_size;
9820 poly_uint64 group_gap_adj;
9821 tree msq = NULL_TREE, lsq;
9822 tree realignment_token = NULL_TREE;
9823 gphi *phi = NULL;
9824 vec<tree> dr_chain = vNULL;
9825 bool grouped_load = false;
9826 stmt_vec_info first_stmt_info;
9827 stmt_vec_info first_stmt_info_for_drptr = NULL;
9828 bool compute_in_loop = false;
9829 class loop *at_loop;
9830 int vec_num;
9831 bool slp = (slp_node != NULL);
9832 bool slp_perm = false;
9833 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9834 poly_uint64 vf;
9835 tree aggr_type;
9836 gather_scatter_info gs_info;
9837 tree ref_type;
9838 enum vect_def_type mask_dt = vect_unknown_def_type;
9840 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9841 return false;
9843 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9844 && ! vec_stmt)
9845 return false;
9847 if (!STMT_VINFO_DATA_REF (stmt_info))
9848 return false;
9850 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9851 int mask_index = -1;
9852 slp_tree slp_op = NULL;
9853 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9855 scalar_dest = gimple_assign_lhs (assign);
9856 if (TREE_CODE (scalar_dest) != SSA_NAME)
9857 return false;
9859 tree_code code = gimple_assign_rhs_code (assign);
9860 if (code != ARRAY_REF
9861 && code != BIT_FIELD_REF
9862 && code != INDIRECT_REF
9863 && code != COMPONENT_REF
9864 && code != IMAGPART_EXPR
9865 && code != REALPART_EXPR
9866 && code != MEM_REF
9867 && TREE_CODE_CLASS (code) != tcc_declaration)
9868 return false;
9870 else
9872 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9873 if (!call || !gimple_call_internal_p (call))
9874 return false;
9876 internal_fn ifn = gimple_call_internal_fn (call);
9877 if (!internal_load_fn_p (ifn))
9878 return false;
9880 scalar_dest = gimple_call_lhs (call);
9881 if (!scalar_dest)
9882 return false;
9884 mask_index = internal_fn_mask_index (ifn);
9885 if (mask_index >= 0 && slp_node)
9886 mask_index = vect_slp_child_index_for_operand
9887 (call, mask_index, STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9888 if (mask_index >= 0
9889 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9890 &mask, &slp_op, &mask_dt, &mask_vectype))
9891 return false;
9894 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9895 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9897 if (loop_vinfo)
9899 loop = LOOP_VINFO_LOOP (loop_vinfo);
9900 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9901 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9903 else
9904 vf = 1;
9906 /* Multiple types in SLP are handled by creating the appropriate number of
9907 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9908 case of SLP. */
9909 if (slp)
9910 ncopies = 1;
9911 else
9912 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9914 gcc_assert (ncopies >= 1);
9916 /* FORNOW. This restriction should be relaxed. */
9917 if (nested_in_vect_loop && ncopies > 1)
9919 if (dump_enabled_p ())
9920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9921 "multiple types in nested loop.\n");
9922 return false;
9925 /* Invalidate assumptions made by dependence analysis when vectorization
9926 on the unrolled body effectively re-orders stmts. */
9927 if (ncopies > 1
9928 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9929 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9930 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9932 if (dump_enabled_p ())
9933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9934 "cannot perform implicit CSE when unrolling "
9935 "with negative dependence distance\n");
9936 return false;
9939 elem_type = TREE_TYPE (vectype);
9940 mode = TYPE_MODE (vectype);
9942 /* FORNOW. In some cases can vectorize even if data-type not supported
9943 (e.g. - data copies). */
9944 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
9946 if (dump_enabled_p ())
9947 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9948 "Aligned load, but unsupported type.\n");
9949 return false;
9952 /* Check if the load is a part of an interleaving chain. */
9953 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9955 grouped_load = true;
9956 /* FORNOW */
9957 gcc_assert (!nested_in_vect_loop);
9958 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9960 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9961 group_size = DR_GROUP_SIZE (first_stmt_info);
9963 /* Refuse non-SLP vectorization of SLP-only groups. */
9964 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9966 if (dump_enabled_p ())
9967 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9968 "cannot vectorize load in non-SLP mode.\n");
9969 return false;
9972 /* Invalidate assumptions made by dependence analysis when vectorization
9973 on the unrolled body effectively re-orders stmts. */
9974 if (!PURE_SLP_STMT (stmt_info)
9975 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9976 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9977 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9979 if (dump_enabled_p ())
9980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9981 "cannot perform implicit CSE when performing "
9982 "group loads with negative dependence distance\n");
9983 return false;
9986 else
9987 group_size = 1;
9989 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9991 slp_perm = true;
9993 if (!loop_vinfo)
9995 /* In BB vectorization we may not actually use a loaded vector
9996 accessing elements in excess of DR_GROUP_SIZE. */
9997 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9998 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9999 unsigned HOST_WIDE_INT nunits;
10000 unsigned j, k, maxk = 0;
10001 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
10002 if (k > maxk)
10003 maxk = k;
10004 tree vectype = SLP_TREE_VECTYPE (slp_node);
10005 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
10006 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
10008 if (dump_enabled_p ())
10009 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10010 "BB vectorization with gaps at the end of "
10011 "a load is not supported\n");
10012 return false;
10016 auto_vec<tree> tem;
10017 unsigned n_perms;
10018 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
10019 true, &n_perms))
10021 if (dump_enabled_p ())
10022 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
10023 vect_location,
10024 "unsupported load permutation\n");
10025 return false;
10029 vect_memory_access_type memory_access_type;
10030 enum dr_alignment_support alignment_support_scheme;
10031 int misalignment;
10032 poly_int64 poffset;
10033 internal_fn lanes_ifn;
10034 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
10035 ncopies, &memory_access_type, &poffset,
10036 &alignment_support_scheme, &misalignment, &gs_info,
10037 &lanes_ifn))
10038 return false;
10040 if (mask)
10042 if (memory_access_type == VMAT_CONTIGUOUS)
10044 machine_mode vec_mode = TYPE_MODE (vectype);
10045 if (!VECTOR_MODE_P (vec_mode)
10046 || !can_vec_mask_load_store_p (vec_mode,
10047 TYPE_MODE (mask_vectype), true))
10048 return false;
10050 else if (memory_access_type != VMAT_LOAD_STORE_LANES
10051 && memory_access_type != VMAT_GATHER_SCATTER)
10053 if (dump_enabled_p ())
10054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10055 "unsupported access type for masked load.\n");
10056 return false;
10058 else if (memory_access_type == VMAT_GATHER_SCATTER
10059 && gs_info.ifn == IFN_LAST
10060 && !gs_info.decl)
10062 if (dump_enabled_p ())
10063 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10064 "unsupported masked emulated gather.\n");
10065 return false;
10069 bool costing_p = !vec_stmt;
10071 if (costing_p) /* transformation not required. */
10073 if (slp_node
10074 && mask
10075 && !vect_maybe_update_slp_op_vectype (slp_op,
10076 mask_vectype))
10078 if (dump_enabled_p ())
10079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10080 "incompatible vector types for invariants\n");
10081 return false;
10084 if (!slp)
10085 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
10087 if (loop_vinfo
10088 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10089 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
10090 VLS_LOAD, group_size,
10091 memory_access_type, &gs_info,
10092 mask);
10094 if (dump_enabled_p ()
10095 && memory_access_type != VMAT_ELEMENTWISE
10096 && memory_access_type != VMAT_GATHER_SCATTER
10097 && alignment_support_scheme != dr_aligned)
10098 dump_printf_loc (MSG_NOTE, vect_location,
10099 "Vectorizing an unaligned access.\n");
10101 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10102 vinfo->any_known_not_updated_vssa = true;
10104 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
10107 if (!slp)
10108 gcc_assert (memory_access_type
10109 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
10111 if (dump_enabled_p () && !costing_p)
10112 dump_printf_loc (MSG_NOTE, vect_location,
10113 "transform load. ncopies = %d\n", ncopies);
10115 /* Transform. */
10117 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
10118 ensure_base_align (dr_info);
10120 if (memory_access_type == VMAT_INVARIANT)
10122 gcc_assert (!grouped_load && !mask && !bb_vinfo);
10123 /* If we have versioned for aliasing or the loop doesn't
10124 have any data dependencies that would preclude this,
10125 then we are sure this is a loop invariant load and
10126 thus we can insert it on the preheader edge.
10127 TODO: hoist_defs_of_uses should ideally be computed
10128 once at analysis time, remembered and used in the
10129 transform time. */
10130 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
10131 && !nested_in_vect_loop
10132 && hoist_defs_of_uses (stmt_info, loop, !costing_p));
10133 if (costing_p)
10135 enum vect_cost_model_location cost_loc
10136 = hoist_p ? vect_prologue : vect_body;
10137 unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
10138 stmt_info, 0, cost_loc);
10139 cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0,
10140 cost_loc);
10141 unsigned int prologue_cost = hoist_p ? cost : 0;
10142 unsigned int inside_cost = hoist_p ? 0 : cost;
10143 if (dump_enabled_p ())
10144 dump_printf_loc (MSG_NOTE, vect_location,
10145 "vect_model_load_cost: inside_cost = %d, "
10146 "prologue_cost = %d .\n",
10147 inside_cost, prologue_cost);
10148 return true;
10150 if (hoist_p)
10152 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
10153 if (dump_enabled_p ())
10154 dump_printf_loc (MSG_NOTE, vect_location,
10155 "hoisting out of the vectorized loop: %G",
10156 (gimple *) stmt);
10157 scalar_dest = copy_ssa_name (scalar_dest);
10158 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
10159 edge pe = loop_preheader_edge (loop);
10160 gphi *vphi = get_virtual_phi (loop->header);
10161 tree vuse;
10162 if (vphi)
10163 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
10164 else
10165 vuse = gimple_vuse (gsi_stmt (*gsi));
10166 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
10167 gimple_set_vuse (new_stmt, vuse);
10168 gsi_insert_on_edge_immediate (pe, new_stmt);
10170 /* These copies are all equivalent. */
10171 if (hoist_p)
10172 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10173 vectype, NULL);
10174 else
10176 gimple_stmt_iterator gsi2 = *gsi;
10177 gsi_next (&gsi2);
10178 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10179 vectype, &gsi2);
10181 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
10182 if (slp)
10183 for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
10184 slp_node->push_vec_def (new_stmt);
10185 else
10187 for (j = 0; j < ncopies; ++j)
10188 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10189 *vec_stmt = new_stmt;
10191 return true;
10194 if (memory_access_type == VMAT_ELEMENTWISE
10195 || memory_access_type == VMAT_STRIDED_SLP)
10197 gimple_stmt_iterator incr_gsi;
10198 bool insert_after;
10199 tree offvar;
10200 tree ivstep;
10201 tree running_off;
10202 vec<constructor_elt, va_gc> *v = NULL;
10203 tree stride_base, stride_step, alias_off;
10204 /* Checked by get_load_store_type. */
10205 unsigned int const_nunits = nunits.to_constant ();
10206 unsigned HOST_WIDE_INT cst_offset = 0;
10207 tree dr_offset;
10208 unsigned int inside_cost = 0;
10210 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
10211 gcc_assert (!nested_in_vect_loop);
10213 if (grouped_load)
10215 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10216 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10218 else
10220 first_stmt_info = stmt_info;
10221 first_dr_info = dr_info;
10224 if (slp && grouped_load)
10226 group_size = DR_GROUP_SIZE (first_stmt_info);
10227 ref_type = get_group_alias_ptr_type (first_stmt_info);
10229 else
10231 if (grouped_load)
10232 cst_offset
10233 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
10234 * vect_get_place_in_interleaving_chain (stmt_info,
10235 first_stmt_info));
10236 group_size = 1;
10237 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
10240 if (!costing_p)
10242 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
10243 stride_base = fold_build_pointer_plus (
10244 DR_BASE_ADDRESS (first_dr_info->dr),
10245 size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
10246 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
10247 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
10249 /* For a load with loop-invariant (but other than power-of-2)
10250 stride (i.e. not a grouped access) like so:
10252 for (i = 0; i < n; i += stride)
10253 ... = array[i];
10255 we generate a new induction variable and new accesses to
10256 form a new vector (or vectors, depending on ncopies):
10258 for (j = 0; ; j += VF*stride)
10259 tmp1 = array[j];
10260 tmp2 = array[j + stride];
10262 vectemp = {tmp1, tmp2, ...}
10265 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
10266 build_int_cst (TREE_TYPE (stride_step), vf));
10268 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
10270 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
10271 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
10272 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
10273 loop, &incr_gsi, insert_after,
10274 &offvar, NULL);
10276 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
10279 running_off = offvar;
10280 alias_off = build_int_cst (ref_type, 0);
10281 int nloads = const_nunits;
10282 int lnel = 1;
10283 tree ltype = TREE_TYPE (vectype);
10284 tree lvectype = vectype;
10285 auto_vec<tree> dr_chain;
10286 if (memory_access_type == VMAT_STRIDED_SLP)
10288 if (group_size < const_nunits)
10290 /* First check if vec_init optab supports construction from vector
10291 elts directly. Otherwise avoid emitting a constructor of
10292 vector elements by performing the loads using an integer type
10293 of the same size, constructing a vector of those and then
10294 re-interpreting it as the original vector type. This avoids a
10295 huge runtime penalty due to the general inability to perform
10296 store forwarding from smaller stores to a larger load. */
10297 tree ptype;
10298 tree vtype
10299 = vector_vector_composition_type (vectype,
10300 const_nunits / group_size,
10301 &ptype);
10302 if (vtype != NULL_TREE)
10304 nloads = const_nunits / group_size;
10305 lnel = group_size;
10306 lvectype = vtype;
10307 ltype = ptype;
10310 else
10312 nloads = 1;
10313 lnel = const_nunits;
10314 ltype = vectype;
10316 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
10318 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10319 else if (nloads == 1)
10320 ltype = vectype;
10322 if (slp)
10324 /* For SLP permutation support we need to load the whole group,
10325 not only the number of vector stmts the permutation result
10326 fits in. */
10327 if (slp_perm)
10329 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10330 variable VF. */
10331 unsigned int const_vf = vf.to_constant ();
10332 ncopies = CEIL (group_size * const_vf, const_nunits);
10333 dr_chain.create (ncopies);
10335 else
10336 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10338 unsigned int group_el = 0;
10339 unsigned HOST_WIDE_INT
10340 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10341 unsigned int n_groups = 0;
10342 /* For costing some adjacent vector loads, we'd like to cost with
10343 the total number of them once instead of cost each one by one. */
10344 unsigned int n_adjacent_loads = 0;
10345 for (j = 0; j < ncopies; j++)
10347 if (nloads > 1 && !costing_p)
10348 vec_alloc (v, nloads);
10349 gimple *new_stmt = NULL;
10350 for (i = 0; i < nloads; i++)
10352 if (costing_p)
10354 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10355 avoid ICE, see PR110776. */
10356 if (VECTOR_TYPE_P (ltype)
10357 && memory_access_type != VMAT_ELEMENTWISE)
10358 n_adjacent_loads++;
10359 else
10360 inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
10361 stmt_info, 0, vect_body);
10362 continue;
10364 tree this_off = build_int_cst (TREE_TYPE (alias_off),
10365 group_el * elsz + cst_offset);
10366 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10367 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10368 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
10369 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10370 if (nloads > 1)
10371 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10372 gimple_assign_lhs (new_stmt));
10374 group_el += lnel;
10375 if (! slp
10376 || group_el == group_size)
10378 n_groups++;
10379 /* When doing SLP make sure to not load elements from
10380 the next vector iteration, those will not be accessed
10381 so just use the last element again. See PR107451. */
10382 if (!slp || known_lt (n_groups, vf))
10384 tree newoff = copy_ssa_name (running_off);
10385 gimple *incr
10386 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10387 running_off, stride_step);
10388 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
10389 running_off = newoff;
10391 group_el = 0;
10395 if (nloads > 1)
10397 if (costing_p)
10398 inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
10399 stmt_info, 0, vect_body);
10400 else
10402 tree vec_inv = build_constructor (lvectype, v);
10403 new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
10404 lvectype, gsi);
10405 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10406 if (lvectype != vectype)
10408 new_stmt
10409 = gimple_build_assign (make_ssa_name (vectype),
10410 VIEW_CONVERT_EXPR,
10411 build1 (VIEW_CONVERT_EXPR,
10412 vectype, new_temp));
10413 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10414 gsi);
10419 if (!costing_p)
10421 if (slp)
10423 if (slp_perm)
10424 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
10425 else
10426 slp_node->push_vec_def (new_stmt);
10428 else
10430 if (j == 0)
10431 *vec_stmt = new_stmt;
10432 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10436 if (slp_perm)
10438 unsigned n_perms;
10439 if (costing_p)
10441 unsigned n_loads;
10442 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10443 true, &n_perms, &n_loads);
10444 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
10445 first_stmt_info, 0, vect_body);
10447 else
10448 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10449 false, &n_perms);
10452 if (costing_p)
10454 if (n_adjacent_loads > 0)
10455 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
10456 alignment_support_scheme, misalignment, false,
10457 &inside_cost, nullptr, cost_vec, cost_vec,
10458 true);
10459 if (dump_enabled_p ())
10460 dump_printf_loc (MSG_NOTE, vect_location,
10461 "vect_model_load_cost: inside_cost = %u, "
10462 "prologue_cost = 0 .\n",
10463 inside_cost);
10466 return true;
10469 if (memory_access_type == VMAT_GATHER_SCATTER
10470 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
10471 grouped_load = false;
10473 if (grouped_load
10474 || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10476 if (grouped_load)
10478 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10479 group_size = DR_GROUP_SIZE (first_stmt_info);
10481 else
10483 first_stmt_info = stmt_info;
10484 group_size = 1;
10486 /* For SLP vectorization we directly vectorize a subchain
10487 without permutation. */
10488 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10489 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10490 /* For BB vectorization always use the first stmt to base
10491 the data ref pointer on. */
10492 if (bb_vinfo)
10493 first_stmt_info_for_drptr
10494 = vect_find_first_scalar_stmt_in_slp (slp_node);
10496 /* Check if the chain of loads is already vectorized. */
10497 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10498 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10499 ??? But we can only do so if there is exactly one
10500 as we have no way to get at the rest. Leave the CSE
10501 opportunity alone.
10502 ??? With the group load eventually participating
10503 in multiple different permutations (having multiple
10504 slp nodes which refer to the same group) the CSE
10505 is even wrong code. See PR56270. */
10506 && !slp)
10508 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10509 return true;
10511 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10512 group_gap_adj = 0;
10514 /* VEC_NUM is the number of vect stmts to be created for this group. */
10515 if (slp)
10517 grouped_load = false;
10518 /* If an SLP permutation is from N elements to N elements,
10519 and if one vector holds a whole number of N, we can load
10520 the inputs to the permutation in the same way as an
10521 unpermuted sequence. In other cases we need to load the
10522 whole group, not only the number of vector stmts the
10523 permutation result fits in. */
10524 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10525 if (slp_perm
10526 && (group_size != scalar_lanes
10527 || !multiple_p (nunits, group_size)))
10529 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10530 variable VF; see vect_transform_slp_perm_load. */
10531 unsigned int const_vf = vf.to_constant ();
10532 unsigned int const_nunits = nunits.to_constant ();
10533 vec_num = CEIL (group_size * const_vf, const_nunits);
10534 group_gap_adj = vf * group_size - nunits * vec_num;
10536 else
10538 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10539 group_gap_adj
10540 = group_size - scalar_lanes;
10543 else
10544 vec_num = group_size;
10546 ref_type = get_group_alias_ptr_type (first_stmt_info);
10548 else
10550 first_stmt_info = stmt_info;
10551 first_dr_info = dr_info;
10552 group_size = vec_num = 1;
10553 group_gap_adj = 0;
10554 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10555 if (slp)
10556 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10559 gcc_assert (alignment_support_scheme);
10560 vec_loop_masks *loop_masks
10561 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10562 ? &LOOP_VINFO_MASKS (loop_vinfo)
10563 : NULL);
10564 vec_loop_lens *loop_lens
10565 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10566 ? &LOOP_VINFO_LENS (loop_vinfo)
10567 : NULL);
10569 /* Shouldn't go with length-based approach if fully masked. */
10570 gcc_assert (!loop_lens || !loop_masks);
10572 /* Targets with store-lane instructions must not require explicit
10573 realignment. vect_supportable_dr_alignment always returns either
10574 dr_aligned or dr_unaligned_supported for masked operations. */
10575 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10576 && !mask
10577 && !loop_masks)
10578 || alignment_support_scheme == dr_aligned
10579 || alignment_support_scheme == dr_unaligned_supported);
10581 /* In case the vectorization factor (VF) is bigger than the number
10582 of elements that we can fit in a vectype (nunits), we have to generate
10583 more than one vector stmt - i.e - we need to "unroll" the
10584 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10585 from one copy of the vector stmt to the next, in the field
10586 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10587 stages to find the correct vector defs to be used when vectorizing
10588 stmts that use the defs of the current stmt. The example below
10589 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10590 need to create 4 vectorized stmts):
10592 before vectorization:
10593 RELATED_STMT VEC_STMT
10594 S1: x = memref - -
10595 S2: z = x + 1 - -
10597 step 1: vectorize stmt S1:
10598 We first create the vector stmt VS1_0, and, as usual, record a
10599 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10600 Next, we create the vector stmt VS1_1, and record a pointer to
10601 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10602 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10603 stmts and pointers:
10604 RELATED_STMT VEC_STMT
10605 VS1_0: vx0 = memref0 VS1_1 -
10606 VS1_1: vx1 = memref1 VS1_2 -
10607 VS1_2: vx2 = memref2 VS1_3 -
10608 VS1_3: vx3 = memref3 - -
10609 S1: x = load - VS1_0
10610 S2: z = x + 1 - -
10613 /* In case of interleaving (non-unit grouped access):
10615 S1: x2 = &base + 2
10616 S2: x0 = &base
10617 S3: x1 = &base + 1
10618 S4: x3 = &base + 3
10620 Vectorized loads are created in the order of memory accesses
10621 starting from the access of the first stmt of the chain:
10623 VS1: vx0 = &base
10624 VS2: vx1 = &base + vec_size*1
10625 VS3: vx3 = &base + vec_size*2
10626 VS4: vx4 = &base + vec_size*3
10628 Then permutation statements are generated:
10630 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10631 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10634 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10635 (the order of the data-refs in the output of vect_permute_load_chain
10636 corresponds to the order of scalar stmts in the interleaving chain - see
10637 the documentation of vect_permute_load_chain()).
10638 The generation of permutation stmts and recording them in
10639 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10641 In case of both multiple types and interleaving, the vector loads and
10642 permutation stmts above are created for every copy. The result vector
10643 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10644 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10646 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10647 on a target that supports unaligned accesses (dr_unaligned_supported)
10648 we generate the following code:
10649 p = initial_addr;
10650 indx = 0;
10651 loop {
10652 p = p + indx * vectype_size;
10653 vec_dest = *(p);
10654 indx = indx + 1;
10657 Otherwise, the data reference is potentially unaligned on a target that
10658 does not support unaligned accesses (dr_explicit_realign_optimized) -
10659 then generate the following code, in which the data in each iteration is
10660 obtained by two vector loads, one from the previous iteration, and one
10661 from the current iteration:
10662 p1 = initial_addr;
10663 msq_init = *(floor(p1))
10664 p2 = initial_addr + VS - 1;
10665 realignment_token = call target_builtin;
10666 indx = 0;
10667 loop {
10668 p2 = p2 + indx * vectype_size
10669 lsq = *(floor(p2))
10670 vec_dest = realign_load (msq, lsq, realignment_token)
10671 indx = indx + 1;
10672 msq = lsq;
10673 } */
10675 /* If the misalignment remains the same throughout the execution of the
10676 loop, we can create the init_addr and permutation mask at the loop
10677 preheader. Otherwise, it needs to be created inside the loop.
10678 This can only occur when vectorizing memory accesses in the inner-loop
10679 nested within an outer-loop that is being vectorized. */
10681 if (nested_in_vect_loop
10682 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10683 GET_MODE_SIZE (TYPE_MODE (vectype))))
10685 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10686 compute_in_loop = true;
10689 bool diff_first_stmt_info
10690 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10692 tree offset = NULL_TREE;
10693 if ((alignment_support_scheme == dr_explicit_realign_optimized
10694 || alignment_support_scheme == dr_explicit_realign)
10695 && !compute_in_loop)
10697 /* If we have different first_stmt_info, we can't set up realignment
10698 here, since we can't guarantee first_stmt_info DR has been
10699 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10700 distance from first_stmt_info DR instead as below. */
10701 if (!costing_p)
10703 if (!diff_first_stmt_info)
10704 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10705 &realignment_token,
10706 alignment_support_scheme, NULL_TREE,
10707 &at_loop);
10708 if (alignment_support_scheme == dr_explicit_realign_optimized)
10710 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10711 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10712 size_one_node);
10713 gcc_assert (!first_stmt_info_for_drptr);
10717 else
10718 at_loop = loop;
10720 if (!known_eq (poffset, 0))
10721 offset = (offset
10722 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10723 : size_int (poffset));
10725 tree bump;
10726 tree vec_offset = NULL_TREE;
10727 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10729 aggr_type = NULL_TREE;
10730 bump = NULL_TREE;
10732 else if (memory_access_type == VMAT_GATHER_SCATTER)
10734 aggr_type = elem_type;
10735 if (!costing_p)
10736 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
10737 &bump, &vec_offset, loop_lens);
10739 else
10741 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10742 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
10743 else
10744 aggr_type = vectype;
10745 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10746 memory_access_type, loop_lens);
10749 auto_vec<tree> vec_offsets;
10750 auto_vec<tree> vec_masks;
10751 if (mask && !costing_p)
10753 if (slp_node)
10754 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10755 &vec_masks);
10756 else
10757 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
10758 &vec_masks, mask_vectype);
10761 tree vec_mask = NULL_TREE;
10762 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10764 gcc_assert (alignment_support_scheme == dr_aligned
10765 || alignment_support_scheme == dr_unaligned_supported);
10766 gcc_assert (grouped_load && !slp);
10768 unsigned int inside_cost = 0, prologue_cost = 0;
10769 /* For costing some adjacent vector loads, we'd like to cost with
10770 the total number of them once instead of cost each one by one. */
10771 unsigned int n_adjacent_loads = 0;
10772 for (j = 0; j < ncopies; j++)
10774 if (costing_p)
10776 /* An IFN_LOAD_LANES will load all its vector results,
10777 regardless of which ones we actually need. Account
10778 for the cost of unused results. */
10779 if (first_stmt_info == stmt_info)
10781 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10782 stmt_vec_info next_stmt_info = first_stmt_info;
10785 gaps -= 1;
10786 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10788 while (next_stmt_info);
10789 if (gaps)
10791 if (dump_enabled_p ())
10792 dump_printf_loc (MSG_NOTE, vect_location,
10793 "vect_model_load_cost: %d "
10794 "unused vectors.\n",
10795 gaps);
10796 vect_get_load_cost (vinfo, stmt_info, gaps,
10797 alignment_support_scheme,
10798 misalignment, false, &inside_cost,
10799 &prologue_cost, cost_vec, cost_vec,
10800 true);
10803 n_adjacent_loads++;
10804 continue;
10807 /* 1. Create the vector or array pointer update chain. */
10808 if (j == 0)
10809 dataref_ptr
10810 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10811 at_loop, offset, &dummy, gsi,
10812 &ptr_incr, false, bump);
10813 else
10815 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10816 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10817 stmt_info, bump);
10819 if (mask)
10820 vec_mask = vec_masks[j];
10822 tree vec_array = create_vector_array (vectype, vec_num);
10824 tree final_mask = NULL_TREE;
10825 tree final_len = NULL_TREE;
10826 tree bias = NULL_TREE;
10827 if (loop_masks)
10828 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10829 ncopies, vectype, j);
10830 if (vec_mask)
10831 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
10832 vec_mask, gsi);
10834 if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10836 if (loop_lens)
10837 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10838 ncopies, vectype, j, 1);
10839 else
10840 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10841 signed char biasval
10842 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10843 bias = build_int_cst (intQI_type_node, biasval);
10844 if (!final_mask)
10846 mask_vectype = truth_type_for (vectype);
10847 final_mask = build_minus_one_cst (mask_vectype);
10851 gcall *call;
10852 if (final_len && final_mask)
10854 /* Emit:
10855 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10856 VEC_MASK, LEN, BIAS). */
10857 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10858 tree alias_ptr = build_int_cst (ref_type, align);
10859 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
10860 dataref_ptr, alias_ptr,
10861 final_mask, final_len, bias);
10863 else if (final_mask)
10865 /* Emit:
10866 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10867 VEC_MASK). */
10868 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10869 tree alias_ptr = build_int_cst (ref_type, align);
10870 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
10871 dataref_ptr, alias_ptr,
10872 final_mask);
10874 else
10876 /* Emit:
10877 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10878 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
10879 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10881 gimple_call_set_lhs (call, vec_array);
10882 gimple_call_set_nothrow (call, true);
10883 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
10885 dr_chain.create (vec_num);
10886 /* Extract each vector into an SSA_NAME. */
10887 for (i = 0; i < vec_num; i++)
10889 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10890 vec_array, i);
10891 dr_chain.quick_push (new_temp);
10894 /* Record the mapping between SSA_NAMEs and statements. */
10895 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10897 /* Record that VEC_ARRAY is now dead. */
10898 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10900 dr_chain.release ();
10902 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10905 if (costing_p)
10907 if (n_adjacent_loads > 0)
10908 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
10909 alignment_support_scheme, misalignment, false,
10910 &inside_cost, &prologue_cost, cost_vec,
10911 cost_vec, true);
10912 if (dump_enabled_p ())
10913 dump_printf_loc (MSG_NOTE, vect_location,
10914 "vect_model_load_cost: inside_cost = %u, "
10915 "prologue_cost = %u .\n",
10916 inside_cost, prologue_cost);
10919 return true;
10922 if (memory_access_type == VMAT_GATHER_SCATTER)
10924 gcc_assert (alignment_support_scheme == dr_aligned
10925 || alignment_support_scheme == dr_unaligned_supported);
10926 gcc_assert (!grouped_load && !slp_perm);
10928 unsigned int inside_cost = 0, prologue_cost = 0;
10929 for (j = 0; j < ncopies; j++)
10931 /* 1. Create the vector or array pointer update chain. */
10932 if (j == 0 && !costing_p)
10934 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10935 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10936 slp_node, &gs_info, &dataref_ptr,
10937 &vec_offsets);
10938 else
10939 dataref_ptr
10940 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10941 at_loop, offset, &dummy, gsi,
10942 &ptr_incr, false, bump);
10944 else if (!costing_p)
10946 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10947 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10948 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10949 gsi, stmt_info, bump);
10952 gimple *new_stmt = NULL;
10953 for (i = 0; i < vec_num; i++)
10955 tree final_mask = NULL_TREE;
10956 tree final_len = NULL_TREE;
10957 tree bias = NULL_TREE;
10958 if (!costing_p)
10960 if (mask)
10961 vec_mask = vec_masks[vec_num * j + i];
10962 if (loop_masks)
10963 final_mask
10964 = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10965 vec_num * ncopies, vectype,
10966 vec_num * j + i);
10967 if (vec_mask)
10968 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10969 final_mask, vec_mask, gsi);
10971 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10972 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10973 gsi, stmt_info, bump);
10976 /* 2. Create the vector-load in the loop. */
10977 unsigned HOST_WIDE_INT align;
10978 if (gs_info.ifn != IFN_LAST)
10980 if (costing_p)
10982 unsigned int cnunits = vect_nunits_for_cost (vectype);
10983 inside_cost
10984 = record_stmt_cost (cost_vec, cnunits, scalar_load,
10985 stmt_info, 0, vect_body);
10986 continue;
10988 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10989 vec_offset = vec_offsets[vec_num * j + i];
10990 tree zero = build_zero_cst (vectype);
10991 tree scale = size_int (gs_info.scale);
10993 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
10995 if (loop_lens)
10996 final_len
10997 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10998 vec_num * ncopies, vectype,
10999 vec_num * j + i, 1);
11000 else
11001 final_len
11002 = build_int_cst (sizetype,
11003 TYPE_VECTOR_SUBPARTS (vectype));
11004 signed char biasval
11005 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11006 bias = build_int_cst (intQI_type_node, biasval);
11007 if (!final_mask)
11009 mask_vectype = truth_type_for (vectype);
11010 final_mask = build_minus_one_cst (mask_vectype);
11014 gcall *call;
11015 if (final_len && final_mask)
11016 call
11017 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
11018 dataref_ptr, vec_offset,
11019 scale, zero, final_mask,
11020 final_len, bias);
11021 else if (final_mask)
11022 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
11023 dataref_ptr, vec_offset,
11024 scale, zero, final_mask);
11025 else
11026 call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
11027 dataref_ptr, vec_offset,
11028 scale, zero);
11029 gimple_call_set_nothrow (call, true);
11030 new_stmt = call;
11031 data_ref = NULL_TREE;
11033 else if (gs_info.decl)
11035 /* The builtin decls path for gather is legacy, x86 only. */
11036 gcc_assert (!final_len && nunits.is_constant ());
11037 if (costing_p)
11039 unsigned int cnunits = vect_nunits_for_cost (vectype);
11040 inside_cost
11041 = record_stmt_cost (cost_vec, cnunits, scalar_load,
11042 stmt_info, 0, vect_body);
11043 continue;
11045 poly_uint64 offset_nunits
11046 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
11047 if (known_eq (nunits, offset_nunits))
11049 new_stmt = vect_build_one_gather_load_call
11050 (vinfo, stmt_info, gsi, &gs_info,
11051 dataref_ptr, vec_offsets[vec_num * j + i],
11052 final_mask);
11053 data_ref = NULL_TREE;
11055 else if (known_eq (nunits, offset_nunits * 2))
11057 /* We have a offset vector with half the number of
11058 lanes but the builtins will produce full vectype
11059 data with just the lower lanes filled. */
11060 new_stmt = vect_build_one_gather_load_call
11061 (vinfo, stmt_info, gsi, &gs_info,
11062 dataref_ptr, vec_offsets[2 * vec_num * j + 2 * i],
11063 final_mask);
11064 tree low = make_ssa_name (vectype);
11065 gimple_set_lhs (new_stmt, low);
11066 vect_finish_stmt_generation (vinfo, stmt_info,
11067 new_stmt, gsi);
11069 /* now put upper half of final_mask in final_mask low. */
11070 if (final_mask
11071 && !SCALAR_INT_MODE_P
11072 (TYPE_MODE (TREE_TYPE (final_mask))))
11074 int count = nunits.to_constant ();
11075 vec_perm_builder sel (count, count, 1);
11076 sel.quick_grow (count);
11077 for (int i = 0; i < count; ++i)
11078 sel[i] = i | (count / 2);
11079 vec_perm_indices indices (sel, 2, count);
11080 tree perm_mask = vect_gen_perm_mask_checked
11081 (TREE_TYPE (final_mask), indices);
11082 new_stmt = gimple_build_assign (NULL_TREE,
11083 VEC_PERM_EXPR,
11084 final_mask,
11085 final_mask,
11086 perm_mask);
11087 final_mask = make_ssa_name (TREE_TYPE (final_mask));
11088 gimple_set_lhs (new_stmt, final_mask);
11089 vect_finish_stmt_generation (vinfo, stmt_info,
11090 new_stmt, gsi);
11092 else if (final_mask)
11094 new_stmt = gimple_build_assign (NULL_TREE,
11095 VEC_UNPACK_HI_EXPR,
11096 final_mask);
11097 final_mask = make_ssa_name
11098 (truth_type_for (gs_info.offset_vectype));
11099 gimple_set_lhs (new_stmt, final_mask);
11100 vect_finish_stmt_generation (vinfo, stmt_info,
11101 new_stmt, gsi);
11104 new_stmt = vect_build_one_gather_load_call
11105 (vinfo, stmt_info, gsi, &gs_info,
11106 dataref_ptr,
11107 vec_offsets[2 * vec_num * j + 2 * i + 1],
11108 final_mask);
11109 tree high = make_ssa_name (vectype);
11110 gimple_set_lhs (new_stmt, high);
11111 vect_finish_stmt_generation (vinfo, stmt_info,
11112 new_stmt, gsi);
11114 /* compose low + high. */
11115 int count = nunits.to_constant ();
11116 vec_perm_builder sel (count, count, 1);
11117 sel.quick_grow (count);
11118 for (int i = 0; i < count; ++i)
11119 sel[i] = i < count / 2 ? i : i + count / 2;
11120 vec_perm_indices indices (sel, 2, count);
11121 tree perm_mask
11122 = vect_gen_perm_mask_checked (vectype, indices);
11123 new_stmt = gimple_build_assign (NULL_TREE,
11124 VEC_PERM_EXPR,
11125 low, high, perm_mask);
11126 data_ref = NULL_TREE;
11128 else if (known_eq (nunits * 2, offset_nunits))
11130 /* We have a offset vector with double the number of
11131 lanes. Select the low/high part accordingly. */
11132 vec_offset = vec_offsets[(vec_num * j + i) / 2];
11133 if ((vec_num * j + i) & 1)
11135 int count = offset_nunits.to_constant ();
11136 vec_perm_builder sel (count, count, 1);
11137 sel.quick_grow (count);
11138 for (int i = 0; i < count; ++i)
11139 sel[i] = i | (count / 2);
11140 vec_perm_indices indices (sel, 2, count);
11141 tree perm_mask = vect_gen_perm_mask_checked
11142 (TREE_TYPE (vec_offset), indices);
11143 new_stmt = gimple_build_assign (NULL_TREE,
11144 VEC_PERM_EXPR,
11145 vec_offset,
11146 vec_offset,
11147 perm_mask);
11148 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
11149 gimple_set_lhs (new_stmt, vec_offset);
11150 vect_finish_stmt_generation (vinfo, stmt_info,
11151 new_stmt, gsi);
11153 new_stmt = vect_build_one_gather_load_call
11154 (vinfo, stmt_info, gsi, &gs_info,
11155 dataref_ptr, vec_offset, final_mask);
11156 data_ref = NULL_TREE;
11158 else
11159 gcc_unreachable ();
11161 else
11163 /* Emulated gather-scatter. */
11164 gcc_assert (!final_mask);
11165 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
11166 if (costing_p)
11168 /* For emulated gathers N offset vector element
11169 offset add is consumed by the load). */
11170 inside_cost = record_stmt_cost (cost_vec, const_nunits,
11171 vec_to_scalar, stmt_info,
11172 0, vect_body);
11173 /* N scalar loads plus gathering them into a
11174 vector. */
11175 inside_cost
11176 = record_stmt_cost (cost_vec, const_nunits, scalar_load,
11177 stmt_info, 0, vect_body);
11178 inside_cost
11179 = record_stmt_cost (cost_vec, 1, vec_construct,
11180 stmt_info, 0, vect_body);
11181 continue;
11183 unsigned HOST_WIDE_INT const_offset_nunits
11184 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
11185 .to_constant ();
11186 vec<constructor_elt, va_gc> *ctor_elts;
11187 vec_alloc (ctor_elts, const_nunits);
11188 gimple_seq stmts = NULL;
11189 /* We support offset vectors with more elements
11190 than the data vector for now. */
11191 unsigned HOST_WIDE_INT factor
11192 = const_offset_nunits / const_nunits;
11193 vec_offset = vec_offsets[(vec_num * j + i) / factor];
11194 unsigned elt_offset
11195 = ((vec_num * j + i) % factor) * const_nunits;
11196 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
11197 tree scale = size_int (gs_info.scale);
11198 align = get_object_alignment (DR_REF (first_dr_info->dr));
11199 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
11200 for (unsigned k = 0; k < const_nunits; ++k)
11202 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
11203 bitsize_int (k + elt_offset));
11204 tree idx
11205 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
11206 vec_offset, TYPE_SIZE (idx_type), boff);
11207 idx = gimple_convert (&stmts, sizetype, idx);
11208 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx,
11209 scale);
11210 tree ptr = gimple_build (&stmts, PLUS_EXPR,
11211 TREE_TYPE (dataref_ptr),
11212 dataref_ptr, idx);
11213 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
11214 tree elt = make_ssa_name (TREE_TYPE (vectype));
11215 tree ref = build2 (MEM_REF, ltype, ptr,
11216 build_int_cst (ref_type, 0));
11217 new_stmt = gimple_build_assign (elt, ref);
11218 gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
11219 gimple_seq_add_stmt (&stmts, new_stmt);
11220 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
11222 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
11223 new_stmt = gimple_build_assign (
11224 NULL_TREE, build_constructor (vectype, ctor_elts));
11225 data_ref = NULL_TREE;
11228 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11229 /* DATA_REF is null if we've already built the statement. */
11230 if (data_ref)
11232 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11233 new_stmt = gimple_build_assign (vec_dest, data_ref);
11235 new_temp = make_ssa_name (vec_dest, new_stmt);
11236 gimple_set_lhs (new_stmt, new_temp);
11237 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11239 /* Store vector loads in the corresponding SLP_NODE. */
11240 if (slp)
11241 slp_node->push_vec_def (new_stmt);
11244 if (!slp && !costing_p)
11245 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11248 if (!slp && !costing_p)
11249 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11251 if (costing_p && dump_enabled_p ())
11252 dump_printf_loc (MSG_NOTE, vect_location,
11253 "vect_model_load_cost: inside_cost = %u, "
11254 "prologue_cost = %u .\n",
11255 inside_cost, prologue_cost);
11256 return true;
11259 poly_uint64 group_elt = 0;
11260 unsigned int inside_cost = 0, prologue_cost = 0;
11261 /* For costing some adjacent vector loads, we'd like to cost with
11262 the total number of them once instead of cost each one by one. */
11263 unsigned int n_adjacent_loads = 0;
11264 for (j = 0; j < ncopies; j++)
11266 /* 1. Create the vector or array pointer update chain. */
11267 if (j == 0 && !costing_p)
11269 bool simd_lane_access_p
11270 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
11271 if (simd_lane_access_p
11272 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
11273 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
11274 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
11275 && integer_zerop (DR_INIT (first_dr_info->dr))
11276 && alias_sets_conflict_p (get_alias_set (aggr_type),
11277 get_alias_set (TREE_TYPE (ref_type)))
11278 && (alignment_support_scheme == dr_aligned
11279 || alignment_support_scheme == dr_unaligned_supported))
11281 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
11282 dataref_offset = build_int_cst (ref_type, 0);
11284 else if (diff_first_stmt_info)
11286 dataref_ptr
11287 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
11288 aggr_type, at_loop, offset, &dummy,
11289 gsi, &ptr_incr, simd_lane_access_p,
11290 bump);
11291 /* Adjust the pointer by the difference to first_stmt. */
11292 data_reference_p ptrdr
11293 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
11294 tree diff
11295 = fold_convert (sizetype,
11296 size_binop (MINUS_EXPR,
11297 DR_INIT (first_dr_info->dr),
11298 DR_INIT (ptrdr)));
11299 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11300 stmt_info, diff);
11301 if (alignment_support_scheme == dr_explicit_realign)
11303 msq = vect_setup_realignment (vinfo,
11304 first_stmt_info_for_drptr, gsi,
11305 &realignment_token,
11306 alignment_support_scheme,
11307 dataref_ptr, &at_loop);
11308 gcc_assert (!compute_in_loop);
11311 else
11312 dataref_ptr
11313 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11314 at_loop,
11315 offset, &dummy, gsi, &ptr_incr,
11316 simd_lane_access_p, bump);
11318 else if (!costing_p)
11320 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11321 if (dataref_offset)
11322 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
11323 bump);
11324 else
11325 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11326 stmt_info, bump);
11329 if (grouped_load || slp_perm)
11330 dr_chain.create (vec_num);
11332 gimple *new_stmt = NULL;
11333 for (i = 0; i < vec_num; i++)
11335 tree final_mask = NULL_TREE;
11336 tree final_len = NULL_TREE;
11337 tree bias = NULL_TREE;
11338 if (!costing_p)
11340 if (mask)
11341 vec_mask = vec_masks[vec_num * j + i];
11342 if (loop_masks)
11343 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11344 vec_num * ncopies, vectype,
11345 vec_num * j + i);
11346 if (vec_mask)
11347 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
11348 final_mask, vec_mask, gsi);
11350 if (i > 0)
11351 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11352 gsi, stmt_info, bump);
11355 /* 2. Create the vector-load in the loop. */
11356 switch (alignment_support_scheme)
11358 case dr_aligned:
11359 case dr_unaligned_supported:
11361 if (costing_p)
11362 break;
11364 unsigned int misalign;
11365 unsigned HOST_WIDE_INT align;
11366 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
11367 if (alignment_support_scheme == dr_aligned)
11368 misalign = 0;
11369 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
11371 align
11372 = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
11373 misalign = 0;
11375 else
11376 misalign = misalignment;
11377 if (dataref_offset == NULL_TREE
11378 && TREE_CODE (dataref_ptr) == SSA_NAME)
11379 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
11380 misalign);
11381 align = least_bit_hwi (misalign | align);
11383 /* Compute IFN when LOOP_LENS or final_mask valid. */
11384 machine_mode vmode = TYPE_MODE (vectype);
11385 machine_mode new_vmode = vmode;
11386 internal_fn partial_ifn = IFN_LAST;
11387 if (loop_lens)
11389 opt_machine_mode new_ovmode
11390 = get_len_load_store_mode (vmode, true, &partial_ifn);
11391 new_vmode = new_ovmode.require ();
11392 unsigned factor
11393 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
11394 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11395 vec_num * ncopies, vectype,
11396 vec_num * j + i, factor);
11398 else if (final_mask)
11400 if (!can_vec_mask_load_store_p (
11401 vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
11402 &partial_ifn))
11403 gcc_unreachable ();
11406 if (partial_ifn == IFN_MASK_LEN_LOAD)
11408 if (!final_len)
11410 /* Pass VF value to 'len' argument of
11411 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11412 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11414 if (!final_mask)
11416 /* Pass all ones value to 'mask' argument of
11417 MASK_LEN_LOAD if final_mask is invalid. */
11418 mask_vectype = truth_type_for (vectype);
11419 final_mask = build_minus_one_cst (mask_vectype);
11422 if (final_len)
11424 signed char biasval
11425 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11427 bias = build_int_cst (intQI_type_node, biasval);
11430 if (final_len)
11432 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11433 gcall *call;
11434 if (partial_ifn == IFN_MASK_LEN_LOAD)
11435 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
11436 dataref_ptr, ptr,
11437 final_mask, final_len,
11438 bias);
11439 else
11440 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
11441 dataref_ptr, ptr,
11442 final_len, bias);
11443 gimple_call_set_nothrow (call, true);
11444 new_stmt = call;
11445 data_ref = NULL_TREE;
11447 /* Need conversion if it's wrapped with VnQI. */
11448 if (vmode != new_vmode)
11450 tree new_vtype = build_vector_type_for_mode (
11451 unsigned_intQI_type_node, new_vmode);
11452 tree var
11453 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
11454 gimple_set_lhs (call, var);
11455 vect_finish_stmt_generation (vinfo, stmt_info, call,
11456 gsi);
11457 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11458 new_stmt = gimple_build_assign (vec_dest,
11459 VIEW_CONVERT_EXPR, op);
11462 else if (final_mask)
11464 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11465 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
11466 dataref_ptr, ptr,
11467 final_mask);
11468 gimple_call_set_nothrow (call, true);
11469 new_stmt = call;
11470 data_ref = NULL_TREE;
11472 else
11474 tree ltype = vectype;
11475 tree new_vtype = NULL_TREE;
11476 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11477 unsigned int vect_align
11478 = vect_known_alignment_in_bytes (first_dr_info, vectype);
11479 unsigned int scalar_dr_size
11480 = vect_get_scalar_dr_size (first_dr_info);
11481 /* If there's no peeling for gaps but we have a gap
11482 with slp loads then load the lower half of the
11483 vector only. See get_group_load_store_type for
11484 when we apply this optimization. */
11485 if (slp
11486 && loop_vinfo
11487 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != 0
11488 && known_eq (nunits, (group_size - gap) * 2)
11489 && known_eq (nunits, group_size)
11490 && gap >= (vect_align / scalar_dr_size))
11492 tree half_vtype;
11493 new_vtype
11494 = vector_vector_composition_type (vectype, 2,
11495 &half_vtype);
11496 if (new_vtype != NULL_TREE)
11497 ltype = half_vtype;
11499 /* Try to use a single smaller load when we are about
11500 to load excess elements compared to the unrolled
11501 scalar loop.
11502 ??? This should cover the above case as well. */
11503 else if (known_gt ((vec_num * j + i + 1) * nunits,
11504 (group_size * vf - gap)))
11506 if (known_ge ((vec_num * j + i + 1) * nunits
11507 - (group_size * vf - gap), nunits))
11508 /* DR will be unused. */
11509 ltype = NULL_TREE;
11510 else if (known_ge (vect_align,
11511 tree_to_poly_uint64
11512 (TYPE_SIZE_UNIT (vectype))))
11513 /* Aligned access to excess elements is OK if
11514 at least one element is accessed in the
11515 scalar loop. */
11517 else
11519 auto remain
11520 = ((group_size * vf - gap)
11521 - (vec_num * j + i) * nunits);
11522 /* remain should now be > 0 and < nunits. */
11523 unsigned num;
11524 if (constant_multiple_p (nunits, remain, &num))
11526 tree ptype;
11527 new_vtype
11528 = vector_vector_composition_type (vectype,
11529 num,
11530 &ptype);
11531 if (new_vtype)
11532 ltype = ptype;
11534 /* Else use multiple loads or a masked load? */
11537 tree offset
11538 = (dataref_offset ? dataref_offset
11539 : build_int_cst (ref_type, 0));
11540 if (!ltype)
11542 else if (ltype != vectype
11543 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11545 poly_uint64 gap_offset
11546 = (tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype))
11547 - tree_to_poly_uint64 (TYPE_SIZE_UNIT (ltype)));
11548 tree gapcst = build_int_cstu (ref_type, gap_offset);
11549 offset = size_binop (PLUS_EXPR, offset, gapcst);
11551 if (ltype)
11553 data_ref
11554 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
11555 if (alignment_support_scheme == dr_aligned)
11557 else
11558 TREE_TYPE (data_ref)
11559 = build_aligned_type (TREE_TYPE (data_ref),
11560 align * BITS_PER_UNIT);
11562 if (!ltype)
11563 data_ref = build_constructor (vectype, NULL);
11564 else if (ltype != vectype)
11566 vect_copy_ref_info (data_ref,
11567 DR_REF (first_dr_info->dr));
11568 tree tem = make_ssa_name (ltype);
11569 new_stmt = gimple_build_assign (tem, data_ref);
11570 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
11571 gsi);
11572 data_ref = NULL;
11573 vec<constructor_elt, va_gc> *v;
11574 /* We've computed 'num' above to statically two
11575 or via constant_multiple_p. */
11576 unsigned num
11577 = (exact_div (tree_to_poly_uint64
11578 (TYPE_SIZE_UNIT (vectype)),
11579 tree_to_poly_uint64
11580 (TYPE_SIZE_UNIT (ltype)))
11581 .to_constant ());
11582 vec_alloc (v, num);
11583 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11585 while (--num)
11586 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11587 build_zero_cst (ltype));
11588 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11590 else
11592 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11593 while (--num)
11594 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11595 build_zero_cst (ltype));
11597 gcc_assert (new_vtype != NULL_TREE);
11598 if (new_vtype == vectype)
11599 new_stmt = gimple_build_assign (
11600 vec_dest, build_constructor (vectype, v));
11601 else
11603 tree new_vname = make_ssa_name (new_vtype);
11604 new_stmt = gimple_build_assign (
11605 new_vname, build_constructor (new_vtype, v));
11606 vect_finish_stmt_generation (vinfo, stmt_info,
11607 new_stmt, gsi);
11608 new_stmt = gimple_build_assign (
11609 vec_dest,
11610 build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
11614 break;
11616 case dr_explicit_realign:
11618 if (costing_p)
11619 break;
11620 tree ptr, bump;
11622 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11624 if (compute_in_loop)
11625 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
11626 &realignment_token,
11627 dr_explicit_realign,
11628 dataref_ptr, NULL);
11630 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11631 ptr = copy_ssa_name (dataref_ptr);
11632 else
11633 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11634 // For explicit realign the target alignment should be
11635 // known at compile time.
11636 unsigned HOST_WIDE_INT align
11637 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11638 new_stmt = gimple_build_assign (
11639 ptr, BIT_AND_EXPR, dataref_ptr,
11640 build_int_cst (TREE_TYPE (dataref_ptr),
11641 -(HOST_WIDE_INT) align));
11642 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11643 data_ref
11644 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11645 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11646 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11647 new_stmt = gimple_build_assign (vec_dest, data_ref);
11648 new_temp = make_ssa_name (vec_dest, new_stmt);
11649 gimple_assign_set_lhs (new_stmt, new_temp);
11650 gimple_move_vops (new_stmt, stmt_info->stmt);
11651 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11652 msq = new_temp;
11654 bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11655 bump = size_binop (MINUS_EXPR, bump, size_one_node);
11656 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11657 bump);
11658 new_stmt = gimple_build_assign (
11659 NULL_TREE, BIT_AND_EXPR, ptr,
11660 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11661 if (TREE_CODE (ptr) == SSA_NAME)
11662 ptr = copy_ssa_name (ptr, new_stmt);
11663 else
11664 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
11665 gimple_assign_set_lhs (new_stmt, ptr);
11666 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11667 data_ref
11668 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11669 break;
11671 case dr_explicit_realign_optimized:
11673 if (costing_p)
11674 break;
11675 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11676 new_temp = copy_ssa_name (dataref_ptr);
11677 else
11678 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11679 // We should only be doing this if we know the target
11680 // alignment at compile time.
11681 unsigned HOST_WIDE_INT align
11682 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11683 new_stmt = gimple_build_assign (
11684 new_temp, BIT_AND_EXPR, dataref_ptr,
11685 build_int_cst (TREE_TYPE (dataref_ptr),
11686 -(HOST_WIDE_INT) align));
11687 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11688 data_ref = build2 (MEM_REF, vectype, new_temp,
11689 build_int_cst (ref_type, 0));
11690 break;
11692 default:
11693 gcc_unreachable ();
11696 /* One common place to cost the above vect load for different
11697 alignment support schemes. */
11698 if (costing_p)
11700 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11701 only need to take care of the first stmt, whose
11702 stmt_info is first_stmt_info, vec_num iterating on it
11703 will cover the cost for the remaining, it's consistent
11704 with transforming. For the prologue cost for realign,
11705 we only need to count it once for the whole group. */
11706 bool first_stmt_info_p = first_stmt_info == stmt_info;
11707 bool add_realign_cost = first_stmt_info_p && i == 0;
11708 if (memory_access_type == VMAT_CONTIGUOUS
11709 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11710 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11711 && (!grouped_load || first_stmt_info_p)))
11713 /* Leave realign cases alone to keep them simple. */
11714 if (alignment_support_scheme == dr_explicit_realign_optimized
11715 || alignment_support_scheme == dr_explicit_realign)
11716 vect_get_load_cost (vinfo, stmt_info, 1,
11717 alignment_support_scheme, misalignment,
11718 add_realign_cost, &inside_cost,
11719 &prologue_cost, cost_vec, cost_vec,
11720 true);
11721 else
11722 n_adjacent_loads++;
11725 else
11727 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11728 /* DATA_REF is null if we've already built the statement. */
11729 if (data_ref)
11731 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11732 new_stmt = gimple_build_assign (vec_dest, data_ref);
11734 new_temp = make_ssa_name (vec_dest, new_stmt);
11735 gimple_set_lhs (new_stmt, new_temp);
11736 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11739 /* 3. Handle explicit realignment if necessary/supported.
11740 Create in loop:
11741 vec_dest = realign_load (msq, lsq, realignment_token) */
11742 if (!costing_p
11743 && (alignment_support_scheme == dr_explicit_realign_optimized
11744 || alignment_support_scheme == dr_explicit_realign))
11746 lsq = gimple_assign_lhs (new_stmt);
11747 if (!realignment_token)
11748 realignment_token = dataref_ptr;
11749 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11750 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11751 lsq, realignment_token);
11752 new_temp = make_ssa_name (vec_dest, new_stmt);
11753 gimple_assign_set_lhs (new_stmt, new_temp);
11754 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11756 if (alignment_support_scheme == dr_explicit_realign_optimized)
11758 gcc_assert (phi);
11759 if (i == vec_num - 1 && j == ncopies - 1)
11760 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11761 UNKNOWN_LOCATION);
11762 msq = lsq;
11766 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11768 if (costing_p)
11769 inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
11770 stmt_info, 0, vect_body);
11771 else
11773 tree perm_mask = perm_mask_for_reverse (vectype);
11774 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
11775 perm_mask, stmt_info, gsi);
11776 new_stmt = SSA_NAME_DEF_STMT (new_temp);
11780 /* Collect vector loads and later create their permutation in
11781 vect_transform_grouped_load (). */
11782 if (!costing_p && (grouped_load || slp_perm))
11783 dr_chain.quick_push (new_temp);
11785 /* Store vector loads in the corresponding SLP_NODE. */
11786 if (!costing_p && slp && !slp_perm)
11787 slp_node->push_vec_def (new_stmt);
11789 /* With SLP permutation we load the gaps as well, without
11790 we need to skip the gaps after we manage to fully load
11791 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11792 group_elt += nunits;
11793 if (!costing_p
11794 && maybe_ne (group_gap_adj, 0U)
11795 && !slp_perm
11796 && known_eq (group_elt, group_size - group_gap_adj))
11798 poly_wide_int bump_val
11799 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11800 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11801 == -1)
11802 bump_val = -bump_val;
11803 tree bump = wide_int_to_tree (sizetype, bump_val);
11804 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11805 stmt_info, bump);
11806 group_elt = 0;
11809 /* Bump the vector pointer to account for a gap or for excess
11810 elements loaded for a permuted SLP load. */
11811 if (!costing_p
11812 && maybe_ne (group_gap_adj, 0U)
11813 && slp_perm)
11815 poly_wide_int bump_val
11816 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11817 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11818 bump_val = -bump_val;
11819 tree bump = wide_int_to_tree (sizetype, bump_val);
11820 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11821 stmt_info, bump);
11824 if (slp && !slp_perm)
11825 continue;
11827 if (slp_perm)
11829 unsigned n_perms;
11830 /* For SLP we know we've seen all possible uses of dr_chain so
11831 direct vect_transform_slp_perm_load to DCE the unused parts.
11832 ??? This is a hack to prevent compile-time issues as seen
11833 in PR101120 and friends. */
11834 if (costing_p)
11836 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
11837 true, &n_perms, nullptr);
11838 inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
11839 stmt_info, 0, vect_body);
11841 else
11843 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11844 gsi, vf, false, &n_perms,
11845 nullptr, true);
11846 gcc_assert (ok);
11849 else
11851 if (grouped_load)
11853 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11854 /* We assume that the cost of a single load-lanes instruction
11855 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11856 If a grouped access is instead being provided by a
11857 load-and-permute operation, include the cost of the
11858 permutes. */
11859 if (costing_p && first_stmt_info == stmt_info)
11861 /* Uses an even and odd extract operations or shuffle
11862 operations for each needed permute. */
11863 int group_size = DR_GROUP_SIZE (first_stmt_info);
11864 int nstmts = ceil_log2 (group_size) * group_size;
11865 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
11866 stmt_info, 0, vect_body);
11868 if (dump_enabled_p ())
11869 dump_printf_loc (MSG_NOTE, vect_location,
11870 "vect_model_load_cost:"
11871 "strided group_size = %d .\n",
11872 group_size);
11874 else if (!costing_p)
11876 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
11877 group_size, gsi);
11878 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11881 else if (!costing_p)
11882 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11884 dr_chain.release ();
11886 if (!slp && !costing_p)
11887 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11889 if (costing_p)
11891 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
11892 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11893 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11894 if (n_adjacent_loads > 0)
11895 vect_get_load_cost (vinfo, stmt_info, n_adjacent_loads,
11896 alignment_support_scheme, misalignment, false,
11897 &inside_cost, &prologue_cost, cost_vec, cost_vec,
11898 true);
11899 if (dump_enabled_p ())
11900 dump_printf_loc (MSG_NOTE, vect_location,
11901 "vect_model_load_cost: inside_cost = %u, "
11902 "prologue_cost = %u .\n",
11903 inside_cost, prologue_cost);
11906 return true;
11909 /* Function vect_is_simple_cond.
11911 Input:
11912 LOOP - the loop that is being vectorized.
11913 COND - Condition that is checked for simple use.
11915 Output:
11916 *COMP_VECTYPE - the vector type for the comparison.
11917 *DTS - The def types for the arguments of the comparison
11919 Returns whether a COND can be vectorized. Checks whether
11920 condition operands are supportable using vec_is_simple_use. */
11922 static bool
11923 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
11924 slp_tree slp_node, tree *comp_vectype,
11925 enum vect_def_type *dts, tree vectype)
11927 tree lhs, rhs;
11928 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11929 slp_tree slp_op;
11931 /* Mask case. */
11932 if (TREE_CODE (cond) == SSA_NAME
11933 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11935 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
11936 &slp_op, &dts[0], comp_vectype)
11937 || !*comp_vectype
11938 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11939 return false;
11940 return true;
11943 if (!COMPARISON_CLASS_P (cond))
11944 return false;
11946 lhs = TREE_OPERAND (cond, 0);
11947 rhs = TREE_OPERAND (cond, 1);
11949 if (TREE_CODE (lhs) == SSA_NAME)
11951 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
11952 &lhs, &slp_op, &dts[0], &vectype1))
11953 return false;
11955 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
11956 || TREE_CODE (lhs) == FIXED_CST)
11957 dts[0] = vect_constant_def;
11958 else
11959 return false;
11961 if (TREE_CODE (rhs) == SSA_NAME)
11963 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
11964 &rhs, &slp_op, &dts[1], &vectype2))
11965 return false;
11967 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
11968 || TREE_CODE (rhs) == FIXED_CST)
11969 dts[1] = vect_constant_def;
11970 else
11971 return false;
11973 if (vectype1 && vectype2
11974 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11975 TYPE_VECTOR_SUBPARTS (vectype2)))
11976 return false;
11978 *comp_vectype = vectype1 ? vectype1 : vectype2;
11979 /* Invariant comparison. */
11980 if (! *comp_vectype)
11982 tree scalar_type = TREE_TYPE (lhs);
11983 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11984 *comp_vectype = truth_type_for (vectype);
11985 else
11987 /* If we can widen the comparison to match vectype do so. */
11988 if (INTEGRAL_TYPE_P (scalar_type)
11989 && !slp_node
11990 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
11991 TYPE_SIZE (TREE_TYPE (vectype))))
11992 scalar_type = build_nonstandard_integer_type
11993 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
11994 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
11995 slp_node);
11999 return true;
12002 /* vectorizable_condition.
12004 Check if STMT_INFO is conditional modify expression that can be vectorized.
12005 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12006 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
12007 at GSI.
12009 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
12011 Return true if STMT_INFO is vectorizable in this way. */
12013 static bool
12014 vectorizable_condition (vec_info *vinfo,
12015 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12016 gimple **vec_stmt,
12017 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12019 tree scalar_dest = NULL_TREE;
12020 tree vec_dest = NULL_TREE;
12021 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
12022 tree then_clause, else_clause;
12023 tree comp_vectype = NULL_TREE;
12024 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
12025 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
12026 tree vec_compare;
12027 tree new_temp;
12028 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12029 enum vect_def_type dts[4]
12030 = {vect_unknown_def_type, vect_unknown_def_type,
12031 vect_unknown_def_type, vect_unknown_def_type};
12032 int ndts = 4;
12033 int ncopies;
12034 int vec_num;
12035 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12036 int i;
12037 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12038 vec<tree> vec_oprnds0 = vNULL;
12039 vec<tree> vec_oprnds1 = vNULL;
12040 vec<tree> vec_oprnds2 = vNULL;
12041 vec<tree> vec_oprnds3 = vNULL;
12042 tree vec_cmp_type;
12043 bool masked = false;
12045 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12046 return false;
12048 /* Is vectorizable conditional operation? */
12049 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12050 if (!stmt)
12051 return false;
12053 code = gimple_assign_rhs_code (stmt);
12054 if (code != COND_EXPR)
12055 return false;
12057 stmt_vec_info reduc_info = NULL;
12058 int reduc_index = -1;
12059 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
12060 bool for_reduction
12061 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
12062 if (for_reduction)
12064 if (slp_node)
12065 return false;
12066 reduc_info = info_for_reduction (vinfo, stmt_info);
12067 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
12068 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
12069 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
12070 || reduc_index != -1);
12072 else
12074 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12075 return false;
12078 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12079 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12081 if (slp_node)
12083 ncopies = 1;
12084 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
12086 else
12088 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12089 vec_num = 1;
12092 gcc_assert (ncopies >= 1);
12093 if (for_reduction && ncopies > 1)
12094 return false; /* FORNOW */
12096 cond_expr = gimple_assign_rhs1 (stmt);
12098 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
12099 &comp_vectype, &dts[0], vectype)
12100 || !comp_vectype)
12101 return false;
12103 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
12104 slp_tree then_slp_node, else_slp_node;
12105 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
12106 &then_clause, &then_slp_node, &dts[2], &vectype1))
12107 return false;
12108 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
12109 &else_clause, &else_slp_node, &dts[3], &vectype2))
12110 return false;
12112 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
12113 return false;
12115 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
12116 return false;
12118 masked = !COMPARISON_CLASS_P (cond_expr);
12119 vec_cmp_type = truth_type_for (comp_vectype);
12121 if (vec_cmp_type == NULL_TREE)
12122 return false;
12124 cond_code = TREE_CODE (cond_expr);
12125 if (!masked)
12127 cond_expr0 = TREE_OPERAND (cond_expr, 0);
12128 cond_expr1 = TREE_OPERAND (cond_expr, 1);
12131 /* For conditional reductions, the "then" value needs to be the candidate
12132 value calculated by this iteration while the "else" value needs to be
12133 the result carried over from previous iterations. If the COND_EXPR
12134 is the other way around, we need to swap it. */
12135 bool must_invert_cmp_result = false;
12136 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
12138 if (masked)
12139 must_invert_cmp_result = true;
12140 else
12142 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
12143 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
12144 if (new_code == ERROR_MARK)
12145 must_invert_cmp_result = true;
12146 else
12148 cond_code = new_code;
12149 /* Make sure we don't accidentally use the old condition. */
12150 cond_expr = NULL_TREE;
12153 std::swap (then_clause, else_clause);
12156 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
12158 /* Boolean values may have another representation in vectors
12159 and therefore we prefer bit operations over comparison for
12160 them (which also works for scalar masks). We store opcodes
12161 to use in bitop1 and bitop2. Statement is vectorized as
12162 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12163 depending on bitop1 and bitop2 arity. */
12164 switch (cond_code)
12166 case GT_EXPR:
12167 bitop1 = BIT_NOT_EXPR;
12168 bitop2 = BIT_AND_EXPR;
12169 break;
12170 case GE_EXPR:
12171 bitop1 = BIT_NOT_EXPR;
12172 bitop2 = BIT_IOR_EXPR;
12173 break;
12174 case LT_EXPR:
12175 bitop1 = BIT_NOT_EXPR;
12176 bitop2 = BIT_AND_EXPR;
12177 std::swap (cond_expr0, cond_expr1);
12178 break;
12179 case LE_EXPR:
12180 bitop1 = BIT_NOT_EXPR;
12181 bitop2 = BIT_IOR_EXPR;
12182 std::swap (cond_expr0, cond_expr1);
12183 break;
12184 case NE_EXPR:
12185 bitop1 = BIT_XOR_EXPR;
12186 break;
12187 case EQ_EXPR:
12188 bitop1 = BIT_XOR_EXPR;
12189 bitop2 = BIT_NOT_EXPR;
12190 break;
12191 default:
12192 return false;
12194 cond_code = SSA_NAME;
12197 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
12198 && reduction_type == EXTRACT_LAST_REDUCTION
12199 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
12201 if (dump_enabled_p ())
12202 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12203 "reduction comparison operation not supported.\n");
12204 return false;
12207 if (!vec_stmt)
12209 if (bitop1 != NOP_EXPR)
12211 machine_mode mode = TYPE_MODE (comp_vectype);
12212 optab optab;
12214 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
12215 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12216 return false;
12218 if (bitop2 != NOP_EXPR)
12220 optab = optab_for_tree_code (bitop2, comp_vectype,
12221 optab_default);
12222 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12223 return false;
12227 vect_cost_for_stmt kind = vector_stmt;
12228 if (reduction_type == EXTRACT_LAST_REDUCTION)
12229 /* Count one reduction-like operation per vector. */
12230 kind = vec_to_scalar;
12231 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
12232 && (masked
12233 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
12234 cond_code)
12235 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
12236 ERROR_MARK))))
12237 return false;
12239 if (slp_node
12240 && (!vect_maybe_update_slp_op_vectype
12241 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
12242 || (op_adjust == 1
12243 && !vect_maybe_update_slp_op_vectype
12244 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
12245 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
12246 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
12248 if (dump_enabled_p ())
12249 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12250 "incompatible vector types for invariants\n");
12251 return false;
12254 if (loop_vinfo && for_reduction
12255 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12257 if (reduction_type == EXTRACT_LAST_REDUCTION)
12259 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12260 vectype, OPTIMIZE_FOR_SPEED))
12261 vect_record_loop_len (loop_vinfo,
12262 &LOOP_VINFO_LENS (loop_vinfo),
12263 ncopies * vec_num, vectype, 1);
12264 else
12265 vect_record_loop_mask (loop_vinfo,
12266 &LOOP_VINFO_MASKS (loop_vinfo),
12267 ncopies * vec_num, vectype, NULL);
12269 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12270 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
12272 if (dump_enabled_p ())
12273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12274 "conditional reduction prevents the use"
12275 " of partial vectors.\n");
12276 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
12280 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
12281 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
12282 cost_vec, kind);
12283 return true;
12286 /* Transform. */
12288 /* Handle def. */
12289 scalar_dest = gimple_assign_lhs (stmt);
12290 if (reduction_type != EXTRACT_LAST_REDUCTION)
12291 vec_dest = vect_create_destination_var (scalar_dest, vectype);
12293 bool swap_cond_operands = false;
12295 /* See whether another part of the vectorized code applies a loop
12296 mask to the condition, or to its inverse. */
12298 vec_loop_masks *masks = NULL;
12299 vec_loop_lens *lens = NULL;
12300 if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
12302 if (reduction_type == EXTRACT_LAST_REDUCTION)
12303 lens = &LOOP_VINFO_LENS (loop_vinfo);
12305 else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
12307 if (reduction_type == EXTRACT_LAST_REDUCTION)
12308 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12309 else
12311 scalar_cond_masked_key cond (cond_expr, ncopies);
12312 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12313 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12314 else
12316 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
12317 tree_code orig_code = cond.code;
12318 cond.code = invert_tree_comparison (cond.code, honor_nans);
12319 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
12321 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12322 cond_code = cond.code;
12323 swap_cond_operands = true;
12325 else
12327 /* Try the inverse of the current mask. We check if the
12328 inverse mask is live and if so we generate a negate of
12329 the current mask such that we still honor NaNs. */
12330 cond.inverted_p = true;
12331 cond.code = orig_code;
12332 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12334 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12335 cond_code = cond.code;
12336 swap_cond_operands = true;
12337 must_invert_cmp_result = true;
12344 /* Handle cond expr. */
12345 if (masked)
12346 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12347 cond_expr, comp_vectype, &vec_oprnds0,
12348 then_clause, vectype, &vec_oprnds2,
12349 reduction_type != EXTRACT_LAST_REDUCTION
12350 ? else_clause : NULL, vectype, &vec_oprnds3);
12351 else
12352 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12353 cond_expr0, comp_vectype, &vec_oprnds0,
12354 cond_expr1, comp_vectype, &vec_oprnds1,
12355 then_clause, vectype, &vec_oprnds2,
12356 reduction_type != EXTRACT_LAST_REDUCTION
12357 ? else_clause : NULL, vectype, &vec_oprnds3);
12359 /* Arguments are ready. Create the new vector stmt. */
12360 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
12362 vec_then_clause = vec_oprnds2[i];
12363 if (reduction_type != EXTRACT_LAST_REDUCTION)
12364 vec_else_clause = vec_oprnds3[i];
12366 if (swap_cond_operands)
12367 std::swap (vec_then_clause, vec_else_clause);
12369 if (masked)
12370 vec_compare = vec_cond_lhs;
12371 else
12373 vec_cond_rhs = vec_oprnds1[i];
12374 if (bitop1 == NOP_EXPR)
12376 gimple_seq stmts = NULL;
12377 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
12378 vec_cond_lhs, vec_cond_rhs);
12379 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
12381 else
12383 new_temp = make_ssa_name (vec_cmp_type);
12384 gassign *new_stmt;
12385 if (bitop1 == BIT_NOT_EXPR)
12386 new_stmt = gimple_build_assign (new_temp, bitop1,
12387 vec_cond_rhs);
12388 else
12389 new_stmt
12390 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
12391 vec_cond_rhs);
12392 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12393 if (bitop2 == NOP_EXPR)
12394 vec_compare = new_temp;
12395 else if (bitop2 == BIT_NOT_EXPR
12396 && reduction_type != EXTRACT_LAST_REDUCTION)
12398 /* Instead of doing ~x ? y : z do x ? z : y. */
12399 vec_compare = new_temp;
12400 std::swap (vec_then_clause, vec_else_clause);
12402 else
12404 vec_compare = make_ssa_name (vec_cmp_type);
12405 if (bitop2 == BIT_NOT_EXPR)
12406 new_stmt
12407 = gimple_build_assign (vec_compare, bitop2, new_temp);
12408 else
12409 new_stmt
12410 = gimple_build_assign (vec_compare, bitop2,
12411 vec_cond_lhs, new_temp);
12412 vect_finish_stmt_generation (vinfo, stmt_info,
12413 new_stmt, gsi);
12418 /* If we decided to apply a loop mask to the result of the vector
12419 comparison, AND the comparison with the mask now. Later passes
12420 should then be able to reuse the AND results between mulitple
12421 vector statements.
12423 For example:
12424 for (int i = 0; i < 100; ++i)
12425 x[i] = y[i] ? z[i] : 10;
12427 results in following optimized GIMPLE:
12429 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12430 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12431 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12432 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12433 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12434 vect_iftmp.11_47, { 10, ... }>;
12436 instead of using a masked and unmasked forms of
12437 vec != { 0, ... } (masked in the MASK_LOAD,
12438 unmasked in the VEC_COND_EXPR). */
12440 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12441 in cases where that's necessary. */
12443 tree len = NULL_TREE, bias = NULL_TREE;
12444 if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
12446 if (!is_gimple_val (vec_compare))
12448 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12449 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12450 vec_compare);
12451 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12452 vec_compare = vec_compare_name;
12455 if (must_invert_cmp_result)
12457 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12458 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12459 BIT_NOT_EXPR,
12460 vec_compare);
12461 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12462 vec_compare = vec_compare_name;
12465 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12466 vectype, OPTIMIZE_FOR_SPEED))
12468 if (lens)
12470 len = vect_get_loop_len (loop_vinfo, gsi, lens,
12471 vec_num * ncopies, vectype, i, 1);
12472 signed char biasval
12473 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
12474 bias = build_int_cst (intQI_type_node, biasval);
12476 else
12478 len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
12479 bias = build_int_cst (intQI_type_node, 0);
12482 if (masks)
12484 tree loop_mask
12485 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
12486 vectype, i);
12487 tree tmp2 = make_ssa_name (vec_cmp_type);
12488 gassign *g
12489 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
12490 loop_mask);
12491 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
12492 vec_compare = tmp2;
12496 gimple *new_stmt;
12497 if (reduction_type == EXTRACT_LAST_REDUCTION)
12499 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
12500 tree lhs = gimple_get_lhs (old_stmt);
12501 if (len)
12502 new_stmt = gimple_build_call_internal
12503 (IFN_LEN_FOLD_EXTRACT_LAST, 5, else_clause, vec_compare,
12504 vec_then_clause, len, bias);
12505 else
12506 new_stmt = gimple_build_call_internal
12507 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
12508 vec_then_clause);
12509 gimple_call_set_lhs (new_stmt, lhs);
12510 SSA_NAME_DEF_STMT (lhs) = new_stmt;
12511 if (old_stmt == gsi_stmt (*gsi))
12512 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
12513 else
12515 /* In this case we're moving the definition to later in the
12516 block. That doesn't matter because the only uses of the
12517 lhs are in phi statements. */
12518 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12519 gsi_remove (&old_gsi, true);
12520 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12523 else
12525 new_temp = make_ssa_name (vec_dest);
12526 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12527 vec_then_clause, vec_else_clause);
12528 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12530 if (slp_node)
12531 slp_node->push_vec_def (new_stmt);
12532 else
12533 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12536 if (!slp_node)
12537 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12539 vec_oprnds0.release ();
12540 vec_oprnds1.release ();
12541 vec_oprnds2.release ();
12542 vec_oprnds3.release ();
12544 return true;
12547 /* Helper of vectorizable_comparison.
12549 Check if STMT_INFO is comparison expression CODE that can be vectorized.
12550 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12551 comparison, put it in VEC_STMT, and insert it at GSI.
12553 Return true if STMT_INFO is vectorizable in this way. */
12555 static bool
12556 vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
12557 stmt_vec_info stmt_info, tree_code code,
12558 gimple_stmt_iterator *gsi, gimple **vec_stmt,
12559 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12561 tree lhs, rhs1, rhs2;
12562 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12563 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12564 tree new_temp;
12565 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12566 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
12567 int ndts = 2;
12568 poly_uint64 nunits;
12569 int ncopies;
12570 enum tree_code bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12571 int i;
12572 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12573 vec<tree> vec_oprnds0 = vNULL;
12574 vec<tree> vec_oprnds1 = vNULL;
12575 tree mask_type;
12576 tree mask = NULL_TREE;
12578 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12579 return false;
12581 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
12582 return false;
12584 mask_type = vectype;
12585 nunits = TYPE_VECTOR_SUBPARTS (vectype);
12587 if (slp_node)
12588 ncopies = 1;
12589 else
12590 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12592 gcc_assert (ncopies >= 1);
12594 if (TREE_CODE_CLASS (code) != tcc_comparison)
12595 return false;
12597 slp_tree slp_rhs1, slp_rhs2;
12598 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12599 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
12600 return false;
12602 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12603 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
12604 return false;
12606 if (vectype1 && vectype2
12607 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12608 TYPE_VECTOR_SUBPARTS (vectype2)))
12609 return false;
12611 vectype = vectype1 ? vectype1 : vectype2;
12613 /* Invariant comparison. */
12614 if (!vectype)
12616 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
12617 vectype = mask_type;
12618 else
12619 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
12620 slp_node);
12621 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
12622 return false;
12624 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
12625 return false;
12627 /* Can't compare mask and non-mask types. */
12628 if (vectype1 && vectype2
12629 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12630 return false;
12632 /* Boolean values may have another representation in vectors
12633 and therefore we prefer bit operations over comparison for
12634 them (which also works for scalar masks). We store opcodes
12635 to use in bitop1 and bitop2. Statement is vectorized as
12636 BITOP2 (rhs1 BITOP1 rhs2) or
12637 rhs1 BITOP2 (BITOP1 rhs2)
12638 depending on bitop1 and bitop2 arity. */
12639 bool swap_p = false;
12640 if (VECTOR_BOOLEAN_TYPE_P (vectype))
12642 if (code == GT_EXPR)
12644 bitop1 = BIT_NOT_EXPR;
12645 bitop2 = BIT_AND_EXPR;
12647 else if (code == GE_EXPR)
12649 bitop1 = BIT_NOT_EXPR;
12650 bitop2 = BIT_IOR_EXPR;
12652 else if (code == LT_EXPR)
12654 bitop1 = BIT_NOT_EXPR;
12655 bitop2 = BIT_AND_EXPR;
12656 swap_p = true;
12658 else if (code == LE_EXPR)
12660 bitop1 = BIT_NOT_EXPR;
12661 bitop2 = BIT_IOR_EXPR;
12662 swap_p = true;
12664 else
12666 bitop1 = BIT_XOR_EXPR;
12667 if (code == EQ_EXPR)
12668 bitop2 = BIT_NOT_EXPR;
12672 if (!vec_stmt)
12674 if (bitop1 == NOP_EXPR)
12676 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12677 return false;
12679 else
12681 machine_mode mode = TYPE_MODE (vectype);
12682 optab optab;
12684 optab = optab_for_tree_code (bitop1, vectype, optab_default);
12685 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12686 return false;
12688 if (bitop2 != NOP_EXPR)
12690 optab = optab_for_tree_code (bitop2, vectype, optab_default);
12691 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12692 return false;
12696 /* Put types on constant and invariant SLP children. */
12697 if (slp_node
12698 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12699 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
12701 if (dump_enabled_p ())
12702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12703 "incompatible vector types for invariants\n");
12704 return false;
12707 vect_model_simple_cost (vinfo, stmt_info,
12708 ncopies * (1 + (bitop2 != NOP_EXPR)),
12709 dts, ndts, slp_node, cost_vec);
12710 return true;
12713 /* Transform. */
12715 /* Handle def. */
12716 lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info));
12717 if (lhs)
12718 mask = vect_create_destination_var (lhs, mask_type);
12720 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12721 rhs1, vectype, &vec_oprnds0,
12722 rhs2, vectype, &vec_oprnds1);
12723 if (swap_p)
12724 std::swap (vec_oprnds0, vec_oprnds1);
12726 /* Arguments are ready. Create the new vector stmt. */
12727 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12729 gimple *new_stmt;
12730 vec_rhs2 = vec_oprnds1[i];
12732 if (lhs)
12733 new_temp = make_ssa_name (mask);
12734 else
12735 new_temp = make_temp_ssa_name (mask_type, NULL, "cmp");
12736 if (bitop1 == NOP_EXPR)
12738 new_stmt = gimple_build_assign (new_temp, code,
12739 vec_rhs1, vec_rhs2);
12740 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12742 else
12744 if (bitop1 == BIT_NOT_EXPR)
12745 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12746 else
12747 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12748 vec_rhs2);
12749 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12750 if (bitop2 != NOP_EXPR)
12752 tree res = make_ssa_name (mask);
12753 if (bitop2 == BIT_NOT_EXPR)
12754 new_stmt = gimple_build_assign (res, bitop2, new_temp);
12755 else
12756 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12757 new_temp);
12758 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12761 if (slp_node)
12762 slp_node->push_vec_def (new_stmt);
12763 else
12764 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12767 if (!slp_node)
12768 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12770 vec_oprnds0.release ();
12771 vec_oprnds1.release ();
12773 return true;
12776 /* vectorizable_comparison.
12778 Check if STMT_INFO is comparison expression that can be vectorized.
12779 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12780 comparison, put it in VEC_STMT, and insert it at GSI.
12782 Return true if STMT_INFO is vectorizable in this way. */
12784 static bool
12785 vectorizable_comparison (vec_info *vinfo,
12786 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12787 gimple **vec_stmt,
12788 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12790 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12792 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12793 return false;
12795 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12796 return false;
12798 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12799 if (!stmt)
12800 return false;
12802 enum tree_code code = gimple_assign_rhs_code (stmt);
12803 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12804 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12805 vec_stmt, slp_node, cost_vec))
12806 return false;
12808 if (!vec_stmt)
12809 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
12811 return true;
12814 /* Check to see if the current early break given in STMT_INFO is valid for
12815 vectorization. */
12817 static bool
12818 vectorizable_early_exit (vec_info *vinfo, stmt_vec_info stmt_info,
12819 gimple_stmt_iterator *gsi, gimple **vec_stmt,
12820 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12822 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12823 if (!loop_vinfo
12824 || !is_a <gcond *> (STMT_VINFO_STMT (stmt_info)))
12825 return false;
12827 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_condition_def)
12828 return false;
12830 if (!STMT_VINFO_RELEVANT_P (stmt_info))
12831 return false;
12833 DUMP_VECT_SCOPE ("vectorizable_early_exit");
12835 auto code = gimple_cond_code (STMT_VINFO_STMT (stmt_info));
12837 tree vectype = NULL_TREE;
12838 slp_tree slp_op0;
12839 tree op0;
12840 enum vect_def_type dt0;
12841 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op0, &slp_op0, &dt0,
12842 &vectype))
12844 if (dump_enabled_p ())
12845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12846 "use not simple.\n");
12847 return false;
12850 if (!vectype)
12851 return false;
12853 machine_mode mode = TYPE_MODE (vectype);
12854 int ncopies;
12856 if (slp_node)
12857 ncopies = 1;
12858 else
12859 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12861 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
12862 bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
12864 /* Now build the new conditional. Pattern gimple_conds get dropped during
12865 codegen so we must replace the original insn. */
12866 gimple *orig_stmt = STMT_VINFO_STMT (vect_orig_stmt (stmt_info));
12867 gcond *cond_stmt = as_a <gcond *>(orig_stmt);
12868 /* When vectorizing we assume that if the branch edge is taken that we're
12869 exiting the loop. This is not however always the case as the compiler will
12870 rewrite conditions to always be a comparison against 0. To do this it
12871 sometimes flips the edges. This is fine for scalar, but for vector we
12872 then have to flip the test, as we're still assuming that if you take the
12873 branch edge that we found the exit condition. i.e. we need to know whether
12874 we are generating a `forall` or an `exist` condition. */
12875 auto new_code = NE_EXPR;
12876 auto reduc_optab = ior_optab;
12877 auto reduc_op = BIT_IOR_EXPR;
12878 tree cst = build_zero_cst (vectype);
12879 edge exit_true_edge = EDGE_SUCC (gimple_bb (cond_stmt), 0);
12880 if (exit_true_edge->flags & EDGE_FALSE_VALUE)
12881 exit_true_edge = EDGE_SUCC (gimple_bb (cond_stmt), 1);
12882 gcc_assert (exit_true_edge->flags & EDGE_TRUE_VALUE);
12883 if (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
12884 exit_true_edge->dest))
12886 new_code = EQ_EXPR;
12887 reduc_optab = and_optab;
12888 reduc_op = BIT_AND_EXPR;
12889 cst = build_minus_one_cst (vectype);
12892 /* Analyze only. */
12893 if (!vec_stmt)
12895 if (direct_optab_handler (cbranch_optab, mode) == CODE_FOR_nothing)
12897 if (dump_enabled_p ())
12898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12899 "can't vectorize early exit because the "
12900 "target doesn't support flag setting vector "
12901 "comparisons.\n");
12902 return false;
12905 if (ncopies > 1
12906 && direct_optab_handler (reduc_optab, mode) == CODE_FOR_nothing)
12908 if (dump_enabled_p ())
12909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12910 "can't vectorize early exit because the "
12911 "target does not support boolean vector %s "
12912 "for type %T.\n",
12913 reduc_optab == ior_optab ? "OR" : "AND",
12914 vectype);
12915 return false;
12918 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12919 vec_stmt, slp_node, cost_vec))
12920 return false;
12922 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12924 if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN, vectype,
12925 OPTIMIZE_FOR_SPEED))
12926 return false;
12927 else
12928 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, NULL);
12932 return true;
12935 /* Tranform. */
12937 tree new_temp = NULL_TREE;
12938 gimple *new_stmt = NULL;
12940 if (dump_enabled_p ())
12941 dump_printf_loc (MSG_NOTE, vect_location, "transform early-exit.\n");
12943 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12944 vec_stmt, slp_node, cost_vec))
12945 gcc_unreachable ();
12947 gimple *stmt = STMT_VINFO_STMT (stmt_info);
12948 basic_block cond_bb = gimple_bb (stmt);
12949 gimple_stmt_iterator cond_gsi = gsi_last_bb (cond_bb);
12951 auto_vec<tree> stmts;
12953 if (slp_node)
12954 stmts.safe_splice (SLP_TREE_VEC_DEFS (slp_node));
12955 else
12957 auto vec_stmts = STMT_VINFO_VEC_STMTS (stmt_info);
12958 stmts.reserve_exact (vec_stmts.length ());
12959 for (auto stmt : vec_stmts)
12960 stmts.quick_push (gimple_assign_lhs (stmt));
12963 /* Determine if we need to reduce the final value. */
12964 if (stmts.length () > 1)
12966 /* We build the reductions in a way to maintain as much parallelism as
12967 possible. */
12968 auto_vec<tree> workset (stmts.length ());
12970 /* Mask the statements as we queue them up. Normally we loop over
12971 vec_num, but since we inspect the exact results of vectorization
12972 we don't need to and instead can just use the stmts themselves. */
12973 if (masked_loop_p)
12974 for (unsigned i = 0; i < stmts.length (); i++)
12976 tree stmt_mask
12977 = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype,
12979 stmt_mask
12980 = prepare_vec_mask (loop_vinfo, TREE_TYPE (stmt_mask), stmt_mask,
12981 stmts[i], &cond_gsi);
12982 workset.quick_push (stmt_mask);
12984 else
12985 workset.splice (stmts);
12987 while (workset.length () > 1)
12989 new_temp = make_temp_ssa_name (vectype, NULL, "vexit_reduc");
12990 tree arg0 = workset.pop ();
12991 tree arg1 = workset.pop ();
12992 new_stmt = gimple_build_assign (new_temp, reduc_op, arg0, arg1);
12993 vect_finish_stmt_generation (loop_vinfo, stmt_info, new_stmt,
12994 &cond_gsi);
12995 workset.quick_insert (0, new_temp);
12998 else
13000 new_temp = stmts[0];
13001 if (masked_loop_p)
13003 tree mask
13004 = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies, vectype, 0);
13005 new_temp = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
13006 new_temp, &cond_gsi);
13010 gcc_assert (new_temp);
13012 gimple_cond_set_condition (cond_stmt, new_code, new_temp, cst);
13013 update_stmt (orig_stmt);
13015 if (slp_node)
13016 SLP_TREE_VEC_DEFS (slp_node).truncate (0);
13017 else
13018 STMT_VINFO_VEC_STMTS (stmt_info).truncate (0);
13020 if (!slp_node)
13021 *vec_stmt = orig_stmt;
13023 return true;
13026 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
13027 can handle all live statements in the node. Otherwise return true
13028 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
13029 VEC_STMT_P is as for vectorizable_live_operation. */
13031 static bool
13032 can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
13033 slp_tree slp_node, slp_instance slp_node_instance,
13034 bool vec_stmt_p,
13035 stmt_vector_for_cost *cost_vec)
13037 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
13038 if (slp_node)
13040 stmt_vec_info slp_stmt_info;
13041 unsigned int i;
13042 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
13044 if ((STMT_VINFO_LIVE_P (slp_stmt_info)
13045 || (loop_vinfo
13046 && LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
13047 && STMT_VINFO_DEF_TYPE (slp_stmt_info)
13048 == vect_induction_def))
13049 && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
13050 slp_node_instance, i,
13051 vec_stmt_p, cost_vec))
13052 return false;
13055 else if ((STMT_VINFO_LIVE_P (stmt_info)
13056 || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo)
13057 && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def))
13058 && !vectorizable_live_operation (vinfo, stmt_info,
13059 slp_node, slp_node_instance, -1,
13060 vec_stmt_p, cost_vec))
13061 return false;
13063 return true;
13066 /* Make sure the statement is vectorizable. */
13068 opt_result
13069 vect_analyze_stmt (vec_info *vinfo,
13070 stmt_vec_info stmt_info, bool *need_to_vectorize,
13071 slp_tree node, slp_instance node_instance,
13072 stmt_vector_for_cost *cost_vec)
13074 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
13075 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
13076 bool ok;
13077 gimple_seq pattern_def_seq;
13079 if (dump_enabled_p ())
13080 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
13081 stmt_info->stmt);
13083 if (gimple_has_volatile_ops (stmt_info->stmt))
13084 return opt_result::failure_at (stmt_info->stmt,
13085 "not vectorized:"
13086 " stmt has volatile operands: %G\n",
13087 stmt_info->stmt);
13089 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13090 && node == NULL
13091 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
13093 gimple_stmt_iterator si;
13095 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
13097 stmt_vec_info pattern_def_stmt_info
13098 = vinfo->lookup_stmt (gsi_stmt (si));
13099 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
13100 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
13102 /* Analyze def stmt of STMT if it's a pattern stmt. */
13103 if (dump_enabled_p ())
13104 dump_printf_loc (MSG_NOTE, vect_location,
13105 "==> examining pattern def statement: %G",
13106 pattern_def_stmt_info->stmt);
13108 opt_result res
13109 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
13110 need_to_vectorize, node, node_instance,
13111 cost_vec);
13112 if (!res)
13113 return res;
13118 /* Skip stmts that do not need to be vectorized. In loops this is expected
13119 to include:
13120 - the COND_EXPR which is the loop exit condition
13121 - any LABEL_EXPRs in the loop
13122 - computations that are used only for array indexing or loop control.
13123 In basic blocks we only analyze statements that are a part of some SLP
13124 instance, therefore, all the statements are relevant.
13126 Pattern statement needs to be analyzed instead of the original statement
13127 if the original statement is not relevant. Otherwise, we analyze both
13128 statements. In basic blocks we are called from some SLP instance
13129 traversal, don't analyze pattern stmts instead, the pattern stmts
13130 already will be part of SLP instance. */
13132 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
13133 if (!STMT_VINFO_RELEVANT_P (stmt_info)
13134 && !STMT_VINFO_LIVE_P (stmt_info))
13136 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13137 && pattern_stmt_info
13138 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
13139 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
13141 /* Analyze PATTERN_STMT instead of the original stmt. */
13142 stmt_info = pattern_stmt_info;
13143 if (dump_enabled_p ())
13144 dump_printf_loc (MSG_NOTE, vect_location,
13145 "==> examining pattern statement: %G",
13146 stmt_info->stmt);
13148 else
13150 if (dump_enabled_p ())
13151 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
13153 return opt_result::success ();
13156 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
13157 && node == NULL
13158 && pattern_stmt_info
13159 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
13160 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
13162 /* Analyze PATTERN_STMT too. */
13163 if (dump_enabled_p ())
13164 dump_printf_loc (MSG_NOTE, vect_location,
13165 "==> examining pattern statement: %G",
13166 pattern_stmt_info->stmt);
13168 opt_result res
13169 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
13170 node_instance, cost_vec);
13171 if (!res)
13172 return res;
13175 switch (STMT_VINFO_DEF_TYPE (stmt_info))
13177 case vect_internal_def:
13178 case vect_condition_def:
13179 break;
13181 case vect_reduction_def:
13182 case vect_nested_cycle:
13183 gcc_assert (!bb_vinfo
13184 && (relevance == vect_used_in_outer
13185 || relevance == vect_used_in_outer_by_reduction
13186 || relevance == vect_used_by_reduction
13187 || relevance == vect_unused_in_scope
13188 || relevance == vect_used_only_live));
13189 break;
13191 case vect_induction_def:
13192 case vect_first_order_recurrence:
13193 gcc_assert (!bb_vinfo);
13194 break;
13196 case vect_constant_def:
13197 case vect_external_def:
13198 case vect_unknown_def_type:
13199 default:
13200 gcc_unreachable ();
13203 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
13204 if (node)
13205 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
13207 if (STMT_VINFO_RELEVANT_P (stmt_info))
13209 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
13210 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
13211 || gimple_code (stmt_info->stmt) == GIMPLE_COND
13212 || (call && gimple_call_lhs (call) == NULL_TREE));
13213 *need_to_vectorize = true;
13216 if (PURE_SLP_STMT (stmt_info) && !node)
13218 if (dump_enabled_p ())
13219 dump_printf_loc (MSG_NOTE, vect_location,
13220 "handled only by SLP analysis\n");
13221 return opt_result::success ();
13224 ok = true;
13225 if (!bb_vinfo
13226 && (STMT_VINFO_RELEVANT_P (stmt_info)
13227 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
13228 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
13229 -mveclibabi= takes preference over library functions with
13230 the simd attribute. */
13231 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13232 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
13233 cost_vec)
13234 || vectorizable_conversion (vinfo, stmt_info,
13235 NULL, NULL, node, cost_vec)
13236 || vectorizable_operation (vinfo, stmt_info,
13237 NULL, NULL, node, cost_vec)
13238 || vectorizable_assignment (vinfo, stmt_info,
13239 NULL, NULL, node, cost_vec)
13240 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13241 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13242 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
13243 node, node_instance, cost_vec)
13244 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
13245 NULL, node, cost_vec)
13246 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13247 || vectorizable_condition (vinfo, stmt_info,
13248 NULL, NULL, node, cost_vec)
13249 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
13250 cost_vec)
13251 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
13252 stmt_info, NULL, node)
13253 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13254 stmt_info, NULL, node, cost_vec)
13255 || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
13256 cost_vec));
13257 else
13259 if (bb_vinfo)
13260 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
13261 || vectorizable_simd_clone_call (vinfo, stmt_info,
13262 NULL, NULL, node, cost_vec)
13263 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
13264 cost_vec)
13265 || vectorizable_shift (vinfo, stmt_info,
13266 NULL, NULL, node, cost_vec)
13267 || vectorizable_operation (vinfo, stmt_info,
13268 NULL, NULL, node, cost_vec)
13269 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
13270 cost_vec)
13271 || vectorizable_load (vinfo, stmt_info,
13272 NULL, NULL, node, cost_vec)
13273 || vectorizable_store (vinfo, stmt_info,
13274 NULL, NULL, node, cost_vec)
13275 || vectorizable_condition (vinfo, stmt_info,
13276 NULL, NULL, node, cost_vec)
13277 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
13278 cost_vec)
13279 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec)
13280 || vectorizable_early_exit (vinfo, stmt_info, NULL, NULL, node,
13281 cost_vec));
13285 if (node)
13286 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13288 if (!ok)
13289 return opt_result::failure_at (stmt_info->stmt,
13290 "not vectorized:"
13291 " relevant stmt not supported: %G",
13292 stmt_info->stmt);
13294 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
13295 need extra handling, except for vectorizable reductions. */
13296 if (!bb_vinfo
13297 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
13298 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
13299 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
13300 stmt_info, node, node_instance,
13301 false, cost_vec))
13302 return opt_result::failure_at (stmt_info->stmt,
13303 "not vectorized:"
13304 " live stmt not supported: %G",
13305 stmt_info->stmt);
13307 return opt_result::success ();
13311 /* Function vect_transform_stmt.
13313 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
13315 bool
13316 vect_transform_stmt (vec_info *vinfo,
13317 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
13318 slp_tree slp_node, slp_instance slp_node_instance)
13320 bool is_store = false;
13321 gimple *vec_stmt = NULL;
13322 bool done;
13324 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
13326 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
13327 if (slp_node)
13328 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
13330 switch (STMT_VINFO_TYPE (stmt_info))
13332 case type_demotion_vec_info_type:
13333 case type_promotion_vec_info_type:
13334 case type_conversion_vec_info_type:
13335 done = vectorizable_conversion (vinfo, stmt_info,
13336 gsi, &vec_stmt, slp_node, NULL);
13337 gcc_assert (done);
13338 break;
13340 case induc_vec_info_type:
13341 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
13342 stmt_info, &vec_stmt, slp_node,
13343 NULL);
13344 gcc_assert (done);
13345 break;
13347 case shift_vec_info_type:
13348 done = vectorizable_shift (vinfo, stmt_info,
13349 gsi, &vec_stmt, slp_node, NULL);
13350 gcc_assert (done);
13351 break;
13353 case op_vec_info_type:
13354 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
13355 NULL);
13356 gcc_assert (done);
13357 break;
13359 case assignment_vec_info_type:
13360 done = vectorizable_assignment (vinfo, stmt_info,
13361 gsi, &vec_stmt, slp_node, NULL);
13362 gcc_assert (done);
13363 break;
13365 case load_vec_info_type:
13366 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
13367 NULL);
13368 gcc_assert (done);
13369 break;
13371 case store_vec_info_type:
13372 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
13373 && !slp_node
13374 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))
13375 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))))
13376 /* In case of interleaving, the whole chain is vectorized when the
13377 last store in the chain is reached. Store stmts before the last
13378 one are skipped, and there vec_stmt_info shouldn't be freed
13379 meanwhile. */
13381 else
13383 done = vectorizable_store (vinfo, stmt_info,
13384 gsi, &vec_stmt, slp_node, NULL);
13385 gcc_assert (done);
13386 is_store = true;
13388 break;
13390 case condition_vec_info_type:
13391 done = vectorizable_condition (vinfo, stmt_info,
13392 gsi, &vec_stmt, slp_node, NULL);
13393 gcc_assert (done);
13394 break;
13396 case comparison_vec_info_type:
13397 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
13398 slp_node, NULL);
13399 gcc_assert (done);
13400 break;
13402 case call_vec_info_type:
13403 done = vectorizable_call (vinfo, stmt_info,
13404 gsi, &vec_stmt, slp_node, NULL);
13405 break;
13407 case call_simd_clone_vec_info_type:
13408 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
13409 slp_node, NULL);
13410 break;
13412 case reduc_vec_info_type:
13413 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
13414 gsi, &vec_stmt, slp_node);
13415 gcc_assert (done);
13416 break;
13418 case cycle_phi_info_type:
13419 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
13420 &vec_stmt, slp_node, slp_node_instance);
13421 gcc_assert (done);
13422 break;
13424 case lc_phi_info_type:
13425 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
13426 stmt_info, &vec_stmt, slp_node);
13427 gcc_assert (done);
13428 break;
13430 case recurr_info_type:
13431 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13432 stmt_info, &vec_stmt, slp_node, NULL);
13433 gcc_assert (done);
13434 break;
13436 case phi_info_type:
13437 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
13438 gcc_assert (done);
13439 break;
13441 case loop_exit_ctrl_vec_info_type:
13442 done = vectorizable_early_exit (vinfo, stmt_info, gsi, &vec_stmt,
13443 slp_node, NULL);
13444 gcc_assert (done);
13445 break;
13447 default:
13448 if (!STMT_VINFO_LIVE_P (stmt_info))
13450 if (dump_enabled_p ())
13451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13452 "stmt not supported.\n");
13453 gcc_unreachable ();
13455 done = true;
13458 if (!slp_node && vec_stmt)
13459 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
13461 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
13463 /* Handle stmts whose DEF is used outside the loop-nest that is
13464 being vectorized. */
13465 done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
13466 slp_node_instance, true, NULL);
13467 gcc_assert (done);
13470 if (slp_node)
13471 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13473 return is_store;
13477 /* Remove a group of stores (for SLP or interleaving), free their
13478 stmt_vec_info. */
13480 void
13481 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
13483 stmt_vec_info next_stmt_info = first_stmt_info;
13485 while (next_stmt_info)
13487 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
13488 next_stmt_info = vect_orig_stmt (next_stmt_info);
13489 /* Free the attached stmt_vec_info and remove the stmt. */
13490 vinfo->remove_stmt (next_stmt_info);
13491 next_stmt_info = tmp;
13495 /* If NUNITS is nonzero, return a vector type that contains NUNITS
13496 elements of type SCALAR_TYPE, or null if the target doesn't support
13497 such a type.
13499 If NUNITS is zero, return a vector type that contains elements of
13500 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13502 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13503 for this vectorization region and want to "autodetect" the best choice.
13504 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13505 and we want the new type to be interoperable with it. PREVAILING_MODE
13506 in this case can be a scalar integer mode or a vector mode; when it
13507 is a vector mode, the function acts like a tree-level version of
13508 related_vector_mode. */
13510 tree
13511 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
13512 tree scalar_type, poly_uint64 nunits)
13514 tree orig_scalar_type = scalar_type;
13515 scalar_mode inner_mode;
13516 machine_mode simd_mode;
13517 tree vectype;
13519 if ((!INTEGRAL_TYPE_P (scalar_type)
13520 && !POINTER_TYPE_P (scalar_type)
13521 && !SCALAR_FLOAT_TYPE_P (scalar_type))
13522 || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
13523 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
13524 return NULL_TREE;
13526 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
13528 /* Interoperability between modes requires one to be a constant multiple
13529 of the other, so that the number of vectors required for each operation
13530 is a compile-time constant. */
13531 if (prevailing_mode != VOIDmode
13532 && !constant_multiple_p (nunits * nbytes,
13533 GET_MODE_SIZE (prevailing_mode))
13534 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
13535 nunits * nbytes))
13536 return NULL_TREE;
13538 /* For vector types of elements whose mode precision doesn't
13539 match their types precision we use a element type of mode
13540 precision. The vectorization routines will have to make sure
13541 they support the proper result truncation/extension.
13542 We also make sure to build vector types with INTEGER_TYPE
13543 component type only. */
13544 if (INTEGRAL_TYPE_P (scalar_type)
13545 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
13546 || TREE_CODE (scalar_type) != INTEGER_TYPE))
13547 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
13548 TYPE_UNSIGNED (scalar_type));
13550 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13551 When the component mode passes the above test simply use a type
13552 corresponding to that mode. The theory is that any use that
13553 would cause problems with this will disable vectorization anyway. */
13554 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
13555 && !INTEGRAL_TYPE_P (scalar_type))
13556 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
13558 /* We can't build a vector type of elements with alignment bigger than
13559 their size. */
13560 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
13561 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
13562 TYPE_UNSIGNED (scalar_type));
13564 /* If we felt back to using the mode fail if there was
13565 no scalar type for it. */
13566 if (scalar_type == NULL_TREE)
13567 return NULL_TREE;
13569 /* If no prevailing mode was supplied, use the mode the target prefers.
13570 Otherwise lookup a vector mode based on the prevailing mode. */
13571 if (prevailing_mode == VOIDmode)
13573 gcc_assert (known_eq (nunits, 0U));
13574 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
13575 if (SCALAR_INT_MODE_P (simd_mode))
13577 /* Traditional behavior is not to take the integer mode
13578 literally, but simply to use it as a way of determining
13579 the vector size. It is up to mode_for_vector to decide
13580 what the TYPE_MODE should be.
13582 Note that nunits == 1 is allowed in order to support single
13583 element vector types. */
13584 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
13585 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13586 return NULL_TREE;
13589 else if (SCALAR_INT_MODE_P (prevailing_mode)
13590 || !related_vector_mode (prevailing_mode,
13591 inner_mode, nunits).exists (&simd_mode))
13593 /* Fall back to using mode_for_vector, mostly in the hope of being
13594 able to use an integer mode. */
13595 if (known_eq (nunits, 0U)
13596 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
13597 return NULL_TREE;
13599 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13600 return NULL_TREE;
13603 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
13605 /* In cases where the mode was chosen by mode_for_vector, check that
13606 the target actually supports the chosen mode, or that it at least
13607 allows the vector mode to be replaced by a like-sized integer. */
13608 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
13609 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
13610 return NULL_TREE;
13612 /* Re-attach the address-space qualifier if we canonicalized the scalar
13613 type. */
13614 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
13615 return build_qualified_type
13616 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
13618 return vectype;
13621 /* Function get_vectype_for_scalar_type.
13623 Returns the vector type corresponding to SCALAR_TYPE as supported
13624 by the target. If GROUP_SIZE is nonzero and we're performing BB
13625 vectorization, make sure that the number of elements in the vector
13626 is no bigger than GROUP_SIZE. */
13628 tree
13629 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
13630 unsigned int group_size)
13632 /* For BB vectorization, we should always have a group size once we've
13633 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13634 are tentative requests during things like early data reference
13635 analysis and pattern recognition. */
13636 if (is_a <bb_vec_info> (vinfo))
13637 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13638 else
13639 group_size = 0;
13641 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13642 scalar_type);
13643 if (vectype && vinfo->vector_mode == VOIDmode)
13644 vinfo->vector_mode = TYPE_MODE (vectype);
13646 /* Register the natural choice of vector type, before the group size
13647 has been applied. */
13648 if (vectype)
13649 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
13651 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13652 try again with an explicit number of elements. */
13653 if (vectype
13654 && group_size
13655 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
13657 /* Start with the biggest number of units that fits within
13658 GROUP_SIZE and halve it until we find a valid vector type.
13659 Usually either the first attempt will succeed or all will
13660 fail (in the latter case because GROUP_SIZE is too small
13661 for the target), but it's possible that a target could have
13662 a hole between supported vector types.
13664 If GROUP_SIZE is not a power of 2, this has the effect of
13665 trying the largest power of 2 that fits within the group,
13666 even though the group is not a multiple of that vector size.
13667 The BB vectorizer will then try to carve up the group into
13668 smaller pieces. */
13669 unsigned int nunits = 1 << floor_log2 (group_size);
13672 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13673 scalar_type, nunits);
13674 nunits /= 2;
13676 while (nunits > 1 && !vectype);
13679 return vectype;
13682 /* Return the vector type corresponding to SCALAR_TYPE as supported
13683 by the target. NODE, if nonnull, is the SLP tree node that will
13684 use the returned vector type. */
13686 tree
13687 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
13689 unsigned int group_size = 0;
13690 if (node)
13691 group_size = SLP_TREE_LANES (node);
13692 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13695 /* Function get_mask_type_for_scalar_type.
13697 Returns the mask type corresponding to a result of comparison
13698 of vectors of specified SCALAR_TYPE as supported by target.
13699 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13700 make sure that the number of elements in the vector is no bigger
13701 than GROUP_SIZE. */
13703 tree
13704 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13705 unsigned int group_size)
13707 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13709 if (!vectype)
13710 return NULL;
13712 return truth_type_for (vectype);
13715 /* Function get_mask_type_for_scalar_type.
13717 Returns the mask type corresponding to a result of comparison
13718 of vectors of specified SCALAR_TYPE as supported by target.
13719 NODE, if nonnull, is the SLP tree node that will use the returned
13720 vector type. */
13722 tree
13723 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13724 slp_tree node)
13726 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
13728 if (!vectype)
13729 return NULL;
13731 return truth_type_for (vectype);
13734 /* Function get_same_sized_vectype
13736 Returns a vector type corresponding to SCALAR_TYPE of size
13737 VECTOR_TYPE if supported by the target. */
13739 tree
13740 get_same_sized_vectype (tree scalar_type, tree vector_type)
13742 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
13743 return truth_type_for (vector_type);
13745 poly_uint64 nunits;
13746 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
13747 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
13748 return NULL_TREE;
13750 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
13751 scalar_type, nunits);
13754 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13755 would not change the chosen vector modes. */
13757 bool
13758 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
13760 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
13761 i != vinfo->used_vector_modes.end (); ++i)
13762 if (!VECTOR_MODE_P (*i)
13763 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
13764 return false;
13765 return true;
13768 /* Function vect_is_simple_use.
13770 Input:
13771 VINFO - the vect info of the loop or basic block that is being vectorized.
13772 OPERAND - operand in the loop or bb.
13773 Output:
13774 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13775 case OPERAND is an SSA_NAME that is defined in the vectorizable region
13776 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13777 the definition could be anywhere in the function
13778 DT - the type of definition
13780 Returns whether a stmt with OPERAND can be vectorized.
13781 For loops, supportable operands are constants, loop invariants, and operands
13782 that are defined by the current iteration of the loop. Unsupportable
13783 operands are those that are defined by a previous iteration of the loop (as
13784 is the case in reduction/induction computations).
13785 For basic blocks, supportable operands are constants and bb invariants.
13786 For now, operands defined outside the basic block are not supported. */
13788 bool
13789 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13790 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
13792 if (def_stmt_info_out)
13793 *def_stmt_info_out = NULL;
13794 if (def_stmt_out)
13795 *def_stmt_out = NULL;
13796 *dt = vect_unknown_def_type;
13798 if (dump_enabled_p ())
13800 dump_printf_loc (MSG_NOTE, vect_location,
13801 "vect_is_simple_use: operand ");
13802 if (TREE_CODE (operand) == SSA_NAME
13803 && !SSA_NAME_IS_DEFAULT_DEF (operand))
13804 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
13805 else
13806 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
13809 if (CONSTANT_CLASS_P (operand))
13810 *dt = vect_constant_def;
13811 else if (is_gimple_min_invariant (operand))
13812 *dt = vect_external_def;
13813 else if (TREE_CODE (operand) != SSA_NAME)
13814 *dt = vect_unknown_def_type;
13815 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
13816 *dt = vect_external_def;
13817 else
13819 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
13820 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
13821 if (!stmt_vinfo)
13822 *dt = vect_external_def;
13823 else
13825 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
13826 def_stmt = stmt_vinfo->stmt;
13827 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
13828 if (def_stmt_info_out)
13829 *def_stmt_info_out = stmt_vinfo;
13831 if (def_stmt_out)
13832 *def_stmt_out = def_stmt;
13835 if (dump_enabled_p ())
13837 dump_printf (MSG_NOTE, ", type of def: ");
13838 switch (*dt)
13840 case vect_uninitialized_def:
13841 dump_printf (MSG_NOTE, "uninitialized\n");
13842 break;
13843 case vect_constant_def:
13844 dump_printf (MSG_NOTE, "constant\n");
13845 break;
13846 case vect_external_def:
13847 dump_printf (MSG_NOTE, "external\n");
13848 break;
13849 case vect_internal_def:
13850 dump_printf (MSG_NOTE, "internal\n");
13851 break;
13852 case vect_induction_def:
13853 dump_printf (MSG_NOTE, "induction\n");
13854 break;
13855 case vect_reduction_def:
13856 dump_printf (MSG_NOTE, "reduction\n");
13857 break;
13858 case vect_double_reduction_def:
13859 dump_printf (MSG_NOTE, "double reduction\n");
13860 break;
13861 case vect_nested_cycle:
13862 dump_printf (MSG_NOTE, "nested cycle\n");
13863 break;
13864 case vect_first_order_recurrence:
13865 dump_printf (MSG_NOTE, "first order recurrence\n");
13866 break;
13867 case vect_condition_def:
13868 dump_printf (MSG_NOTE, "control flow\n");
13869 break;
13870 case vect_unknown_def_type:
13871 dump_printf (MSG_NOTE, "unknown\n");
13872 break;
13876 if (*dt == vect_unknown_def_type)
13878 if (dump_enabled_p ())
13879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13880 "Unsupported pattern.\n");
13881 return false;
13884 return true;
13887 /* Function vect_is_simple_use.
13889 Same as vect_is_simple_use but also determines the vector operand
13890 type of OPERAND and stores it to *VECTYPE. If the definition of
13891 OPERAND is vect_uninitialized_def, vect_constant_def or
13892 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13893 is responsible to compute the best suited vector type for the
13894 scalar operand. */
13896 bool
13897 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13898 tree *vectype, stmt_vec_info *def_stmt_info_out,
13899 gimple **def_stmt_out)
13901 stmt_vec_info def_stmt_info;
13902 gimple *def_stmt;
13903 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
13904 return false;
13906 if (def_stmt_out)
13907 *def_stmt_out = def_stmt;
13908 if (def_stmt_info_out)
13909 *def_stmt_info_out = def_stmt_info;
13911 /* Now get a vector type if the def is internal, otherwise supply
13912 NULL_TREE and leave it up to the caller to figure out a proper
13913 type for the use stmt. */
13914 if (*dt == vect_internal_def
13915 || *dt == vect_induction_def
13916 || *dt == vect_reduction_def
13917 || *dt == vect_double_reduction_def
13918 || *dt == vect_nested_cycle
13919 || *dt == vect_first_order_recurrence)
13921 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
13922 gcc_assert (*vectype != NULL_TREE);
13923 if (dump_enabled_p ())
13924 dump_printf_loc (MSG_NOTE, vect_location,
13925 "vect_is_simple_use: vectype %T\n", *vectype);
13927 else if (*dt == vect_uninitialized_def
13928 || *dt == vect_constant_def
13929 || *dt == vect_external_def)
13930 *vectype = NULL_TREE;
13931 else
13932 gcc_unreachable ();
13934 return true;
13937 /* Function vect_is_simple_use.
13939 Same as vect_is_simple_use but determines the operand by operand
13940 position OPERAND from either STMT or SLP_NODE, filling in *OP
13941 and *SLP_DEF (when SLP_NODE is not NULL). */
13943 bool
13944 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
13945 unsigned operand, tree *op, slp_tree *slp_def,
13946 enum vect_def_type *dt,
13947 tree *vectype, stmt_vec_info *def_stmt_info_out)
13949 if (slp_node)
13951 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
13952 *slp_def = child;
13953 *vectype = SLP_TREE_VECTYPE (child);
13954 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
13956 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
13957 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
13959 else
13961 if (def_stmt_info_out)
13962 *def_stmt_info_out = NULL;
13963 *op = SLP_TREE_SCALAR_OPS (child)[0];
13964 *dt = SLP_TREE_DEF_TYPE (child);
13965 return true;
13968 else
13970 *slp_def = NULL;
13971 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
13973 if (gimple_assign_rhs_code (ass) == COND_EXPR
13974 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
13976 if (operand < 2)
13977 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
13978 else
13979 *op = gimple_op (ass, operand);
13981 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
13982 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
13983 else
13984 *op = gimple_op (ass, operand + 1);
13986 else if (gcond *cond = dyn_cast <gcond *> (stmt->stmt))
13987 *op = gimple_op (cond, operand);
13988 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
13989 *op = gimple_call_arg (call, operand);
13990 else
13991 gcc_unreachable ();
13992 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
13996 /* If OP is not NULL and is external or constant update its vector
13997 type with VECTYPE. Returns true if successful or false if not,
13998 for example when conflicting vector types are present. */
14000 bool
14001 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
14003 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
14004 return true;
14005 if (SLP_TREE_VECTYPE (op))
14006 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
14007 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
14008 should be handled by patters. Allow vect_constant_def for now. */
14009 if (VECTOR_BOOLEAN_TYPE_P (vectype)
14010 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
14011 return false;
14012 SLP_TREE_VECTYPE (op) = vectype;
14013 return true;
14016 /* Function supportable_widening_operation
14018 Check whether an operation represented by the code CODE is a
14019 widening operation that is supported by the target platform in
14020 vector form (i.e., when operating on arguments of type VECTYPE_IN
14021 producing a result of type VECTYPE_OUT).
14023 Widening operations we currently support are NOP (CONVERT), FLOAT,
14024 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
14025 are supported by the target platform either directly (via vector
14026 tree-codes), or via target builtins.
14028 Output:
14029 - CODE1 and CODE2 are codes of vector operations to be used when
14030 vectorizing the operation, if available.
14031 - MULTI_STEP_CVT determines the number of required intermediate steps in
14032 case of multi-step conversion (like char->short->int - in that case
14033 MULTI_STEP_CVT will be 1).
14034 - INTERM_TYPES contains the intermediate type required to perform the
14035 widening operation (short in the above example). */
14037 bool
14038 supportable_widening_operation (vec_info *vinfo,
14039 code_helper code,
14040 stmt_vec_info stmt_info,
14041 tree vectype_out, tree vectype_in,
14042 code_helper *code1,
14043 code_helper *code2,
14044 int *multi_step_cvt,
14045 vec<tree> *interm_types)
14047 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
14048 class loop *vect_loop = NULL;
14049 machine_mode vec_mode;
14050 enum insn_code icode1, icode2;
14051 optab optab1 = unknown_optab, optab2 = unknown_optab;
14052 tree vectype = vectype_in;
14053 tree wide_vectype = vectype_out;
14054 tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
14055 int i;
14056 tree prev_type, intermediate_type;
14057 machine_mode intermediate_mode, prev_mode;
14058 optab optab3, optab4;
14060 *multi_step_cvt = 0;
14061 if (loop_info)
14062 vect_loop = LOOP_VINFO_LOOP (loop_info);
14064 switch (code.safe_as_tree_code ())
14066 case MAX_TREE_CODES:
14067 /* Don't set c1 and c2 if code is not a tree_code. */
14068 break;
14070 case WIDEN_MULT_EXPR:
14071 /* The result of a vectorized widening operation usually requires
14072 two vectors (because the widened results do not fit into one vector).
14073 The generated vector results would normally be expected to be
14074 generated in the same order as in the original scalar computation,
14075 i.e. if 8 results are generated in each vector iteration, they are
14076 to be organized as follows:
14077 vect1: [res1,res2,res3,res4],
14078 vect2: [res5,res6,res7,res8].
14080 However, in the special case that the result of the widening
14081 operation is used in a reduction computation only, the order doesn't
14082 matter (because when vectorizing a reduction we change the order of
14083 the computation). Some targets can take advantage of this and
14084 generate more efficient code. For example, targets like Altivec,
14085 that support widen_mult using a sequence of {mult_even,mult_odd}
14086 generate the following vectors:
14087 vect1: [res1,res3,res5,res7],
14088 vect2: [res2,res4,res6,res8].
14090 When vectorizing outer-loops, we execute the inner-loop sequentially
14091 (each vectorized inner-loop iteration contributes to VF outer-loop
14092 iterations in parallel). We therefore don't allow to change the
14093 order of the computation in the inner-loop during outer-loop
14094 vectorization. */
14095 /* TODO: Another case in which order doesn't *really* matter is when we
14096 widen and then contract again, e.g. (short)((int)x * y >> 8).
14097 Normally, pack_trunc performs an even/odd permute, whereas the
14098 repack from an even/odd expansion would be an interleave, which
14099 would be significantly simpler for e.g. AVX2. */
14100 /* In any case, in order to avoid duplicating the code below, recurse
14101 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
14102 are properly set up for the caller. If we fail, we'll continue with
14103 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
14104 if (vect_loop
14105 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
14106 && !nested_in_vect_loop_p (vect_loop, stmt_info)
14107 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
14108 stmt_info, vectype_out,
14109 vectype_in, code1,
14110 code2, multi_step_cvt,
14111 interm_types))
14113 /* Elements in a vector with vect_used_by_reduction property cannot
14114 be reordered if the use chain with this property does not have the
14115 same operation. One such an example is s += a * b, where elements
14116 in a and b cannot be reordered. Here we check if the vector defined
14117 by STMT is only directly used in the reduction statement. */
14118 tree lhs = gimple_assign_lhs (stmt_info->stmt);
14119 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
14120 if (use_stmt_info
14121 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
14122 return true;
14124 c1 = VEC_WIDEN_MULT_LO_EXPR;
14125 c2 = VEC_WIDEN_MULT_HI_EXPR;
14126 break;
14128 case DOT_PROD_EXPR:
14129 c1 = DOT_PROD_EXPR;
14130 c2 = DOT_PROD_EXPR;
14131 break;
14133 case SAD_EXPR:
14134 c1 = SAD_EXPR;
14135 c2 = SAD_EXPR;
14136 break;
14138 case VEC_WIDEN_MULT_EVEN_EXPR:
14139 /* Support the recursion induced just above. */
14140 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
14141 c2 = VEC_WIDEN_MULT_ODD_EXPR;
14142 break;
14144 case WIDEN_LSHIFT_EXPR:
14145 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
14146 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
14147 break;
14149 CASE_CONVERT:
14150 c1 = VEC_UNPACK_LO_EXPR;
14151 c2 = VEC_UNPACK_HI_EXPR;
14152 break;
14154 case FLOAT_EXPR:
14155 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
14156 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
14157 break;
14159 case FIX_TRUNC_EXPR:
14160 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
14161 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
14162 break;
14164 default:
14165 gcc_unreachable ();
14168 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
14169 std::swap (c1, c2);
14171 if (code == FIX_TRUNC_EXPR)
14173 /* The signedness is determined from output operand. */
14174 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14175 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
14177 else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
14178 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14179 && VECTOR_BOOLEAN_TYPE_P (vectype)
14180 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
14181 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
14183 /* If the input and result modes are the same, a different optab
14184 is needed where we pass in the number of units in vectype. */
14185 optab1 = vec_unpacks_sbool_lo_optab;
14186 optab2 = vec_unpacks_sbool_hi_optab;
14189 vec_mode = TYPE_MODE (vectype);
14190 if (widening_fn_p (code))
14192 /* If this is an internal fn then we must check whether the target
14193 supports either a low-high split or an even-odd split. */
14194 internal_fn ifn = as_internal_fn ((combined_fn) code);
14196 internal_fn lo, hi, even, odd;
14197 lookup_hilo_internal_fn (ifn, &lo, &hi);
14198 *code1 = as_combined_fn (lo);
14199 *code2 = as_combined_fn (hi);
14200 optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
14201 optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
14203 /* If we don't support low-high, then check for even-odd. */
14204 if (!optab1
14205 || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
14206 || !optab2
14207 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
14209 lookup_evenodd_internal_fn (ifn, &even, &odd);
14210 *code1 = as_combined_fn (even);
14211 *code2 = as_combined_fn (odd);
14212 optab1 = direct_internal_fn_optab (even, {vectype, vectype});
14213 optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
14216 else if (code.is_tree_code ())
14218 if (code == FIX_TRUNC_EXPR)
14220 /* The signedness is determined from output operand. */
14221 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14222 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
14224 else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
14225 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
14226 && VECTOR_BOOLEAN_TYPE_P (vectype)
14227 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
14228 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
14230 /* If the input and result modes are the same, a different optab
14231 is needed where we pass in the number of units in vectype. */
14232 optab1 = vec_unpacks_sbool_lo_optab;
14233 optab2 = vec_unpacks_sbool_hi_optab;
14235 else
14237 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14238 optab2 = optab_for_tree_code (c2, vectype, optab_default);
14240 *code1 = c1;
14241 *code2 = c2;
14244 if (!optab1 || !optab2)
14245 return false;
14247 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
14248 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
14249 return false;
14252 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
14253 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
14255 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14256 return true;
14257 /* For scalar masks we may have different boolean
14258 vector types having the same QImode. Thus we
14259 add additional check for elements number. */
14260 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
14261 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
14262 return true;
14265 /* Check if it's a multi-step conversion that can be done using intermediate
14266 types. */
14268 prev_type = vectype;
14269 prev_mode = vec_mode;
14271 if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
14272 return false;
14274 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14275 intermediate steps in promotion sequence. We try
14276 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
14277 not. */
14278 interm_types->create (MAX_INTERM_CVT_STEPS);
14279 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14281 intermediate_mode = insn_data[icode1].operand[0].mode;
14282 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14283 intermediate_type
14284 = vect_halve_mask_nunits (prev_type, intermediate_mode);
14285 else if (VECTOR_MODE_P (intermediate_mode))
14287 tree intermediate_element_type
14288 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
14289 TYPE_UNSIGNED (prev_type));
14290 intermediate_type
14291 = build_vector_type_for_mode (intermediate_element_type,
14292 intermediate_mode);
14294 else
14295 intermediate_type
14296 = lang_hooks.types.type_for_mode (intermediate_mode,
14297 TYPE_UNSIGNED (prev_type));
14299 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14300 && VECTOR_BOOLEAN_TYPE_P (prev_type)
14301 && intermediate_mode == prev_mode
14302 && SCALAR_INT_MODE_P (prev_mode))
14304 /* If the input and result modes are the same, a different optab
14305 is needed where we pass in the number of units in vectype. */
14306 optab3 = vec_unpacks_sbool_lo_optab;
14307 optab4 = vec_unpacks_sbool_hi_optab;
14309 else
14311 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
14312 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
14315 if (!optab3 || !optab4
14316 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
14317 || insn_data[icode1].operand[0].mode != intermediate_mode
14318 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
14319 || insn_data[icode2].operand[0].mode != intermediate_mode
14320 || ((icode1 = optab_handler (optab3, intermediate_mode))
14321 == CODE_FOR_nothing)
14322 || ((icode2 = optab_handler (optab4, intermediate_mode))
14323 == CODE_FOR_nothing))
14324 break;
14326 interm_types->quick_push (intermediate_type);
14327 (*multi_step_cvt)++;
14329 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
14330 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
14332 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14333 return true;
14334 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
14335 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
14336 return true;
14339 prev_type = intermediate_type;
14340 prev_mode = intermediate_mode;
14343 interm_types->release ();
14344 return false;
14348 /* Function supportable_narrowing_operation
14350 Check whether an operation represented by the code CODE is a
14351 narrowing operation that is supported by the target platform in
14352 vector form (i.e., when operating on arguments of type VECTYPE_IN
14353 and producing a result of type VECTYPE_OUT).
14355 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14356 and FLOAT. This function checks if these operations are supported by
14357 the target platform directly via vector tree-codes.
14359 Output:
14360 - CODE1 is the code of a vector operation to be used when
14361 vectorizing the operation, if available.
14362 - MULTI_STEP_CVT determines the number of required intermediate steps in
14363 case of multi-step conversion (like int->short->char - in that case
14364 MULTI_STEP_CVT will be 1).
14365 - INTERM_TYPES contains the intermediate type required to perform the
14366 narrowing operation (short in the above example). */
14368 bool
14369 supportable_narrowing_operation (code_helper code,
14370 tree vectype_out, tree vectype_in,
14371 code_helper *code1, int *multi_step_cvt,
14372 vec<tree> *interm_types)
14374 machine_mode vec_mode;
14375 enum insn_code icode1;
14376 optab optab1, interm_optab;
14377 tree vectype = vectype_in;
14378 tree narrow_vectype = vectype_out;
14379 enum tree_code c1;
14380 tree intermediate_type, prev_type;
14381 machine_mode intermediate_mode, prev_mode;
14382 int i;
14383 unsigned HOST_WIDE_INT n_elts;
14384 bool uns;
14386 if (!code.is_tree_code ())
14387 return false;
14389 *multi_step_cvt = 0;
14390 switch ((tree_code) code)
14392 CASE_CONVERT:
14393 c1 = VEC_PACK_TRUNC_EXPR;
14394 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
14395 && VECTOR_BOOLEAN_TYPE_P (vectype)
14396 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
14397 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
14398 && n_elts < BITS_PER_UNIT)
14399 optab1 = vec_pack_sbool_trunc_optab;
14400 else
14401 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14402 break;
14404 case FIX_TRUNC_EXPR:
14405 c1 = VEC_PACK_FIX_TRUNC_EXPR;
14406 /* The signedness is determined from output operand. */
14407 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
14408 break;
14410 case FLOAT_EXPR:
14411 c1 = VEC_PACK_FLOAT_EXPR;
14412 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14413 break;
14415 default:
14416 gcc_unreachable ();
14419 if (!optab1)
14420 return false;
14422 vec_mode = TYPE_MODE (vectype);
14423 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
14424 return false;
14426 *code1 = c1;
14428 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14430 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14431 return true;
14432 /* For scalar masks we may have different boolean
14433 vector types having the same QImode. Thus we
14434 add additional check for elements number. */
14435 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
14436 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14437 return true;
14440 if (code == FLOAT_EXPR)
14441 return false;
14443 /* Check if it's a multi-step conversion that can be done using intermediate
14444 types. */
14445 prev_mode = vec_mode;
14446 prev_type = vectype;
14447 if (code == FIX_TRUNC_EXPR)
14448 uns = TYPE_UNSIGNED (vectype_out);
14449 else
14450 uns = TYPE_UNSIGNED (vectype);
14452 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14453 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14454 costly than signed. */
14455 if (code == FIX_TRUNC_EXPR && uns)
14457 enum insn_code icode2;
14459 intermediate_type
14460 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
14461 interm_optab
14462 = optab_for_tree_code (c1, intermediate_type, optab_default);
14463 if (interm_optab != unknown_optab
14464 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
14465 && insn_data[icode1].operand[0].mode
14466 == insn_data[icode2].operand[0].mode)
14468 uns = false;
14469 optab1 = interm_optab;
14470 icode1 = icode2;
14474 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14475 intermediate steps in promotion sequence. We try
14476 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14477 interm_types->create (MAX_INTERM_CVT_STEPS);
14478 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14480 intermediate_mode = insn_data[icode1].operand[0].mode;
14481 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14482 intermediate_type
14483 = vect_double_mask_nunits (prev_type, intermediate_mode);
14484 else
14485 intermediate_type
14486 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
14487 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14488 && VECTOR_BOOLEAN_TYPE_P (prev_type)
14489 && SCALAR_INT_MODE_P (prev_mode)
14490 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
14491 && n_elts < BITS_PER_UNIT)
14492 interm_optab = vec_pack_sbool_trunc_optab;
14493 else
14494 interm_optab
14495 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
14496 optab_default);
14497 if (!interm_optab
14498 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
14499 || insn_data[icode1].operand[0].mode != intermediate_mode
14500 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
14501 == CODE_FOR_nothing))
14502 break;
14504 interm_types->quick_push (intermediate_type);
14505 (*multi_step_cvt)++;
14507 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14509 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14510 return true;
14511 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
14512 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14513 return true;
14516 prev_mode = intermediate_mode;
14517 prev_type = intermediate_type;
14518 optab1 = interm_optab;
14521 interm_types->release ();
14522 return false;
14525 /* Generate and return a vector mask of MASK_TYPE such that
14526 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14527 Add the statements to SEQ. */
14529 tree
14530 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
14531 tree end_index, const char *name)
14533 tree cmp_type = TREE_TYPE (start_index);
14534 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
14535 cmp_type, mask_type,
14536 OPTIMIZE_FOR_SPEED));
14537 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
14538 start_index, end_index,
14539 build_zero_cst (mask_type));
14540 tree tmp;
14541 if (name)
14542 tmp = make_temp_ssa_name (mask_type, NULL, name);
14543 else
14544 tmp = make_ssa_name (mask_type);
14545 gimple_call_set_lhs (call, tmp);
14546 gimple_seq_add_stmt (seq, call);
14547 return tmp;
14550 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14551 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14553 tree
14554 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
14555 tree end_index)
14557 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
14558 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
14561 /* Try to compute the vector types required to vectorize STMT_INFO,
14562 returning true on success and false if vectorization isn't possible.
14563 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14564 take sure that the number of elements in the vectors is no bigger
14565 than GROUP_SIZE.
14567 On success:
14569 - Set *STMT_VECTYPE_OUT to:
14570 - NULL_TREE if the statement doesn't need to be vectorized;
14571 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14573 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14574 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14575 statement does not help to determine the overall number of units. */
14577 opt_result
14578 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
14579 tree *stmt_vectype_out,
14580 tree *nunits_vectype_out,
14581 unsigned int group_size)
14583 gimple *stmt = stmt_info->stmt;
14585 /* For BB vectorization, we should always have a group size once we've
14586 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14587 are tentative requests during things like early data reference
14588 analysis and pattern recognition. */
14589 if (is_a <bb_vec_info> (vinfo))
14590 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
14591 else
14592 group_size = 0;
14594 *stmt_vectype_out = NULL_TREE;
14595 *nunits_vectype_out = NULL_TREE;
14597 if (gimple_get_lhs (stmt) == NULL_TREE
14598 /* Allow vector conditionals through here. */
14599 && !is_a <gcond *> (stmt)
14600 /* MASK_STORE has no lhs, but is ok. */
14601 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
14603 if (is_a <gcall *> (stmt))
14605 /* Ignore calls with no lhs. These must be calls to
14606 #pragma omp simd functions, and what vectorization factor
14607 it really needs can't be determined until
14608 vectorizable_simd_clone_call. */
14609 if (dump_enabled_p ())
14610 dump_printf_loc (MSG_NOTE, vect_location,
14611 "defer to SIMD clone analysis.\n");
14612 return opt_result::success ();
14615 return opt_result::failure_at (stmt,
14616 "not vectorized: irregular stmt: %G", stmt);
14619 tree vectype;
14620 tree scalar_type = NULL_TREE;
14621 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
14623 vectype = STMT_VINFO_VECTYPE (stmt_info);
14624 if (dump_enabled_p ())
14625 dump_printf_loc (MSG_NOTE, vect_location,
14626 "precomputed vectype: %T\n", vectype);
14628 else if (vect_use_mask_type_p (stmt_info))
14630 unsigned int precision = stmt_info->mask_precision;
14631 scalar_type = build_nonstandard_integer_type (precision, 1);
14632 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
14633 if (!vectype)
14634 return opt_result::failure_at (stmt, "not vectorized: unsupported"
14635 " data-type %T\n", scalar_type);
14636 if (dump_enabled_p ())
14637 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14639 else
14641 /* If we got here with a gcond it means that the target had no available vector
14642 mode for the scalar type. We can't vectorize so abort. */
14643 if (is_a <gcond *> (stmt))
14644 return opt_result::failure_at (stmt,
14645 "not vectorized:"
14646 " unsupported data-type for gcond %T\n",
14647 scalar_type);
14649 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
14650 scalar_type = TREE_TYPE (DR_REF (dr));
14651 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
14652 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
14653 else
14654 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
14656 if (dump_enabled_p ())
14658 if (group_size)
14659 dump_printf_loc (MSG_NOTE, vect_location,
14660 "get vectype for scalar type (group size %d):"
14661 " %T\n", group_size, scalar_type);
14662 else
14663 dump_printf_loc (MSG_NOTE, vect_location,
14664 "get vectype for scalar type: %T\n", scalar_type);
14666 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
14667 if (!vectype)
14668 return opt_result::failure_at (stmt,
14669 "not vectorized:"
14670 " unsupported data-type %T\n",
14671 scalar_type);
14673 if (dump_enabled_p ())
14674 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14677 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
14678 return opt_result::failure_at (stmt,
14679 "not vectorized: vector stmt in loop:%G",
14680 stmt);
14682 *stmt_vectype_out = vectype;
14684 /* Don't try to compute scalar types if the stmt produces a boolean
14685 vector; use the existing vector type instead. */
14686 tree nunits_vectype = vectype;
14687 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14689 /* The number of units is set according to the smallest scalar
14690 type (or the largest vector size, but we only support one
14691 vector size per vectorization). */
14692 scalar_type = vect_get_smallest_scalar_type (stmt_info,
14693 TREE_TYPE (vectype));
14694 if (scalar_type != TREE_TYPE (vectype))
14696 if (dump_enabled_p ())
14697 dump_printf_loc (MSG_NOTE, vect_location,
14698 "get vectype for smallest scalar type: %T\n",
14699 scalar_type);
14700 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
14701 group_size);
14702 if (!nunits_vectype)
14703 return opt_result::failure_at
14704 (stmt, "not vectorized: unsupported data-type %T\n",
14705 scalar_type);
14706 if (dump_enabled_p ())
14707 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
14708 nunits_vectype);
14712 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
14713 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
14714 return opt_result::failure_at (stmt,
14715 "Not vectorized: Incompatible number "
14716 "of vector subparts between %T and %T\n",
14717 nunits_vectype, *stmt_vectype_out);
14719 if (dump_enabled_p ())
14721 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
14722 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
14723 dump_printf (MSG_NOTE, "\n");
14726 *nunits_vectype_out = nunits_vectype;
14727 return opt_result::success ();
14730 /* Generate and return statement sequence that sets vector length LEN that is:
14732 min_of_start_and_end = min (START_INDEX, END_INDEX);
14733 left_len = END_INDEX - min_of_start_and_end;
14734 rhs = min (left_len, LEN_LIMIT);
14735 LEN = rhs;
14737 Note: the cost of the code generated by this function is modeled
14738 by vect_estimate_min_profitable_iters, so changes here may need
14739 corresponding changes there. */
14741 gimple_seq
14742 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
14744 gimple_seq stmts = NULL;
14745 tree len_type = TREE_TYPE (len);
14746 gcc_assert (TREE_TYPE (start_index) == len_type);
14748 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
14749 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
14750 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
14751 gimple* stmt = gimple_build_assign (len, rhs);
14752 gimple_seq_add_stmt (&stmts, stmt);
14754 return stmts;