compiler: only build thunk struct type when it is needed
[official-gcc.git] / gcc / tree-vect-stmts.cc
blobc8d1efc45e5176eec988e5886e7e745bd542ca85
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 static unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind,
95 stmt_vec_info stmt_info, slp_tree node,
96 tree vectype, int misalign,
97 enum vect_cost_model_location where)
99 if ((kind == vector_load || kind == unaligned_load)
100 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
101 kind = vector_gather_load;
102 if ((kind == vector_store || kind == unaligned_store)
103 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
104 kind = vector_scatter_store;
106 stmt_info_for_cost si
107 = { count, kind, where, stmt_info, node, vectype, misalign };
108 body_cost_vec->safe_push (si);
110 return (unsigned)
111 (builtin_vectorization_cost (kind, vectype, misalign) * count);
114 unsigned
115 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
116 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
117 tree vectype, int misalign,
118 enum vect_cost_model_location where)
120 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
121 vectype, misalign, where);
124 unsigned
125 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
126 enum vect_cost_for_stmt kind, slp_tree node,
127 tree vectype, int misalign,
128 enum vect_cost_model_location where)
130 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
131 vectype, misalign, where);
134 unsigned
135 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
136 enum vect_cost_for_stmt kind,
137 enum vect_cost_model_location where)
139 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
140 || kind == scalar_stmt);
141 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
142 NULL_TREE, 0, where);
145 /* Return a variable of type ELEM_TYPE[NELEMS]. */
147 static tree
148 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
150 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
151 "vect_array");
154 /* ARRAY is an array of vectors created by create_vector_array.
155 Return an SSA_NAME for the vector in index N. The reference
156 is part of the vectorization of STMT_INFO and the vector is associated
157 with scalar destination SCALAR_DEST. */
159 static tree
160 read_vector_array (vec_info *vinfo,
161 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
162 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
164 tree vect_type, vect, vect_name, array_ref;
165 gimple *new_stmt;
167 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
168 vect_type = TREE_TYPE (TREE_TYPE (array));
169 vect = vect_create_destination_var (scalar_dest, vect_type);
170 array_ref = build4 (ARRAY_REF, vect_type, array,
171 build_int_cst (size_type_node, n),
172 NULL_TREE, NULL_TREE);
174 new_stmt = gimple_build_assign (vect, array_ref);
175 vect_name = make_ssa_name (vect, new_stmt);
176 gimple_assign_set_lhs (new_stmt, vect_name);
177 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
179 return vect_name;
182 /* ARRAY is an array of vectors created by create_vector_array.
183 Emit code to store SSA_NAME VECT in index N of the array.
184 The store is part of the vectorization of STMT_INFO. */
186 static void
187 write_vector_array (vec_info *vinfo,
188 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
189 tree vect, tree array, unsigned HOST_WIDE_INT n)
191 tree array_ref;
192 gimple *new_stmt;
194 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
195 build_int_cst (size_type_node, n),
196 NULL_TREE, NULL_TREE);
198 new_stmt = gimple_build_assign (array_ref, vect);
199 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
202 /* PTR is a pointer to an array of type TYPE. Return a representation
203 of *PTR. The memory reference replaces those in FIRST_DR
204 (and its group). */
206 static tree
207 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
209 tree mem_ref;
211 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
212 /* Arrays have the same alignment as their type. */
213 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
214 return mem_ref;
217 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
218 Emit the clobber before *GSI. */
220 static void
221 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
222 gimple_stmt_iterator *gsi, tree var)
224 tree clobber = build_clobber (TREE_TYPE (var));
225 gimple *new_stmt = gimple_build_assign (var, clobber);
226 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
229 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
231 /* Function vect_mark_relevant.
233 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
235 static void
236 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
237 enum vect_relevant relevant, bool live_p)
239 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE, vect_location,
244 "mark relevant %d, live %d: %G", relevant, live_p,
245 stmt_info->stmt);
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
253 /* This is the last stmt in a sequence that was detected as a
254 pattern that can potentially be vectorized. Don't mark the stmt
255 as relevant/live because it's not going to be vectorized.
256 Instead mark the pattern-stmt that replaces it. */
258 if (dump_enabled_p ())
259 dump_printf_loc (MSG_NOTE, vect_location,
260 "last stmt in pattern. don't mark"
261 " relevant/live.\n");
262 stmt_vec_info old_stmt_info = stmt_info;
263 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
264 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
265 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
266 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
269 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
270 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
271 STMT_VINFO_RELEVANT (stmt_info) = relevant;
273 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
274 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
276 if (dump_enabled_p ())
277 dump_printf_loc (MSG_NOTE, vect_location,
278 "already marked relevant/live.\n");
279 return;
282 worklist->safe_push (stmt_info);
286 /* Function is_simple_and_all_uses_invariant
288 Return true if STMT_INFO is simple and all uses of it are invariant. */
290 bool
291 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
292 loop_vec_info loop_vinfo)
294 tree op;
295 ssa_op_iter iter;
297 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
298 if (!stmt)
299 return false;
301 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
303 enum vect_def_type dt = vect_uninitialized_def;
305 if (!vect_is_simple_use (op, loop_vinfo, &dt))
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
309 "use not simple.\n");
310 return false;
313 if (dt != vect_external_def && dt != vect_constant_def)
314 return false;
316 return true;
319 /* Function vect_stmt_relevant_p.
321 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
322 is "relevant for vectorization".
324 A stmt is considered "relevant for vectorization" if:
325 - it has uses outside the loop.
326 - it has vdefs (it alters memory).
327 - control stmts in the loop (except for the exit condition).
329 CHECKME: what other side effects would the vectorizer allow? */
331 static bool
332 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
333 enum vect_relevant *relevant, bool *live_p)
335 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
336 ssa_op_iter op_iter;
337 imm_use_iterator imm_iter;
338 use_operand_p use_p;
339 def_operand_p def_p;
341 *relevant = vect_unused_in_scope;
342 *live_p = false;
344 /* cond stmt other than loop exit cond. */
345 if (is_ctrl_stmt (stmt_info->stmt)
346 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
347 *relevant = vect_used_in_scope;
349 /* changing memory. */
350 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
351 if (gimple_vdef (stmt_info->stmt)
352 && !gimple_clobber_p (stmt_info->stmt))
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE, vect_location,
356 "vec_stmt_relevant_p: stmt has vdefs.\n");
357 *relevant = vect_used_in_scope;
360 /* uses outside the loop. */
361 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
363 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
365 basic_block bb = gimple_bb (USE_STMT (use_p));
366 if (!flow_bb_inside_loop_p (loop, bb))
368 if (is_gimple_debug (USE_STMT (use_p)))
369 continue;
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: used out of loop.\n");
375 /* We expect all such uses to be in the loop exit phis
376 (because of loop closed form) */
377 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
378 gcc_assert (bb == single_exit (loop)->dest);
380 *live_p = true;
385 if (*live_p && *relevant == vect_unused_in_scope
386 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: stmt live but not relevant.\n");
391 *relevant = vect_used_only_live;
394 return (*live_p || *relevant);
398 /* Function exist_non_indexing_operands_for_use_p
400 USE is one of the uses attached to STMT_INFO. Check if USE is
401 used in STMT_INFO for anything other than indexing an array. */
403 static bool
404 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
406 tree operand;
408 /* USE corresponds to some operand in STMT. If there is no data
409 reference in STMT, then any operand that corresponds to USE
410 is not indexing an array. */
411 if (!STMT_VINFO_DATA_REF (stmt_info))
412 return true;
414 /* STMT has a data_ref. FORNOW this means that its of one of
415 the following forms:
416 -1- ARRAY_REF = var
417 -2- var = ARRAY_REF
418 (This should have been verified in analyze_data_refs).
420 'var' in the second case corresponds to a def, not a use,
421 so USE cannot correspond to any operands that are not used
422 for array indexing.
424 Therefore, all we need to check is if STMT falls into the
425 first case, and whether var corresponds to USE. */
427 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
428 if (!assign || !gimple_assign_copy_p (assign))
430 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
431 if (call && gimple_call_internal_p (call))
433 internal_fn ifn = gimple_call_internal_fn (call);
434 int mask_index = internal_fn_mask_index (ifn);
435 if (mask_index >= 0
436 && use == gimple_call_arg (call, mask_index))
437 return true;
438 int stored_value_index = internal_fn_stored_value_index (ifn);
439 if (stored_value_index >= 0
440 && use == gimple_call_arg (call, stored_value_index))
441 return true;
442 if (internal_gather_scatter_fn_p (ifn)
443 && use == gimple_call_arg (call, 1))
444 return true;
446 return false;
449 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
450 return false;
451 operand = gimple_assign_rhs1 (assign);
452 if (TREE_CODE (operand) != SSA_NAME)
453 return false;
455 if (operand == use)
456 return true;
458 return false;
463 Function process_use.
465 Inputs:
466 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
467 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
468 that defined USE. This is done by calling mark_relevant and passing it
469 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
470 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
471 be performed.
473 Outputs:
474 Generally, LIVE_P and RELEVANT are used to define the liveness and
475 relevance info of the DEF_STMT of this USE:
476 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
477 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
478 Exceptions:
479 - case 1: If USE is used only for address computations (e.g. array indexing),
480 which does not need to be directly vectorized, then the liveness/relevance
481 of the respective DEF_STMT is left unchanged.
482 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
483 we skip DEF_STMT cause it had already been processed.
484 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
485 "relevant" will be modified accordingly.
487 Return true if everything is as expected. Return false otherwise. */
489 static opt_result
490 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
491 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
492 bool force)
494 stmt_vec_info dstmt_vinfo;
495 enum vect_def_type dt;
497 /* case 1: we are only interested in uses that need to be vectorized. Uses
498 that are used for address computation are not considered relevant. */
499 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
500 return opt_result::success ();
502 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
503 return opt_result::failure_at (stmt_vinfo->stmt,
504 "not vectorized:"
505 " unsupported use in stmt.\n");
507 if (!dstmt_vinfo)
508 return opt_result::success ();
510 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
511 basic_block bb = gimple_bb (stmt_vinfo->stmt);
513 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
514 We have to force the stmt live since the epilogue loop needs it to
515 continue computing the reduction. */
516 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
517 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
518 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
519 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
520 && bb->loop_father == def_bb->loop_father)
522 if (dump_enabled_p ())
523 dump_printf_loc (MSG_NOTE, vect_location,
524 "reduc-stmt defining reduc-phi in the same nest.\n");
525 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
526 return opt_result::success ();
529 /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 outer-loop-header-bb:
531 d = dstmt_vinfo
532 inner-loop:
533 stmt # use (d)
534 outer-loop-tail-bb:
535 ... */
536 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE, vect_location,
540 "outer-loop def-stmt defining inner-loop stmt.\n");
542 switch (relevant)
544 case vect_unused_in_scope:
545 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
546 vect_used_in_scope : vect_unused_in_scope;
547 break;
549 case vect_used_in_outer_by_reduction:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
551 relevant = vect_used_by_reduction;
552 break;
554 case vect_used_in_outer:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
556 relevant = vect_used_in_scope;
557 break;
559 case vect_used_in_scope:
560 break;
562 default:
563 gcc_unreachable ();
567 /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 outer-loop-header-bb:
570 inner-loop:
571 d = dstmt_vinfo
572 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
573 stmt # use (d) */
574 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_NOTE, vect_location,
578 "inner-loop def-stmt defining outer-loop stmt.\n");
580 switch (relevant)
582 case vect_unused_in_scope:
583 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
584 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
585 vect_used_in_outer_by_reduction : vect_unused_in_scope;
586 break;
588 case vect_used_by_reduction:
589 case vect_used_only_live:
590 relevant = vect_used_in_outer_by_reduction;
591 break;
593 case vect_used_in_scope:
594 relevant = vect_used_in_outer;
595 break;
597 default:
598 gcc_unreachable ();
601 /* We are also not interested in uses on loop PHI backedges that are
602 inductions. Otherwise we'll needlessly vectorize the IV increment
603 and cause hybrid SLP for SLP inductions. Unless the PHI is live
604 of course. */
605 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
606 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
607 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
608 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
609 loop_latch_edge (bb->loop_father))
610 == use))
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_NOTE, vect_location,
614 "induction value on backedge.\n");
615 return opt_result::success ();
619 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
620 return opt_result::success ();
624 /* Function vect_mark_stmts_to_be_vectorized.
626 Not all stmts in the loop need to be vectorized. For example:
628 for i...
629 for j...
630 1. T0 = i + j
631 2. T1 = a[T0]
633 3. j = j + 1
635 Stmt 1 and 3 do not need to be vectorized, because loop control and
636 addressing of vectorized data-refs are handled differently.
638 This pass detects such stmts. */
640 opt_result
641 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
643 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
644 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
645 unsigned int nbbs = loop->num_nodes;
646 gimple_stmt_iterator si;
647 unsigned int i;
648 basic_block bb;
649 bool live_p;
650 enum vect_relevant relevant;
652 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
654 auto_vec<stmt_vec_info, 64> worklist;
656 /* 1. Init worklist. */
657 for (i = 0; i < nbbs; i++)
659 bb = bbs[i];
660 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
662 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
665 phi_info->stmt);
667 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
668 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
670 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
672 if (is_gimple_debug (gsi_stmt (si)))
673 continue;
674 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
675 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE, vect_location,
677 "init: stmt relevant? %G", stmt_info->stmt);
679 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
680 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
684 /* 2. Process_worklist */
685 while (worklist.length () > 0)
687 use_operand_p use_p;
688 ssa_op_iter iter;
690 stmt_vec_info stmt_vinfo = worklist.pop ();
691 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE, vect_location,
693 "worklist: examine stmt: %G", stmt_vinfo->stmt);
695 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 (DEF_STMT) as relevant/irrelevant according to the relevance property
697 of STMT. */
698 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
700 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
701 propagated as is to the DEF_STMTs of its USEs.
703 One exception is when STMT has been identified as defining a reduction
704 variable; in this case we set the relevance to vect_used_by_reduction.
705 This is because we distinguish between two kinds of relevant stmts -
706 those that are used by a reduction computation, and those that are
707 (also) used by a regular computation. This allows us later on to
708 identify stmts that are used solely by a reduction, and therefore the
709 order of the results that they produce does not have to be kept. */
711 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
713 case vect_reduction_def:
714 gcc_assert (relevant != vect_unused_in_scope);
715 if (relevant != vect_unused_in_scope
716 && relevant != vect_used_in_scope
717 && relevant != vect_used_by_reduction
718 && relevant != vect_used_only_live)
719 return opt_result::failure_at
720 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
721 break;
723 case vect_nested_cycle:
724 if (relevant != vect_unused_in_scope
725 && relevant != vect_used_in_outer_by_reduction
726 && relevant != vect_used_in_outer)
727 return opt_result::failure_at
728 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
729 break;
731 case vect_double_reduction_def:
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_by_reduction
734 && relevant != vect_used_only_live)
735 return opt_result::failure_at
736 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
737 break;
739 default:
740 break;
743 if (is_pattern_stmt_p (stmt_vinfo))
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
750 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
751 tree op = gimple_assign_rhs1 (assign);
753 i = 1;
754 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
756 opt_result res
757 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
758 loop_vinfo, relevant, &worklist, false);
759 if (!res)
760 return res;
761 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
762 loop_vinfo, relevant, &worklist, false);
763 if (!res)
764 return res;
765 i = 2;
767 for (; i < gimple_num_ops (assign); i++)
769 op = gimple_op (assign, i);
770 if (TREE_CODE (op) == SSA_NAME)
772 opt_result res
773 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
774 &worklist, false);
775 if (!res)
776 return res;
780 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
782 for (i = 0; i < gimple_call_num_args (call); i++)
784 tree arg = gimple_call_arg (call, i);
785 opt_result res
786 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
787 &worklist, false);
788 if (!res)
789 return res;
793 else
794 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
796 tree op = USE_FROM_PTR (use_p);
797 opt_result res
798 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
799 &worklist, false);
800 if (!res)
801 return res;
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 opt_result res
810 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
811 &worklist, true);
812 if (!res)
814 if (fatal)
815 *fatal = false;
816 return res;
819 } /* while worklist */
821 return opt_result::success ();
824 /* Function vect_model_simple_cost.
826 Models cost for simple operations, i.e. those that only emit ncopies of a
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
830 static void
831 vect_model_simple_cost (vec_info *,
832 stmt_vec_info stmt_info, int ncopies,
833 enum vect_def_type *dt,
834 int ndts,
835 slp_tree node,
836 stmt_vector_for_cost *cost_vec,
837 vect_cost_for_stmt kind = vector_stmt)
839 int inside_cost = 0, prologue_cost = 0;
841 gcc_assert (cost_vec != NULL);
843 /* ??? Somehow we need to fix this at the callers. */
844 if (node)
845 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
847 if (!node)
848 /* Cost the "broadcast" of a scalar operand in to a vector operand.
849 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
850 cost model. */
851 for (int i = 0; i < ndts; i++)
852 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
853 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
854 stmt_info, 0, vect_prologue);
856 /* Pass the inside-of-loop statements to the target-specific cost model. */
857 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
858 stmt_info, 0, vect_body);
860 if (dump_enabled_p ())
861 dump_printf_loc (MSG_NOTE, vect_location,
862 "vect_model_simple_cost: inside_cost = %d, "
863 "prologue_cost = %d .\n", inside_cost, prologue_cost);
867 /* Model cost for type demotion and promotion operations. PWR is
868 normally zero for single-step promotions and demotions. It will be
869 one if two-step promotion/demotion is required, and so on. NCOPIES
870 is the number of vector results (and thus number of instructions)
871 for the narrowest end of the operation chain. Each additional
872 step doubles the number of instructions required. If WIDEN_ARITH
873 is true the stmt is doing widening arithmetic. */
875 static void
876 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
877 enum vect_def_type *dt,
878 unsigned int ncopies, int pwr,
879 stmt_vector_for_cost *cost_vec,
880 bool widen_arith)
882 int i;
883 int inside_cost = 0, prologue_cost = 0;
885 for (i = 0; i < pwr + 1; i++)
887 inside_cost += record_stmt_cost (cost_vec, ncopies,
888 widen_arith
889 ? vector_stmt : vec_promote_demote,
890 stmt_info, 0, vect_body);
891 ncopies *= 2;
894 /* FORNOW: Assuming maximum 2 args per stmts. */
895 for (i = 0; i < 2; i++)
896 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
897 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
898 stmt_info, 0, vect_prologue);
900 if (dump_enabled_p ())
901 dump_printf_loc (MSG_NOTE, vect_location,
902 "vect_model_promotion_demotion_cost: inside_cost = %d, "
903 "prologue_cost = %d .\n", inside_cost, prologue_cost);
906 /* Returns true if the current function returns DECL. */
908 static bool
909 cfun_returns (tree decl)
911 edge_iterator ei;
912 edge e;
913 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
915 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
916 if (!ret)
917 continue;
918 if (gimple_return_retval (ret) == decl)
919 return true;
920 /* We often end up with an aggregate copy to the result decl,
921 handle that case as well. First skip intermediate clobbers
922 though. */
923 gimple *def = ret;
926 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
928 while (gimple_clobber_p (def));
929 if (is_a <gassign *> (def)
930 && gimple_assign_lhs (def) == gimple_return_retval (ret)
931 && gimple_assign_rhs1 (def) == decl)
932 return true;
934 return false;
937 /* Function vect_model_store_cost
939 Models cost for stores. In the case of grouped accesses, one access
940 has the overhead of the grouped access attributed to it. */
942 static void
943 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
944 vect_memory_access_type memory_access_type,
945 dr_alignment_support alignment_support_scheme,
946 int misalignment,
947 vec_load_store_type vls_type, slp_tree slp_node,
948 stmt_vector_for_cost *cost_vec)
950 unsigned int inside_cost = 0, prologue_cost = 0;
951 stmt_vec_info first_stmt_info = stmt_info;
952 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
954 /* ??? Somehow we need to fix this at the callers. */
955 if (slp_node)
956 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
958 if (vls_type == VLS_STORE_INVARIANT)
960 if (!slp_node)
961 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
962 stmt_info, 0, vect_prologue);
965 /* Grouped stores update all elements in the group at once,
966 so we want the DR for the first statement. */
967 if (!slp_node && grouped_access_p)
968 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
970 /* True if we should include any once-per-group costs as well as
971 the cost of the statement itself. For SLP we only get called
972 once per group anyhow. */
973 bool first_stmt_p = (first_stmt_info == stmt_info);
975 /* We assume that the cost of a single store-lanes instruction is
976 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
977 access is instead being provided by a permute-and-store operation,
978 include the cost of the permutes. */
979 if (first_stmt_p
980 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
982 /* Uses a high and low interleave or shuffle operations for each
983 needed permute. */
984 int group_size = DR_GROUP_SIZE (first_stmt_info);
985 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
986 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
987 stmt_info, 0, vect_body);
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_NOTE, vect_location,
991 "vect_model_store_cost: strided group_size = %d .\n",
992 group_size);
995 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
996 /* Costs of the stores. */
997 if (memory_access_type == VMAT_ELEMENTWISE
998 || memory_access_type == VMAT_GATHER_SCATTER)
1000 /* N scalar stores plus extracting the elements. */
1001 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1002 inside_cost += record_stmt_cost (cost_vec,
1003 ncopies * assumed_nunits,
1004 scalar_store, stmt_info, 0, vect_body);
1006 else
1007 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1008 misalignment, &inside_cost, cost_vec);
1010 if (memory_access_type == VMAT_ELEMENTWISE
1011 || memory_access_type == VMAT_STRIDED_SLP)
1013 /* N scalar stores plus extracting the elements. */
1014 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1015 inside_cost += record_stmt_cost (cost_vec,
1016 ncopies * assumed_nunits,
1017 vec_to_scalar, stmt_info, 0, vect_body);
1020 /* When vectorizing a store into the function result assign
1021 a penalty if the function returns in a multi-register location.
1022 In this case we assume we'll end up with having to spill the
1023 vector result and do piecewise loads as a conservative estimate. */
1024 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1025 if (base
1026 && (TREE_CODE (base) == RESULT_DECL
1027 || (DECL_P (base) && cfun_returns (base)))
1028 && !aggregate_value_p (base, cfun->decl))
1030 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1031 /* ??? Handle PARALLEL in some way. */
1032 if (REG_P (reg))
1034 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1035 /* Assume that a single reg-reg move is possible and cheap,
1036 do not account for vector to gp register move cost. */
1037 if (nregs > 1)
1039 /* Spill. */
1040 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1041 vector_store,
1042 stmt_info, 0, vect_epilogue);
1043 /* Loads. */
1044 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1045 scalar_load,
1046 stmt_info, 0, vect_epilogue);
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_NOTE, vect_location,
1053 "vect_model_store_cost: inside_cost = %d, "
1054 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1058 /* Calculate cost of DR's memory access. */
1059 void
1060 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1061 dr_alignment_support alignment_support_scheme,
1062 int misalignment,
1063 unsigned int *inside_cost,
1064 stmt_vector_for_cost *body_cost_vec)
1066 switch (alignment_support_scheme)
1068 case dr_aligned:
1070 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1071 vector_store, stmt_info, 0,
1072 vect_body);
1074 if (dump_enabled_p ())
1075 dump_printf_loc (MSG_NOTE, vect_location,
1076 "vect_model_store_cost: aligned.\n");
1077 break;
1080 case dr_unaligned_supported:
1082 /* Here, we assign an additional cost for the unaligned store. */
1083 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1084 unaligned_store, stmt_info,
1085 misalignment, vect_body);
1086 if (dump_enabled_p ())
1087 dump_printf_loc (MSG_NOTE, vect_location,
1088 "vect_model_store_cost: unaligned supported by "
1089 "hardware.\n");
1090 break;
1093 case dr_unaligned_unsupported:
1095 *inside_cost = VECT_MAX_COST;
1097 if (dump_enabled_p ())
1098 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1099 "vect_model_store_cost: unsupported access.\n");
1100 break;
1103 default:
1104 gcc_unreachable ();
1109 /* Function vect_model_load_cost
1111 Models cost for loads. In the case of grouped accesses, one access has
1112 the overhead of the grouped access attributed to it. Since unaligned
1113 accesses are supported for loads, we also account for the costs of the
1114 access scheme chosen. */
1116 static void
1117 vect_model_load_cost (vec_info *vinfo,
1118 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1119 vect_memory_access_type memory_access_type,
1120 dr_alignment_support alignment_support_scheme,
1121 int misalignment,
1122 gather_scatter_info *gs_info,
1123 slp_tree slp_node,
1124 stmt_vector_for_cost *cost_vec)
1126 unsigned int inside_cost = 0, prologue_cost = 0;
1127 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1129 gcc_assert (cost_vec);
1131 /* ??? Somehow we need to fix this at the callers. */
1132 if (slp_node)
1133 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1135 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1137 /* If the load is permuted then the alignment is determined by
1138 the first group element not by the first scalar stmt DR. */
1139 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1140 /* Record the cost for the permutation. */
1141 unsigned n_perms, n_loads;
1142 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1143 vf, true, &n_perms, &n_loads);
1144 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1145 first_stmt_info, 0, vect_body);
1147 /* And adjust the number of loads performed. This handles
1148 redundancies as well as loads that are later dead. */
1149 ncopies = n_loads;
1152 /* Grouped loads read all elements in the group at once,
1153 so we want the DR for the first statement. */
1154 stmt_vec_info first_stmt_info = stmt_info;
1155 if (!slp_node && grouped_access_p)
1156 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1158 /* True if we should include any once-per-group costs as well as
1159 the cost of the statement itself. For SLP we only get called
1160 once per group anyhow. */
1161 bool first_stmt_p = (first_stmt_info == stmt_info);
1163 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1164 ones we actually need. Account for the cost of unused results. */
1165 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1167 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1168 stmt_vec_info next_stmt_info = first_stmt_info;
1171 gaps -= 1;
1172 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1174 while (next_stmt_info);
1175 if (gaps)
1177 if (dump_enabled_p ())
1178 dump_printf_loc (MSG_NOTE, vect_location,
1179 "vect_model_load_cost: %d unused vectors.\n",
1180 gaps);
1181 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1182 alignment_support_scheme, misalignment, false,
1183 &inside_cost, &prologue_cost,
1184 cost_vec, cost_vec, true);
1188 /* We assume that the cost of a single load-lanes instruction is
1189 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1190 access is instead being provided by a load-and-permute operation,
1191 include the cost of the permutes. */
1192 if (first_stmt_p
1193 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1195 /* Uses an even and odd extract operations or shuffle operations
1196 for each needed permute. */
1197 int group_size = DR_GROUP_SIZE (first_stmt_info);
1198 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1199 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1200 stmt_info, 0, vect_body);
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE, vect_location,
1204 "vect_model_load_cost: strided group_size = %d .\n",
1205 group_size);
1208 /* The loads themselves. */
1209 if (memory_access_type == VMAT_ELEMENTWISE
1210 || memory_access_type == VMAT_GATHER_SCATTER)
1212 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1213 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1214 if (memory_access_type == VMAT_GATHER_SCATTER
1215 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1216 /* For emulated gathers N offset vector element extracts
1217 (we assume the scalar scaling and ptr + offset add is consumed by
1218 the load). */
1219 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1220 vec_to_scalar, stmt_info, 0,
1221 vect_body);
1222 /* N scalar loads plus gathering them into a vector. */
1223 inside_cost += record_stmt_cost (cost_vec,
1224 ncopies * assumed_nunits,
1225 scalar_load, stmt_info, 0, vect_body);
1227 else if (memory_access_type == VMAT_INVARIANT)
1229 /* Invariant loads will ideally be hoisted and splat to a vector. */
1230 prologue_cost += record_stmt_cost (cost_vec, 1,
1231 scalar_load, stmt_info, 0,
1232 vect_prologue);
1233 prologue_cost += record_stmt_cost (cost_vec, 1,
1234 scalar_to_vec, stmt_info, 0,
1235 vect_prologue);
1237 else
1238 vect_get_load_cost (vinfo, stmt_info, ncopies,
1239 alignment_support_scheme, misalignment, first_stmt_p,
1240 &inside_cost, &prologue_cost,
1241 cost_vec, cost_vec, true);
1242 if (memory_access_type == VMAT_ELEMENTWISE
1243 || memory_access_type == VMAT_STRIDED_SLP
1244 || (memory_access_type == VMAT_GATHER_SCATTER
1245 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1246 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1247 stmt_info, 0, vect_body);
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE, vect_location,
1251 "vect_model_load_cost: inside_cost = %d, "
1252 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1256 /* Calculate cost of DR's memory access. */
1257 void
1258 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1259 dr_alignment_support alignment_support_scheme,
1260 int misalignment,
1261 bool add_realign_cost, unsigned int *inside_cost,
1262 unsigned int *prologue_cost,
1263 stmt_vector_for_cost *prologue_cost_vec,
1264 stmt_vector_for_cost *body_cost_vec,
1265 bool record_prologue_costs)
1267 switch (alignment_support_scheme)
1269 case dr_aligned:
1271 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1272 stmt_info, 0, vect_body);
1274 if (dump_enabled_p ())
1275 dump_printf_loc (MSG_NOTE, vect_location,
1276 "vect_model_load_cost: aligned.\n");
1278 break;
1280 case dr_unaligned_supported:
1282 /* Here, we assign an additional cost for the unaligned load. */
1283 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1284 unaligned_load, stmt_info,
1285 misalignment, vect_body);
1287 if (dump_enabled_p ())
1288 dump_printf_loc (MSG_NOTE, vect_location,
1289 "vect_model_load_cost: unaligned supported by "
1290 "hardware.\n");
1292 break;
1294 case dr_explicit_realign:
1296 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1297 vector_load, stmt_info, 0, vect_body);
1298 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1299 vec_perm, stmt_info, 0, vect_body);
1301 /* FIXME: If the misalignment remains fixed across the iterations of
1302 the containing loop, the following cost should be added to the
1303 prologue costs. */
1304 if (targetm.vectorize.builtin_mask_for_load)
1305 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1306 stmt_info, 0, vect_body);
1308 if (dump_enabled_p ())
1309 dump_printf_loc (MSG_NOTE, vect_location,
1310 "vect_model_load_cost: explicit realign\n");
1312 break;
1314 case dr_explicit_realign_optimized:
1316 if (dump_enabled_p ())
1317 dump_printf_loc (MSG_NOTE, vect_location,
1318 "vect_model_load_cost: unaligned software "
1319 "pipelined.\n");
1321 /* Unaligned software pipeline has a load of an address, an initial
1322 load, and possibly a mask operation to "prime" the loop. However,
1323 if this is an access in a group of loads, which provide grouped
1324 access, then the above cost should only be considered for one
1325 access in the group. Inside the loop, there is a load op
1326 and a realignment op. */
1328 if (add_realign_cost && record_prologue_costs)
1330 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1331 vector_stmt, stmt_info,
1332 0, vect_prologue);
1333 if (targetm.vectorize.builtin_mask_for_load)
1334 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1335 vector_stmt, stmt_info,
1336 0, vect_prologue);
1339 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1340 stmt_info, 0, vect_body);
1341 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1342 stmt_info, 0, vect_body);
1344 if (dump_enabled_p ())
1345 dump_printf_loc (MSG_NOTE, vect_location,
1346 "vect_model_load_cost: explicit realign optimized"
1347 "\n");
1349 break;
1352 case dr_unaligned_unsupported:
1354 *inside_cost = VECT_MAX_COST;
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1358 "vect_model_load_cost: unsupported access.\n");
1359 break;
1362 default:
1363 gcc_unreachable ();
1367 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1368 the loop preheader for the vectorized stmt STMT_VINFO. */
1370 static void
1371 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1372 gimple_stmt_iterator *gsi)
1374 if (gsi)
1375 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1376 else
1377 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1379 if (dump_enabled_p ())
1380 dump_printf_loc (MSG_NOTE, vect_location,
1381 "created new init_stmt: %G", new_stmt);
1384 /* Function vect_init_vector.
1386 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1387 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1388 vector type a vector with all elements equal to VAL is created first.
1389 Place the initialization at GSI if it is not NULL. Otherwise, place the
1390 initialization at the loop preheader.
1391 Return the DEF of INIT_STMT.
1392 It will be used in the vectorization of STMT_INFO. */
1394 tree
1395 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1396 gimple_stmt_iterator *gsi)
1398 gimple *init_stmt;
1399 tree new_temp;
1401 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1402 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1404 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1405 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1407 /* Scalar boolean value should be transformed into
1408 all zeros or all ones value before building a vector. */
1409 if (VECTOR_BOOLEAN_TYPE_P (type))
1411 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1412 tree false_val = build_zero_cst (TREE_TYPE (type));
1414 if (CONSTANT_CLASS_P (val))
1415 val = integer_zerop (val) ? false_val : true_val;
1416 else
1418 new_temp = make_ssa_name (TREE_TYPE (type));
1419 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1420 val, true_val, false_val);
1421 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1422 val = new_temp;
1425 else
1427 gimple_seq stmts = NULL;
1428 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1429 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1430 TREE_TYPE (type), val);
1431 else
1432 /* ??? Condition vectorization expects us to do
1433 promotion of invariant/external defs. */
1434 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1435 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1436 !gsi_end_p (gsi2); )
1438 init_stmt = gsi_stmt (gsi2);
1439 gsi_remove (&gsi2, false);
1440 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1444 val = build_vector_from_val (type, val);
1447 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1448 init_stmt = gimple_build_assign (new_temp, val);
1449 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1450 return new_temp;
1454 /* Function vect_get_vec_defs_for_operand.
1456 OP is an operand in STMT_VINFO. This function returns a vector of
1457 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1459 In the case that OP is an SSA_NAME which is defined in the loop, then
1460 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1462 In case OP is an invariant or constant, a new stmt that creates a vector def
1463 needs to be introduced. VECTYPE may be used to specify a required type for
1464 vector invariant. */
1466 void
1467 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1468 unsigned ncopies,
1469 tree op, vec<tree> *vec_oprnds, tree vectype)
1471 gimple *def_stmt;
1472 enum vect_def_type dt;
1473 bool is_simple_use;
1474 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1476 if (dump_enabled_p ())
1477 dump_printf_loc (MSG_NOTE, vect_location,
1478 "vect_get_vec_defs_for_operand: %T\n", op);
1480 stmt_vec_info def_stmt_info;
1481 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1482 &def_stmt_info, &def_stmt);
1483 gcc_assert (is_simple_use);
1484 if (def_stmt && dump_enabled_p ())
1485 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1487 vec_oprnds->create (ncopies);
1488 if (dt == vect_constant_def || dt == vect_external_def)
1490 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1491 tree vector_type;
1493 if (vectype)
1494 vector_type = vectype;
1495 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1496 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1497 vector_type = truth_type_for (stmt_vectype);
1498 else
1499 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1501 gcc_assert (vector_type);
1502 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1503 while (ncopies--)
1504 vec_oprnds->quick_push (vop);
1506 else
1508 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1509 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1510 for (unsigned i = 0; i < ncopies; ++i)
1511 vec_oprnds->quick_push (gimple_get_lhs
1512 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1517 /* Get vectorized definitions for OP0 and OP1. */
1519 void
1520 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1521 unsigned ncopies,
1522 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1523 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1524 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1525 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1527 if (slp_node)
1529 if (op0)
1530 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1531 if (op1)
1532 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1533 if (op2)
1534 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1535 if (op3)
1536 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1538 else
1540 if (op0)
1541 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1542 op0, vec_oprnds0, vectype0);
1543 if (op1)
1544 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1545 op1, vec_oprnds1, vectype1);
1546 if (op2)
1547 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1548 op2, vec_oprnds2, vectype2);
1549 if (op3)
1550 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1551 op3, vec_oprnds3, vectype3);
1555 void
1556 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1557 unsigned ncopies,
1558 tree op0, vec<tree> *vec_oprnds0,
1559 tree op1, vec<tree> *vec_oprnds1,
1560 tree op2, vec<tree> *vec_oprnds2,
1561 tree op3, vec<tree> *vec_oprnds3)
1563 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1564 op0, vec_oprnds0, NULL_TREE,
1565 op1, vec_oprnds1, NULL_TREE,
1566 op2, vec_oprnds2, NULL_TREE,
1567 op3, vec_oprnds3, NULL_TREE);
1570 /* Helper function called by vect_finish_replace_stmt and
1571 vect_finish_stmt_generation. Set the location of the new
1572 statement and create and return a stmt_vec_info for it. */
1574 static void
1575 vect_finish_stmt_generation_1 (vec_info *,
1576 stmt_vec_info stmt_info, gimple *vec_stmt)
1578 if (dump_enabled_p ())
1579 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1581 if (stmt_info)
1583 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1585 /* While EH edges will generally prevent vectorization, stmt might
1586 e.g. be in a must-not-throw region. Ensure newly created stmts
1587 that could throw are part of the same region. */
1588 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1589 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1590 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1592 else
1593 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1596 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1597 which sets the same scalar result as STMT_INFO did. Create and return a
1598 stmt_vec_info for VEC_STMT. */
1600 void
1601 vect_finish_replace_stmt (vec_info *vinfo,
1602 stmt_vec_info stmt_info, gimple *vec_stmt)
1604 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1605 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1607 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1608 gsi_replace (&gsi, vec_stmt, true);
1610 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1613 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1614 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1616 void
1617 vect_finish_stmt_generation (vec_info *vinfo,
1618 stmt_vec_info stmt_info, gimple *vec_stmt,
1619 gimple_stmt_iterator *gsi)
1621 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1623 if (!gsi_end_p (*gsi)
1624 && gimple_has_mem_ops (vec_stmt))
1626 gimple *at_stmt = gsi_stmt (*gsi);
1627 tree vuse = gimple_vuse (at_stmt);
1628 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1630 tree vdef = gimple_vdef (at_stmt);
1631 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1632 gimple_set_modified (vec_stmt, true);
1633 /* If we have an SSA vuse and insert a store, update virtual
1634 SSA form to avoid triggering the renamer. Do so only
1635 if we can easily see all uses - which is what almost always
1636 happens with the way vectorized stmts are inserted. */
1637 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1638 && ((is_gimple_assign (vec_stmt)
1639 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1640 || (is_gimple_call (vec_stmt)
1641 && (!(gimple_call_flags (vec_stmt)
1642 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1643 || (gimple_call_lhs (vec_stmt)
1644 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1646 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1647 gimple_set_vdef (vec_stmt, new_vdef);
1648 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1652 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1653 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1656 /* We want to vectorize a call to combined function CFN with function
1657 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1658 as the types of all inputs. Check whether this is possible using
1659 an internal function, returning its code if so or IFN_LAST if not. */
1661 static internal_fn
1662 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1663 tree vectype_out, tree vectype_in)
1665 internal_fn ifn;
1666 if (internal_fn_p (cfn))
1667 ifn = as_internal_fn (cfn);
1668 else
1669 ifn = associated_internal_fn (fndecl);
1670 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1672 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1673 if (info.vectorizable)
1675 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1676 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1677 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1678 OPTIMIZE_FOR_SPEED))
1679 return ifn;
1682 return IFN_LAST;
1686 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1687 gimple_stmt_iterator *);
1689 /* Check whether a load or store statement in the loop described by
1690 LOOP_VINFO is possible in a loop using partial vectors. This is
1691 testing whether the vectorizer pass has the appropriate support,
1692 as well as whether the target does.
1694 VLS_TYPE says whether the statement is a load or store and VECTYPE
1695 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1696 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1697 says how the load or store is going to be implemented and GROUP_SIZE
1698 is the number of load or store statements in the containing group.
1699 If the access is a gather load or scatter store, GS_INFO describes
1700 its arguments. If the load or store is conditional, SCALAR_MASK is the
1701 condition under which it occurs.
1703 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1704 vectors is not supported, otherwise record the required rgroup control
1705 types. */
1707 static void
1708 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1709 slp_tree slp_node,
1710 vec_load_store_type vls_type,
1711 int group_size,
1712 vect_memory_access_type
1713 memory_access_type,
1714 gather_scatter_info *gs_info,
1715 tree scalar_mask)
1717 /* Invariant loads need no special support. */
1718 if (memory_access_type == VMAT_INVARIANT)
1719 return;
1721 unsigned int nvectors;
1722 if (slp_node)
1723 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1724 else
1725 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1727 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1728 machine_mode vecmode = TYPE_MODE (vectype);
1729 bool is_load = (vls_type == VLS_LOAD);
1730 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1732 if (is_load
1733 ? !vect_load_lanes_supported (vectype, group_size, true)
1734 : !vect_store_lanes_supported (vectype, group_size, true))
1736 if (dump_enabled_p ())
1737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1738 "can't operate on partial vectors because"
1739 " the target doesn't have an appropriate"
1740 " load/store-lanes instruction.\n");
1741 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1742 return;
1744 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1745 scalar_mask);
1746 return;
1749 if (memory_access_type == VMAT_GATHER_SCATTER)
1751 internal_fn ifn = (is_load
1752 ? IFN_MASK_GATHER_LOAD
1753 : IFN_MASK_SCATTER_STORE);
1754 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1755 gs_info->memory_type,
1756 gs_info->offset_vectype,
1757 gs_info->scale))
1759 if (dump_enabled_p ())
1760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1761 "can't operate on partial vectors because"
1762 " the target doesn't have an appropriate"
1763 " gather load or scatter store instruction.\n");
1764 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1765 return;
1767 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1768 scalar_mask);
1769 return;
1772 if (memory_access_type != VMAT_CONTIGUOUS
1773 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1775 /* Element X of the data must come from iteration i * VF + X of the
1776 scalar loop. We need more work to support other mappings. */
1777 if (dump_enabled_p ())
1778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1779 "can't operate on partial vectors because an"
1780 " access isn't contiguous.\n");
1781 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1782 return;
1785 if (!VECTOR_MODE_P (vecmode))
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1789 "can't operate on partial vectors when emulating"
1790 " vector operations.\n");
1791 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1792 return;
1795 /* We might load more scalars than we need for permuting SLP loads.
1796 We checked in get_group_load_store_type that the extra elements
1797 don't leak into a new vector. */
1798 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1800 unsigned int nvectors;
1801 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1802 return nvectors;
1803 gcc_unreachable ();
1806 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1807 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1808 machine_mode mask_mode;
1809 bool using_partial_vectors_p = false;
1810 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1811 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1813 nvectors = group_memory_nvectors (group_size * vf, nunits);
1814 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1815 using_partial_vectors_p = true;
1818 machine_mode vmode;
1819 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1821 nvectors = group_memory_nvectors (group_size * vf, nunits);
1822 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1823 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1824 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1825 using_partial_vectors_p = true;
1828 if (!using_partial_vectors_p)
1830 if (dump_enabled_p ())
1831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1832 "can't operate on partial vectors because the"
1833 " target doesn't have the appropriate partial"
1834 " vectorization load or store.\n");
1835 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1839 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1840 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1841 that needs to be applied to all loads and stores in a vectorized loop.
1842 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1843 otherwise return VEC_MASK & LOOP_MASK.
1845 MASK_TYPE is the type of both masks. If new statements are needed,
1846 insert them before GSI. */
1848 static tree
1849 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1850 tree vec_mask, gimple_stmt_iterator *gsi)
1852 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1853 if (!loop_mask)
1854 return vec_mask;
1856 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1858 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1859 return vec_mask;
1861 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1862 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1863 vec_mask, loop_mask);
1865 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1866 return and_res;
1869 /* Determine whether we can use a gather load or scatter store to vectorize
1870 strided load or store STMT_INFO by truncating the current offset to a
1871 smaller width. We need to be able to construct an offset vector:
1873 { 0, X, X*2, X*3, ... }
1875 without loss of precision, where X is STMT_INFO's DR_STEP.
1877 Return true if this is possible, describing the gather load or scatter
1878 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1880 static bool
1881 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1882 loop_vec_info loop_vinfo, bool masked_p,
1883 gather_scatter_info *gs_info)
1885 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1886 data_reference *dr = dr_info->dr;
1887 tree step = DR_STEP (dr);
1888 if (TREE_CODE (step) != INTEGER_CST)
1890 /* ??? Perhaps we could use range information here? */
1891 if (dump_enabled_p ())
1892 dump_printf_loc (MSG_NOTE, vect_location,
1893 "cannot truncate variable step.\n");
1894 return false;
1897 /* Get the number of bits in an element. */
1898 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1899 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1900 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1902 /* Set COUNT to the upper limit on the number of elements - 1.
1903 Start with the maximum vectorization factor. */
1904 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1906 /* Try lowering COUNT to the number of scalar latch iterations. */
1907 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1908 widest_int max_iters;
1909 if (max_loop_iterations (loop, &max_iters)
1910 && max_iters < count)
1911 count = max_iters.to_shwi ();
1913 /* Try scales of 1 and the element size. */
1914 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1915 wi::overflow_type overflow = wi::OVF_NONE;
1916 for (int i = 0; i < 2; ++i)
1918 int scale = scales[i];
1919 widest_int factor;
1920 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1921 continue;
1923 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1924 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1925 if (overflow)
1926 continue;
1927 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1928 unsigned int min_offset_bits = wi::min_precision (range, sign);
1930 /* Find the narrowest viable offset type. */
1931 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1932 tree offset_type = build_nonstandard_integer_type (offset_bits,
1933 sign == UNSIGNED);
1935 /* See whether the target supports the operation with an offset
1936 no narrower than OFFSET_TYPE. */
1937 tree memory_type = TREE_TYPE (DR_REF (dr));
1938 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1939 vectype, memory_type, offset_type, scale,
1940 &gs_info->ifn, &gs_info->offset_vectype)
1941 || gs_info->ifn == IFN_LAST)
1942 continue;
1944 gs_info->decl = NULL_TREE;
1945 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1946 but we don't need to store that here. */
1947 gs_info->base = NULL_TREE;
1948 gs_info->element_type = TREE_TYPE (vectype);
1949 gs_info->offset = fold_convert (offset_type, step);
1950 gs_info->offset_dt = vect_constant_def;
1951 gs_info->scale = scale;
1952 gs_info->memory_type = memory_type;
1953 return true;
1956 if (overflow && dump_enabled_p ())
1957 dump_printf_loc (MSG_NOTE, vect_location,
1958 "truncating gather/scatter offset to %d bits"
1959 " might change its value.\n", element_bits);
1961 return false;
1964 /* Return true if we can use gather/scatter internal functions to
1965 vectorize STMT_INFO, which is a grouped or strided load or store.
1966 MASKED_P is true if load or store is conditional. When returning
1967 true, fill in GS_INFO with the information required to perform the
1968 operation. */
1970 static bool
1971 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1972 loop_vec_info loop_vinfo, bool masked_p,
1973 gather_scatter_info *gs_info)
1975 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1976 || gs_info->ifn == IFN_LAST)
1977 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1978 masked_p, gs_info);
1980 tree old_offset_type = TREE_TYPE (gs_info->offset);
1981 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1983 gcc_assert (TYPE_PRECISION (new_offset_type)
1984 >= TYPE_PRECISION (old_offset_type));
1985 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1987 if (dump_enabled_p ())
1988 dump_printf_loc (MSG_NOTE, vect_location,
1989 "using gather/scatter for strided/grouped access,"
1990 " scale = %d\n", gs_info->scale);
1992 return true;
1995 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1996 elements with a known constant step. Return -1 if that step
1997 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1999 static int
2000 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
2002 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2003 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
2004 size_zero_node);
2007 /* If the target supports a permute mask that reverses the elements in
2008 a vector of type VECTYPE, return that mask, otherwise return null. */
2010 static tree
2011 perm_mask_for_reverse (tree vectype)
2013 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2015 /* The encoding has a single stepped pattern. */
2016 vec_perm_builder sel (nunits, 1, 3);
2017 for (int i = 0; i < 3; ++i)
2018 sel.quick_push (nunits - 1 - i);
2020 vec_perm_indices indices (sel, 1, nunits);
2021 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
2022 indices))
2023 return NULL_TREE;
2024 return vect_gen_perm_mask_checked (vectype, indices);
2027 /* A subroutine of get_load_store_type, with a subset of the same
2028 arguments. Handle the case where STMT_INFO is a load or store that
2029 accesses consecutive elements with a negative step. Sets *POFFSET
2030 to the offset to be applied to the DR for the first access. */
2032 static vect_memory_access_type
2033 get_negative_load_store_type (vec_info *vinfo,
2034 stmt_vec_info stmt_info, tree vectype,
2035 vec_load_store_type vls_type,
2036 unsigned int ncopies, poly_int64 *poffset)
2038 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2039 dr_alignment_support alignment_support_scheme;
2041 if (ncopies > 1)
2043 if (dump_enabled_p ())
2044 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2045 "multiple types with negative step.\n");
2046 return VMAT_ELEMENTWISE;
2049 /* For backward running DRs the first access in vectype actually is
2050 N-1 elements before the address of the DR. */
2051 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2052 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2054 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
2055 alignment_support_scheme
2056 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2057 if (alignment_support_scheme != dr_aligned
2058 && alignment_support_scheme != dr_unaligned_supported)
2060 if (dump_enabled_p ())
2061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2062 "negative step but alignment required.\n");
2063 *poffset = 0;
2064 return VMAT_ELEMENTWISE;
2067 if (vls_type == VLS_STORE_INVARIANT)
2069 if (dump_enabled_p ())
2070 dump_printf_loc (MSG_NOTE, vect_location,
2071 "negative step with invariant source;"
2072 " no permute needed.\n");
2073 return VMAT_CONTIGUOUS_DOWN;
2076 if (!perm_mask_for_reverse (vectype))
2078 if (dump_enabled_p ())
2079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2080 "negative step and reversing not supported.\n");
2081 *poffset = 0;
2082 return VMAT_ELEMENTWISE;
2085 return VMAT_CONTIGUOUS_REVERSE;
2088 /* STMT_INFO is either a masked or unconditional store. Return the value
2089 being stored. */
2091 tree
2092 vect_get_store_rhs (stmt_vec_info stmt_info)
2094 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2096 gcc_assert (gimple_assign_single_p (assign));
2097 return gimple_assign_rhs1 (assign);
2099 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2101 internal_fn ifn = gimple_call_internal_fn (call);
2102 int index = internal_fn_stored_value_index (ifn);
2103 gcc_assert (index >= 0);
2104 return gimple_call_arg (call, index);
2106 gcc_unreachable ();
2109 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2111 This function returns a vector type which can be composed with NETLS pieces,
2112 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2113 same vector size as the return vector. It checks target whether supports
2114 pieces-size vector mode for construction firstly, if target fails to, check
2115 pieces-size scalar mode for construction further. It returns NULL_TREE if
2116 fails to find the available composition.
2118 For example, for (vtype=V16QI, nelts=4), we can probably get:
2119 - V16QI with PTYPE V4QI.
2120 - V4SI with PTYPE SI.
2121 - NULL_TREE. */
2123 static tree
2124 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2126 gcc_assert (VECTOR_TYPE_P (vtype));
2127 gcc_assert (known_gt (nelts, 0U));
2129 machine_mode vmode = TYPE_MODE (vtype);
2130 if (!VECTOR_MODE_P (vmode))
2131 return NULL_TREE;
2133 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2134 unsigned int pbsize;
2135 if (constant_multiple_p (vbsize, nelts, &pbsize))
2137 /* First check if vec_init optab supports construction from
2138 vector pieces directly. */
2139 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2140 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2141 machine_mode rmode;
2142 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2143 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2144 != CODE_FOR_nothing))
2146 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2147 return vtype;
2150 /* Otherwise check if exists an integer type of the same piece size and
2151 if vec_init optab supports construction from it directly. */
2152 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2153 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2154 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2155 != CODE_FOR_nothing))
2157 *ptype = build_nonstandard_integer_type (pbsize, 1);
2158 return build_vector_type (*ptype, nelts);
2162 return NULL_TREE;
2165 /* A subroutine of get_load_store_type, with a subset of the same
2166 arguments. Handle the case where STMT_INFO is part of a grouped load
2167 or store.
2169 For stores, the statements in the group are all consecutive
2170 and there is no gap at the end. For loads, the statements in the
2171 group might not be consecutive; there can be gaps between statements
2172 as well as at the end. */
2174 static bool
2175 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2176 tree vectype, slp_tree slp_node,
2177 bool masked_p, vec_load_store_type vls_type,
2178 vect_memory_access_type *memory_access_type,
2179 poly_int64 *poffset,
2180 dr_alignment_support *alignment_support_scheme,
2181 int *misalignment,
2182 gather_scatter_info *gs_info)
2184 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2185 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2186 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2187 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2188 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2189 bool single_element_p = (stmt_info == first_stmt_info
2190 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2191 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2192 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2194 /* True if the vectorized statements would access beyond the last
2195 statement in the group. */
2196 bool overrun_p = false;
2198 /* True if we can cope with such overrun by peeling for gaps, so that
2199 there is at least one final scalar iteration after the vector loop. */
2200 bool can_overrun_p = (!masked_p
2201 && vls_type == VLS_LOAD
2202 && loop_vinfo
2203 && !loop->inner);
2205 /* There can only be a gap at the end of the group if the stride is
2206 known at compile time. */
2207 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2209 /* Stores can't yet have gaps. */
2210 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2212 if (slp_node)
2214 /* For SLP vectorization we directly vectorize a subchain
2215 without permutation. */
2216 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2217 first_dr_info
2218 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2219 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2221 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2222 separated by the stride, until we have a complete vector.
2223 Fall back to scalar accesses if that isn't possible. */
2224 if (multiple_p (nunits, group_size))
2225 *memory_access_type = VMAT_STRIDED_SLP;
2226 else
2227 *memory_access_type = VMAT_ELEMENTWISE;
2229 else
2231 overrun_p = loop_vinfo && gap != 0;
2232 if (overrun_p && vls_type != VLS_LOAD)
2234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2235 "Grouped store with gaps requires"
2236 " non-consecutive accesses\n");
2237 return false;
2239 /* An overrun is fine if the trailing elements are smaller
2240 than the alignment boundary B. Every vector access will
2241 be a multiple of B and so we are guaranteed to access a
2242 non-gap element in the same B-sized block. */
2243 if (overrun_p
2244 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2245 vectype)
2246 / vect_get_scalar_dr_size (first_dr_info)))
2247 overrun_p = false;
2249 /* If the gap splits the vector in half and the target
2250 can do half-vector operations avoid the epilogue peeling
2251 by simply loading half of the vector only. Usually
2252 the construction with an upper zero half will be elided. */
2253 dr_alignment_support alss;
2254 int misalign = dr_misalignment (first_dr_info, vectype);
2255 tree half_vtype;
2256 if (overrun_p
2257 && !masked_p
2258 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2259 vectype, misalign)))
2260 == dr_aligned
2261 || alss == dr_unaligned_supported)
2262 && known_eq (nunits, (group_size - gap) * 2)
2263 && known_eq (nunits, group_size)
2264 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2265 != NULL_TREE))
2266 overrun_p = false;
2268 if (overrun_p && !can_overrun_p)
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2272 "Peeling for outer loop is not supported\n");
2273 return false;
2275 int cmp = compare_step_with_zero (vinfo, stmt_info);
2276 if (cmp < 0)
2278 if (single_element_p)
2279 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2280 only correct for single element "interleaving" SLP. */
2281 *memory_access_type = get_negative_load_store_type
2282 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2283 else
2285 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2286 separated by the stride, until we have a complete vector.
2287 Fall back to scalar accesses if that isn't possible. */
2288 if (multiple_p (nunits, group_size))
2289 *memory_access_type = VMAT_STRIDED_SLP;
2290 else
2291 *memory_access_type = VMAT_ELEMENTWISE;
2294 else
2296 gcc_assert (!loop_vinfo || cmp > 0);
2297 *memory_access_type = VMAT_CONTIGUOUS;
2300 /* When we have a contiguous access across loop iterations
2301 but the access in the loop doesn't cover the full vector
2302 we can end up with no gap recorded but still excess
2303 elements accessed, see PR103116. Make sure we peel for
2304 gaps if necessary and sufficient and give up if not. */
2305 if (loop_vinfo
2306 && *memory_access_type == VMAT_CONTIGUOUS
2307 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2308 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2309 nunits))
2311 unsigned HOST_WIDE_INT cnunits, cvf;
2312 if (!can_overrun_p
2313 || !nunits.is_constant (&cnunits)
2314 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2315 /* Peeling for gaps assumes that a single scalar iteration
2316 is enough to make sure the last vector iteration doesn't
2317 access excess elements.
2318 ??? Enhancements include peeling multiple iterations
2319 or using masked loads with a static mask. */
2320 || (group_size * cvf) % cnunits + group_size < cnunits)
2322 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2324 "peeling for gaps insufficient for "
2325 "access\n");
2326 return false;
2328 overrun_p = true;
2332 else
2334 /* We can always handle this case using elementwise accesses,
2335 but see if something more efficient is available. */
2336 *memory_access_type = VMAT_ELEMENTWISE;
2338 /* If there is a gap at the end of the group then these optimizations
2339 would access excess elements in the last iteration. */
2340 bool would_overrun_p = (gap != 0);
2341 /* An overrun is fine if the trailing elements are smaller than the
2342 alignment boundary B. Every vector access will be a multiple of B
2343 and so we are guaranteed to access a non-gap element in the
2344 same B-sized block. */
2345 if (would_overrun_p
2346 && !masked_p
2347 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2348 / vect_get_scalar_dr_size (first_dr_info)))
2349 would_overrun_p = false;
2351 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2352 && (can_overrun_p || !would_overrun_p)
2353 && compare_step_with_zero (vinfo, stmt_info) > 0)
2355 /* First cope with the degenerate case of a single-element
2356 vector. */
2357 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2360 /* Otherwise try using LOAD/STORE_LANES. */
2361 else if (vls_type == VLS_LOAD
2362 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2363 : vect_store_lanes_supported (vectype, group_size,
2364 masked_p))
2366 *memory_access_type = VMAT_LOAD_STORE_LANES;
2367 overrun_p = would_overrun_p;
2370 /* If that fails, try using permuting loads. */
2371 else if (vls_type == VLS_LOAD
2372 ? vect_grouped_load_supported (vectype, single_element_p,
2373 group_size)
2374 : vect_grouped_store_supported (vectype, group_size))
2376 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2377 overrun_p = would_overrun_p;
2381 /* As a last resort, trying using a gather load or scatter store.
2383 ??? Although the code can handle all group sizes correctly,
2384 it probably isn't a win to use separate strided accesses based
2385 on nearby locations. Or, even if it's a win over scalar code,
2386 it might not be a win over vectorizing at a lower VF, if that
2387 allows us to use contiguous accesses. */
2388 if (*memory_access_type == VMAT_ELEMENTWISE
2389 && single_element_p
2390 && loop_vinfo
2391 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2392 masked_p, gs_info))
2393 *memory_access_type = VMAT_GATHER_SCATTER;
2396 if (*memory_access_type == VMAT_GATHER_SCATTER
2397 || *memory_access_type == VMAT_ELEMENTWISE)
2399 *alignment_support_scheme = dr_unaligned_supported;
2400 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2402 else
2404 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2405 *alignment_support_scheme
2406 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2407 *misalignment);
2410 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2412 /* STMT is the leader of the group. Check the operands of all the
2413 stmts of the group. */
2414 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2415 while (next_stmt_info)
2417 tree op = vect_get_store_rhs (next_stmt_info);
2418 enum vect_def_type dt;
2419 if (!vect_is_simple_use (op, vinfo, &dt))
2421 if (dump_enabled_p ())
2422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2423 "use not simple.\n");
2424 return false;
2426 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2430 if (overrun_p)
2432 gcc_assert (can_overrun_p);
2433 if (dump_enabled_p ())
2434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2435 "Data access with gaps requires scalar "
2436 "epilogue loop\n");
2437 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2440 return true;
2443 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2444 if there is a memory access type that the vectorized form can use,
2445 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2446 or scatters, fill in GS_INFO accordingly. In addition
2447 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2448 the target does not support the alignment scheme. *MISALIGNMENT
2449 is set according to the alignment of the access (including
2450 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2452 SLP says whether we're performing SLP rather than loop vectorization.
2453 MASKED_P is true if the statement is conditional on a vectorized mask.
2454 VECTYPE is the vector type that the vectorized statements will use.
2455 NCOPIES is the number of vector statements that will be needed. */
2457 static bool
2458 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2459 tree vectype, slp_tree slp_node,
2460 bool masked_p, vec_load_store_type vls_type,
2461 unsigned int ncopies,
2462 vect_memory_access_type *memory_access_type,
2463 poly_int64 *poffset,
2464 dr_alignment_support *alignment_support_scheme,
2465 int *misalignment,
2466 gather_scatter_info *gs_info)
2468 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2469 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2470 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2471 *poffset = 0;
2472 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2474 *memory_access_type = VMAT_GATHER_SCATTER;
2475 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2476 gcc_unreachable ();
2477 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2478 &gs_info->offset_dt,
2479 &gs_info->offset_vectype))
2481 if (dump_enabled_p ())
2482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2483 "%s index use not simple.\n",
2484 vls_type == VLS_LOAD ? "gather" : "scatter");
2485 return false;
2487 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2489 if (vls_type != VLS_LOAD)
2491 if (dump_enabled_p ())
2492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2493 "unsupported emulated scatter.\n");
2494 return false;
2496 else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2497 || !TYPE_VECTOR_SUBPARTS
2498 (gs_info->offset_vectype).is_constant ()
2499 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2500 (gs_info->offset_vectype),
2501 TYPE_VECTOR_SUBPARTS (vectype)))
2503 if (dump_enabled_p ())
2504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2505 "unsupported vector types for emulated "
2506 "gather.\n");
2507 return false;
2510 /* Gather-scatter accesses perform only component accesses, alignment
2511 is irrelevant for them. */
2512 *alignment_support_scheme = dr_unaligned_supported;
2514 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2516 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2517 masked_p,
2518 vls_type, memory_access_type, poffset,
2519 alignment_support_scheme,
2520 misalignment, gs_info))
2521 return false;
2523 else if (STMT_VINFO_STRIDED_P (stmt_info))
2525 gcc_assert (!slp_node);
2526 if (loop_vinfo
2527 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2528 masked_p, gs_info))
2529 *memory_access_type = VMAT_GATHER_SCATTER;
2530 else
2531 *memory_access_type = VMAT_ELEMENTWISE;
2532 /* Alignment is irrelevant here. */
2533 *alignment_support_scheme = dr_unaligned_supported;
2535 else
2537 int cmp = compare_step_with_zero (vinfo, stmt_info);
2538 if (cmp == 0)
2540 gcc_assert (vls_type == VLS_LOAD);
2541 *memory_access_type = VMAT_INVARIANT;
2542 /* Invariant accesses perform only component accesses, alignment
2543 is irrelevant for them. */
2544 *alignment_support_scheme = dr_unaligned_supported;
2546 else
2548 if (cmp < 0)
2549 *memory_access_type = get_negative_load_store_type
2550 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2551 else
2552 *memory_access_type = VMAT_CONTIGUOUS;
2553 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2554 vectype, *poffset);
2555 *alignment_support_scheme
2556 = vect_supportable_dr_alignment (vinfo,
2557 STMT_VINFO_DR_INFO (stmt_info),
2558 vectype, *misalignment);
2562 if ((*memory_access_type == VMAT_ELEMENTWISE
2563 || *memory_access_type == VMAT_STRIDED_SLP)
2564 && !nunits.is_constant ())
2566 if (dump_enabled_p ())
2567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2568 "Not using elementwise accesses due to variable "
2569 "vectorization factor.\n");
2570 return false;
2573 if (*alignment_support_scheme == dr_unaligned_unsupported)
2575 if (dump_enabled_p ())
2576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2577 "unsupported unaligned access\n");
2578 return false;
2581 /* FIXME: At the moment the cost model seems to underestimate the
2582 cost of using elementwise accesses. This check preserves the
2583 traditional behavior until that can be fixed. */
2584 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2585 if (!first_stmt_info)
2586 first_stmt_info = stmt_info;
2587 if (*memory_access_type == VMAT_ELEMENTWISE
2588 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2589 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2590 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2591 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2593 if (dump_enabled_p ())
2594 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2595 "not falling back to elementwise accesses\n");
2596 return false;
2598 return true;
2601 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2602 conditional operation STMT_INFO. When returning true, store the mask
2603 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2604 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2605 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2607 static bool
2608 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2609 slp_tree slp_node, unsigned mask_index,
2610 tree *mask, slp_tree *mask_node,
2611 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2613 enum vect_def_type mask_dt;
2614 tree mask_vectype;
2615 slp_tree mask_node_1;
2616 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2617 mask, &mask_node_1, &mask_dt, &mask_vectype))
2619 if (dump_enabled_p ())
2620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2621 "mask use not simple.\n");
2622 return false;
2625 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2627 if (dump_enabled_p ())
2628 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2629 "mask argument is not a boolean.\n");
2630 return false;
2633 /* If the caller is not prepared for adjusting an external/constant
2634 SLP mask vector type fail. */
2635 if (slp_node
2636 && !mask_node
2637 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2639 if (dump_enabled_p ())
2640 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2641 "SLP mask argument is not vectorized.\n");
2642 return false;
2645 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2646 if (!mask_vectype)
2647 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2649 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2651 if (dump_enabled_p ())
2652 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2653 "could not find an appropriate vector mask type.\n");
2654 return false;
2657 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2658 TYPE_VECTOR_SUBPARTS (vectype)))
2660 if (dump_enabled_p ())
2661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2662 "vector mask type %T"
2663 " does not match vector data type %T.\n",
2664 mask_vectype, vectype);
2666 return false;
2669 *mask_dt_out = mask_dt;
2670 *mask_vectype_out = mask_vectype;
2671 if (mask_node)
2672 *mask_node = mask_node_1;
2673 return true;
2676 /* Return true if stored value RHS is suitable for vectorizing store
2677 statement STMT_INFO. When returning true, store the type of the
2678 definition in *RHS_DT_OUT, the type of the vectorized store value in
2679 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2681 static bool
2682 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2683 slp_tree slp_node, tree rhs,
2684 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2685 vec_load_store_type *vls_type_out)
2687 /* In the case this is a store from a constant make sure
2688 native_encode_expr can handle it. */
2689 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2691 if (dump_enabled_p ())
2692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2693 "cannot encode constant as a byte sequence.\n");
2694 return false;
2697 unsigned op_no = 0;
2698 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2700 if (gimple_call_internal_p (call)
2701 && internal_store_fn_p (gimple_call_internal_fn (call)))
2702 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2705 enum vect_def_type rhs_dt;
2706 tree rhs_vectype;
2707 slp_tree slp_op;
2708 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2709 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2711 if (dump_enabled_p ())
2712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2713 "use not simple.\n");
2714 return false;
2717 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2718 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2720 if (dump_enabled_p ())
2721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2722 "incompatible vector types.\n");
2723 return false;
2726 *rhs_dt_out = rhs_dt;
2727 *rhs_vectype_out = rhs_vectype;
2728 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2729 *vls_type_out = VLS_STORE_INVARIANT;
2730 else
2731 *vls_type_out = VLS_STORE;
2732 return true;
2735 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2736 Note that we support masks with floating-point type, in which case the
2737 floats are interpreted as a bitmask. */
2739 static tree
2740 vect_build_all_ones_mask (vec_info *vinfo,
2741 stmt_vec_info stmt_info, tree masktype)
2743 if (TREE_CODE (masktype) == INTEGER_TYPE)
2744 return build_int_cst (masktype, -1);
2745 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2747 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2748 mask = build_vector_from_val (masktype, mask);
2749 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2751 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2753 REAL_VALUE_TYPE r;
2754 long tmp[6];
2755 for (int j = 0; j < 6; ++j)
2756 tmp[j] = -1;
2757 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2758 tree mask = build_real (TREE_TYPE (masktype), r);
2759 mask = build_vector_from_val (masktype, mask);
2760 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2762 gcc_unreachable ();
2765 /* Build an all-zero merge value of type VECTYPE while vectorizing
2766 STMT_INFO as a gather load. */
2768 static tree
2769 vect_build_zero_merge_argument (vec_info *vinfo,
2770 stmt_vec_info stmt_info, tree vectype)
2772 tree merge;
2773 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2774 merge = build_int_cst (TREE_TYPE (vectype), 0);
2775 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2777 REAL_VALUE_TYPE r;
2778 long tmp[6];
2779 for (int j = 0; j < 6; ++j)
2780 tmp[j] = 0;
2781 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2782 merge = build_real (TREE_TYPE (vectype), r);
2784 else
2785 gcc_unreachable ();
2786 merge = build_vector_from_val (vectype, merge);
2787 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2790 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2791 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2792 the gather load operation. If the load is conditional, MASK is the
2793 unvectorized condition and MASK_DT is its definition type, otherwise
2794 MASK is null. */
2796 static void
2797 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2798 gimple_stmt_iterator *gsi,
2799 gimple **vec_stmt,
2800 gather_scatter_info *gs_info,
2801 tree mask)
2803 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2804 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2805 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2806 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2807 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2808 edge pe = loop_preheader_edge (loop);
2809 enum { NARROW, NONE, WIDEN } modifier;
2810 poly_uint64 gather_off_nunits
2811 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2813 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2814 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2815 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2816 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2817 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2818 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2819 tree scaletype = TREE_VALUE (arglist);
2820 tree real_masktype = masktype;
2821 gcc_checking_assert (types_compatible_p (srctype, rettype)
2822 && (!mask
2823 || TREE_CODE (masktype) == INTEGER_TYPE
2824 || types_compatible_p (srctype, masktype)));
2825 if (mask)
2826 masktype = truth_type_for (srctype);
2828 tree mask_halftype = masktype;
2829 tree perm_mask = NULL_TREE;
2830 tree mask_perm_mask = NULL_TREE;
2831 if (known_eq (nunits, gather_off_nunits))
2832 modifier = NONE;
2833 else if (known_eq (nunits * 2, gather_off_nunits))
2835 modifier = WIDEN;
2837 /* Currently widening gathers and scatters are only supported for
2838 fixed-length vectors. */
2839 int count = gather_off_nunits.to_constant ();
2840 vec_perm_builder sel (count, count, 1);
2841 for (int i = 0; i < count; ++i)
2842 sel.quick_push (i | (count / 2));
2844 vec_perm_indices indices (sel, 1, count);
2845 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2846 indices);
2848 else if (known_eq (nunits, gather_off_nunits * 2))
2850 modifier = NARROW;
2852 /* Currently narrowing gathers and scatters are only supported for
2853 fixed-length vectors. */
2854 int count = nunits.to_constant ();
2855 vec_perm_builder sel (count, count, 1);
2856 sel.quick_grow (count);
2857 for (int i = 0; i < count; ++i)
2858 sel[i] = i < count / 2 ? i : i + count / 2;
2859 vec_perm_indices indices (sel, 2, count);
2860 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2862 ncopies *= 2;
2864 if (mask && VECTOR_TYPE_P (real_masktype))
2866 for (int i = 0; i < count; ++i)
2867 sel[i] = i | (count / 2);
2868 indices.new_vector (sel, 2, count);
2869 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2871 else if (mask)
2872 mask_halftype = truth_type_for (gs_info->offset_vectype);
2874 else
2875 gcc_unreachable ();
2877 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2878 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2880 tree ptr = fold_convert (ptrtype, gs_info->base);
2881 if (!is_gimple_min_invariant (ptr))
2883 gimple_seq seq;
2884 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2885 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2886 gcc_assert (!new_bb);
2889 tree scale = build_int_cst (scaletype, gs_info->scale);
2891 tree vec_oprnd0 = NULL_TREE;
2892 tree vec_mask = NULL_TREE;
2893 tree src_op = NULL_TREE;
2894 tree mask_op = NULL_TREE;
2895 tree prev_res = NULL_TREE;
2897 if (!mask)
2899 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2900 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2903 auto_vec<tree> vec_oprnds0;
2904 auto_vec<tree> vec_masks;
2905 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2906 modifier == WIDEN ? ncopies / 2 : ncopies,
2907 gs_info->offset, &vec_oprnds0);
2908 if (mask)
2909 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2910 modifier == NARROW ? ncopies / 2 : ncopies,
2911 mask, &vec_masks, masktype);
2912 for (int j = 0; j < ncopies; ++j)
2914 tree op, var;
2915 if (modifier == WIDEN && (j & 1))
2916 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2917 perm_mask, stmt_info, gsi);
2918 else
2919 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2921 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2923 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2924 TYPE_VECTOR_SUBPARTS (idxtype)));
2925 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2926 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2927 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2928 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2929 op = var;
2932 if (mask)
2934 if (mask_perm_mask && (j & 1))
2935 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2936 mask_perm_mask, stmt_info, gsi);
2937 else
2939 if (modifier == NARROW)
2941 if ((j & 1) == 0)
2942 vec_mask = vec_masks[j / 2];
2944 else
2945 vec_mask = vec_masks[j];
2947 mask_op = vec_mask;
2948 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2950 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2951 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2952 gcc_assert (known_eq (sub1, sub2));
2953 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2954 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2955 gassign *new_stmt
2956 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2957 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2958 mask_op = var;
2961 if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2963 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2964 gassign *new_stmt
2965 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2966 : VEC_UNPACK_LO_EXPR,
2967 mask_op);
2968 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2969 mask_op = var;
2971 src_op = mask_op;
2974 tree mask_arg = mask_op;
2975 if (masktype != real_masktype)
2977 tree utype, optype = TREE_TYPE (mask_op);
2978 if (VECTOR_TYPE_P (real_masktype)
2979 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2980 utype = real_masktype;
2981 else
2982 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2983 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2984 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2985 gassign *new_stmt
2986 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2987 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2988 mask_arg = var;
2989 if (!useless_type_conversion_p (real_masktype, utype))
2991 gcc_assert (TYPE_PRECISION (utype)
2992 <= TYPE_PRECISION (real_masktype));
2993 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2994 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2995 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2996 mask_arg = var;
2998 src_op = build_zero_cst (srctype);
3000 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
3001 mask_arg, scale);
3003 if (!useless_type_conversion_p (vectype, rettype))
3005 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
3006 TYPE_VECTOR_SUBPARTS (rettype)));
3007 op = vect_get_new_ssa_name (rettype, vect_simple_var);
3008 gimple_call_set_lhs (new_stmt, op);
3009 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3010 var = make_ssa_name (vec_dest);
3011 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
3012 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3013 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3015 else
3017 var = make_ssa_name (vec_dest, new_stmt);
3018 gimple_call_set_lhs (new_stmt, var);
3019 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3022 if (modifier == NARROW)
3024 if ((j & 1) == 0)
3026 prev_res = var;
3027 continue;
3029 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
3030 stmt_info, gsi);
3031 new_stmt = SSA_NAME_DEF_STMT (var);
3034 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3036 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3039 /* Prepare the base and offset in GS_INFO for vectorization.
3040 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3041 to the vectorized offset argument for the first copy of STMT_INFO.
3042 STMT_INFO is the statement described by GS_INFO and LOOP is the
3043 containing loop. */
3045 static void
3046 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
3047 class loop *loop, stmt_vec_info stmt_info,
3048 slp_tree slp_node, gather_scatter_info *gs_info,
3049 tree *dataref_ptr, vec<tree> *vec_offset)
3051 gimple_seq stmts = NULL;
3052 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3053 if (stmts != NULL)
3055 basic_block new_bb;
3056 edge pe = loop_preheader_edge (loop);
3057 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3058 gcc_assert (!new_bb);
3060 if (slp_node)
3061 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3062 else
3064 unsigned ncopies
3065 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
3066 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
3067 gs_info->offset, vec_offset,
3068 gs_info->offset_vectype);
3072 /* Prepare to implement a grouped or strided load or store using
3073 the gather load or scatter store operation described by GS_INFO.
3074 STMT_INFO is the load or store statement.
3076 Set *DATAREF_BUMP to the amount that should be added to the base
3077 address after each copy of the vectorized statement. Set *VEC_OFFSET
3078 to an invariant offset vector in which element I has the value
3079 I * DR_STEP / SCALE. */
3081 static void
3082 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3083 loop_vec_info loop_vinfo,
3084 gather_scatter_info *gs_info,
3085 tree *dataref_bump, tree *vec_offset)
3087 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3088 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3090 tree bump = size_binop (MULT_EXPR,
3091 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3092 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3093 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3095 /* The offset given in GS_INFO can have pointer type, so use the element
3096 type of the vector instead. */
3097 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3099 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3100 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3101 ssize_int (gs_info->scale));
3102 step = fold_convert (offset_type, step);
3104 /* Create {0, X, X*2, X*3, ...}. */
3105 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3106 build_zero_cst (offset_type), step);
3107 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3110 /* Return the amount that should be added to a vector pointer to move
3111 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3112 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3113 vectorization. */
3115 static tree
3116 vect_get_data_ptr_increment (vec_info *vinfo,
3117 dr_vec_info *dr_info, tree aggr_type,
3118 vect_memory_access_type memory_access_type)
3120 if (memory_access_type == VMAT_INVARIANT)
3121 return size_zero_node;
3123 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3124 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3125 if (tree_int_cst_sgn (step) == -1)
3126 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3127 return iv_step;
3130 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3132 static bool
3133 vectorizable_bswap (vec_info *vinfo,
3134 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3135 gimple **vec_stmt, slp_tree slp_node,
3136 slp_tree *slp_op,
3137 tree vectype_in, stmt_vector_for_cost *cost_vec)
3139 tree op, vectype;
3140 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3141 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3142 unsigned ncopies;
3144 op = gimple_call_arg (stmt, 0);
3145 vectype = STMT_VINFO_VECTYPE (stmt_info);
3146 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3148 /* Multiple types in SLP are handled by creating the appropriate number of
3149 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3150 case of SLP. */
3151 if (slp_node)
3152 ncopies = 1;
3153 else
3154 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3156 gcc_assert (ncopies >= 1);
3158 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3159 if (! char_vectype)
3160 return false;
3162 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3163 unsigned word_bytes;
3164 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3165 return false;
3167 /* The encoding uses one stepped pattern for each byte in the word. */
3168 vec_perm_builder elts (num_bytes, word_bytes, 3);
3169 for (unsigned i = 0; i < 3; ++i)
3170 for (unsigned j = 0; j < word_bytes; ++j)
3171 elts.quick_push ((i + 1) * word_bytes - j - 1);
3173 vec_perm_indices indices (elts, 1, num_bytes);
3174 machine_mode vmode = TYPE_MODE (char_vectype);
3175 if (!can_vec_perm_const_p (vmode, vmode, indices))
3176 return false;
3178 if (! vec_stmt)
3180 if (slp_node
3181 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3183 if (dump_enabled_p ())
3184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3185 "incompatible vector types for invariants\n");
3186 return false;
3189 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3190 DUMP_VECT_SCOPE ("vectorizable_bswap");
3191 record_stmt_cost (cost_vec,
3192 1, vector_stmt, stmt_info, 0, vect_prologue);
3193 record_stmt_cost (cost_vec,
3194 slp_node
3195 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3196 vec_perm, stmt_info, 0, vect_body);
3197 return true;
3200 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3202 /* Transform. */
3203 vec<tree> vec_oprnds = vNULL;
3204 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3205 op, &vec_oprnds);
3206 /* Arguments are ready. create the new vector stmt. */
3207 unsigned i;
3208 tree vop;
3209 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3211 gimple *new_stmt;
3212 tree tem = make_ssa_name (char_vectype);
3213 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3214 char_vectype, vop));
3215 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3216 tree tem2 = make_ssa_name (char_vectype);
3217 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3218 tem, tem, bswap_vconst);
3219 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3220 tem = make_ssa_name (vectype);
3221 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3222 vectype, tem2));
3223 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3224 if (slp_node)
3225 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3226 else
3227 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3230 if (!slp_node)
3231 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3233 vec_oprnds.release ();
3234 return true;
3237 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3238 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3239 in a single step. On success, store the binary pack code in
3240 *CONVERT_CODE. */
3242 static bool
3243 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3244 tree_code *convert_code)
3246 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3247 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3248 return false;
3250 tree_code code;
3251 int multi_step_cvt = 0;
3252 auto_vec <tree, 8> interm_types;
3253 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3254 &code, &multi_step_cvt, &interm_types)
3255 || multi_step_cvt)
3256 return false;
3258 *convert_code = code;
3259 return true;
3262 /* Function vectorizable_call.
3264 Check if STMT_INFO performs a function call that can be vectorized.
3265 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3266 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3267 Return true if STMT_INFO is vectorizable in this way. */
3269 static bool
3270 vectorizable_call (vec_info *vinfo,
3271 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3272 gimple **vec_stmt, slp_tree slp_node,
3273 stmt_vector_for_cost *cost_vec)
3275 gcall *stmt;
3276 tree vec_dest;
3277 tree scalar_dest;
3278 tree op;
3279 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3280 tree vectype_out, vectype_in;
3281 poly_uint64 nunits_in;
3282 poly_uint64 nunits_out;
3283 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3284 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3285 tree fndecl, new_temp, rhs_type;
3286 enum vect_def_type dt[4]
3287 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3288 vect_unknown_def_type };
3289 tree vectypes[ARRAY_SIZE (dt)] = {};
3290 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3291 int ndts = ARRAY_SIZE (dt);
3292 int ncopies, j;
3293 auto_vec<tree, 8> vargs;
3294 enum { NARROW, NONE, WIDEN } modifier;
3295 size_t i, nargs;
3296 tree lhs;
3298 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3299 return false;
3301 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3302 && ! vec_stmt)
3303 return false;
3305 /* Is STMT_INFO a vectorizable call? */
3306 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3307 if (!stmt)
3308 return false;
3310 if (gimple_call_internal_p (stmt)
3311 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3312 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3313 /* Handled by vectorizable_load and vectorizable_store. */
3314 return false;
3316 if (gimple_call_lhs (stmt) == NULL_TREE
3317 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3318 return false;
3320 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3322 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3324 /* Process function arguments. */
3325 rhs_type = NULL_TREE;
3326 vectype_in = NULL_TREE;
3327 nargs = gimple_call_num_args (stmt);
3329 /* Bail out if the function has more than four arguments, we do not have
3330 interesting builtin functions to vectorize with more than two arguments
3331 except for fma. No arguments is also not good. */
3332 if (nargs == 0 || nargs > 4)
3333 return false;
3335 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3336 combined_fn cfn = gimple_call_combined_fn (stmt);
3337 if (cfn == CFN_GOMP_SIMD_LANE)
3339 nargs = 0;
3340 rhs_type = unsigned_type_node;
3343 int mask_opno = -1;
3344 if (internal_fn_p (cfn))
3345 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3347 for (i = 0; i < nargs; i++)
3349 if ((int) i == mask_opno)
3351 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3352 &op, &slp_op[i], &dt[i], &vectypes[i]))
3353 return false;
3354 continue;
3357 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3358 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3360 if (dump_enabled_p ())
3361 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3362 "use not simple.\n");
3363 return false;
3366 /* We can only handle calls with arguments of the same type. */
3367 if (rhs_type
3368 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3370 if (dump_enabled_p ())
3371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3372 "argument types differ.\n");
3373 return false;
3375 if (!rhs_type)
3376 rhs_type = TREE_TYPE (op);
3378 if (!vectype_in)
3379 vectype_in = vectypes[i];
3380 else if (vectypes[i]
3381 && !types_compatible_p (vectypes[i], vectype_in))
3383 if (dump_enabled_p ())
3384 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3385 "argument vector types differ.\n");
3386 return false;
3389 /* If all arguments are external or constant defs, infer the vector type
3390 from the scalar type. */
3391 if (!vectype_in)
3392 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3393 if (vec_stmt)
3394 gcc_assert (vectype_in);
3395 if (!vectype_in)
3397 if (dump_enabled_p ())
3398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3399 "no vectype for scalar type %T\n", rhs_type);
3401 return false;
3403 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3404 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3405 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3406 by a pack of the two vectors into an SI vector. We would need
3407 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3408 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3410 if (dump_enabled_p ())
3411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3412 "mismatched vector sizes %T and %T\n",
3413 vectype_in, vectype_out);
3414 return false;
3417 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3418 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3420 if (dump_enabled_p ())
3421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3422 "mixed mask and nonmask vector types\n");
3423 return false;
3426 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3428 if (dump_enabled_p ())
3429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3430 "use emulated vector type for call\n");
3431 return false;
3434 /* FORNOW */
3435 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3436 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3437 if (known_eq (nunits_in * 2, nunits_out))
3438 modifier = NARROW;
3439 else if (known_eq (nunits_out, nunits_in))
3440 modifier = NONE;
3441 else if (known_eq (nunits_out * 2, nunits_in))
3442 modifier = WIDEN;
3443 else
3444 return false;
3446 /* We only handle functions that do not read or clobber memory. */
3447 if (gimple_vuse (stmt))
3449 if (dump_enabled_p ())
3450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3451 "function reads from or writes to memory.\n");
3452 return false;
3455 /* For now, we only vectorize functions if a target specific builtin
3456 is available. TODO -- in some cases, it might be profitable to
3457 insert the calls for pieces of the vector, in order to be able
3458 to vectorize other operations in the loop. */
3459 fndecl = NULL_TREE;
3460 internal_fn ifn = IFN_LAST;
3461 tree callee = gimple_call_fndecl (stmt);
3463 /* First try using an internal function. */
3464 tree_code convert_code = ERROR_MARK;
3465 if (cfn != CFN_LAST
3466 && (modifier == NONE
3467 || (modifier == NARROW
3468 && simple_integer_narrowing (vectype_out, vectype_in,
3469 &convert_code))))
3470 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3471 vectype_in);
3473 /* If that fails, try asking for a target-specific built-in function. */
3474 if (ifn == IFN_LAST)
3476 if (cfn != CFN_LAST)
3477 fndecl = targetm.vectorize.builtin_vectorized_function
3478 (cfn, vectype_out, vectype_in);
3479 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3480 fndecl = targetm.vectorize.builtin_md_vectorized_function
3481 (callee, vectype_out, vectype_in);
3484 if (ifn == IFN_LAST && !fndecl)
3486 if (cfn == CFN_GOMP_SIMD_LANE
3487 && !slp_node
3488 && loop_vinfo
3489 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3490 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3491 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3492 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3494 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3495 { 0, 1, 2, ... vf - 1 } vector. */
3496 gcc_assert (nargs == 0);
3498 else if (modifier == NONE
3499 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3500 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3501 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3502 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3503 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3504 slp_op, vectype_in, cost_vec);
3505 else
3507 if (dump_enabled_p ())
3508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3509 "function is not vectorizable.\n");
3510 return false;
3514 if (slp_node)
3515 ncopies = 1;
3516 else if (modifier == NARROW && ifn == IFN_LAST)
3517 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3518 else
3519 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3521 /* Sanity check: make sure that at least one copy of the vectorized stmt
3522 needs to be generated. */
3523 gcc_assert (ncopies >= 1);
3525 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3526 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3527 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3528 if (!vec_stmt) /* transformation not required. */
3530 if (slp_node)
3531 for (i = 0; i < nargs; ++i)
3532 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3533 vectypes[i]
3534 ? vectypes[i] : vectype_in))
3536 if (dump_enabled_p ())
3537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3538 "incompatible vector types for invariants\n");
3539 return false;
3541 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3542 DUMP_VECT_SCOPE ("vectorizable_call");
3543 vect_model_simple_cost (vinfo, stmt_info,
3544 ncopies, dt, ndts, slp_node, cost_vec);
3545 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3546 record_stmt_cost (cost_vec, ncopies / 2,
3547 vec_promote_demote, stmt_info, 0, vect_body);
3549 if (loop_vinfo
3550 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3551 && (reduc_idx >= 0 || mask_opno >= 0))
3553 if (reduc_idx >= 0
3554 && (cond_fn == IFN_LAST
3555 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3556 OPTIMIZE_FOR_SPEED)))
3558 if (dump_enabled_p ())
3559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3560 "can't use a fully-masked loop because no"
3561 " conditional operation is available.\n");
3562 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3564 else
3566 unsigned int nvectors
3567 = (slp_node
3568 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3569 : ncopies);
3570 tree scalar_mask = NULL_TREE;
3571 if (mask_opno >= 0)
3572 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3573 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3574 vectype_out, scalar_mask);
3577 return true;
3580 /* Transform. */
3582 if (dump_enabled_p ())
3583 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3585 /* Handle def. */
3586 scalar_dest = gimple_call_lhs (stmt);
3587 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3589 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3590 unsigned int vect_nargs = nargs;
3591 if (masked_loop_p && reduc_idx >= 0)
3593 ifn = cond_fn;
3594 vect_nargs += 2;
3597 if (modifier == NONE || ifn != IFN_LAST)
3599 tree prev_res = NULL_TREE;
3600 vargs.safe_grow (vect_nargs, true);
3601 auto_vec<vec<tree> > vec_defs (nargs);
3602 for (j = 0; j < ncopies; ++j)
3604 /* Build argument list for the vectorized call. */
3605 if (slp_node)
3607 vec<tree> vec_oprnds0;
3609 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3610 vec_oprnds0 = vec_defs[0];
3612 /* Arguments are ready. Create the new vector stmt. */
3613 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3615 int varg = 0;
3616 if (masked_loop_p && reduc_idx >= 0)
3618 unsigned int vec_num = vec_oprnds0.length ();
3619 /* Always true for SLP. */
3620 gcc_assert (ncopies == 1);
3621 vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
3622 vectype_out, i);
3624 size_t k;
3625 for (k = 0; k < nargs; k++)
3627 vec<tree> vec_oprndsk = vec_defs[k];
3628 vargs[varg++] = vec_oprndsk[i];
3630 if (masked_loop_p && reduc_idx >= 0)
3631 vargs[varg++] = vargs[reduc_idx + 1];
3632 gimple *new_stmt;
3633 if (modifier == NARROW)
3635 /* We don't define any narrowing conditional functions
3636 at present. */
3637 gcc_assert (mask_opno < 0);
3638 tree half_res = make_ssa_name (vectype_in);
3639 gcall *call
3640 = gimple_build_call_internal_vec (ifn, vargs);
3641 gimple_call_set_lhs (call, half_res);
3642 gimple_call_set_nothrow (call, true);
3643 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3644 if ((i & 1) == 0)
3646 prev_res = half_res;
3647 continue;
3649 new_temp = make_ssa_name (vec_dest);
3650 new_stmt = gimple_build_assign (new_temp, convert_code,
3651 prev_res, half_res);
3652 vect_finish_stmt_generation (vinfo, stmt_info,
3653 new_stmt, gsi);
3655 else
3657 if (mask_opno >= 0 && masked_loop_p)
3659 unsigned int vec_num = vec_oprnds0.length ();
3660 /* Always true for SLP. */
3661 gcc_assert (ncopies == 1);
3662 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3663 vectype_out, i);
3664 vargs[mask_opno] = prepare_vec_mask
3665 (loop_vinfo, TREE_TYPE (mask), mask,
3666 vargs[mask_opno], gsi);
3669 gcall *call;
3670 if (ifn != IFN_LAST)
3671 call = gimple_build_call_internal_vec (ifn, vargs);
3672 else
3673 call = gimple_build_call_vec (fndecl, vargs);
3674 new_temp = make_ssa_name (vec_dest, call);
3675 gimple_call_set_lhs (call, new_temp);
3676 gimple_call_set_nothrow (call, true);
3677 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3678 new_stmt = call;
3680 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3682 continue;
3685 int varg = 0;
3686 if (masked_loop_p && reduc_idx >= 0)
3687 vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
3688 vectype_out, j);
3689 for (i = 0; i < nargs; i++)
3691 op = gimple_call_arg (stmt, i);
3692 if (j == 0)
3694 vec_defs.quick_push (vNULL);
3695 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3696 op, &vec_defs[i],
3697 vectypes[i]);
3699 vargs[varg++] = vec_defs[i][j];
3701 if (masked_loop_p && reduc_idx >= 0)
3702 vargs[varg++] = vargs[reduc_idx + 1];
3704 if (mask_opno >= 0 && masked_loop_p)
3706 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3707 vectype_out, j);
3708 vargs[mask_opno]
3709 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3710 vargs[mask_opno], gsi);
3713 gimple *new_stmt;
3714 if (cfn == CFN_GOMP_SIMD_LANE)
3716 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3717 tree new_var
3718 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3719 gimple *init_stmt = gimple_build_assign (new_var, cst);
3720 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3721 new_temp = make_ssa_name (vec_dest);
3722 new_stmt = gimple_build_assign (new_temp, new_var);
3723 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3725 else if (modifier == NARROW)
3727 /* We don't define any narrowing conditional functions at
3728 present. */
3729 gcc_assert (mask_opno < 0);
3730 tree half_res = make_ssa_name (vectype_in);
3731 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3732 gimple_call_set_lhs (call, half_res);
3733 gimple_call_set_nothrow (call, true);
3734 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3735 if ((j & 1) == 0)
3737 prev_res = half_res;
3738 continue;
3740 new_temp = make_ssa_name (vec_dest);
3741 new_stmt = gimple_build_assign (new_temp, convert_code,
3742 prev_res, half_res);
3743 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3745 else
3747 gcall *call;
3748 if (ifn != IFN_LAST)
3749 call = gimple_build_call_internal_vec (ifn, vargs);
3750 else
3751 call = gimple_build_call_vec (fndecl, vargs);
3752 new_temp = make_ssa_name (vec_dest, call);
3753 gimple_call_set_lhs (call, new_temp);
3754 gimple_call_set_nothrow (call, true);
3755 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3756 new_stmt = call;
3759 if (j == (modifier == NARROW ? 1 : 0))
3760 *vec_stmt = new_stmt;
3761 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3763 for (i = 0; i < nargs; i++)
3765 vec<tree> vec_oprndsi = vec_defs[i];
3766 vec_oprndsi.release ();
3769 else if (modifier == NARROW)
3771 auto_vec<vec<tree> > vec_defs (nargs);
3772 /* We don't define any narrowing conditional functions at present. */
3773 gcc_assert (mask_opno < 0);
3774 for (j = 0; j < ncopies; ++j)
3776 /* Build argument list for the vectorized call. */
3777 if (j == 0)
3778 vargs.create (nargs * 2);
3779 else
3780 vargs.truncate (0);
3782 if (slp_node)
3784 vec<tree> vec_oprnds0;
3786 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3787 vec_oprnds0 = vec_defs[0];
3789 /* Arguments are ready. Create the new vector stmt. */
3790 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3792 size_t k;
3793 vargs.truncate (0);
3794 for (k = 0; k < nargs; k++)
3796 vec<tree> vec_oprndsk = vec_defs[k];
3797 vargs.quick_push (vec_oprndsk[i]);
3798 vargs.quick_push (vec_oprndsk[i + 1]);
3800 gcall *call;
3801 if (ifn != IFN_LAST)
3802 call = gimple_build_call_internal_vec (ifn, vargs);
3803 else
3804 call = gimple_build_call_vec (fndecl, vargs);
3805 new_temp = make_ssa_name (vec_dest, call);
3806 gimple_call_set_lhs (call, new_temp);
3807 gimple_call_set_nothrow (call, true);
3808 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3809 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3811 continue;
3814 for (i = 0; i < nargs; i++)
3816 op = gimple_call_arg (stmt, i);
3817 if (j == 0)
3819 vec_defs.quick_push (vNULL);
3820 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3821 op, &vec_defs[i], vectypes[i]);
3823 vec_oprnd0 = vec_defs[i][2*j];
3824 vec_oprnd1 = vec_defs[i][2*j+1];
3826 vargs.quick_push (vec_oprnd0);
3827 vargs.quick_push (vec_oprnd1);
3830 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3831 new_temp = make_ssa_name (vec_dest, new_stmt);
3832 gimple_call_set_lhs (new_stmt, new_temp);
3833 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3835 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3838 if (!slp_node)
3839 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3841 for (i = 0; i < nargs; i++)
3843 vec<tree> vec_oprndsi = vec_defs[i];
3844 vec_oprndsi.release ();
3847 else
3848 /* No current target implements this case. */
3849 return false;
3851 vargs.release ();
3853 /* The call in STMT might prevent it from being removed in dce.
3854 We however cannot remove it here, due to the way the ssa name
3855 it defines is mapped to the new definition. So just replace
3856 rhs of the statement with something harmless. */
3858 if (slp_node)
3859 return true;
3861 stmt_info = vect_orig_stmt (stmt_info);
3862 lhs = gimple_get_lhs (stmt_info->stmt);
3864 gassign *new_stmt
3865 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3866 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3868 return true;
3872 struct simd_call_arg_info
3874 tree vectype;
3875 tree op;
3876 HOST_WIDE_INT linear_step;
3877 enum vect_def_type dt;
3878 unsigned int align;
3879 bool simd_lane_linear;
3882 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3883 is linear within simd lane (but not within whole loop), note it in
3884 *ARGINFO. */
3886 static void
3887 vect_simd_lane_linear (tree op, class loop *loop,
3888 struct simd_call_arg_info *arginfo)
3890 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3892 if (!is_gimple_assign (def_stmt)
3893 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3894 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3895 return;
3897 tree base = gimple_assign_rhs1 (def_stmt);
3898 HOST_WIDE_INT linear_step = 0;
3899 tree v = gimple_assign_rhs2 (def_stmt);
3900 while (TREE_CODE (v) == SSA_NAME)
3902 tree t;
3903 def_stmt = SSA_NAME_DEF_STMT (v);
3904 if (is_gimple_assign (def_stmt))
3905 switch (gimple_assign_rhs_code (def_stmt))
3907 case PLUS_EXPR:
3908 t = gimple_assign_rhs2 (def_stmt);
3909 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3910 return;
3911 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3912 v = gimple_assign_rhs1 (def_stmt);
3913 continue;
3914 case MULT_EXPR:
3915 t = gimple_assign_rhs2 (def_stmt);
3916 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3917 return;
3918 linear_step = tree_to_shwi (t);
3919 v = gimple_assign_rhs1 (def_stmt);
3920 continue;
3921 CASE_CONVERT:
3922 t = gimple_assign_rhs1 (def_stmt);
3923 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3924 || (TYPE_PRECISION (TREE_TYPE (v))
3925 < TYPE_PRECISION (TREE_TYPE (t))))
3926 return;
3927 if (!linear_step)
3928 linear_step = 1;
3929 v = t;
3930 continue;
3931 default:
3932 return;
3934 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3935 && loop->simduid
3936 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3937 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3938 == loop->simduid))
3940 if (!linear_step)
3941 linear_step = 1;
3942 arginfo->linear_step = linear_step;
3943 arginfo->op = base;
3944 arginfo->simd_lane_linear = true;
3945 return;
3950 /* Return the number of elements in vector type VECTYPE, which is associated
3951 with a SIMD clone. At present these vectors always have a constant
3952 length. */
3954 static unsigned HOST_WIDE_INT
3955 simd_clone_subparts (tree vectype)
3957 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3960 /* Function vectorizable_simd_clone_call.
3962 Check if STMT_INFO performs a function call that can be vectorized
3963 by calling a simd clone of the function.
3964 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3965 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3966 Return true if STMT_INFO is vectorizable in this way. */
3968 static bool
3969 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3970 gimple_stmt_iterator *gsi,
3971 gimple **vec_stmt, slp_tree slp_node,
3972 stmt_vector_for_cost *)
3974 tree vec_dest;
3975 tree scalar_dest;
3976 tree op, type;
3977 tree vec_oprnd0 = NULL_TREE;
3978 tree vectype;
3979 poly_uint64 nunits;
3980 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3981 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3982 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3983 tree fndecl, new_temp;
3984 int ncopies, j;
3985 auto_vec<simd_call_arg_info> arginfo;
3986 vec<tree> vargs = vNULL;
3987 size_t i, nargs;
3988 tree lhs, rtype, ratype;
3989 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3991 /* Is STMT a vectorizable call? */
3992 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3993 if (!stmt)
3994 return false;
3996 fndecl = gimple_call_fndecl (stmt);
3997 if (fndecl == NULL_TREE)
3998 return false;
4000 struct cgraph_node *node = cgraph_node::get (fndecl);
4001 if (node == NULL || node->simd_clones == NULL)
4002 return false;
4004 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4005 return false;
4007 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4008 && ! vec_stmt)
4009 return false;
4011 if (gimple_call_lhs (stmt)
4012 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4013 return false;
4015 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4017 vectype = STMT_VINFO_VECTYPE (stmt_info);
4019 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4020 return false;
4022 /* FORNOW */
4023 if (slp_node)
4024 return false;
4026 /* Process function arguments. */
4027 nargs = gimple_call_num_args (stmt);
4029 /* Bail out if the function has zero arguments. */
4030 if (nargs == 0)
4031 return false;
4033 arginfo.reserve (nargs, true);
4035 for (i = 0; i < nargs; i++)
4037 simd_call_arg_info thisarginfo;
4038 affine_iv iv;
4040 thisarginfo.linear_step = 0;
4041 thisarginfo.align = 0;
4042 thisarginfo.op = NULL_TREE;
4043 thisarginfo.simd_lane_linear = false;
4045 op = gimple_call_arg (stmt, i);
4046 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
4047 &thisarginfo.vectype)
4048 || thisarginfo.dt == vect_uninitialized_def)
4050 if (dump_enabled_p ())
4051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4052 "use not simple.\n");
4053 return false;
4056 if (thisarginfo.dt == vect_constant_def
4057 || thisarginfo.dt == vect_external_def)
4058 gcc_assert (thisarginfo.vectype == NULL_TREE);
4059 else
4061 gcc_assert (thisarginfo.vectype != NULL_TREE);
4062 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
4064 if (dump_enabled_p ())
4065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4066 "vector mask arguments are not supported\n");
4067 return false;
4071 /* For linear arguments, the analyze phase should have saved
4072 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4073 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4074 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4076 gcc_assert (vec_stmt);
4077 thisarginfo.linear_step
4078 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4079 thisarginfo.op
4080 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4081 thisarginfo.simd_lane_linear
4082 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4083 == boolean_true_node);
4084 /* If loop has been peeled for alignment, we need to adjust it. */
4085 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4086 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4087 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4089 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4090 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4091 tree opt = TREE_TYPE (thisarginfo.op);
4092 bias = fold_convert (TREE_TYPE (step), bias);
4093 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4094 thisarginfo.op
4095 = fold_build2 (POINTER_TYPE_P (opt)
4096 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4097 thisarginfo.op, bias);
4100 else if (!vec_stmt
4101 && thisarginfo.dt != vect_constant_def
4102 && thisarginfo.dt != vect_external_def
4103 && loop_vinfo
4104 && TREE_CODE (op) == SSA_NAME
4105 && simple_iv (loop, loop_containing_stmt (stmt), op,
4106 &iv, false)
4107 && tree_fits_shwi_p (iv.step))
4109 thisarginfo.linear_step = tree_to_shwi (iv.step);
4110 thisarginfo.op = iv.base;
4112 else if ((thisarginfo.dt == vect_constant_def
4113 || thisarginfo.dt == vect_external_def)
4114 && POINTER_TYPE_P (TREE_TYPE (op)))
4115 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4116 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4117 linear too. */
4118 if (POINTER_TYPE_P (TREE_TYPE (op))
4119 && !thisarginfo.linear_step
4120 && !vec_stmt
4121 && thisarginfo.dt != vect_constant_def
4122 && thisarginfo.dt != vect_external_def
4123 && loop_vinfo
4124 && !slp_node
4125 && TREE_CODE (op) == SSA_NAME)
4126 vect_simd_lane_linear (op, loop, &thisarginfo);
4128 arginfo.quick_push (thisarginfo);
4131 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4132 if (!vf.is_constant ())
4134 if (dump_enabled_p ())
4135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4136 "not considering SIMD clones; not yet supported"
4137 " for variable-width vectors.\n");
4138 return false;
4141 unsigned int badness = 0;
4142 struct cgraph_node *bestn = NULL;
4143 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4144 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4145 else
4146 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4147 n = n->simdclone->next_clone)
4149 unsigned int this_badness = 0;
4150 unsigned int num_calls;
4151 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4152 || n->simdclone->nargs != nargs)
4153 continue;
4154 if (num_calls != 1)
4155 this_badness += exact_log2 (num_calls) * 4096;
4156 if (n->simdclone->inbranch)
4157 this_badness += 8192;
4158 int target_badness = targetm.simd_clone.usable (n);
4159 if (target_badness < 0)
4160 continue;
4161 this_badness += target_badness * 512;
4162 /* FORNOW: Have to add code to add the mask argument. */
4163 if (n->simdclone->inbranch)
4164 continue;
4165 for (i = 0; i < nargs; i++)
4167 switch (n->simdclone->args[i].arg_type)
4169 case SIMD_CLONE_ARG_TYPE_VECTOR:
4170 if (!useless_type_conversion_p
4171 (n->simdclone->args[i].orig_type,
4172 TREE_TYPE (gimple_call_arg (stmt, i))))
4173 i = -1;
4174 else if (arginfo[i].dt == vect_constant_def
4175 || arginfo[i].dt == vect_external_def
4176 || arginfo[i].linear_step)
4177 this_badness += 64;
4178 break;
4179 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4180 if (arginfo[i].dt != vect_constant_def
4181 && arginfo[i].dt != vect_external_def)
4182 i = -1;
4183 break;
4184 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4185 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4186 if (arginfo[i].dt == vect_constant_def
4187 || arginfo[i].dt == vect_external_def
4188 || (arginfo[i].linear_step
4189 != n->simdclone->args[i].linear_step))
4190 i = -1;
4191 break;
4192 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4193 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4194 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4195 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4196 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4197 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4198 /* FORNOW */
4199 i = -1;
4200 break;
4201 case SIMD_CLONE_ARG_TYPE_MASK:
4202 gcc_unreachable ();
4204 if (i == (size_t) -1)
4205 break;
4206 if (n->simdclone->args[i].alignment > arginfo[i].align)
4208 i = -1;
4209 break;
4211 if (arginfo[i].align)
4212 this_badness += (exact_log2 (arginfo[i].align)
4213 - exact_log2 (n->simdclone->args[i].alignment));
4215 if (i == (size_t) -1)
4216 continue;
4217 if (bestn == NULL || this_badness < badness)
4219 bestn = n;
4220 badness = this_badness;
4224 if (bestn == NULL)
4225 return false;
4227 for (i = 0; i < nargs; i++)
4228 if ((arginfo[i].dt == vect_constant_def
4229 || arginfo[i].dt == vect_external_def)
4230 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4232 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4233 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4234 slp_node);
4235 if (arginfo[i].vectype == NULL
4236 || !constant_multiple_p (bestn->simdclone->simdlen,
4237 simd_clone_subparts (arginfo[i].vectype)))
4238 return false;
4241 fndecl = bestn->decl;
4242 nunits = bestn->simdclone->simdlen;
4243 ncopies = vector_unroll_factor (vf, nunits);
4245 /* If the function isn't const, only allow it in simd loops where user
4246 has asserted that at least nunits consecutive iterations can be
4247 performed using SIMD instructions. */
4248 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4249 && gimple_vuse (stmt))
4250 return false;
4252 /* Sanity check: make sure that at least one copy of the vectorized stmt
4253 needs to be generated. */
4254 gcc_assert (ncopies >= 1);
4256 if (!vec_stmt) /* transformation not required. */
4258 /* When the original call is pure or const but the SIMD ABI dictates
4259 an aggregate return we will have to use a virtual definition and
4260 in a loop eventually even need to add a virtual PHI. That's
4261 not straight-forward so allow to fix this up via renaming. */
4262 if (gimple_call_lhs (stmt)
4263 && !gimple_vdef (stmt)
4264 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4265 vinfo->any_known_not_updated_vssa = true;
4266 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4267 for (i = 0; i < nargs; i++)
4268 if ((bestn->simdclone->args[i].arg_type
4269 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4270 || (bestn->simdclone->args[i].arg_type
4271 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4273 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4274 + 1,
4275 true);
4276 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4277 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4278 ? size_type_node : TREE_TYPE (arginfo[i].op);
4279 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4280 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4281 tree sll = arginfo[i].simd_lane_linear
4282 ? boolean_true_node : boolean_false_node;
4283 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4285 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4286 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4287 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4288 dt, slp_node, cost_vec); */
4289 return true;
4292 /* Transform. */
4294 if (dump_enabled_p ())
4295 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4297 /* Handle def. */
4298 scalar_dest = gimple_call_lhs (stmt);
4299 vec_dest = NULL_TREE;
4300 rtype = NULL_TREE;
4301 ratype = NULL_TREE;
4302 if (scalar_dest)
4304 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4305 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4306 if (TREE_CODE (rtype) == ARRAY_TYPE)
4308 ratype = rtype;
4309 rtype = TREE_TYPE (ratype);
4313 auto_vec<vec<tree> > vec_oprnds;
4314 auto_vec<unsigned> vec_oprnds_i;
4315 vec_oprnds.safe_grow_cleared (nargs, true);
4316 vec_oprnds_i.safe_grow_cleared (nargs, true);
4317 for (j = 0; j < ncopies; ++j)
4319 /* Build argument list for the vectorized call. */
4320 if (j == 0)
4321 vargs.create (nargs);
4322 else
4323 vargs.truncate (0);
4325 for (i = 0; i < nargs; i++)
4327 unsigned int k, l, m, o;
4328 tree atype;
4329 op = gimple_call_arg (stmt, i);
4330 switch (bestn->simdclone->args[i].arg_type)
4332 case SIMD_CLONE_ARG_TYPE_VECTOR:
4333 atype = bestn->simdclone->args[i].vector_type;
4334 o = vector_unroll_factor (nunits,
4335 simd_clone_subparts (atype));
4336 for (m = j * o; m < (j + 1) * o; m++)
4338 if (simd_clone_subparts (atype)
4339 < simd_clone_subparts (arginfo[i].vectype))
4341 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4342 k = (simd_clone_subparts (arginfo[i].vectype)
4343 / simd_clone_subparts (atype));
4344 gcc_assert ((k & (k - 1)) == 0);
4345 if (m == 0)
4347 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4348 ncopies * o / k, op,
4349 &vec_oprnds[i]);
4350 vec_oprnds_i[i] = 0;
4351 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4353 else
4355 vec_oprnd0 = arginfo[i].op;
4356 if ((m & (k - 1)) == 0)
4357 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4359 arginfo[i].op = vec_oprnd0;
4360 vec_oprnd0
4361 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4362 bitsize_int (prec),
4363 bitsize_int ((m & (k - 1)) * prec));
4364 gassign *new_stmt
4365 = gimple_build_assign (make_ssa_name (atype),
4366 vec_oprnd0);
4367 vect_finish_stmt_generation (vinfo, stmt_info,
4368 new_stmt, gsi);
4369 vargs.safe_push (gimple_assign_lhs (new_stmt));
4371 else
4373 k = (simd_clone_subparts (atype)
4374 / simd_clone_subparts (arginfo[i].vectype));
4375 gcc_assert ((k & (k - 1)) == 0);
4376 vec<constructor_elt, va_gc> *ctor_elts;
4377 if (k != 1)
4378 vec_alloc (ctor_elts, k);
4379 else
4380 ctor_elts = NULL;
4381 for (l = 0; l < k; l++)
4383 if (m == 0 && l == 0)
4385 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4386 k * o * ncopies,
4388 &vec_oprnds[i]);
4389 vec_oprnds_i[i] = 0;
4390 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4392 else
4393 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4394 arginfo[i].op = vec_oprnd0;
4395 if (k == 1)
4396 break;
4397 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4398 vec_oprnd0);
4400 if (k == 1)
4401 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4402 atype))
4404 vec_oprnd0
4405 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4406 gassign *new_stmt
4407 = gimple_build_assign (make_ssa_name (atype),
4408 vec_oprnd0);
4409 vect_finish_stmt_generation (vinfo, stmt_info,
4410 new_stmt, gsi);
4411 vargs.safe_push (gimple_assign_lhs (new_stmt));
4413 else
4414 vargs.safe_push (vec_oprnd0);
4415 else
4417 vec_oprnd0 = build_constructor (atype, ctor_elts);
4418 gassign *new_stmt
4419 = gimple_build_assign (make_ssa_name (atype),
4420 vec_oprnd0);
4421 vect_finish_stmt_generation (vinfo, stmt_info,
4422 new_stmt, gsi);
4423 vargs.safe_push (gimple_assign_lhs (new_stmt));
4427 break;
4428 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4429 vargs.safe_push (op);
4430 break;
4431 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4432 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4433 if (j == 0)
4435 gimple_seq stmts;
4436 arginfo[i].op
4437 = force_gimple_operand (unshare_expr (arginfo[i].op),
4438 &stmts, true, NULL_TREE);
4439 if (stmts != NULL)
4441 basic_block new_bb;
4442 edge pe = loop_preheader_edge (loop);
4443 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4444 gcc_assert (!new_bb);
4446 if (arginfo[i].simd_lane_linear)
4448 vargs.safe_push (arginfo[i].op);
4449 break;
4451 tree phi_res = copy_ssa_name (op);
4452 gphi *new_phi = create_phi_node (phi_res, loop->header);
4453 add_phi_arg (new_phi, arginfo[i].op,
4454 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4455 enum tree_code code
4456 = POINTER_TYPE_P (TREE_TYPE (op))
4457 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4458 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4459 ? sizetype : TREE_TYPE (op);
4460 poly_widest_int cst
4461 = wi::mul (bestn->simdclone->args[i].linear_step,
4462 ncopies * nunits);
4463 tree tcst = wide_int_to_tree (type, cst);
4464 tree phi_arg = copy_ssa_name (op);
4465 gassign *new_stmt
4466 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4467 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4468 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4469 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4470 UNKNOWN_LOCATION);
4471 arginfo[i].op = phi_res;
4472 vargs.safe_push (phi_res);
4474 else
4476 enum tree_code code
4477 = POINTER_TYPE_P (TREE_TYPE (op))
4478 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4479 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4480 ? sizetype : TREE_TYPE (op);
4481 poly_widest_int cst
4482 = wi::mul (bestn->simdclone->args[i].linear_step,
4483 j * nunits);
4484 tree tcst = wide_int_to_tree (type, cst);
4485 new_temp = make_ssa_name (TREE_TYPE (op));
4486 gassign *new_stmt
4487 = gimple_build_assign (new_temp, code,
4488 arginfo[i].op, tcst);
4489 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4490 vargs.safe_push (new_temp);
4492 break;
4493 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4494 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4495 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4496 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4497 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4498 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4499 default:
4500 gcc_unreachable ();
4504 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4505 if (vec_dest)
4507 gcc_assert (ratype
4508 || known_eq (simd_clone_subparts (rtype), nunits));
4509 if (ratype)
4510 new_temp = create_tmp_var (ratype);
4511 else if (useless_type_conversion_p (vectype, rtype))
4512 new_temp = make_ssa_name (vec_dest, new_call);
4513 else
4514 new_temp = make_ssa_name (rtype, new_call);
4515 gimple_call_set_lhs (new_call, new_temp);
4517 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4518 gimple *new_stmt = new_call;
4520 if (vec_dest)
4522 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4524 unsigned int k, l;
4525 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4526 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4527 k = vector_unroll_factor (nunits,
4528 simd_clone_subparts (vectype));
4529 gcc_assert ((k & (k - 1)) == 0);
4530 for (l = 0; l < k; l++)
4532 tree t;
4533 if (ratype)
4535 t = build_fold_addr_expr (new_temp);
4536 t = build2 (MEM_REF, vectype, t,
4537 build_int_cst (TREE_TYPE (t), l * bytes));
4539 else
4540 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4541 bitsize_int (prec), bitsize_int (l * prec));
4542 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4543 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4545 if (j == 0 && l == 0)
4546 *vec_stmt = new_stmt;
4547 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4550 if (ratype)
4551 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4552 continue;
4554 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4556 unsigned int k = (simd_clone_subparts (vectype)
4557 / simd_clone_subparts (rtype));
4558 gcc_assert ((k & (k - 1)) == 0);
4559 if ((j & (k - 1)) == 0)
4560 vec_alloc (ret_ctor_elts, k);
4561 if (ratype)
4563 unsigned int m, o;
4564 o = vector_unroll_factor (nunits,
4565 simd_clone_subparts (rtype));
4566 for (m = 0; m < o; m++)
4568 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4569 size_int (m), NULL_TREE, NULL_TREE);
4570 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4571 tem);
4572 vect_finish_stmt_generation (vinfo, stmt_info,
4573 new_stmt, gsi);
4574 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4575 gimple_assign_lhs (new_stmt));
4577 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4579 else
4580 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4581 if ((j & (k - 1)) != k - 1)
4582 continue;
4583 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4584 new_stmt
4585 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4586 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4588 if ((unsigned) j == k - 1)
4589 *vec_stmt = new_stmt;
4590 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4591 continue;
4593 else if (ratype)
4595 tree t = build_fold_addr_expr (new_temp);
4596 t = build2 (MEM_REF, vectype, t,
4597 build_int_cst (TREE_TYPE (t), 0));
4598 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4599 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4600 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4602 else if (!useless_type_conversion_p (vectype, rtype))
4604 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4605 new_stmt
4606 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4607 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4611 if (j == 0)
4612 *vec_stmt = new_stmt;
4613 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4616 for (i = 0; i < nargs; ++i)
4618 vec<tree> oprndsi = vec_oprnds[i];
4619 oprndsi.release ();
4621 vargs.release ();
4623 /* The call in STMT might prevent it from being removed in dce.
4624 We however cannot remove it here, due to the way the ssa name
4625 it defines is mapped to the new definition. So just replace
4626 rhs of the statement with something harmless. */
4628 if (slp_node)
4629 return true;
4631 gimple *new_stmt;
4632 if (scalar_dest)
4634 type = TREE_TYPE (scalar_dest);
4635 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4636 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4638 else
4639 new_stmt = gimple_build_nop ();
4640 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4641 unlink_stmt_vdef (stmt);
4643 return true;
4647 /* Function vect_gen_widened_results_half
4649 Create a vector stmt whose code, type, number of arguments, and result
4650 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4651 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4652 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4653 needs to be created (DECL is a function-decl of a target-builtin).
4654 STMT_INFO is the original scalar stmt that we are vectorizing. */
4656 static gimple *
4657 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4658 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4659 tree vec_dest, gimple_stmt_iterator *gsi,
4660 stmt_vec_info stmt_info)
4662 gimple *new_stmt;
4663 tree new_temp;
4665 /* Generate half of the widened result: */
4666 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4667 if (op_type != binary_op)
4668 vec_oprnd1 = NULL;
4669 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4670 new_temp = make_ssa_name (vec_dest, new_stmt);
4671 gimple_assign_set_lhs (new_stmt, new_temp);
4672 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4674 return new_stmt;
4678 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4679 For multi-step conversions store the resulting vectors and call the function
4680 recursively. */
4682 static void
4683 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4684 int multi_step_cvt,
4685 stmt_vec_info stmt_info,
4686 vec<tree> &vec_dsts,
4687 gimple_stmt_iterator *gsi,
4688 slp_tree slp_node, enum tree_code code)
4690 unsigned int i;
4691 tree vop0, vop1, new_tmp, vec_dest;
4693 vec_dest = vec_dsts.pop ();
4695 for (i = 0; i < vec_oprnds->length (); i += 2)
4697 /* Create demotion operation. */
4698 vop0 = (*vec_oprnds)[i];
4699 vop1 = (*vec_oprnds)[i + 1];
4700 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4701 new_tmp = make_ssa_name (vec_dest, new_stmt);
4702 gimple_assign_set_lhs (new_stmt, new_tmp);
4703 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4705 if (multi_step_cvt)
4706 /* Store the resulting vector for next recursive call. */
4707 (*vec_oprnds)[i/2] = new_tmp;
4708 else
4710 /* This is the last step of the conversion sequence. Store the
4711 vectors in SLP_NODE or in vector info of the scalar statement
4712 (or in STMT_VINFO_RELATED_STMT chain). */
4713 if (slp_node)
4714 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4715 else
4716 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4720 /* For multi-step demotion operations we first generate demotion operations
4721 from the source type to the intermediate types, and then combine the
4722 results (stored in VEC_OPRNDS) in demotion operation to the destination
4723 type. */
4724 if (multi_step_cvt)
4726 /* At each level of recursion we have half of the operands we had at the
4727 previous level. */
4728 vec_oprnds->truncate ((i+1)/2);
4729 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4730 multi_step_cvt - 1,
4731 stmt_info, vec_dsts, gsi,
4732 slp_node, VEC_PACK_TRUNC_EXPR);
4735 vec_dsts.quick_push (vec_dest);
4739 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4740 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4741 STMT_INFO. For multi-step conversions store the resulting vectors and
4742 call the function recursively. */
4744 static void
4745 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4746 vec<tree> *vec_oprnds0,
4747 vec<tree> *vec_oprnds1,
4748 stmt_vec_info stmt_info, tree vec_dest,
4749 gimple_stmt_iterator *gsi,
4750 enum tree_code code1,
4751 enum tree_code code2, int op_type)
4753 int i;
4754 tree vop0, vop1, new_tmp1, new_tmp2;
4755 gimple *new_stmt1, *new_stmt2;
4756 vec<tree> vec_tmp = vNULL;
4758 vec_tmp.create (vec_oprnds0->length () * 2);
4759 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4761 if (op_type == binary_op)
4762 vop1 = (*vec_oprnds1)[i];
4763 else
4764 vop1 = NULL_TREE;
4766 /* Generate the two halves of promotion operation. */
4767 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4768 op_type, vec_dest, gsi,
4769 stmt_info);
4770 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4771 op_type, vec_dest, gsi,
4772 stmt_info);
4773 if (is_gimple_call (new_stmt1))
4775 new_tmp1 = gimple_call_lhs (new_stmt1);
4776 new_tmp2 = gimple_call_lhs (new_stmt2);
4778 else
4780 new_tmp1 = gimple_assign_lhs (new_stmt1);
4781 new_tmp2 = gimple_assign_lhs (new_stmt2);
4784 /* Store the results for the next step. */
4785 vec_tmp.quick_push (new_tmp1);
4786 vec_tmp.quick_push (new_tmp2);
4789 vec_oprnds0->release ();
4790 *vec_oprnds0 = vec_tmp;
4793 /* Create vectorized promotion stmts for widening stmts using only half the
4794 potential vector size for input. */
4795 static void
4796 vect_create_half_widening_stmts (vec_info *vinfo,
4797 vec<tree> *vec_oprnds0,
4798 vec<tree> *vec_oprnds1,
4799 stmt_vec_info stmt_info, tree vec_dest,
4800 gimple_stmt_iterator *gsi,
4801 enum tree_code code1,
4802 int op_type)
4804 int i;
4805 tree vop0, vop1;
4806 gimple *new_stmt1;
4807 gimple *new_stmt2;
4808 gimple *new_stmt3;
4809 vec<tree> vec_tmp = vNULL;
4811 vec_tmp.create (vec_oprnds0->length ());
4812 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4814 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4816 gcc_assert (op_type == binary_op);
4817 vop1 = (*vec_oprnds1)[i];
4819 /* Widen the first vector input. */
4820 out_type = TREE_TYPE (vec_dest);
4821 new_tmp1 = make_ssa_name (out_type);
4822 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4823 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4824 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4826 /* Widen the second vector input. */
4827 new_tmp2 = make_ssa_name (out_type);
4828 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4829 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4830 /* Perform the operation. With both vector inputs widened. */
4831 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4833 else
4835 /* Perform the operation. With the single vector input widened. */
4836 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4839 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4840 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4841 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4843 /* Store the results for the next step. */
4844 vec_tmp.quick_push (new_tmp3);
4847 vec_oprnds0->release ();
4848 *vec_oprnds0 = vec_tmp;
4852 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4853 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4854 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4855 Return true if STMT_INFO is vectorizable in this way. */
4857 static bool
4858 vectorizable_conversion (vec_info *vinfo,
4859 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4860 gimple **vec_stmt, slp_tree slp_node,
4861 stmt_vector_for_cost *cost_vec)
4863 tree vec_dest;
4864 tree scalar_dest;
4865 tree op0, op1 = NULL_TREE;
4866 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4867 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4868 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4869 tree new_temp;
4870 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4871 int ndts = 2;
4872 poly_uint64 nunits_in;
4873 poly_uint64 nunits_out;
4874 tree vectype_out, vectype_in;
4875 int ncopies, i;
4876 tree lhs_type, rhs_type;
4877 enum { NARROW, NONE, WIDEN } modifier;
4878 vec<tree> vec_oprnds0 = vNULL;
4879 vec<tree> vec_oprnds1 = vNULL;
4880 tree vop0;
4881 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4882 int multi_step_cvt = 0;
4883 vec<tree> interm_types = vNULL;
4884 tree intermediate_type, cvt_type = NULL_TREE;
4885 int op_type;
4886 unsigned short fltsz;
4888 /* Is STMT a vectorizable conversion? */
4890 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4891 return false;
4893 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4894 && ! vec_stmt)
4895 return false;
4897 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4898 if (!stmt)
4899 return false;
4901 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4902 return false;
4904 code = gimple_assign_rhs_code (stmt);
4905 if (!CONVERT_EXPR_CODE_P (code)
4906 && code != FIX_TRUNC_EXPR
4907 && code != FLOAT_EXPR
4908 && code != WIDEN_PLUS_EXPR
4909 && code != WIDEN_MINUS_EXPR
4910 && code != WIDEN_MULT_EXPR
4911 && code != WIDEN_LSHIFT_EXPR)
4912 return false;
4914 bool widen_arith = (code == WIDEN_PLUS_EXPR
4915 || code == WIDEN_MINUS_EXPR
4916 || code == WIDEN_MULT_EXPR
4917 || code == WIDEN_LSHIFT_EXPR);
4918 op_type = TREE_CODE_LENGTH (code);
4920 /* Check types of lhs and rhs. */
4921 scalar_dest = gimple_assign_lhs (stmt);
4922 lhs_type = TREE_TYPE (scalar_dest);
4923 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4925 /* Check the operands of the operation. */
4926 slp_tree slp_op0, slp_op1 = NULL;
4927 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4928 0, &op0, &slp_op0, &dt[0], &vectype_in))
4930 if (dump_enabled_p ())
4931 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4932 "use not simple.\n");
4933 return false;
4936 rhs_type = TREE_TYPE (op0);
4937 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4938 && !((INTEGRAL_TYPE_P (lhs_type)
4939 && INTEGRAL_TYPE_P (rhs_type))
4940 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4941 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4942 return false;
4944 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4945 && ((INTEGRAL_TYPE_P (lhs_type)
4946 && !type_has_mode_precision_p (lhs_type))
4947 || (INTEGRAL_TYPE_P (rhs_type)
4948 && !type_has_mode_precision_p (rhs_type))))
4950 if (dump_enabled_p ())
4951 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4952 "type conversion to/from bit-precision unsupported."
4953 "\n");
4954 return false;
4957 if (op_type == binary_op)
4959 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4960 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4962 op1 = gimple_assign_rhs2 (stmt);
4963 tree vectype1_in;
4964 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4965 &op1, &slp_op1, &dt[1], &vectype1_in))
4967 if (dump_enabled_p ())
4968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4969 "use not simple.\n");
4970 return false;
4972 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4973 OP1. */
4974 if (!vectype_in)
4975 vectype_in = vectype1_in;
4978 /* If op0 is an external or constant def, infer the vector type
4979 from the scalar type. */
4980 if (!vectype_in)
4981 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4982 if (vec_stmt)
4983 gcc_assert (vectype_in);
4984 if (!vectype_in)
4986 if (dump_enabled_p ())
4987 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4988 "no vectype for scalar type %T\n", rhs_type);
4990 return false;
4993 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4994 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4996 if (dump_enabled_p ())
4997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4998 "can't convert between boolean and non "
4999 "boolean vectors %T\n", rhs_type);
5001 return false;
5004 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5005 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5006 if (known_eq (nunits_out, nunits_in))
5007 if (widen_arith)
5008 modifier = WIDEN;
5009 else
5010 modifier = NONE;
5011 else if (multiple_p (nunits_out, nunits_in))
5012 modifier = NARROW;
5013 else
5015 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5016 modifier = WIDEN;
5019 /* Multiple types in SLP are handled by creating the appropriate number of
5020 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5021 case of SLP. */
5022 if (slp_node)
5023 ncopies = 1;
5024 else if (modifier == NARROW)
5025 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5026 else
5027 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5029 /* Sanity check: make sure that at least one copy of the vectorized stmt
5030 needs to be generated. */
5031 gcc_assert (ncopies >= 1);
5033 bool found_mode = false;
5034 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5035 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5036 opt_scalar_mode rhs_mode_iter;
5038 /* Supportable by target? */
5039 switch (modifier)
5041 case NONE:
5042 if (code != FIX_TRUNC_EXPR
5043 && code != FLOAT_EXPR
5044 && !CONVERT_EXPR_CODE_P (code))
5045 return false;
5046 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
5047 break;
5048 /* FALLTHRU */
5049 unsupported:
5050 if (dump_enabled_p ())
5051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5052 "conversion not supported by target.\n");
5053 return false;
5055 case WIDEN:
5056 if (known_eq (nunits_in, nunits_out))
5058 if (!supportable_half_widening_operation (code, vectype_out,
5059 vectype_in, &code1))
5060 goto unsupported;
5061 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5062 break;
5064 if (supportable_widening_operation (vinfo, code, stmt_info,
5065 vectype_out, vectype_in, &code1,
5066 &code2, &multi_step_cvt,
5067 &interm_types))
5069 /* Binary widening operation can only be supported directly by the
5070 architecture. */
5071 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5072 break;
5075 if (code != FLOAT_EXPR
5076 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5077 goto unsupported;
5079 fltsz = GET_MODE_SIZE (lhs_mode);
5080 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5082 rhs_mode = rhs_mode_iter.require ();
5083 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5084 break;
5086 cvt_type
5087 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5088 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5089 if (cvt_type == NULL_TREE)
5090 goto unsupported;
5092 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5094 if (!supportable_convert_operation (code, vectype_out,
5095 cvt_type, &codecvt1))
5096 goto unsupported;
5098 else if (!supportable_widening_operation (vinfo, code, stmt_info,
5099 vectype_out, cvt_type,
5100 &codecvt1, &codecvt2,
5101 &multi_step_cvt,
5102 &interm_types))
5103 continue;
5104 else
5105 gcc_assert (multi_step_cvt == 0);
5107 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5108 cvt_type,
5109 vectype_in, &code1, &code2,
5110 &multi_step_cvt, &interm_types))
5112 found_mode = true;
5113 break;
5117 if (!found_mode)
5118 goto unsupported;
5120 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5121 codecvt2 = ERROR_MARK;
5122 else
5124 multi_step_cvt++;
5125 interm_types.safe_push (cvt_type);
5126 cvt_type = NULL_TREE;
5128 break;
5130 case NARROW:
5131 gcc_assert (op_type == unary_op);
5132 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5133 &code1, &multi_step_cvt,
5134 &interm_types))
5135 break;
5137 if (code != FIX_TRUNC_EXPR
5138 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5139 goto unsupported;
5141 cvt_type
5142 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5143 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5144 if (cvt_type == NULL_TREE)
5145 goto unsupported;
5146 if (!supportable_convert_operation (code, cvt_type, vectype_in,
5147 &codecvt1))
5148 goto unsupported;
5149 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5150 &code1, &multi_step_cvt,
5151 &interm_types))
5152 break;
5153 goto unsupported;
5155 default:
5156 gcc_unreachable ();
5159 if (!vec_stmt) /* transformation not required. */
5161 if (slp_node
5162 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5163 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5165 if (dump_enabled_p ())
5166 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5167 "incompatible vector types for invariants\n");
5168 return false;
5170 DUMP_VECT_SCOPE ("vectorizable_conversion");
5171 if (modifier == NONE)
5173 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5174 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5175 cost_vec);
5177 else if (modifier == NARROW)
5179 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5180 /* The final packing step produces one vector result per copy. */
5181 unsigned int nvectors
5182 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5183 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5184 multi_step_cvt, cost_vec,
5185 widen_arith);
5187 else
5189 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5190 /* The initial unpacking step produces two vector results
5191 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5192 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5193 unsigned int nvectors
5194 = (slp_node
5195 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5196 : ncopies * 2);
5197 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5198 multi_step_cvt, cost_vec,
5199 widen_arith);
5201 interm_types.release ();
5202 return true;
5205 /* Transform. */
5206 if (dump_enabled_p ())
5207 dump_printf_loc (MSG_NOTE, vect_location,
5208 "transform conversion. ncopies = %d.\n", ncopies);
5210 if (op_type == binary_op)
5212 if (CONSTANT_CLASS_P (op0))
5213 op0 = fold_convert (TREE_TYPE (op1), op0);
5214 else if (CONSTANT_CLASS_P (op1))
5215 op1 = fold_convert (TREE_TYPE (op0), op1);
5218 /* In case of multi-step conversion, we first generate conversion operations
5219 to the intermediate types, and then from that types to the final one.
5220 We create vector destinations for the intermediate type (TYPES) received
5221 from supportable_*_operation, and store them in the correct order
5222 for future use in vect_create_vectorized_*_stmts (). */
5223 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5224 vec_dest = vect_create_destination_var (scalar_dest,
5225 (cvt_type && modifier == WIDEN)
5226 ? cvt_type : vectype_out);
5227 vec_dsts.quick_push (vec_dest);
5229 if (multi_step_cvt)
5231 for (i = interm_types.length () - 1;
5232 interm_types.iterate (i, &intermediate_type); i--)
5234 vec_dest = vect_create_destination_var (scalar_dest,
5235 intermediate_type);
5236 vec_dsts.quick_push (vec_dest);
5240 if (cvt_type)
5241 vec_dest = vect_create_destination_var (scalar_dest,
5242 modifier == WIDEN
5243 ? vectype_out : cvt_type);
5245 int ninputs = 1;
5246 if (!slp_node)
5248 if (modifier == WIDEN)
5250 else if (modifier == NARROW)
5252 if (multi_step_cvt)
5253 ninputs = vect_pow2 (multi_step_cvt);
5254 ninputs *= 2;
5258 switch (modifier)
5260 case NONE:
5261 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5262 op0, &vec_oprnds0);
5263 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5265 /* Arguments are ready, create the new vector stmt. */
5266 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5267 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5268 new_temp = make_ssa_name (vec_dest, new_stmt);
5269 gimple_assign_set_lhs (new_stmt, new_temp);
5270 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5272 if (slp_node)
5273 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5274 else
5275 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5277 break;
5279 case WIDEN:
5280 /* In case the vectorization factor (VF) is bigger than the number
5281 of elements that we can fit in a vectype (nunits), we have to
5282 generate more than one vector stmt - i.e - we need to "unroll"
5283 the vector stmt by a factor VF/nunits. */
5284 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5285 op0, &vec_oprnds0,
5286 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5287 &vec_oprnds1);
5288 if (code == WIDEN_LSHIFT_EXPR)
5290 int oprnds_size = vec_oprnds0.length ();
5291 vec_oprnds1.create (oprnds_size);
5292 for (i = 0; i < oprnds_size; ++i)
5293 vec_oprnds1.quick_push (op1);
5295 /* Arguments are ready. Create the new vector stmts. */
5296 for (i = multi_step_cvt; i >= 0; i--)
5298 tree this_dest = vec_dsts[i];
5299 enum tree_code c1 = code1, c2 = code2;
5300 if (i == 0 && codecvt2 != ERROR_MARK)
5302 c1 = codecvt1;
5303 c2 = codecvt2;
5305 if (known_eq (nunits_out, nunits_in))
5306 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5307 &vec_oprnds1, stmt_info,
5308 this_dest, gsi,
5309 c1, op_type);
5310 else
5311 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5312 &vec_oprnds1, stmt_info,
5313 this_dest, gsi,
5314 c1, c2, op_type);
5317 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5319 gimple *new_stmt;
5320 if (cvt_type)
5322 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5323 new_temp = make_ssa_name (vec_dest);
5324 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5325 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5327 else
5328 new_stmt = SSA_NAME_DEF_STMT (vop0);
5330 if (slp_node)
5331 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5332 else
5333 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5335 break;
5337 case NARROW:
5338 /* In case the vectorization factor (VF) is bigger than the number
5339 of elements that we can fit in a vectype (nunits), we have to
5340 generate more than one vector stmt - i.e - we need to "unroll"
5341 the vector stmt by a factor VF/nunits. */
5342 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5343 op0, &vec_oprnds0);
5344 /* Arguments are ready. Create the new vector stmts. */
5345 if (cvt_type)
5346 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5348 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5349 new_temp = make_ssa_name (vec_dest);
5350 gassign *new_stmt
5351 = gimple_build_assign (new_temp, codecvt1, vop0);
5352 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5353 vec_oprnds0[i] = new_temp;
5356 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5357 multi_step_cvt,
5358 stmt_info, vec_dsts, gsi,
5359 slp_node, code1);
5360 break;
5362 if (!slp_node)
5363 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5365 vec_oprnds0.release ();
5366 vec_oprnds1.release ();
5367 interm_types.release ();
5369 return true;
5372 /* Return true if we can assume from the scalar form of STMT_INFO that
5373 neither the scalar nor the vector forms will generate code. STMT_INFO
5374 is known not to involve a data reference. */
5376 bool
5377 vect_nop_conversion_p (stmt_vec_info stmt_info)
5379 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5380 if (!stmt)
5381 return false;
5383 tree lhs = gimple_assign_lhs (stmt);
5384 tree_code code = gimple_assign_rhs_code (stmt);
5385 tree rhs = gimple_assign_rhs1 (stmt);
5387 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5388 return true;
5390 if (CONVERT_EXPR_CODE_P (code))
5391 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5393 return false;
5396 /* Function vectorizable_assignment.
5398 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5399 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5400 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5401 Return true if STMT_INFO is vectorizable in this way. */
5403 static bool
5404 vectorizable_assignment (vec_info *vinfo,
5405 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5406 gimple **vec_stmt, slp_tree slp_node,
5407 stmt_vector_for_cost *cost_vec)
5409 tree vec_dest;
5410 tree scalar_dest;
5411 tree op;
5412 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5413 tree new_temp;
5414 enum vect_def_type dt[1] = {vect_unknown_def_type};
5415 int ndts = 1;
5416 int ncopies;
5417 int i;
5418 vec<tree> vec_oprnds = vNULL;
5419 tree vop;
5420 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5421 enum tree_code code;
5422 tree vectype_in;
5424 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5425 return false;
5427 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5428 && ! vec_stmt)
5429 return false;
5431 /* Is vectorizable assignment? */
5432 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5433 if (!stmt)
5434 return false;
5436 scalar_dest = gimple_assign_lhs (stmt);
5437 if (TREE_CODE (scalar_dest) != SSA_NAME)
5438 return false;
5440 if (STMT_VINFO_DATA_REF (stmt_info))
5441 return false;
5443 code = gimple_assign_rhs_code (stmt);
5444 if (!(gimple_assign_single_p (stmt)
5445 || code == PAREN_EXPR
5446 || CONVERT_EXPR_CODE_P (code)))
5447 return false;
5449 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5450 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5452 /* Multiple types in SLP are handled by creating the appropriate number of
5453 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5454 case of SLP. */
5455 if (slp_node)
5456 ncopies = 1;
5457 else
5458 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5460 gcc_assert (ncopies >= 1);
5462 slp_tree slp_op;
5463 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5464 &dt[0], &vectype_in))
5466 if (dump_enabled_p ())
5467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5468 "use not simple.\n");
5469 return false;
5471 if (!vectype_in)
5472 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5474 /* We can handle NOP_EXPR conversions that do not change the number
5475 of elements or the vector size. */
5476 if ((CONVERT_EXPR_CODE_P (code)
5477 || code == VIEW_CONVERT_EXPR)
5478 && (!vectype_in
5479 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5480 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5481 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5482 return false;
5484 if (VECTOR_BOOLEAN_TYPE_P (vectype)
5485 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5487 if (dump_enabled_p ())
5488 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5489 "can't convert between boolean and non "
5490 "boolean vectors %T\n", TREE_TYPE (op));
5492 return false;
5495 /* We do not handle bit-precision changes. */
5496 if ((CONVERT_EXPR_CODE_P (code)
5497 || code == VIEW_CONVERT_EXPR)
5498 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5499 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5500 || !type_has_mode_precision_p (TREE_TYPE (op)))
5501 /* But a conversion that does not change the bit-pattern is ok. */
5502 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5503 > TYPE_PRECISION (TREE_TYPE (op)))
5504 && TYPE_UNSIGNED (TREE_TYPE (op))))
5506 if (dump_enabled_p ())
5507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5508 "type conversion to/from bit-precision "
5509 "unsupported.\n");
5510 return false;
5513 if (!vec_stmt) /* transformation not required. */
5515 if (slp_node
5516 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5518 if (dump_enabled_p ())
5519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5520 "incompatible vector types for invariants\n");
5521 return false;
5523 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5524 DUMP_VECT_SCOPE ("vectorizable_assignment");
5525 if (!vect_nop_conversion_p (stmt_info))
5526 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5527 cost_vec);
5528 return true;
5531 /* Transform. */
5532 if (dump_enabled_p ())
5533 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5535 /* Handle def. */
5536 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5538 /* Handle use. */
5539 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5541 /* Arguments are ready. create the new vector stmt. */
5542 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5544 if (CONVERT_EXPR_CODE_P (code)
5545 || code == VIEW_CONVERT_EXPR)
5546 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5547 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5548 new_temp = make_ssa_name (vec_dest, new_stmt);
5549 gimple_assign_set_lhs (new_stmt, new_temp);
5550 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5551 if (slp_node)
5552 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5553 else
5554 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5556 if (!slp_node)
5557 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5559 vec_oprnds.release ();
5560 return true;
5564 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5565 either as shift by a scalar or by a vector. */
5567 bool
5568 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5571 machine_mode vec_mode;
5572 optab optab;
5573 int icode;
5574 tree vectype;
5576 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5577 if (!vectype)
5578 return false;
5580 optab = optab_for_tree_code (code, vectype, optab_scalar);
5581 if (!optab
5582 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5584 optab = optab_for_tree_code (code, vectype, optab_vector);
5585 if (!optab
5586 || (optab_handler (optab, TYPE_MODE (vectype))
5587 == CODE_FOR_nothing))
5588 return false;
5591 vec_mode = TYPE_MODE (vectype);
5592 icode = (int) optab_handler (optab, vec_mode);
5593 if (icode == CODE_FOR_nothing)
5594 return false;
5596 return true;
5600 /* Function vectorizable_shift.
5602 Check if STMT_INFO performs a shift operation that can be vectorized.
5603 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5604 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5605 Return true if STMT_INFO is vectorizable in this way. */
5607 static bool
5608 vectorizable_shift (vec_info *vinfo,
5609 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5610 gimple **vec_stmt, slp_tree slp_node,
5611 stmt_vector_for_cost *cost_vec)
5613 tree vec_dest;
5614 tree scalar_dest;
5615 tree op0, op1 = NULL;
5616 tree vec_oprnd1 = NULL_TREE;
5617 tree vectype;
5618 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5619 enum tree_code code;
5620 machine_mode vec_mode;
5621 tree new_temp;
5622 optab optab;
5623 int icode;
5624 machine_mode optab_op2_mode;
5625 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5626 int ndts = 2;
5627 poly_uint64 nunits_in;
5628 poly_uint64 nunits_out;
5629 tree vectype_out;
5630 tree op1_vectype;
5631 int ncopies;
5632 int i;
5633 vec<tree> vec_oprnds0 = vNULL;
5634 vec<tree> vec_oprnds1 = vNULL;
5635 tree vop0, vop1;
5636 unsigned int k;
5637 bool scalar_shift_arg = true;
5638 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5639 bool incompatible_op1_vectype_p = false;
5641 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5642 return false;
5644 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5645 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5646 && ! vec_stmt)
5647 return false;
5649 /* Is STMT a vectorizable binary/unary operation? */
5650 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5651 if (!stmt)
5652 return false;
5654 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5655 return false;
5657 code = gimple_assign_rhs_code (stmt);
5659 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5660 || code == RROTATE_EXPR))
5661 return false;
5663 scalar_dest = gimple_assign_lhs (stmt);
5664 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5665 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5667 if (dump_enabled_p ())
5668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5669 "bit-precision shifts not supported.\n");
5670 return false;
5673 slp_tree slp_op0;
5674 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5675 0, &op0, &slp_op0, &dt[0], &vectype))
5677 if (dump_enabled_p ())
5678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5679 "use not simple.\n");
5680 return false;
5682 /* If op0 is an external or constant def, infer the vector type
5683 from the scalar type. */
5684 if (!vectype)
5685 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5686 if (vec_stmt)
5687 gcc_assert (vectype);
5688 if (!vectype)
5690 if (dump_enabled_p ())
5691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5692 "no vectype for scalar type\n");
5693 return false;
5696 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5697 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5698 if (maybe_ne (nunits_out, nunits_in))
5699 return false;
5701 stmt_vec_info op1_def_stmt_info;
5702 slp_tree slp_op1;
5703 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5704 &dt[1], &op1_vectype, &op1_def_stmt_info))
5706 if (dump_enabled_p ())
5707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5708 "use not simple.\n");
5709 return false;
5712 /* Multiple types in SLP are handled by creating the appropriate number of
5713 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5714 case of SLP. */
5715 if (slp_node)
5716 ncopies = 1;
5717 else
5718 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5720 gcc_assert (ncopies >= 1);
5722 /* Determine whether the shift amount is a vector, or scalar. If the
5723 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5725 if ((dt[1] == vect_internal_def
5726 || dt[1] == vect_induction_def
5727 || dt[1] == vect_nested_cycle)
5728 && !slp_node)
5729 scalar_shift_arg = false;
5730 else if (dt[1] == vect_constant_def
5731 || dt[1] == vect_external_def
5732 || dt[1] == vect_internal_def)
5734 /* In SLP, need to check whether the shift count is the same,
5735 in loops if it is a constant or invariant, it is always
5736 a scalar shift. */
5737 if (slp_node)
5739 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5740 stmt_vec_info slpstmt_info;
5742 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5744 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5745 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5746 scalar_shift_arg = false;
5749 /* For internal SLP defs we have to make sure we see scalar stmts
5750 for all vector elements.
5751 ??? For different vectors we could resort to a different
5752 scalar shift operand but code-generation below simply always
5753 takes the first. */
5754 if (dt[1] == vect_internal_def
5755 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5756 stmts.length ()))
5757 scalar_shift_arg = false;
5760 /* If the shift amount is computed by a pattern stmt we cannot
5761 use the scalar amount directly thus give up and use a vector
5762 shift. */
5763 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5764 scalar_shift_arg = false;
5766 else
5768 if (dump_enabled_p ())
5769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5770 "operand mode requires invariant argument.\n");
5771 return false;
5774 /* Vector shifted by vector. */
5775 bool was_scalar_shift_arg = scalar_shift_arg;
5776 if (!scalar_shift_arg)
5778 optab = optab_for_tree_code (code, vectype, optab_vector);
5779 if (dump_enabled_p ())
5780 dump_printf_loc (MSG_NOTE, vect_location,
5781 "vector/vector shift/rotate found.\n");
5783 if (!op1_vectype)
5784 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5785 slp_op1);
5786 incompatible_op1_vectype_p
5787 = (op1_vectype == NULL_TREE
5788 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5789 TYPE_VECTOR_SUBPARTS (vectype))
5790 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5791 if (incompatible_op1_vectype_p
5792 && (!slp_node
5793 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5794 || slp_op1->refcnt != 1))
5796 if (dump_enabled_p ())
5797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5798 "unusable type for last operand in"
5799 " vector/vector shift/rotate.\n");
5800 return false;
5803 /* See if the machine has a vector shifted by scalar insn and if not
5804 then see if it has a vector shifted by vector insn. */
5805 else
5807 optab = optab_for_tree_code (code, vectype, optab_scalar);
5808 if (optab
5809 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5811 if (dump_enabled_p ())
5812 dump_printf_loc (MSG_NOTE, vect_location,
5813 "vector/scalar shift/rotate found.\n");
5815 else
5817 optab = optab_for_tree_code (code, vectype, optab_vector);
5818 if (optab
5819 && (optab_handler (optab, TYPE_MODE (vectype))
5820 != CODE_FOR_nothing))
5822 scalar_shift_arg = false;
5824 if (dump_enabled_p ())
5825 dump_printf_loc (MSG_NOTE, vect_location,
5826 "vector/vector shift/rotate found.\n");
5828 if (!op1_vectype)
5829 op1_vectype = get_vectype_for_scalar_type (vinfo,
5830 TREE_TYPE (op1),
5831 slp_op1);
5833 /* Unlike the other binary operators, shifts/rotates have
5834 the rhs being int, instead of the same type as the lhs,
5835 so make sure the scalar is the right type if we are
5836 dealing with vectors of long long/long/short/char. */
5837 incompatible_op1_vectype_p
5838 = (!op1_vectype
5839 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5840 TREE_TYPE (op1)));
5841 if (incompatible_op1_vectype_p
5842 && dt[1] == vect_internal_def)
5844 if (dump_enabled_p ())
5845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5846 "unusable type for last operand in"
5847 " vector/vector shift/rotate.\n");
5848 return false;
5854 /* Supportable by target? */
5855 if (!optab)
5857 if (dump_enabled_p ())
5858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5859 "no optab.\n");
5860 return false;
5862 vec_mode = TYPE_MODE (vectype);
5863 icode = (int) optab_handler (optab, vec_mode);
5864 if (icode == CODE_FOR_nothing)
5866 if (dump_enabled_p ())
5867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5868 "op not supported by target.\n");
5869 return false;
5871 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5872 if (vect_emulated_vector_p (vectype))
5873 return false;
5875 if (!vec_stmt) /* transformation not required. */
5877 if (slp_node
5878 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5879 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5880 && (!incompatible_op1_vectype_p
5881 || dt[1] == vect_constant_def)
5882 && !vect_maybe_update_slp_op_vectype
5883 (slp_op1,
5884 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5886 if (dump_enabled_p ())
5887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5888 "incompatible vector types for invariants\n");
5889 return false;
5891 /* Now adjust the constant shift amount in place. */
5892 if (slp_node
5893 && incompatible_op1_vectype_p
5894 && dt[1] == vect_constant_def)
5896 for (unsigned i = 0;
5897 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5899 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5900 = fold_convert (TREE_TYPE (vectype),
5901 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5902 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5903 == INTEGER_CST));
5906 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5907 DUMP_VECT_SCOPE ("vectorizable_shift");
5908 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5909 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5910 return true;
5913 /* Transform. */
5915 if (dump_enabled_p ())
5916 dump_printf_loc (MSG_NOTE, vect_location,
5917 "transform binary/unary operation.\n");
5919 if (incompatible_op1_vectype_p && !slp_node)
5921 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5922 op1 = fold_convert (TREE_TYPE (vectype), op1);
5923 if (dt[1] != vect_constant_def)
5924 op1 = vect_init_vector (vinfo, stmt_info, op1,
5925 TREE_TYPE (vectype), NULL);
5928 /* Handle def. */
5929 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5931 if (scalar_shift_arg && dt[1] != vect_internal_def)
5933 /* Vector shl and shr insn patterns can be defined with scalar
5934 operand 2 (shift operand). In this case, use constant or loop
5935 invariant op1 directly, without extending it to vector mode
5936 first. */
5937 optab_op2_mode = insn_data[icode].operand[2].mode;
5938 if (!VECTOR_MODE_P (optab_op2_mode))
5940 if (dump_enabled_p ())
5941 dump_printf_loc (MSG_NOTE, vect_location,
5942 "operand 1 using scalar mode.\n");
5943 vec_oprnd1 = op1;
5944 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5945 vec_oprnds1.quick_push (vec_oprnd1);
5946 /* Store vec_oprnd1 for every vector stmt to be created.
5947 We check during the analysis that all the shift arguments
5948 are the same.
5949 TODO: Allow different constants for different vector
5950 stmts generated for an SLP instance. */
5951 for (k = 0;
5952 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5953 vec_oprnds1.quick_push (vec_oprnd1);
5956 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5958 if (was_scalar_shift_arg)
5960 /* If the argument was the same in all lanes create
5961 the correctly typed vector shift amount directly. */
5962 op1 = fold_convert (TREE_TYPE (vectype), op1);
5963 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5964 !loop_vinfo ? gsi : NULL);
5965 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5966 !loop_vinfo ? gsi : NULL);
5967 vec_oprnds1.create (slp_node->vec_stmts_size);
5968 for (k = 0; k < slp_node->vec_stmts_size; k++)
5969 vec_oprnds1.quick_push (vec_oprnd1);
5971 else if (dt[1] == vect_constant_def)
5972 /* The constant shift amount has been adjusted in place. */
5974 else
5975 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5978 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5979 (a special case for certain kind of vector shifts); otherwise,
5980 operand 1 should be of a vector type (the usual case). */
5981 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5982 op0, &vec_oprnds0,
5983 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5985 /* Arguments are ready. Create the new vector stmt. */
5986 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5988 /* For internal defs where we need to use a scalar shift arg
5989 extract the first lane. */
5990 if (scalar_shift_arg && dt[1] == vect_internal_def)
5992 vop1 = vec_oprnds1[0];
5993 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5994 gassign *new_stmt
5995 = gimple_build_assign (new_temp,
5996 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5997 vop1,
5998 TYPE_SIZE (TREE_TYPE (new_temp)),
5999 bitsize_zero_node));
6000 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6001 vop1 = new_temp;
6003 else
6004 vop1 = vec_oprnds1[i];
6005 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6006 new_temp = make_ssa_name (vec_dest, new_stmt);
6007 gimple_assign_set_lhs (new_stmt, new_temp);
6008 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6009 if (slp_node)
6010 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6011 else
6012 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6015 if (!slp_node)
6016 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6018 vec_oprnds0.release ();
6019 vec_oprnds1.release ();
6021 return true;
6025 /* Function vectorizable_operation.
6027 Check if STMT_INFO performs a binary, unary or ternary operation that can
6028 be vectorized.
6029 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6030 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6031 Return true if STMT_INFO is vectorizable in this way. */
6033 static bool
6034 vectorizable_operation (vec_info *vinfo,
6035 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6036 gimple **vec_stmt, slp_tree slp_node,
6037 stmt_vector_for_cost *cost_vec)
6039 tree vec_dest;
6040 tree scalar_dest;
6041 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6042 tree vectype;
6043 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6044 enum tree_code code, orig_code;
6045 machine_mode vec_mode;
6046 tree new_temp;
6047 int op_type;
6048 optab optab;
6049 bool target_support_p;
6050 enum vect_def_type dt[3]
6051 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6052 int ndts = 3;
6053 poly_uint64 nunits_in;
6054 poly_uint64 nunits_out;
6055 tree vectype_out;
6056 int ncopies, vec_num;
6057 int i;
6058 vec<tree> vec_oprnds0 = vNULL;
6059 vec<tree> vec_oprnds1 = vNULL;
6060 vec<tree> vec_oprnds2 = vNULL;
6061 tree vop0, vop1, vop2;
6062 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6064 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6065 return false;
6067 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6068 && ! vec_stmt)
6069 return false;
6071 /* Is STMT a vectorizable binary/unary operation? */
6072 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6073 if (!stmt)
6074 return false;
6076 /* Loads and stores are handled in vectorizable_{load,store}. */
6077 if (STMT_VINFO_DATA_REF (stmt_info))
6078 return false;
6080 orig_code = code = gimple_assign_rhs_code (stmt);
6082 /* Shifts are handled in vectorizable_shift. */
6083 if (code == LSHIFT_EXPR
6084 || code == RSHIFT_EXPR
6085 || code == LROTATE_EXPR
6086 || code == RROTATE_EXPR)
6087 return false;
6089 /* Comparisons are handled in vectorizable_comparison. */
6090 if (TREE_CODE_CLASS (code) == tcc_comparison)
6091 return false;
6093 /* Conditions are handled in vectorizable_condition. */
6094 if (code == COND_EXPR)
6095 return false;
6097 /* For pointer addition and subtraction, we should use the normal
6098 plus and minus for the vector operation. */
6099 if (code == POINTER_PLUS_EXPR)
6100 code = PLUS_EXPR;
6101 if (code == POINTER_DIFF_EXPR)
6102 code = MINUS_EXPR;
6104 /* Support only unary or binary operations. */
6105 op_type = TREE_CODE_LENGTH (code);
6106 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6108 if (dump_enabled_p ())
6109 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6110 "num. args = %d (not unary/binary/ternary op).\n",
6111 op_type);
6112 return false;
6115 scalar_dest = gimple_assign_lhs (stmt);
6116 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6118 /* Most operations cannot handle bit-precision types without extra
6119 truncations. */
6120 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6121 if (!mask_op_p
6122 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6123 /* Exception are bitwise binary operations. */
6124 && code != BIT_IOR_EXPR
6125 && code != BIT_XOR_EXPR
6126 && code != BIT_AND_EXPR)
6128 if (dump_enabled_p ())
6129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6130 "bit-precision arithmetic not supported.\n");
6131 return false;
6134 slp_tree slp_op0;
6135 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6136 0, &op0, &slp_op0, &dt[0], &vectype))
6138 if (dump_enabled_p ())
6139 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6140 "use not simple.\n");
6141 return false;
6143 /* If op0 is an external or constant def, infer the vector type
6144 from the scalar type. */
6145 if (!vectype)
6147 /* For boolean type we cannot determine vectype by
6148 invariant value (don't know whether it is a vector
6149 of booleans or vector of integers). We use output
6150 vectype because operations on boolean don't change
6151 type. */
6152 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6154 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6156 if (dump_enabled_p ())
6157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6158 "not supported operation on bool value.\n");
6159 return false;
6161 vectype = vectype_out;
6163 else
6164 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6165 slp_node);
6167 if (vec_stmt)
6168 gcc_assert (vectype);
6169 if (!vectype)
6171 if (dump_enabled_p ())
6172 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6173 "no vectype for scalar type %T\n",
6174 TREE_TYPE (op0));
6176 return false;
6179 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6180 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6181 if (maybe_ne (nunits_out, nunits_in))
6182 return false;
6184 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6185 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6186 if (op_type == binary_op || op_type == ternary_op)
6188 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6189 1, &op1, &slp_op1, &dt[1], &vectype2))
6191 if (dump_enabled_p ())
6192 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6193 "use not simple.\n");
6194 return false;
6196 if (vectype2
6197 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6198 return false;
6200 if (op_type == ternary_op)
6202 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6203 2, &op2, &slp_op2, &dt[2], &vectype3))
6205 if (dump_enabled_p ())
6206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6207 "use not simple.\n");
6208 return false;
6210 if (vectype3
6211 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6212 return false;
6215 /* Multiple types in SLP are handled by creating the appropriate number of
6216 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6217 case of SLP. */
6218 if (slp_node)
6220 ncopies = 1;
6221 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6223 else
6225 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6226 vec_num = 1;
6229 gcc_assert (ncopies >= 1);
6231 /* Reject attempts to combine mask types with nonmask types, e.g. if
6232 we have an AND between a (nonmask) boolean loaded from memory and
6233 a (mask) boolean result of a comparison.
6235 TODO: We could easily fix these cases up using pattern statements. */
6236 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6237 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6238 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6240 if (dump_enabled_p ())
6241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6242 "mixed mask and nonmask vector types\n");
6243 return false;
6246 /* Supportable by target? */
6248 vec_mode = TYPE_MODE (vectype);
6249 if (code == MULT_HIGHPART_EXPR)
6250 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6251 else
6253 optab = optab_for_tree_code (code, vectype, optab_default);
6254 if (!optab)
6256 if (dump_enabled_p ())
6257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6258 "no optab.\n");
6259 return false;
6261 target_support_p = (optab_handler (optab, vec_mode)
6262 != CODE_FOR_nothing);
6265 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6266 if (!target_support_p)
6268 if (dump_enabled_p ())
6269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6270 "op not supported by target.\n");
6271 /* Check only during analysis. */
6272 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6273 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6274 return false;
6275 if (dump_enabled_p ())
6276 dump_printf_loc (MSG_NOTE, vect_location,
6277 "proceeding using word mode.\n");
6278 using_emulated_vectors_p = true;
6281 if (using_emulated_vectors_p
6282 && !vect_can_vectorize_without_simd_p (code))
6284 if (dump_enabled_p ())
6285 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6286 return false;
6289 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6290 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6291 internal_fn cond_fn = get_conditional_internal_fn (code);
6293 if (!vec_stmt) /* transformation not required. */
6295 /* If this operation is part of a reduction, a fully-masked loop
6296 should only change the active lanes of the reduction chain,
6297 keeping the inactive lanes as-is. */
6298 if (loop_vinfo
6299 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6300 && reduc_idx >= 0)
6302 if (cond_fn == IFN_LAST
6303 || !direct_internal_fn_supported_p (cond_fn, vectype,
6304 OPTIMIZE_FOR_SPEED))
6306 if (dump_enabled_p ())
6307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6308 "can't use a fully-masked loop because no"
6309 " conditional operation is available.\n");
6310 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6312 else
6313 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6314 vectype, NULL);
6317 /* Put types on constant and invariant SLP children. */
6318 if (slp_node
6319 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6320 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6321 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6323 if (dump_enabled_p ())
6324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6325 "incompatible vector types for invariants\n");
6326 return false;
6329 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6330 DUMP_VECT_SCOPE ("vectorizable_operation");
6331 vect_model_simple_cost (vinfo, stmt_info,
6332 ncopies, dt, ndts, slp_node, cost_vec);
6333 if (using_emulated_vectors_p)
6335 /* The above vect_model_simple_cost call handles constants
6336 in the prologue and (mis-)costs one of the stmts as
6337 vector stmt. See tree-vect-generic.cc:do_plus_minus/do_negate
6338 for the actual lowering that will be applied. */
6339 unsigned n
6340 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6341 switch (code)
6343 case PLUS_EXPR:
6344 n *= 5;
6345 break;
6346 case MINUS_EXPR:
6347 n *= 6;
6348 break;
6349 case NEGATE_EXPR:
6350 n *= 4;
6351 break;
6352 default:;
6354 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6356 return true;
6359 /* Transform. */
6361 if (dump_enabled_p ())
6362 dump_printf_loc (MSG_NOTE, vect_location,
6363 "transform binary/unary operation.\n");
6365 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6367 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6368 vectors with unsigned elements, but the result is signed. So, we
6369 need to compute the MINUS_EXPR into vectype temporary and
6370 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6371 tree vec_cvt_dest = NULL_TREE;
6372 if (orig_code == POINTER_DIFF_EXPR)
6374 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6375 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6377 /* Handle def. */
6378 else
6379 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6381 /* In case the vectorization factor (VF) is bigger than the number
6382 of elements that we can fit in a vectype (nunits), we have to generate
6383 more than one vector stmt - i.e - we need to "unroll" the
6384 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6385 from one copy of the vector stmt to the next, in the field
6386 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6387 stages to find the correct vector defs to be used when vectorizing
6388 stmts that use the defs of the current stmt. The example below
6389 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6390 we need to create 4 vectorized stmts):
6392 before vectorization:
6393 RELATED_STMT VEC_STMT
6394 S1: x = memref - -
6395 S2: z = x + 1 - -
6397 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6398 there):
6399 RELATED_STMT VEC_STMT
6400 VS1_0: vx0 = memref0 VS1_1 -
6401 VS1_1: vx1 = memref1 VS1_2 -
6402 VS1_2: vx2 = memref2 VS1_3 -
6403 VS1_3: vx3 = memref3 - -
6404 S1: x = load - VS1_0
6405 S2: z = x + 1 - -
6407 step2: vectorize stmt S2 (done here):
6408 To vectorize stmt S2 we first need to find the relevant vector
6409 def for the first operand 'x'. This is, as usual, obtained from
6410 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6411 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6412 relevant vector def 'vx0'. Having found 'vx0' we can generate
6413 the vector stmt VS2_0, and as usual, record it in the
6414 STMT_VINFO_VEC_STMT of stmt S2.
6415 When creating the second copy (VS2_1), we obtain the relevant vector
6416 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6417 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6418 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6419 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6420 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6421 chain of stmts and pointers:
6422 RELATED_STMT VEC_STMT
6423 VS1_0: vx0 = memref0 VS1_1 -
6424 VS1_1: vx1 = memref1 VS1_2 -
6425 VS1_2: vx2 = memref2 VS1_3 -
6426 VS1_3: vx3 = memref3 - -
6427 S1: x = load - VS1_0
6428 VS2_0: vz0 = vx0 + v1 VS2_1 -
6429 VS2_1: vz1 = vx1 + v1 VS2_2 -
6430 VS2_2: vz2 = vx2 + v1 VS2_3 -
6431 VS2_3: vz3 = vx3 + v1 - -
6432 S2: z = x + 1 - VS2_0 */
6434 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6435 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6436 /* Arguments are ready. Create the new vector stmt. */
6437 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6439 gimple *new_stmt = NULL;
6440 vop1 = ((op_type == binary_op || op_type == ternary_op)
6441 ? vec_oprnds1[i] : NULL_TREE);
6442 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6443 if (masked_loop_p && reduc_idx >= 0)
6445 /* Perform the operation on active elements only and take
6446 inactive elements from the reduction chain input. */
6447 gcc_assert (!vop2);
6448 vop2 = reduc_idx == 1 ? vop1 : vop0;
6449 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6450 vectype, i);
6451 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6452 vop0, vop1, vop2);
6453 new_temp = make_ssa_name (vec_dest, call);
6454 gimple_call_set_lhs (call, new_temp);
6455 gimple_call_set_nothrow (call, true);
6456 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6457 new_stmt = call;
6459 else
6461 tree mask = NULL_TREE;
6462 /* When combining two masks check if either of them is elsewhere
6463 combined with a loop mask, if that's the case we can mark that the
6464 new combined mask doesn't need to be combined with a loop mask. */
6465 if (masked_loop_p
6466 && code == BIT_AND_EXPR
6467 && VECTOR_BOOLEAN_TYPE_P (vectype))
6469 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
6470 ncopies}))
6472 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6473 vectype, i);
6475 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6476 vop0, gsi);
6479 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
6480 ncopies }))
6482 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6483 vectype, i);
6485 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6486 vop1, gsi);
6490 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6491 new_temp = make_ssa_name (vec_dest, new_stmt);
6492 gimple_assign_set_lhs (new_stmt, new_temp);
6493 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6494 if (using_emulated_vectors_p)
6495 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
6497 /* Enter the combined value into the vector cond hash so we don't
6498 AND it with a loop mask again. */
6499 if (mask)
6500 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
6502 if (vec_cvt_dest)
6504 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6505 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6506 new_temp);
6507 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6508 gimple_assign_set_lhs (new_stmt, new_temp);
6509 vect_finish_stmt_generation (vinfo, stmt_info,
6510 new_stmt, gsi);
6513 if (slp_node)
6514 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6515 else
6516 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6519 if (!slp_node)
6520 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6522 vec_oprnds0.release ();
6523 vec_oprnds1.release ();
6524 vec_oprnds2.release ();
6526 return true;
6529 /* A helper function to ensure data reference DR_INFO's base alignment. */
6531 static void
6532 ensure_base_align (dr_vec_info *dr_info)
6534 /* Alignment is only analyzed for the first element of a DR group,
6535 use that to look at base alignment we need to enforce. */
6536 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6537 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6539 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6541 if (dr_info->base_misaligned)
6543 tree base_decl = dr_info->base_decl;
6545 // We should only be able to increase the alignment of a base object if
6546 // we know what its new alignment should be at compile time.
6547 unsigned HOST_WIDE_INT align_base_to =
6548 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6550 if (decl_in_symtab_p (base_decl))
6551 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6552 else if (DECL_ALIGN (base_decl) < align_base_to)
6554 SET_DECL_ALIGN (base_decl, align_base_to);
6555 DECL_USER_ALIGN (base_decl) = 1;
6557 dr_info->base_misaligned = false;
6562 /* Function get_group_alias_ptr_type.
6564 Return the alias type for the group starting at FIRST_STMT_INFO. */
6566 static tree
6567 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6569 struct data_reference *first_dr, *next_dr;
6571 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6572 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6573 while (next_stmt_info)
6575 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6576 if (get_alias_set (DR_REF (first_dr))
6577 != get_alias_set (DR_REF (next_dr)))
6579 if (dump_enabled_p ())
6580 dump_printf_loc (MSG_NOTE, vect_location,
6581 "conflicting alias set types.\n");
6582 return ptr_type_node;
6584 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6586 return reference_alias_ptr_type (DR_REF (first_dr));
6590 /* Function scan_operand_equal_p.
6592 Helper function for check_scan_store. Compare two references
6593 with .GOMP_SIMD_LANE bases. */
6595 static bool
6596 scan_operand_equal_p (tree ref1, tree ref2)
6598 tree ref[2] = { ref1, ref2 };
6599 poly_int64 bitsize[2], bitpos[2];
6600 tree offset[2], base[2];
6601 for (int i = 0; i < 2; ++i)
6603 machine_mode mode;
6604 int unsignedp, reversep, volatilep = 0;
6605 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6606 &offset[i], &mode, &unsignedp,
6607 &reversep, &volatilep);
6608 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6609 return false;
6610 if (TREE_CODE (base[i]) == MEM_REF
6611 && offset[i] == NULL_TREE
6612 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6614 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6615 if (is_gimple_assign (def_stmt)
6616 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6617 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6618 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6620 if (maybe_ne (mem_ref_offset (base[i]), 0))
6621 return false;
6622 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6623 offset[i] = gimple_assign_rhs2 (def_stmt);
6628 if (!operand_equal_p (base[0], base[1], 0))
6629 return false;
6630 if (maybe_ne (bitsize[0], bitsize[1]))
6631 return false;
6632 if (offset[0] != offset[1])
6634 if (!offset[0] || !offset[1])
6635 return false;
6636 if (!operand_equal_p (offset[0], offset[1], 0))
6638 tree step[2];
6639 for (int i = 0; i < 2; ++i)
6641 step[i] = integer_one_node;
6642 if (TREE_CODE (offset[i]) == SSA_NAME)
6644 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6645 if (is_gimple_assign (def_stmt)
6646 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6647 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6648 == INTEGER_CST))
6650 step[i] = gimple_assign_rhs2 (def_stmt);
6651 offset[i] = gimple_assign_rhs1 (def_stmt);
6654 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6656 step[i] = TREE_OPERAND (offset[i], 1);
6657 offset[i] = TREE_OPERAND (offset[i], 0);
6659 tree rhs1 = NULL_TREE;
6660 if (TREE_CODE (offset[i]) == SSA_NAME)
6662 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6663 if (gimple_assign_cast_p (def_stmt))
6664 rhs1 = gimple_assign_rhs1 (def_stmt);
6666 else if (CONVERT_EXPR_P (offset[i]))
6667 rhs1 = TREE_OPERAND (offset[i], 0);
6668 if (rhs1
6669 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6670 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6671 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6672 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6673 offset[i] = rhs1;
6675 if (!operand_equal_p (offset[0], offset[1], 0)
6676 || !operand_equal_p (step[0], step[1], 0))
6677 return false;
6680 return true;
6684 enum scan_store_kind {
6685 /* Normal permutation. */
6686 scan_store_kind_perm,
6688 /* Whole vector left shift permutation with zero init. */
6689 scan_store_kind_lshift_zero,
6691 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6692 scan_store_kind_lshift_cond
6695 /* Function check_scan_store.
6697 Verify if we can perform the needed permutations or whole vector shifts.
6698 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6699 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6700 to do at each step. */
6702 static int
6703 scan_store_can_perm_p (tree vectype, tree init,
6704 vec<enum scan_store_kind> *use_whole_vector = NULL)
6706 enum machine_mode vec_mode = TYPE_MODE (vectype);
6707 unsigned HOST_WIDE_INT nunits;
6708 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6709 return -1;
6710 int units_log2 = exact_log2 (nunits);
6711 if (units_log2 <= 0)
6712 return -1;
6714 int i;
6715 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6716 for (i = 0; i <= units_log2; ++i)
6718 unsigned HOST_WIDE_INT j, k;
6719 enum scan_store_kind kind = scan_store_kind_perm;
6720 vec_perm_builder sel (nunits, nunits, 1);
6721 sel.quick_grow (nunits);
6722 if (i == units_log2)
6724 for (j = 0; j < nunits; ++j)
6725 sel[j] = nunits - 1;
6727 else
6729 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6730 sel[j] = j;
6731 for (k = 0; j < nunits; ++j, ++k)
6732 sel[j] = nunits + k;
6734 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6735 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
6737 if (i == units_log2)
6738 return -1;
6740 if (whole_vector_shift_kind == scan_store_kind_perm)
6742 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6743 return -1;
6744 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6745 /* Whole vector shifts shift in zeros, so if init is all zero
6746 constant, there is no need to do anything further. */
6747 if ((TREE_CODE (init) != INTEGER_CST
6748 && TREE_CODE (init) != REAL_CST)
6749 || !initializer_zerop (init))
6751 tree masktype = truth_type_for (vectype);
6752 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6753 return -1;
6754 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6757 kind = whole_vector_shift_kind;
6759 if (use_whole_vector)
6761 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6762 use_whole_vector->safe_grow_cleared (i, true);
6763 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6764 use_whole_vector->safe_push (kind);
6768 return units_log2;
6772 /* Function check_scan_store.
6774 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6776 static bool
6777 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6778 enum vect_def_type rhs_dt, bool slp, tree mask,
6779 vect_memory_access_type memory_access_type)
6781 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6782 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6783 tree ref_type;
6785 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6786 if (slp
6787 || mask
6788 || memory_access_type != VMAT_CONTIGUOUS
6789 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6790 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6791 || loop_vinfo == NULL
6792 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6793 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6794 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6795 || !integer_zerop (DR_INIT (dr_info->dr))
6796 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6797 || !alias_sets_conflict_p (get_alias_set (vectype),
6798 get_alias_set (TREE_TYPE (ref_type))))
6800 if (dump_enabled_p ())
6801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6802 "unsupported OpenMP scan store.\n");
6803 return false;
6806 /* We need to pattern match code built by OpenMP lowering and simplified
6807 by following optimizations into something we can handle.
6808 #pragma omp simd reduction(inscan,+:r)
6809 for (...)
6811 r += something ();
6812 #pragma omp scan inclusive (r)
6813 use (r);
6815 shall have body with:
6816 // Initialization for input phase, store the reduction initializer:
6817 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6818 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6819 D.2042[_21] = 0;
6820 // Actual input phase:
6822 r.0_5 = D.2042[_20];
6823 _6 = _4 + r.0_5;
6824 D.2042[_20] = _6;
6825 // Initialization for scan phase:
6826 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6827 _26 = D.2043[_25];
6828 _27 = D.2042[_25];
6829 _28 = _26 + _27;
6830 D.2043[_25] = _28;
6831 D.2042[_25] = _28;
6832 // Actual scan phase:
6834 r.1_8 = D.2042[_20];
6836 The "omp simd array" variable D.2042 holds the privatized copy used
6837 inside of the loop and D.2043 is another one that holds copies of
6838 the current original list item. The separate GOMP_SIMD_LANE ifn
6839 kinds are there in order to allow optimizing the initializer store
6840 and combiner sequence, e.g. if it is originally some C++ish user
6841 defined reduction, but allow the vectorizer to pattern recognize it
6842 and turn into the appropriate vectorized scan.
6844 For exclusive scan, this is slightly different:
6845 #pragma omp simd reduction(inscan,+:r)
6846 for (...)
6848 use (r);
6849 #pragma omp scan exclusive (r)
6850 r += something ();
6852 shall have body with:
6853 // Initialization for input phase, store the reduction initializer:
6854 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6855 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6856 D.2042[_21] = 0;
6857 // Actual input phase:
6859 r.0_5 = D.2042[_20];
6860 _6 = _4 + r.0_5;
6861 D.2042[_20] = _6;
6862 // Initialization for scan phase:
6863 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6864 _26 = D.2043[_25];
6865 D.2044[_25] = _26;
6866 _27 = D.2042[_25];
6867 _28 = _26 + _27;
6868 D.2043[_25] = _28;
6869 // Actual scan phase:
6871 r.1_8 = D.2044[_20];
6872 ... */
6874 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6876 /* Match the D.2042[_21] = 0; store above. Just require that
6877 it is a constant or external definition store. */
6878 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6880 fail_init:
6881 if (dump_enabled_p ())
6882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6883 "unsupported OpenMP scan initializer store.\n");
6884 return false;
6887 if (! loop_vinfo->scan_map)
6888 loop_vinfo->scan_map = new hash_map<tree, tree>;
6889 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6890 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6891 if (cached)
6892 goto fail_init;
6893 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6895 /* These stores can be vectorized normally. */
6896 return true;
6899 if (rhs_dt != vect_internal_def)
6901 fail:
6902 if (dump_enabled_p ())
6903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6904 "unsupported OpenMP scan combiner pattern.\n");
6905 return false;
6908 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6909 tree rhs = gimple_assign_rhs1 (stmt);
6910 if (TREE_CODE (rhs) != SSA_NAME)
6911 goto fail;
6913 gimple *other_store_stmt = NULL;
6914 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6915 bool inscan_var_store
6916 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6918 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6920 if (!inscan_var_store)
6922 use_operand_p use_p;
6923 imm_use_iterator iter;
6924 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6926 gimple *use_stmt = USE_STMT (use_p);
6927 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6928 continue;
6929 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6930 || !is_gimple_assign (use_stmt)
6931 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6932 || other_store_stmt
6933 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6934 goto fail;
6935 other_store_stmt = use_stmt;
6937 if (other_store_stmt == NULL)
6938 goto fail;
6939 rhs = gimple_assign_lhs (other_store_stmt);
6940 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6941 goto fail;
6944 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6946 use_operand_p use_p;
6947 imm_use_iterator iter;
6948 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6950 gimple *use_stmt = USE_STMT (use_p);
6951 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6952 continue;
6953 if (other_store_stmt)
6954 goto fail;
6955 other_store_stmt = use_stmt;
6958 else
6959 goto fail;
6961 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6962 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6963 || !is_gimple_assign (def_stmt)
6964 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6965 goto fail;
6967 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6968 /* For pointer addition, we should use the normal plus for the vector
6969 operation. */
6970 switch (code)
6972 case POINTER_PLUS_EXPR:
6973 code = PLUS_EXPR;
6974 break;
6975 case MULT_HIGHPART_EXPR:
6976 goto fail;
6977 default:
6978 break;
6980 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6981 goto fail;
6983 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6984 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6985 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6986 goto fail;
6988 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6989 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6990 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6991 || !gimple_assign_load_p (load1_stmt)
6992 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6993 || !gimple_assign_load_p (load2_stmt))
6994 goto fail;
6996 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6997 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6998 if (load1_stmt_info == NULL
6999 || load2_stmt_info == NULL
7000 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7001 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7002 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7003 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7004 goto fail;
7006 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7008 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7009 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7010 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7011 goto fail;
7012 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7013 tree lrhs;
7014 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7015 lrhs = rhs1;
7016 else
7017 lrhs = rhs2;
7018 use_operand_p use_p;
7019 imm_use_iterator iter;
7020 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7022 gimple *use_stmt = USE_STMT (use_p);
7023 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7024 continue;
7025 if (other_store_stmt)
7026 goto fail;
7027 other_store_stmt = use_stmt;
7031 if (other_store_stmt == NULL)
7032 goto fail;
7033 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7034 || !gimple_store_p (other_store_stmt))
7035 goto fail;
7037 stmt_vec_info other_store_stmt_info
7038 = loop_vinfo->lookup_stmt (other_store_stmt);
7039 if (other_store_stmt_info == NULL
7040 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7041 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7042 goto fail;
7044 gimple *stmt1 = stmt;
7045 gimple *stmt2 = other_store_stmt;
7046 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7047 std::swap (stmt1, stmt2);
7048 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7049 gimple_assign_rhs1 (load2_stmt)))
7051 std::swap (rhs1, rhs2);
7052 std::swap (load1_stmt, load2_stmt);
7053 std::swap (load1_stmt_info, load2_stmt_info);
7055 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7056 gimple_assign_rhs1 (load1_stmt)))
7057 goto fail;
7059 tree var3 = NULL_TREE;
7060 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7061 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7062 gimple_assign_rhs1 (load2_stmt)))
7063 goto fail;
7064 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7066 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7067 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7068 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7069 goto fail;
7070 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7071 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7072 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7073 || lookup_attribute ("omp simd inscan exclusive",
7074 DECL_ATTRIBUTES (var3)))
7075 goto fail;
7078 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7079 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7080 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7081 goto fail;
7083 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7084 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7085 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7086 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7087 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7088 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7089 goto fail;
7091 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7092 std::swap (var1, var2);
7094 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7096 if (!lookup_attribute ("omp simd inscan exclusive",
7097 DECL_ATTRIBUTES (var1)))
7098 goto fail;
7099 var1 = var3;
7102 if (loop_vinfo->scan_map == NULL)
7103 goto fail;
7104 tree *init = loop_vinfo->scan_map->get (var1);
7105 if (init == NULL)
7106 goto fail;
7108 /* The IL is as expected, now check if we can actually vectorize it.
7109 Inclusive scan:
7110 _26 = D.2043[_25];
7111 _27 = D.2042[_25];
7112 _28 = _26 + _27;
7113 D.2043[_25] = _28;
7114 D.2042[_25] = _28;
7115 should be vectorized as (where _40 is the vectorized rhs
7116 from the D.2042[_21] = 0; store):
7117 _30 = MEM <vector(8) int> [(int *)&D.2043];
7118 _31 = MEM <vector(8) int> [(int *)&D.2042];
7119 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7120 _33 = _31 + _32;
7121 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7122 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7123 _35 = _33 + _34;
7124 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7125 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7126 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7127 _37 = _35 + _36;
7128 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7129 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7130 _38 = _30 + _37;
7131 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7132 MEM <vector(8) int> [(int *)&D.2043] = _39;
7133 MEM <vector(8) int> [(int *)&D.2042] = _38;
7134 Exclusive scan:
7135 _26 = D.2043[_25];
7136 D.2044[_25] = _26;
7137 _27 = D.2042[_25];
7138 _28 = _26 + _27;
7139 D.2043[_25] = _28;
7140 should be vectorized as (where _40 is the vectorized rhs
7141 from the D.2042[_21] = 0; store):
7142 _30 = MEM <vector(8) int> [(int *)&D.2043];
7143 _31 = MEM <vector(8) int> [(int *)&D.2042];
7144 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7145 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7146 _34 = _32 + _33;
7147 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7148 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7149 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7150 _36 = _34 + _35;
7151 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7152 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7153 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7154 _38 = _36 + _37;
7155 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7156 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7157 _39 = _30 + _38;
7158 _50 = _31 + _39;
7159 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7160 MEM <vector(8) int> [(int *)&D.2044] = _39;
7161 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7162 enum machine_mode vec_mode = TYPE_MODE (vectype);
7163 optab optab = optab_for_tree_code (code, vectype, optab_default);
7164 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7165 goto fail;
7167 int units_log2 = scan_store_can_perm_p (vectype, *init);
7168 if (units_log2 == -1)
7169 goto fail;
7171 return true;
7175 /* Function vectorizable_scan_store.
7177 Helper of vectorizable_score, arguments like on vectorizable_store.
7178 Handle only the transformation, checking is done in check_scan_store. */
7180 static bool
7181 vectorizable_scan_store (vec_info *vinfo,
7182 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7183 gimple **vec_stmt, int ncopies)
7185 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7186 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7187 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7188 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7190 if (dump_enabled_p ())
7191 dump_printf_loc (MSG_NOTE, vect_location,
7192 "transform scan store. ncopies = %d\n", ncopies);
7194 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7195 tree rhs = gimple_assign_rhs1 (stmt);
7196 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7198 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7199 bool inscan_var_store
7200 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7202 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7204 use_operand_p use_p;
7205 imm_use_iterator iter;
7206 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7208 gimple *use_stmt = USE_STMT (use_p);
7209 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7210 continue;
7211 rhs = gimple_assign_lhs (use_stmt);
7212 break;
7216 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7217 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7218 if (code == POINTER_PLUS_EXPR)
7219 code = PLUS_EXPR;
7220 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7221 && commutative_tree_code (code));
7222 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7223 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7224 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7225 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7226 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7227 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7228 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7229 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7230 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7231 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7232 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7234 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7236 std::swap (rhs1, rhs2);
7237 std::swap (var1, var2);
7238 std::swap (load1_dr_info, load2_dr_info);
7241 tree *init = loop_vinfo->scan_map->get (var1);
7242 gcc_assert (init);
7244 unsigned HOST_WIDE_INT nunits;
7245 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7246 gcc_unreachable ();
7247 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7248 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7249 gcc_assert (units_log2 > 0);
7250 auto_vec<tree, 16> perms;
7251 perms.quick_grow (units_log2 + 1);
7252 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7253 for (int i = 0; i <= units_log2; ++i)
7255 unsigned HOST_WIDE_INT j, k;
7256 vec_perm_builder sel (nunits, nunits, 1);
7257 sel.quick_grow (nunits);
7258 if (i == units_log2)
7259 for (j = 0; j < nunits; ++j)
7260 sel[j] = nunits - 1;
7261 else
7263 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7264 sel[j] = j;
7265 for (k = 0; j < nunits; ++j, ++k)
7266 sel[j] = nunits + k;
7268 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7269 if (!use_whole_vector.is_empty ()
7270 && use_whole_vector[i] != scan_store_kind_perm)
7272 if (zero_vec == NULL_TREE)
7273 zero_vec = build_zero_cst (vectype);
7274 if (masktype == NULL_TREE
7275 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7276 masktype = truth_type_for (vectype);
7277 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7279 else
7280 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7283 tree vec_oprnd1 = NULL_TREE;
7284 tree vec_oprnd2 = NULL_TREE;
7285 tree vec_oprnd3 = NULL_TREE;
7286 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7287 tree dataref_offset = build_int_cst (ref_type, 0);
7288 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7289 vectype, VMAT_CONTIGUOUS);
7290 tree ldataref_ptr = NULL_TREE;
7291 tree orig = NULL_TREE;
7292 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7293 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7294 auto_vec<tree> vec_oprnds1;
7295 auto_vec<tree> vec_oprnds2;
7296 auto_vec<tree> vec_oprnds3;
7297 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7298 *init, &vec_oprnds1,
7299 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7300 rhs2, &vec_oprnds3);
7301 for (int j = 0; j < ncopies; j++)
7303 vec_oprnd1 = vec_oprnds1[j];
7304 if (ldataref_ptr == NULL)
7305 vec_oprnd2 = vec_oprnds2[j];
7306 vec_oprnd3 = vec_oprnds3[j];
7307 if (j == 0)
7308 orig = vec_oprnd3;
7309 else if (!inscan_var_store)
7310 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7312 if (ldataref_ptr)
7314 vec_oprnd2 = make_ssa_name (vectype);
7315 tree data_ref = fold_build2 (MEM_REF, vectype,
7316 unshare_expr (ldataref_ptr),
7317 dataref_offset);
7318 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7319 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7320 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7321 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7322 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7325 tree v = vec_oprnd2;
7326 for (int i = 0; i < units_log2; ++i)
7328 tree new_temp = make_ssa_name (vectype);
7329 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7330 (zero_vec
7331 && (use_whole_vector[i]
7332 != scan_store_kind_perm))
7333 ? zero_vec : vec_oprnd1, v,
7334 perms[i]);
7335 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7336 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7337 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7339 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7341 /* Whole vector shift shifted in zero bits, but if *init
7342 is not initializer_zerop, we need to replace those elements
7343 with elements from vec_oprnd1. */
7344 tree_vector_builder vb (masktype, nunits, 1);
7345 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7346 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7347 ? boolean_false_node : boolean_true_node);
7349 tree new_temp2 = make_ssa_name (vectype);
7350 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7351 new_temp, vec_oprnd1);
7352 vect_finish_stmt_generation (vinfo, stmt_info,
7353 g, gsi);
7354 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7355 new_temp = new_temp2;
7358 /* For exclusive scan, perform the perms[i] permutation once
7359 more. */
7360 if (i == 0
7361 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7362 && v == vec_oprnd2)
7364 v = new_temp;
7365 --i;
7366 continue;
7369 tree new_temp2 = make_ssa_name (vectype);
7370 g = gimple_build_assign (new_temp2, code, v, new_temp);
7371 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7372 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7374 v = new_temp2;
7377 tree new_temp = make_ssa_name (vectype);
7378 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7379 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7380 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7382 tree last_perm_arg = new_temp;
7383 /* For exclusive scan, new_temp computed above is the exclusive scan
7384 prefix sum. Turn it into inclusive prefix sum for the broadcast
7385 of the last element into orig. */
7386 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7388 last_perm_arg = make_ssa_name (vectype);
7389 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7390 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7391 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7394 orig = make_ssa_name (vectype);
7395 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7396 last_perm_arg, perms[units_log2]);
7397 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7398 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7400 if (!inscan_var_store)
7402 tree data_ref = fold_build2 (MEM_REF, vectype,
7403 unshare_expr (dataref_ptr),
7404 dataref_offset);
7405 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7406 g = gimple_build_assign (data_ref, new_temp);
7407 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7408 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7412 if (inscan_var_store)
7413 for (int j = 0; j < ncopies; j++)
7415 if (j != 0)
7416 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7418 tree data_ref = fold_build2 (MEM_REF, vectype,
7419 unshare_expr (dataref_ptr),
7420 dataref_offset);
7421 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7422 gimple *g = gimple_build_assign (data_ref, orig);
7423 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7424 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7426 return true;
7430 /* Function vectorizable_store.
7432 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7433 that can be vectorized.
7434 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7435 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7436 Return true if STMT_INFO is vectorizable in this way. */
7438 static bool
7439 vectorizable_store (vec_info *vinfo,
7440 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7441 gimple **vec_stmt, slp_tree slp_node,
7442 stmt_vector_for_cost *cost_vec)
7444 tree data_ref;
7445 tree op;
7446 tree vec_oprnd = NULL_TREE;
7447 tree elem_type;
7448 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7449 class loop *loop = NULL;
7450 machine_mode vec_mode;
7451 tree dummy;
7452 enum vect_def_type rhs_dt = vect_unknown_def_type;
7453 enum vect_def_type mask_dt = vect_unknown_def_type;
7454 tree dataref_ptr = NULL_TREE;
7455 tree dataref_offset = NULL_TREE;
7456 gimple *ptr_incr = NULL;
7457 int ncopies;
7458 int j;
7459 stmt_vec_info first_stmt_info;
7460 bool grouped_store;
7461 unsigned int group_size, i;
7462 vec<tree> oprnds = vNULL;
7463 vec<tree> result_chain = vNULL;
7464 vec<tree> vec_oprnds = vNULL;
7465 bool slp = (slp_node != NULL);
7466 unsigned int vec_num;
7467 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7468 tree aggr_type;
7469 gather_scatter_info gs_info;
7470 poly_uint64 vf;
7471 vec_load_store_type vls_type;
7472 tree ref_type;
7474 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7475 return false;
7477 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7478 && ! vec_stmt)
7479 return false;
7481 /* Is vectorizable store? */
7483 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7484 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7486 tree scalar_dest = gimple_assign_lhs (assign);
7487 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7488 && is_pattern_stmt_p (stmt_info))
7489 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7490 if (TREE_CODE (scalar_dest) != ARRAY_REF
7491 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7492 && TREE_CODE (scalar_dest) != INDIRECT_REF
7493 && TREE_CODE (scalar_dest) != COMPONENT_REF
7494 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7495 && TREE_CODE (scalar_dest) != REALPART_EXPR
7496 && TREE_CODE (scalar_dest) != MEM_REF)
7497 return false;
7499 else
7501 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7502 if (!call || !gimple_call_internal_p (call))
7503 return false;
7505 internal_fn ifn = gimple_call_internal_fn (call);
7506 if (!internal_store_fn_p (ifn))
7507 return false;
7509 if (slp_node != NULL)
7511 if (dump_enabled_p ())
7512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7513 "SLP of masked stores not supported.\n");
7514 return false;
7517 int mask_index = internal_fn_mask_index (ifn);
7518 if (mask_index >= 0
7519 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7520 &mask, NULL, &mask_dt, &mask_vectype))
7521 return false;
7524 op = vect_get_store_rhs (stmt_info);
7526 /* Cannot have hybrid store SLP -- that would mean storing to the
7527 same location twice. */
7528 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7530 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7531 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7533 if (loop_vinfo)
7535 loop = LOOP_VINFO_LOOP (loop_vinfo);
7536 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7538 else
7539 vf = 1;
7541 /* Multiple types in SLP are handled by creating the appropriate number of
7542 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7543 case of SLP. */
7544 if (slp)
7545 ncopies = 1;
7546 else
7547 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7549 gcc_assert (ncopies >= 1);
7551 /* FORNOW. This restriction should be relaxed. */
7552 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7554 if (dump_enabled_p ())
7555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7556 "multiple types in nested loop.\n");
7557 return false;
7560 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7561 op, &rhs_dt, &rhs_vectype, &vls_type))
7562 return false;
7564 elem_type = TREE_TYPE (vectype);
7565 vec_mode = TYPE_MODE (vectype);
7567 if (!STMT_VINFO_DATA_REF (stmt_info))
7568 return false;
7570 vect_memory_access_type memory_access_type;
7571 enum dr_alignment_support alignment_support_scheme;
7572 int misalignment;
7573 poly_int64 poffset;
7574 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7575 ncopies, &memory_access_type, &poffset,
7576 &alignment_support_scheme, &misalignment, &gs_info))
7577 return false;
7579 if (mask)
7581 if (memory_access_type == VMAT_CONTIGUOUS)
7583 if (!VECTOR_MODE_P (vec_mode)
7584 || !can_vec_mask_load_store_p (vec_mode,
7585 TYPE_MODE (mask_vectype), false))
7586 return false;
7588 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7589 && (memory_access_type != VMAT_GATHER_SCATTER
7590 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7592 if (dump_enabled_p ())
7593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7594 "unsupported access type for masked store.\n");
7595 return false;
7598 else
7600 /* FORNOW. In some cases can vectorize even if data-type not supported
7601 (e.g. - array initialization with 0). */
7602 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7603 return false;
7606 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7607 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7608 && memory_access_type != VMAT_GATHER_SCATTER
7609 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7610 if (grouped_store)
7612 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7613 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7614 group_size = DR_GROUP_SIZE (first_stmt_info);
7616 else
7618 first_stmt_info = stmt_info;
7619 first_dr_info = dr_info;
7620 group_size = vec_num = 1;
7623 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7625 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7626 memory_access_type))
7627 return false;
7630 if (!vec_stmt) /* transformation not required. */
7632 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7634 if (loop_vinfo
7635 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7636 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
7637 vls_type, group_size,
7638 memory_access_type, &gs_info,
7639 mask);
7641 if (slp_node
7642 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7643 vectype))
7645 if (dump_enabled_p ())
7646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7647 "incompatible vector types for invariants\n");
7648 return false;
7651 if (dump_enabled_p ()
7652 && memory_access_type != VMAT_ELEMENTWISE
7653 && memory_access_type != VMAT_GATHER_SCATTER
7654 && alignment_support_scheme != dr_aligned)
7655 dump_printf_loc (MSG_NOTE, vect_location,
7656 "Vectorizing an unaligned access.\n");
7658 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7659 vect_model_store_cost (vinfo, stmt_info, ncopies,
7660 memory_access_type, alignment_support_scheme,
7661 misalignment, vls_type, slp_node, cost_vec);
7662 return true;
7664 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7666 /* Transform. */
7668 ensure_base_align (dr_info);
7670 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7672 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7673 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7674 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7675 tree ptr, var, scale, vec_mask;
7676 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7677 tree mask_halfvectype = mask_vectype;
7678 edge pe = loop_preheader_edge (loop);
7679 gimple_seq seq;
7680 basic_block new_bb;
7681 enum { NARROW, NONE, WIDEN } modifier;
7682 poly_uint64 scatter_off_nunits
7683 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7685 if (known_eq (nunits, scatter_off_nunits))
7686 modifier = NONE;
7687 else if (known_eq (nunits * 2, scatter_off_nunits))
7689 modifier = WIDEN;
7691 /* Currently gathers and scatters are only supported for
7692 fixed-length vectors. */
7693 unsigned int count = scatter_off_nunits.to_constant ();
7694 vec_perm_builder sel (count, count, 1);
7695 for (i = 0; i < (unsigned int) count; ++i)
7696 sel.quick_push (i | (count / 2));
7698 vec_perm_indices indices (sel, 1, count);
7699 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7700 indices);
7701 gcc_assert (perm_mask != NULL_TREE);
7703 else if (known_eq (nunits, scatter_off_nunits * 2))
7705 modifier = NARROW;
7707 /* Currently gathers and scatters are only supported for
7708 fixed-length vectors. */
7709 unsigned int count = nunits.to_constant ();
7710 vec_perm_builder sel (count, count, 1);
7711 for (i = 0; i < (unsigned int) count; ++i)
7712 sel.quick_push (i | (count / 2));
7714 vec_perm_indices indices (sel, 2, count);
7715 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7716 gcc_assert (perm_mask != NULL_TREE);
7717 ncopies *= 2;
7719 if (mask)
7720 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7722 else
7723 gcc_unreachable ();
7725 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7726 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7727 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7728 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7729 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7730 scaletype = TREE_VALUE (arglist);
7732 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7733 && TREE_CODE (rettype) == VOID_TYPE);
7735 ptr = fold_convert (ptrtype, gs_info.base);
7736 if (!is_gimple_min_invariant (ptr))
7738 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7739 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7740 gcc_assert (!new_bb);
7743 if (mask == NULL_TREE)
7745 mask_arg = build_int_cst (masktype, -1);
7746 mask_arg = vect_init_vector (vinfo, stmt_info,
7747 mask_arg, masktype, NULL);
7750 scale = build_int_cst (scaletype, gs_info.scale);
7752 auto_vec<tree> vec_oprnds0;
7753 auto_vec<tree> vec_oprnds1;
7754 auto_vec<tree> vec_masks;
7755 if (mask)
7757 tree mask_vectype = truth_type_for (vectype);
7758 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7759 modifier == NARROW
7760 ? ncopies / 2 : ncopies,
7761 mask, &vec_masks, mask_vectype);
7763 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7764 modifier == WIDEN
7765 ? ncopies / 2 : ncopies,
7766 gs_info.offset, &vec_oprnds0);
7767 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7768 modifier == NARROW
7769 ? ncopies / 2 : ncopies,
7770 op, &vec_oprnds1);
7771 for (j = 0; j < ncopies; ++j)
7773 if (modifier == WIDEN)
7775 if (j & 1)
7776 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7777 perm_mask, stmt_info, gsi);
7778 else
7779 op = vec_oprnd0 = vec_oprnds0[j / 2];
7780 src = vec_oprnd1 = vec_oprnds1[j];
7781 if (mask)
7782 mask_op = vec_mask = vec_masks[j];
7784 else if (modifier == NARROW)
7786 if (j & 1)
7787 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7788 perm_mask, stmt_info, gsi);
7789 else
7790 src = vec_oprnd1 = vec_oprnds1[j / 2];
7791 op = vec_oprnd0 = vec_oprnds0[j];
7792 if (mask)
7793 mask_op = vec_mask = vec_masks[j / 2];
7795 else
7797 op = vec_oprnd0 = vec_oprnds0[j];
7798 src = vec_oprnd1 = vec_oprnds1[j];
7799 if (mask)
7800 mask_op = vec_mask = vec_masks[j];
7803 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7805 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7806 TYPE_VECTOR_SUBPARTS (srctype)));
7807 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7808 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7809 gassign *new_stmt
7810 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7811 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7812 src = var;
7815 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7817 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7818 TYPE_VECTOR_SUBPARTS (idxtype)));
7819 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7820 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7821 gassign *new_stmt
7822 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7823 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7824 op = var;
7827 if (mask)
7829 tree utype;
7830 mask_arg = mask_op;
7831 if (modifier == NARROW)
7833 var = vect_get_new_ssa_name (mask_halfvectype,
7834 vect_simple_var);
7835 gassign *new_stmt
7836 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7837 : VEC_UNPACK_LO_EXPR,
7838 mask_op);
7839 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7840 mask_arg = var;
7842 tree optype = TREE_TYPE (mask_arg);
7843 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7844 utype = masktype;
7845 else
7846 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7847 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7848 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7849 gassign *new_stmt
7850 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7851 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7852 mask_arg = var;
7853 if (!useless_type_conversion_p (masktype, utype))
7855 gcc_assert (TYPE_PRECISION (utype)
7856 <= TYPE_PRECISION (masktype));
7857 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7858 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7859 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7860 mask_arg = var;
7864 gcall *new_stmt
7865 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7866 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7868 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7870 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7871 return true;
7873 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7874 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7876 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7877 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7879 if (grouped_store)
7881 /* FORNOW */
7882 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7884 /* We vectorize all the stmts of the interleaving group when we
7885 reach the last stmt in the group. */
7886 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7887 < DR_GROUP_SIZE (first_stmt_info)
7888 && !slp)
7890 *vec_stmt = NULL;
7891 return true;
7894 if (slp)
7896 grouped_store = false;
7897 /* VEC_NUM is the number of vect stmts to be created for this
7898 group. */
7899 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7900 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7901 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7902 == first_stmt_info);
7903 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7904 op = vect_get_store_rhs (first_stmt_info);
7906 else
7907 /* VEC_NUM is the number of vect stmts to be created for this
7908 group. */
7909 vec_num = group_size;
7911 ref_type = get_group_alias_ptr_type (first_stmt_info);
7913 else
7914 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7916 if (dump_enabled_p ())
7917 dump_printf_loc (MSG_NOTE, vect_location,
7918 "transform store. ncopies = %d\n", ncopies);
7920 if (memory_access_type == VMAT_ELEMENTWISE
7921 || memory_access_type == VMAT_STRIDED_SLP)
7923 gimple_stmt_iterator incr_gsi;
7924 bool insert_after;
7925 gimple *incr;
7926 tree offvar;
7927 tree ivstep;
7928 tree running_off;
7929 tree stride_base, stride_step, alias_off;
7930 tree vec_oprnd;
7931 tree dr_offset;
7932 unsigned int g;
7933 /* Checked by get_load_store_type. */
7934 unsigned int const_nunits = nunits.to_constant ();
7936 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7937 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7939 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7940 stride_base
7941 = fold_build_pointer_plus
7942 (DR_BASE_ADDRESS (first_dr_info->dr),
7943 size_binop (PLUS_EXPR,
7944 convert_to_ptrofftype (dr_offset),
7945 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7946 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7948 /* For a store with loop-invariant (but other than power-of-2)
7949 stride (i.e. not a grouped access) like so:
7951 for (i = 0; i < n; i += stride)
7952 array[i] = ...;
7954 we generate a new induction variable and new stores from
7955 the components of the (vectorized) rhs:
7957 for (j = 0; ; j += VF*stride)
7958 vectemp = ...;
7959 tmp1 = vectemp[0];
7960 array[j] = tmp1;
7961 tmp2 = vectemp[1];
7962 array[j + stride] = tmp2;
7966 unsigned nstores = const_nunits;
7967 unsigned lnel = 1;
7968 tree ltype = elem_type;
7969 tree lvectype = vectype;
7970 if (slp)
7972 if (group_size < const_nunits
7973 && const_nunits % group_size == 0)
7975 nstores = const_nunits / group_size;
7976 lnel = group_size;
7977 ltype = build_vector_type (elem_type, group_size);
7978 lvectype = vectype;
7980 /* First check if vec_extract optab doesn't support extraction
7981 of vector elts directly. */
7982 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7983 machine_mode vmode;
7984 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7985 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7986 group_size).exists (&vmode)
7987 || (convert_optab_handler (vec_extract_optab,
7988 TYPE_MODE (vectype), vmode)
7989 == CODE_FOR_nothing))
7991 /* Try to avoid emitting an extract of vector elements
7992 by performing the extracts using an integer type of the
7993 same size, extracting from a vector of those and then
7994 re-interpreting it as the original vector type if
7995 supported. */
7996 unsigned lsize
7997 = group_size * GET_MODE_BITSIZE (elmode);
7998 unsigned int lnunits = const_nunits / group_size;
7999 /* If we can't construct such a vector fall back to
8000 element extracts from the original vector type and
8001 element size stores. */
8002 if (int_mode_for_size (lsize, 0).exists (&elmode)
8003 && VECTOR_MODE_P (TYPE_MODE (vectype))
8004 && related_vector_mode (TYPE_MODE (vectype), elmode,
8005 lnunits).exists (&vmode)
8006 && (convert_optab_handler (vec_extract_optab,
8007 vmode, elmode)
8008 != CODE_FOR_nothing))
8010 nstores = lnunits;
8011 lnel = group_size;
8012 ltype = build_nonstandard_integer_type (lsize, 1);
8013 lvectype = build_vector_type (ltype, nstores);
8015 /* Else fall back to vector extraction anyway.
8016 Fewer stores are more important than avoiding spilling
8017 of the vector we extract from. Compared to the
8018 construction case in vectorizable_load no store-forwarding
8019 issue exists here for reasonable archs. */
8022 else if (group_size >= const_nunits
8023 && group_size % const_nunits == 0)
8025 nstores = 1;
8026 lnel = const_nunits;
8027 ltype = vectype;
8028 lvectype = vectype;
8030 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8031 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8034 ivstep = stride_step;
8035 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8036 build_int_cst (TREE_TYPE (ivstep), vf));
8038 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8040 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8041 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8042 create_iv (stride_base, ivstep, NULL,
8043 loop, &incr_gsi, insert_after,
8044 &offvar, NULL);
8045 incr = gsi_stmt (incr_gsi);
8047 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8049 alias_off = build_int_cst (ref_type, 0);
8050 stmt_vec_info next_stmt_info = first_stmt_info;
8051 for (g = 0; g < group_size; g++)
8053 running_off = offvar;
8054 if (g)
8056 tree size = TYPE_SIZE_UNIT (ltype);
8057 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
8058 size);
8059 tree newoff = copy_ssa_name (running_off, NULL);
8060 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8061 running_off, pos);
8062 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8063 running_off = newoff;
8065 if (!slp)
8066 op = vect_get_store_rhs (next_stmt_info);
8067 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
8068 op, &vec_oprnds);
8069 unsigned int group_el = 0;
8070 unsigned HOST_WIDE_INT
8071 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8072 for (j = 0; j < ncopies; j++)
8074 vec_oprnd = vec_oprnds[j];
8075 /* Pun the vector to extract from if necessary. */
8076 if (lvectype != vectype)
8078 tree tem = make_ssa_name (lvectype);
8079 gimple *pun
8080 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8081 lvectype, vec_oprnd));
8082 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8083 vec_oprnd = tem;
8085 for (i = 0; i < nstores; i++)
8087 tree newref, newoff;
8088 gimple *incr, *assign;
8089 tree size = TYPE_SIZE (ltype);
8090 /* Extract the i'th component. */
8091 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8092 bitsize_int (i), size);
8093 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8094 size, pos);
8096 elem = force_gimple_operand_gsi (gsi, elem, true,
8097 NULL_TREE, true,
8098 GSI_SAME_STMT);
8100 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8101 group_el * elsz);
8102 newref = build2 (MEM_REF, ltype,
8103 running_off, this_off);
8104 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8106 /* And store it to *running_off. */
8107 assign = gimple_build_assign (newref, elem);
8108 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8110 group_el += lnel;
8111 if (! slp
8112 || group_el == group_size)
8114 newoff = copy_ssa_name (running_off, NULL);
8115 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8116 running_off, stride_step);
8117 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8119 running_off = newoff;
8120 group_el = 0;
8122 if (g == group_size - 1
8123 && !slp)
8125 if (j == 0 && i == 0)
8126 *vec_stmt = assign;
8127 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8131 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8132 vec_oprnds.release ();
8133 if (slp)
8134 break;
8137 return true;
8140 auto_vec<tree> dr_chain (group_size);
8141 oprnds.create (group_size);
8143 gcc_assert (alignment_support_scheme);
8144 vec_loop_masks *loop_masks
8145 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8146 ? &LOOP_VINFO_MASKS (loop_vinfo)
8147 : NULL);
8148 vec_loop_lens *loop_lens
8149 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8150 ? &LOOP_VINFO_LENS (loop_vinfo)
8151 : NULL);
8153 /* Shouldn't go with length-based approach if fully masked. */
8154 gcc_assert (!loop_lens || !loop_masks);
8156 /* Targets with store-lane instructions must not require explicit
8157 realignment. vect_supportable_dr_alignment always returns either
8158 dr_aligned or dr_unaligned_supported for masked operations. */
8159 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8160 && !mask
8161 && !loop_masks)
8162 || alignment_support_scheme == dr_aligned
8163 || alignment_support_scheme == dr_unaligned_supported);
8165 tree offset = NULL_TREE;
8166 if (!known_eq (poffset, 0))
8167 offset = size_int (poffset);
8169 tree bump;
8170 tree vec_offset = NULL_TREE;
8171 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8173 aggr_type = NULL_TREE;
8174 bump = NULL_TREE;
8176 else if (memory_access_type == VMAT_GATHER_SCATTER)
8178 aggr_type = elem_type;
8179 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8180 &bump, &vec_offset);
8182 else
8184 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8185 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8186 else
8187 aggr_type = vectype;
8188 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8189 memory_access_type);
8192 if (mask)
8193 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8195 /* In case the vectorization factor (VF) is bigger than the number
8196 of elements that we can fit in a vectype (nunits), we have to generate
8197 more than one vector stmt - i.e - we need to "unroll" the
8198 vector stmt by a factor VF/nunits. */
8200 /* In case of interleaving (non-unit grouped access):
8202 S1: &base + 2 = x2
8203 S2: &base = x0
8204 S3: &base + 1 = x1
8205 S4: &base + 3 = x3
8207 We create vectorized stores starting from base address (the access of the
8208 first stmt in the chain (S2 in the above example), when the last store stmt
8209 of the chain (S4) is reached:
8211 VS1: &base = vx2
8212 VS2: &base + vec_size*1 = vx0
8213 VS3: &base + vec_size*2 = vx1
8214 VS4: &base + vec_size*3 = vx3
8216 Then permutation statements are generated:
8218 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8219 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8222 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8223 (the order of the data-refs in the output of vect_permute_store_chain
8224 corresponds to the order of scalar stmts in the interleaving chain - see
8225 the documentation of vect_permute_store_chain()).
8227 In case of both multiple types and interleaving, above vector stores and
8228 permutation stmts are created for every copy. The result vector stmts are
8229 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8230 STMT_VINFO_RELATED_STMT for the next copies.
8233 auto_vec<tree> vec_masks;
8234 tree vec_mask = NULL;
8235 auto_vec<tree> vec_offsets;
8236 auto_vec<vec<tree> > gvec_oprnds;
8237 gvec_oprnds.safe_grow_cleared (group_size, true);
8238 for (j = 0; j < ncopies; j++)
8240 gimple *new_stmt;
8241 if (j == 0)
8243 if (slp)
8245 /* Get vectorized arguments for SLP_NODE. */
8246 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8247 op, &vec_oprnds);
8248 vec_oprnd = vec_oprnds[0];
8250 else
8252 /* For interleaved stores we collect vectorized defs for all the
8253 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8254 used as an input to vect_permute_store_chain().
8256 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8257 and OPRNDS are of size 1. */
8258 stmt_vec_info next_stmt_info = first_stmt_info;
8259 for (i = 0; i < group_size; i++)
8261 /* Since gaps are not supported for interleaved stores,
8262 DR_GROUP_SIZE is the exact number of stmts in the chain.
8263 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8264 that there is no interleaving, DR_GROUP_SIZE is 1,
8265 and only one iteration of the loop will be executed. */
8266 op = vect_get_store_rhs (next_stmt_info);
8267 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8268 ncopies, op, &gvec_oprnds[i]);
8269 vec_oprnd = gvec_oprnds[i][0];
8270 dr_chain.quick_push (gvec_oprnds[i][0]);
8271 oprnds.quick_push (gvec_oprnds[i][0]);
8272 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8274 if (mask)
8276 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8277 mask, &vec_masks, mask_vectype);
8278 vec_mask = vec_masks[0];
8282 /* We should have catched mismatched types earlier. */
8283 gcc_assert (useless_type_conversion_p (vectype,
8284 TREE_TYPE (vec_oprnd)));
8285 bool simd_lane_access_p
8286 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8287 if (simd_lane_access_p
8288 && !loop_masks
8289 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8290 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8291 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8292 && integer_zerop (DR_INIT (first_dr_info->dr))
8293 && alias_sets_conflict_p (get_alias_set (aggr_type),
8294 get_alias_set (TREE_TYPE (ref_type))))
8296 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8297 dataref_offset = build_int_cst (ref_type, 0);
8299 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8301 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8302 slp_node, &gs_info, &dataref_ptr,
8303 &vec_offsets);
8304 vec_offset = vec_offsets[0];
8306 else
8307 dataref_ptr
8308 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8309 simd_lane_access_p ? loop : NULL,
8310 offset, &dummy, gsi, &ptr_incr,
8311 simd_lane_access_p, bump);
8313 else
8315 /* For interleaved stores we created vectorized defs for all the
8316 defs stored in OPRNDS in the previous iteration (previous copy).
8317 DR_CHAIN is then used as an input to vect_permute_store_chain().
8318 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8319 OPRNDS are of size 1. */
8320 for (i = 0; i < group_size; i++)
8322 vec_oprnd = gvec_oprnds[i][j];
8323 dr_chain[i] = gvec_oprnds[i][j];
8324 oprnds[i] = gvec_oprnds[i][j];
8326 if (mask)
8327 vec_mask = vec_masks[j];
8328 if (dataref_offset)
8329 dataref_offset
8330 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8331 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8332 vec_offset = vec_offsets[j];
8333 else
8334 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8335 stmt_info, bump);
8338 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8340 tree vec_array;
8342 /* Get an array into which we can store the individual vectors. */
8343 vec_array = create_vector_array (vectype, vec_num);
8345 /* Invalidate the current contents of VEC_ARRAY. This should
8346 become an RTL clobber too, which prevents the vector registers
8347 from being upward-exposed. */
8348 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8350 /* Store the individual vectors into the array. */
8351 for (i = 0; i < vec_num; i++)
8353 vec_oprnd = dr_chain[i];
8354 write_vector_array (vinfo, stmt_info,
8355 gsi, vec_oprnd, vec_array, i);
8358 tree final_mask = NULL;
8359 if (loop_masks)
8360 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8361 vectype, j);
8362 if (vec_mask)
8363 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8364 final_mask, vec_mask, gsi);
8366 gcall *call;
8367 if (final_mask)
8369 /* Emit:
8370 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8371 VEC_ARRAY). */
8372 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8373 tree alias_ptr = build_int_cst (ref_type, align);
8374 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8375 dataref_ptr, alias_ptr,
8376 final_mask, vec_array);
8378 else
8380 /* Emit:
8381 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8382 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8383 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8384 vec_array);
8385 gimple_call_set_lhs (call, data_ref);
8387 gimple_call_set_nothrow (call, true);
8388 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8389 new_stmt = call;
8391 /* Record that VEC_ARRAY is now dead. */
8392 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8394 else
8396 new_stmt = NULL;
8397 if (grouped_store)
8399 if (j == 0)
8400 result_chain.create (group_size);
8401 /* Permute. */
8402 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8403 gsi, &result_chain);
8406 stmt_vec_info next_stmt_info = first_stmt_info;
8407 for (i = 0; i < vec_num; i++)
8409 unsigned misalign;
8410 unsigned HOST_WIDE_INT align;
8412 tree final_mask = NULL_TREE;
8413 if (loop_masks)
8414 final_mask = vect_get_loop_mask (gsi, loop_masks,
8415 vec_num * ncopies,
8416 vectype, vec_num * j + i);
8417 if (vec_mask)
8418 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8419 final_mask, vec_mask, gsi);
8421 if (memory_access_type == VMAT_GATHER_SCATTER)
8423 tree scale = size_int (gs_info.scale);
8424 gcall *call;
8425 if (final_mask)
8426 call = gimple_build_call_internal
8427 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8428 scale, vec_oprnd, final_mask);
8429 else
8430 call = gimple_build_call_internal
8431 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8432 scale, vec_oprnd);
8433 gimple_call_set_nothrow (call, true);
8434 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8435 new_stmt = call;
8436 break;
8439 if (i > 0)
8440 /* Bump the vector pointer. */
8441 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8442 gsi, stmt_info, bump);
8444 if (slp)
8445 vec_oprnd = vec_oprnds[i];
8446 else if (grouped_store)
8447 /* For grouped stores vectorized defs are interleaved in
8448 vect_permute_store_chain(). */
8449 vec_oprnd = result_chain[i];
8451 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8452 if (alignment_support_scheme == dr_aligned)
8453 misalign = 0;
8454 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8456 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8457 misalign = 0;
8459 else
8460 misalign = misalignment;
8461 if (dataref_offset == NULL_TREE
8462 && TREE_CODE (dataref_ptr) == SSA_NAME)
8463 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8464 misalign);
8465 align = least_bit_hwi (misalign | align);
8467 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8469 tree perm_mask = perm_mask_for_reverse (vectype);
8470 tree perm_dest = vect_create_destination_var
8471 (vect_get_store_rhs (stmt_info), vectype);
8472 tree new_temp = make_ssa_name (perm_dest);
8474 /* Generate the permute statement. */
8475 gimple *perm_stmt
8476 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8477 vec_oprnd, perm_mask);
8478 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8480 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8481 vec_oprnd = new_temp;
8484 /* Arguments are ready. Create the new vector stmt. */
8485 if (final_mask)
8487 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8488 gcall *call
8489 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8490 dataref_ptr, ptr,
8491 final_mask, vec_oprnd);
8492 gimple_call_set_nothrow (call, true);
8493 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8494 new_stmt = call;
8496 else if (loop_lens)
8498 tree final_len
8499 = vect_get_loop_len (loop_vinfo, loop_lens,
8500 vec_num * ncopies, vec_num * j + i);
8501 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8502 machine_mode vmode = TYPE_MODE (vectype);
8503 opt_machine_mode new_ovmode
8504 = get_len_load_store_mode (vmode, false);
8505 machine_mode new_vmode = new_ovmode.require ();
8506 /* Need conversion if it's wrapped with VnQI. */
8507 if (vmode != new_vmode)
8509 tree new_vtype
8510 = build_vector_type_for_mode (unsigned_intQI_type_node,
8511 new_vmode);
8512 tree var
8513 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8514 vec_oprnd
8515 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8516 gassign *new_stmt
8517 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8518 vec_oprnd);
8519 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8520 gsi);
8521 vec_oprnd = var;
8524 signed char biasval =
8525 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8527 tree bias = build_int_cst (intQI_type_node, biasval);
8528 gcall *call
8529 = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
8530 ptr, final_len, vec_oprnd,
8531 bias);
8532 gimple_call_set_nothrow (call, true);
8533 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8534 new_stmt = call;
8536 else
8538 data_ref = fold_build2 (MEM_REF, vectype,
8539 dataref_ptr,
8540 dataref_offset
8541 ? dataref_offset
8542 : build_int_cst (ref_type, 0));
8543 if (alignment_support_scheme == dr_aligned)
8545 else
8546 TREE_TYPE (data_ref)
8547 = build_aligned_type (TREE_TYPE (data_ref),
8548 align * BITS_PER_UNIT);
8549 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8550 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8551 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8554 if (slp)
8555 continue;
8557 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8558 if (!next_stmt_info)
8559 break;
8562 if (!slp)
8564 if (j == 0)
8565 *vec_stmt = new_stmt;
8566 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8570 for (i = 0; i < group_size; ++i)
8572 vec<tree> oprndsi = gvec_oprnds[i];
8573 oprndsi.release ();
8575 oprnds.release ();
8576 result_chain.release ();
8577 vec_oprnds.release ();
8579 return true;
8582 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8583 VECTOR_CST mask. No checks are made that the target platform supports the
8584 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8585 vect_gen_perm_mask_checked. */
8587 tree
8588 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8590 tree mask_type;
8592 poly_uint64 nunits = sel.length ();
8593 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8595 mask_type = build_vector_type (ssizetype, nunits);
8596 return vec_perm_indices_to_tree (mask_type, sel);
8599 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8600 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8602 tree
8603 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8605 machine_mode vmode = TYPE_MODE (vectype);
8606 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
8607 return vect_gen_perm_mask_any (vectype, sel);
8610 /* Given a vector variable X and Y, that was generated for the scalar
8611 STMT_INFO, generate instructions to permute the vector elements of X and Y
8612 using permutation mask MASK_VEC, insert them at *GSI and return the
8613 permuted vector variable. */
8615 static tree
8616 permute_vec_elements (vec_info *vinfo,
8617 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8618 gimple_stmt_iterator *gsi)
8620 tree vectype = TREE_TYPE (x);
8621 tree perm_dest, data_ref;
8622 gimple *perm_stmt;
8624 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8625 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8626 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8627 else
8628 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8629 data_ref = make_ssa_name (perm_dest);
8631 /* Generate the permute statement. */
8632 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8633 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8635 return data_ref;
8638 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8639 inserting them on the loops preheader edge. Returns true if we
8640 were successful in doing so (and thus STMT_INFO can be moved then),
8641 otherwise returns false. */
8643 static bool
8644 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8646 ssa_op_iter i;
8647 tree op;
8648 bool any = false;
8650 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8652 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8653 if (!gimple_nop_p (def_stmt)
8654 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8656 /* Make sure we don't need to recurse. While we could do
8657 so in simple cases when there are more complex use webs
8658 we don't have an easy way to preserve stmt order to fulfil
8659 dependencies within them. */
8660 tree op2;
8661 ssa_op_iter i2;
8662 if (gimple_code (def_stmt) == GIMPLE_PHI)
8663 return false;
8664 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8666 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8667 if (!gimple_nop_p (def_stmt2)
8668 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8669 return false;
8671 any = true;
8675 if (!any)
8676 return true;
8678 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8680 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8681 if (!gimple_nop_p (def_stmt)
8682 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8684 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8685 gsi_remove (&gsi, false);
8686 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8690 return true;
8693 /* vectorizable_load.
8695 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8696 that can be vectorized.
8697 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8698 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8699 Return true if STMT_INFO is vectorizable in this way. */
8701 static bool
8702 vectorizable_load (vec_info *vinfo,
8703 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8704 gimple **vec_stmt, slp_tree slp_node,
8705 stmt_vector_for_cost *cost_vec)
8707 tree scalar_dest;
8708 tree vec_dest = NULL;
8709 tree data_ref = NULL;
8710 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8711 class loop *loop = NULL;
8712 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8713 bool nested_in_vect_loop = false;
8714 tree elem_type;
8715 tree new_temp;
8716 machine_mode mode;
8717 tree dummy;
8718 tree dataref_ptr = NULL_TREE;
8719 tree dataref_offset = NULL_TREE;
8720 gimple *ptr_incr = NULL;
8721 int ncopies;
8722 int i, j;
8723 unsigned int group_size;
8724 poly_uint64 group_gap_adj;
8725 tree msq = NULL_TREE, lsq;
8726 tree realignment_token = NULL_TREE;
8727 gphi *phi = NULL;
8728 vec<tree> dr_chain = vNULL;
8729 bool grouped_load = false;
8730 stmt_vec_info first_stmt_info;
8731 stmt_vec_info first_stmt_info_for_drptr = NULL;
8732 bool compute_in_loop = false;
8733 class loop *at_loop;
8734 int vec_num;
8735 bool slp = (slp_node != NULL);
8736 bool slp_perm = false;
8737 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8738 poly_uint64 vf;
8739 tree aggr_type;
8740 gather_scatter_info gs_info;
8741 tree ref_type;
8742 enum vect_def_type mask_dt = vect_unknown_def_type;
8744 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8745 return false;
8747 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8748 && ! vec_stmt)
8749 return false;
8751 if (!STMT_VINFO_DATA_REF (stmt_info))
8752 return false;
8754 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8755 int mask_index = -1;
8756 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8758 scalar_dest = gimple_assign_lhs (assign);
8759 if (TREE_CODE (scalar_dest) != SSA_NAME)
8760 return false;
8762 tree_code code = gimple_assign_rhs_code (assign);
8763 if (code != ARRAY_REF
8764 && code != BIT_FIELD_REF
8765 && code != INDIRECT_REF
8766 && code != COMPONENT_REF
8767 && code != IMAGPART_EXPR
8768 && code != REALPART_EXPR
8769 && code != MEM_REF
8770 && TREE_CODE_CLASS (code) != tcc_declaration)
8771 return false;
8773 else
8775 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8776 if (!call || !gimple_call_internal_p (call))
8777 return false;
8779 internal_fn ifn = gimple_call_internal_fn (call);
8780 if (!internal_load_fn_p (ifn))
8781 return false;
8783 scalar_dest = gimple_call_lhs (call);
8784 if (!scalar_dest)
8785 return false;
8787 mask_index = internal_fn_mask_index (ifn);
8788 /* ??? For SLP the mask operand is always last. */
8789 if (mask_index >= 0 && slp_node)
8790 mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
8791 if (mask_index >= 0
8792 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8793 &mask, NULL, &mask_dt, &mask_vectype))
8794 return false;
8797 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8798 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8800 if (loop_vinfo)
8802 loop = LOOP_VINFO_LOOP (loop_vinfo);
8803 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8804 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8806 else
8807 vf = 1;
8809 /* Multiple types in SLP are handled by creating the appropriate number of
8810 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8811 case of SLP. */
8812 if (slp)
8813 ncopies = 1;
8814 else
8815 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8817 gcc_assert (ncopies >= 1);
8819 /* FORNOW. This restriction should be relaxed. */
8820 if (nested_in_vect_loop && ncopies > 1)
8822 if (dump_enabled_p ())
8823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8824 "multiple types in nested loop.\n");
8825 return false;
8828 /* Invalidate assumptions made by dependence analysis when vectorization
8829 on the unrolled body effectively re-orders stmts. */
8830 if (ncopies > 1
8831 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8832 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8833 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8835 if (dump_enabled_p ())
8836 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8837 "cannot perform implicit CSE when unrolling "
8838 "with negative dependence distance\n");
8839 return false;
8842 elem_type = TREE_TYPE (vectype);
8843 mode = TYPE_MODE (vectype);
8845 /* FORNOW. In some cases can vectorize even if data-type not supported
8846 (e.g. - data copies). */
8847 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8849 if (dump_enabled_p ())
8850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8851 "Aligned load, but unsupported type.\n");
8852 return false;
8855 /* Check if the load is a part of an interleaving chain. */
8856 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8858 grouped_load = true;
8859 /* FORNOW */
8860 gcc_assert (!nested_in_vect_loop);
8861 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8863 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8864 group_size = DR_GROUP_SIZE (first_stmt_info);
8866 /* Refuse non-SLP vectorization of SLP-only groups. */
8867 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8869 if (dump_enabled_p ())
8870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8871 "cannot vectorize load in non-SLP mode.\n");
8872 return false;
8875 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8877 slp_perm = true;
8879 if (!loop_vinfo)
8881 /* In BB vectorization we may not actually use a loaded vector
8882 accessing elements in excess of DR_GROUP_SIZE. */
8883 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8884 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8885 unsigned HOST_WIDE_INT nunits;
8886 unsigned j, k, maxk = 0;
8887 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8888 if (k > maxk)
8889 maxk = k;
8890 tree vectype = SLP_TREE_VECTYPE (slp_node);
8891 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8892 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8894 if (dump_enabled_p ())
8895 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8896 "BB vectorization with gaps at the end of "
8897 "a load is not supported\n");
8898 return false;
8902 auto_vec<tree> tem;
8903 unsigned n_perms;
8904 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8905 true, &n_perms))
8907 if (dump_enabled_p ())
8908 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8909 vect_location,
8910 "unsupported load permutation\n");
8911 return false;
8915 /* Invalidate assumptions made by dependence analysis when vectorization
8916 on the unrolled body effectively re-orders stmts. */
8917 if (!PURE_SLP_STMT (stmt_info)
8918 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8919 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8920 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8922 if (dump_enabled_p ())
8923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8924 "cannot perform implicit CSE when performing "
8925 "group loads with negative dependence distance\n");
8926 return false;
8929 else
8930 group_size = 1;
8932 vect_memory_access_type memory_access_type;
8933 enum dr_alignment_support alignment_support_scheme;
8934 int misalignment;
8935 poly_int64 poffset;
8936 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8937 ncopies, &memory_access_type, &poffset,
8938 &alignment_support_scheme, &misalignment, &gs_info))
8939 return false;
8941 if (mask)
8943 if (memory_access_type == VMAT_CONTIGUOUS)
8945 machine_mode vec_mode = TYPE_MODE (vectype);
8946 if (!VECTOR_MODE_P (vec_mode)
8947 || !can_vec_mask_load_store_p (vec_mode,
8948 TYPE_MODE (mask_vectype), true))
8949 return false;
8951 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8952 && memory_access_type != VMAT_GATHER_SCATTER)
8954 if (dump_enabled_p ())
8955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8956 "unsupported access type for masked load.\n");
8957 return false;
8959 else if (memory_access_type == VMAT_GATHER_SCATTER
8960 && gs_info.ifn == IFN_LAST
8961 && !gs_info.decl)
8963 if (dump_enabled_p ())
8964 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8965 "unsupported masked emulated gather.\n");
8966 return false;
8970 if (!vec_stmt) /* transformation not required. */
8972 if (slp_node
8973 && mask
8974 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8975 mask_vectype))
8977 if (dump_enabled_p ())
8978 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8979 "incompatible vector types for invariants\n");
8980 return false;
8983 if (!slp)
8984 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8986 if (loop_vinfo
8987 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8988 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8989 VLS_LOAD, group_size,
8990 memory_access_type, &gs_info,
8991 mask);
8993 if (dump_enabled_p ()
8994 && memory_access_type != VMAT_ELEMENTWISE
8995 && memory_access_type != VMAT_GATHER_SCATTER
8996 && alignment_support_scheme != dr_aligned)
8997 dump_printf_loc (MSG_NOTE, vect_location,
8998 "Vectorizing an unaligned access.\n");
9000 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9001 vinfo->any_known_not_updated_vssa = true;
9003 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
9004 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
9005 alignment_support_scheme, misalignment,
9006 &gs_info, slp_node, cost_vec);
9007 return true;
9010 if (!slp)
9011 gcc_assert (memory_access_type
9012 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
9014 if (dump_enabled_p ())
9015 dump_printf_loc (MSG_NOTE, vect_location,
9016 "transform load. ncopies = %d\n", ncopies);
9018 /* Transform. */
9020 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
9021 ensure_base_align (dr_info);
9023 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
9025 vect_build_gather_load_calls (vinfo,
9026 stmt_info, gsi, vec_stmt, &gs_info, mask);
9027 return true;
9030 if (memory_access_type == VMAT_INVARIANT)
9032 gcc_assert (!grouped_load && !mask && !bb_vinfo);
9033 /* If we have versioned for aliasing or the loop doesn't
9034 have any data dependencies that would preclude this,
9035 then we are sure this is a loop invariant load and
9036 thus we can insert it on the preheader edge. */
9037 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
9038 && !nested_in_vect_loop
9039 && hoist_defs_of_uses (stmt_info, loop));
9040 if (hoist_p)
9042 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
9043 if (dump_enabled_p ())
9044 dump_printf_loc (MSG_NOTE, vect_location,
9045 "hoisting out of the vectorized loop: %G",
9046 (gimple *) stmt);
9047 scalar_dest = copy_ssa_name (scalar_dest);
9048 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
9049 edge pe = loop_preheader_edge (loop);
9050 gphi *vphi = get_virtual_phi (loop->header);
9051 tree vuse;
9052 if (vphi)
9053 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
9054 else
9055 vuse = gimple_vuse (gsi_stmt (*gsi));
9056 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
9057 gimple_set_vuse (new_stmt, vuse);
9058 gsi_insert_on_edge_immediate (pe, new_stmt);
9060 /* These copies are all equivalent, but currently the representation
9061 requires a separate STMT_VINFO_VEC_STMT for each one. */
9062 gimple_stmt_iterator gsi2 = *gsi;
9063 gsi_next (&gsi2);
9064 for (j = 0; j < ncopies; j++)
9066 if (hoist_p)
9067 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9068 vectype, NULL);
9069 else
9070 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9071 vectype, &gsi2);
9072 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
9073 if (slp)
9074 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9075 else
9077 if (j == 0)
9078 *vec_stmt = new_stmt;
9079 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9082 return true;
9085 if (memory_access_type == VMAT_ELEMENTWISE
9086 || memory_access_type == VMAT_STRIDED_SLP)
9088 gimple_stmt_iterator incr_gsi;
9089 bool insert_after;
9090 tree offvar;
9091 tree ivstep;
9092 tree running_off;
9093 vec<constructor_elt, va_gc> *v = NULL;
9094 tree stride_base, stride_step, alias_off;
9095 /* Checked by get_load_store_type. */
9096 unsigned int const_nunits = nunits.to_constant ();
9097 unsigned HOST_WIDE_INT cst_offset = 0;
9098 tree dr_offset;
9100 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
9101 gcc_assert (!nested_in_vect_loop);
9103 if (grouped_load)
9105 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9106 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9108 else
9110 first_stmt_info = stmt_info;
9111 first_dr_info = dr_info;
9113 if (slp && grouped_load)
9115 group_size = DR_GROUP_SIZE (first_stmt_info);
9116 ref_type = get_group_alias_ptr_type (first_stmt_info);
9118 else
9120 if (grouped_load)
9121 cst_offset
9122 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
9123 * vect_get_place_in_interleaving_chain (stmt_info,
9124 first_stmt_info));
9125 group_size = 1;
9126 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9129 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9130 stride_base
9131 = fold_build_pointer_plus
9132 (DR_BASE_ADDRESS (first_dr_info->dr),
9133 size_binop (PLUS_EXPR,
9134 convert_to_ptrofftype (dr_offset),
9135 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9136 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9138 /* For a load with loop-invariant (but other than power-of-2)
9139 stride (i.e. not a grouped access) like so:
9141 for (i = 0; i < n; i += stride)
9142 ... = array[i];
9144 we generate a new induction variable and new accesses to
9145 form a new vector (or vectors, depending on ncopies):
9147 for (j = 0; ; j += VF*stride)
9148 tmp1 = array[j];
9149 tmp2 = array[j + stride];
9151 vectemp = {tmp1, tmp2, ...}
9154 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9155 build_int_cst (TREE_TYPE (stride_step), vf));
9157 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9159 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9160 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9161 create_iv (stride_base, ivstep, NULL,
9162 loop, &incr_gsi, insert_after,
9163 &offvar, NULL);
9165 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9167 running_off = offvar;
9168 alias_off = build_int_cst (ref_type, 0);
9169 int nloads = const_nunits;
9170 int lnel = 1;
9171 tree ltype = TREE_TYPE (vectype);
9172 tree lvectype = vectype;
9173 auto_vec<tree> dr_chain;
9174 if (memory_access_type == VMAT_STRIDED_SLP)
9176 if (group_size < const_nunits)
9178 /* First check if vec_init optab supports construction from vector
9179 elts directly. Otherwise avoid emitting a constructor of
9180 vector elements by performing the loads using an integer type
9181 of the same size, constructing a vector of those and then
9182 re-interpreting it as the original vector type. This avoids a
9183 huge runtime penalty due to the general inability to perform
9184 store forwarding from smaller stores to a larger load. */
9185 tree ptype;
9186 tree vtype
9187 = vector_vector_composition_type (vectype,
9188 const_nunits / group_size,
9189 &ptype);
9190 if (vtype != NULL_TREE)
9192 nloads = const_nunits / group_size;
9193 lnel = group_size;
9194 lvectype = vtype;
9195 ltype = ptype;
9198 else
9200 nloads = 1;
9201 lnel = const_nunits;
9202 ltype = vectype;
9204 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9206 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9207 else if (nloads == 1)
9208 ltype = vectype;
9210 if (slp)
9212 /* For SLP permutation support we need to load the whole group,
9213 not only the number of vector stmts the permutation result
9214 fits in. */
9215 if (slp_perm)
9217 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9218 variable VF. */
9219 unsigned int const_vf = vf.to_constant ();
9220 ncopies = CEIL (group_size * const_vf, const_nunits);
9221 dr_chain.create (ncopies);
9223 else
9224 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9226 unsigned int group_el = 0;
9227 unsigned HOST_WIDE_INT
9228 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9229 for (j = 0; j < ncopies; j++)
9231 if (nloads > 1)
9232 vec_alloc (v, nloads);
9233 gimple *new_stmt = NULL;
9234 for (i = 0; i < nloads; i++)
9236 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9237 group_el * elsz + cst_offset);
9238 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9239 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9240 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9241 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9242 if (nloads > 1)
9243 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9244 gimple_assign_lhs (new_stmt));
9246 group_el += lnel;
9247 if (! slp
9248 || group_el == group_size)
9250 tree newoff = copy_ssa_name (running_off);
9251 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9252 running_off, stride_step);
9253 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9255 running_off = newoff;
9256 group_el = 0;
9259 if (nloads > 1)
9261 tree vec_inv = build_constructor (lvectype, v);
9262 new_temp = vect_init_vector (vinfo, stmt_info,
9263 vec_inv, lvectype, gsi);
9264 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9265 if (lvectype != vectype)
9267 new_stmt = gimple_build_assign (make_ssa_name (vectype),
9268 VIEW_CONVERT_EXPR,
9269 build1 (VIEW_CONVERT_EXPR,
9270 vectype, new_temp));
9271 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9275 if (slp)
9277 if (slp_perm)
9278 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9279 else
9280 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9282 else
9284 if (j == 0)
9285 *vec_stmt = new_stmt;
9286 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9289 if (slp_perm)
9291 unsigned n_perms;
9292 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9293 false, &n_perms);
9295 return true;
9298 if (memory_access_type == VMAT_GATHER_SCATTER
9299 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9300 grouped_load = false;
9302 if (grouped_load)
9304 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9305 group_size = DR_GROUP_SIZE (first_stmt_info);
9306 /* For SLP vectorization we directly vectorize a subchain
9307 without permutation. */
9308 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9309 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9310 /* For BB vectorization always use the first stmt to base
9311 the data ref pointer on. */
9312 if (bb_vinfo)
9313 first_stmt_info_for_drptr
9314 = vect_find_first_scalar_stmt_in_slp (slp_node);
9316 /* Check if the chain of loads is already vectorized. */
9317 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9318 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9319 ??? But we can only do so if there is exactly one
9320 as we have no way to get at the rest. Leave the CSE
9321 opportunity alone.
9322 ??? With the group load eventually participating
9323 in multiple different permutations (having multiple
9324 slp nodes which refer to the same group) the CSE
9325 is even wrong code. See PR56270. */
9326 && !slp)
9328 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9329 return true;
9331 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9332 group_gap_adj = 0;
9334 /* VEC_NUM is the number of vect stmts to be created for this group. */
9335 if (slp)
9337 grouped_load = false;
9338 /* If an SLP permutation is from N elements to N elements,
9339 and if one vector holds a whole number of N, we can load
9340 the inputs to the permutation in the same way as an
9341 unpermuted sequence. In other cases we need to load the
9342 whole group, not only the number of vector stmts the
9343 permutation result fits in. */
9344 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9345 if (slp_perm
9346 && (group_size != scalar_lanes
9347 || !multiple_p (nunits, group_size)))
9349 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9350 variable VF; see vect_transform_slp_perm_load. */
9351 unsigned int const_vf = vf.to_constant ();
9352 unsigned int const_nunits = nunits.to_constant ();
9353 vec_num = CEIL (group_size * const_vf, const_nunits);
9354 group_gap_adj = vf * group_size - nunits * vec_num;
9356 else
9358 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9359 group_gap_adj
9360 = group_size - scalar_lanes;
9363 else
9364 vec_num = group_size;
9366 ref_type = get_group_alias_ptr_type (first_stmt_info);
9368 else
9370 first_stmt_info = stmt_info;
9371 first_dr_info = dr_info;
9372 group_size = vec_num = 1;
9373 group_gap_adj = 0;
9374 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9375 if (slp)
9376 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9379 gcc_assert (alignment_support_scheme);
9380 vec_loop_masks *loop_masks
9381 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9382 ? &LOOP_VINFO_MASKS (loop_vinfo)
9383 : NULL);
9384 vec_loop_lens *loop_lens
9385 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9386 ? &LOOP_VINFO_LENS (loop_vinfo)
9387 : NULL);
9389 /* Shouldn't go with length-based approach if fully masked. */
9390 gcc_assert (!loop_lens || !loop_masks);
9392 /* Targets with store-lane instructions must not require explicit
9393 realignment. vect_supportable_dr_alignment always returns either
9394 dr_aligned or dr_unaligned_supported for masked operations. */
9395 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9396 && !mask
9397 && !loop_masks)
9398 || alignment_support_scheme == dr_aligned
9399 || alignment_support_scheme == dr_unaligned_supported);
9401 /* In case the vectorization factor (VF) is bigger than the number
9402 of elements that we can fit in a vectype (nunits), we have to generate
9403 more than one vector stmt - i.e - we need to "unroll" the
9404 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9405 from one copy of the vector stmt to the next, in the field
9406 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9407 stages to find the correct vector defs to be used when vectorizing
9408 stmts that use the defs of the current stmt. The example below
9409 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9410 need to create 4 vectorized stmts):
9412 before vectorization:
9413 RELATED_STMT VEC_STMT
9414 S1: x = memref - -
9415 S2: z = x + 1 - -
9417 step 1: vectorize stmt S1:
9418 We first create the vector stmt VS1_0, and, as usual, record a
9419 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9420 Next, we create the vector stmt VS1_1, and record a pointer to
9421 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9422 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9423 stmts and pointers:
9424 RELATED_STMT VEC_STMT
9425 VS1_0: vx0 = memref0 VS1_1 -
9426 VS1_1: vx1 = memref1 VS1_2 -
9427 VS1_2: vx2 = memref2 VS1_3 -
9428 VS1_3: vx3 = memref3 - -
9429 S1: x = load - VS1_0
9430 S2: z = x + 1 - -
9433 /* In case of interleaving (non-unit grouped access):
9435 S1: x2 = &base + 2
9436 S2: x0 = &base
9437 S3: x1 = &base + 1
9438 S4: x3 = &base + 3
9440 Vectorized loads are created in the order of memory accesses
9441 starting from the access of the first stmt of the chain:
9443 VS1: vx0 = &base
9444 VS2: vx1 = &base + vec_size*1
9445 VS3: vx3 = &base + vec_size*2
9446 VS4: vx4 = &base + vec_size*3
9448 Then permutation statements are generated:
9450 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9451 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9454 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9455 (the order of the data-refs in the output of vect_permute_load_chain
9456 corresponds to the order of scalar stmts in the interleaving chain - see
9457 the documentation of vect_permute_load_chain()).
9458 The generation of permutation stmts and recording them in
9459 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9461 In case of both multiple types and interleaving, the vector loads and
9462 permutation stmts above are created for every copy. The result vector
9463 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9464 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9466 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9467 on a target that supports unaligned accesses (dr_unaligned_supported)
9468 we generate the following code:
9469 p = initial_addr;
9470 indx = 0;
9471 loop {
9472 p = p + indx * vectype_size;
9473 vec_dest = *(p);
9474 indx = indx + 1;
9477 Otherwise, the data reference is potentially unaligned on a target that
9478 does not support unaligned accesses (dr_explicit_realign_optimized) -
9479 then generate the following code, in which the data in each iteration is
9480 obtained by two vector loads, one from the previous iteration, and one
9481 from the current iteration:
9482 p1 = initial_addr;
9483 msq_init = *(floor(p1))
9484 p2 = initial_addr + VS - 1;
9485 realignment_token = call target_builtin;
9486 indx = 0;
9487 loop {
9488 p2 = p2 + indx * vectype_size
9489 lsq = *(floor(p2))
9490 vec_dest = realign_load (msq, lsq, realignment_token)
9491 indx = indx + 1;
9492 msq = lsq;
9493 } */
9495 /* If the misalignment remains the same throughout the execution of the
9496 loop, we can create the init_addr and permutation mask at the loop
9497 preheader. Otherwise, it needs to be created inside the loop.
9498 This can only occur when vectorizing memory accesses in the inner-loop
9499 nested within an outer-loop that is being vectorized. */
9501 if (nested_in_vect_loop
9502 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9503 GET_MODE_SIZE (TYPE_MODE (vectype))))
9505 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9506 compute_in_loop = true;
9509 bool diff_first_stmt_info
9510 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9512 tree offset = NULL_TREE;
9513 if ((alignment_support_scheme == dr_explicit_realign_optimized
9514 || alignment_support_scheme == dr_explicit_realign)
9515 && !compute_in_loop)
9517 /* If we have different first_stmt_info, we can't set up realignment
9518 here, since we can't guarantee first_stmt_info DR has been
9519 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9520 distance from first_stmt_info DR instead as below. */
9521 if (!diff_first_stmt_info)
9522 msq = vect_setup_realignment (vinfo,
9523 first_stmt_info, gsi, &realignment_token,
9524 alignment_support_scheme, NULL_TREE,
9525 &at_loop);
9526 if (alignment_support_scheme == dr_explicit_realign_optimized)
9528 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9529 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9530 size_one_node);
9531 gcc_assert (!first_stmt_info_for_drptr);
9534 else
9535 at_loop = loop;
9537 if (!known_eq (poffset, 0))
9538 offset = (offset
9539 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
9540 : size_int (poffset));
9542 tree bump;
9543 tree vec_offset = NULL_TREE;
9544 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9546 aggr_type = NULL_TREE;
9547 bump = NULL_TREE;
9549 else if (memory_access_type == VMAT_GATHER_SCATTER)
9551 aggr_type = elem_type;
9552 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9553 &bump, &vec_offset);
9555 else
9557 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9558 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9559 else
9560 aggr_type = vectype;
9561 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9562 memory_access_type);
9565 auto_vec<tree> vec_offsets;
9566 auto_vec<tree> vec_masks;
9567 if (mask)
9569 if (slp_node)
9570 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
9571 &vec_masks);
9572 else
9573 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
9574 &vec_masks, mask_vectype);
9576 tree vec_mask = NULL_TREE;
9577 poly_uint64 group_elt = 0;
9578 for (j = 0; j < ncopies; j++)
9580 /* 1. Create the vector or array pointer update chain. */
9581 if (j == 0)
9583 bool simd_lane_access_p
9584 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9585 if (simd_lane_access_p
9586 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9587 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9588 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9589 && integer_zerop (DR_INIT (first_dr_info->dr))
9590 && alias_sets_conflict_p (get_alias_set (aggr_type),
9591 get_alias_set (TREE_TYPE (ref_type)))
9592 && (alignment_support_scheme == dr_aligned
9593 || alignment_support_scheme == dr_unaligned_supported))
9595 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9596 dataref_offset = build_int_cst (ref_type, 0);
9598 else if (diff_first_stmt_info)
9600 dataref_ptr
9601 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9602 aggr_type, at_loop, offset, &dummy,
9603 gsi, &ptr_incr, simd_lane_access_p,
9604 bump);
9605 /* Adjust the pointer by the difference to first_stmt. */
9606 data_reference_p ptrdr
9607 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9608 tree diff
9609 = fold_convert (sizetype,
9610 size_binop (MINUS_EXPR,
9611 DR_INIT (first_dr_info->dr),
9612 DR_INIT (ptrdr)));
9613 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9614 stmt_info, diff);
9615 if (alignment_support_scheme == dr_explicit_realign)
9617 msq = vect_setup_realignment (vinfo,
9618 first_stmt_info_for_drptr, gsi,
9619 &realignment_token,
9620 alignment_support_scheme,
9621 dataref_ptr, &at_loop);
9622 gcc_assert (!compute_in_loop);
9625 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9627 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9628 slp_node, &gs_info, &dataref_ptr,
9629 &vec_offsets);
9631 else
9632 dataref_ptr
9633 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9634 at_loop,
9635 offset, &dummy, gsi, &ptr_incr,
9636 simd_lane_access_p, bump);
9637 if (mask)
9638 vec_mask = vec_masks[0];
9640 else
9642 if (dataref_offset)
9643 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9644 bump);
9645 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9646 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9647 stmt_info, bump);
9648 if (mask)
9649 vec_mask = vec_masks[j];
9652 if (grouped_load || slp_perm)
9653 dr_chain.create (vec_num);
9655 gimple *new_stmt = NULL;
9656 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9658 tree vec_array;
9660 vec_array = create_vector_array (vectype, vec_num);
9662 tree final_mask = NULL_TREE;
9663 if (loop_masks)
9664 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9665 vectype, j);
9666 if (vec_mask)
9667 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9668 final_mask, vec_mask, gsi);
9670 gcall *call;
9671 if (final_mask)
9673 /* Emit:
9674 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9675 VEC_MASK). */
9676 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9677 tree alias_ptr = build_int_cst (ref_type, align);
9678 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9679 dataref_ptr, alias_ptr,
9680 final_mask);
9682 else
9684 /* Emit:
9685 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9686 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9687 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9689 gimple_call_set_lhs (call, vec_array);
9690 gimple_call_set_nothrow (call, true);
9691 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9692 new_stmt = call;
9694 /* Extract each vector into an SSA_NAME. */
9695 for (i = 0; i < vec_num; i++)
9697 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9698 vec_array, i);
9699 dr_chain.quick_push (new_temp);
9702 /* Record the mapping between SSA_NAMEs and statements. */
9703 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9705 /* Record that VEC_ARRAY is now dead. */
9706 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9708 else
9710 for (i = 0; i < vec_num; i++)
9712 tree final_mask = NULL_TREE;
9713 if (loop_masks
9714 && memory_access_type != VMAT_INVARIANT)
9715 final_mask = vect_get_loop_mask (gsi, loop_masks,
9716 vec_num * ncopies,
9717 vectype, vec_num * j + i);
9718 if (vec_mask)
9719 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9720 final_mask, vec_mask, gsi);
9722 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9723 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9724 gsi, stmt_info, bump);
9726 /* 2. Create the vector-load in the loop. */
9727 switch (alignment_support_scheme)
9729 case dr_aligned:
9730 case dr_unaligned_supported:
9732 unsigned int misalign;
9733 unsigned HOST_WIDE_INT align;
9735 if (memory_access_type == VMAT_GATHER_SCATTER
9736 && gs_info.ifn != IFN_LAST)
9738 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9739 vec_offset = vec_offsets[vec_num * j + i];
9740 tree zero = build_zero_cst (vectype);
9741 tree scale = size_int (gs_info.scale);
9742 gcall *call;
9743 if (final_mask)
9744 call = gimple_build_call_internal
9745 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9746 vec_offset, scale, zero, final_mask);
9747 else
9748 call = gimple_build_call_internal
9749 (IFN_GATHER_LOAD, 4, dataref_ptr,
9750 vec_offset, scale, zero);
9751 gimple_call_set_nothrow (call, true);
9752 new_stmt = call;
9753 data_ref = NULL_TREE;
9754 break;
9756 else if (memory_access_type == VMAT_GATHER_SCATTER)
9758 /* Emulated gather-scatter. */
9759 gcc_assert (!final_mask);
9760 unsigned HOST_WIDE_INT const_nunits
9761 = nunits.to_constant ();
9762 unsigned HOST_WIDE_INT const_offset_nunits
9763 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9764 .to_constant ();
9765 vec<constructor_elt, va_gc> *ctor_elts;
9766 vec_alloc (ctor_elts, const_nunits);
9767 gimple_seq stmts = NULL;
9768 /* We support offset vectors with more elements
9769 than the data vector for now. */
9770 unsigned HOST_WIDE_INT factor
9771 = const_offset_nunits / const_nunits;
9772 vec_offset = vec_offsets[j / factor];
9773 unsigned elt_offset = (j % factor) * const_nunits;
9774 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9775 tree scale = size_int (gs_info.scale);
9776 align
9777 = get_object_alignment (DR_REF (first_dr_info->dr));
9778 tree ltype = build_aligned_type (TREE_TYPE (vectype),
9779 align);
9780 for (unsigned k = 0; k < const_nunits; ++k)
9782 tree boff = size_binop (MULT_EXPR,
9783 TYPE_SIZE (idx_type),
9784 bitsize_int
9785 (k + elt_offset));
9786 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9787 idx_type, vec_offset,
9788 TYPE_SIZE (idx_type),
9789 boff);
9790 idx = gimple_convert (&stmts, sizetype, idx);
9791 idx = gimple_build (&stmts, MULT_EXPR,
9792 sizetype, idx, scale);
9793 tree ptr = gimple_build (&stmts, PLUS_EXPR,
9794 TREE_TYPE (dataref_ptr),
9795 dataref_ptr, idx);
9796 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9797 tree elt = make_ssa_name (TREE_TYPE (vectype));
9798 tree ref = build2 (MEM_REF, ltype, ptr,
9799 build_int_cst (ref_type, 0));
9800 new_stmt = gimple_build_assign (elt, ref);
9801 gimple_set_vuse (new_stmt,
9802 gimple_vuse (gsi_stmt (*gsi)));
9803 gimple_seq_add_stmt (&stmts, new_stmt);
9804 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
9806 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9807 new_stmt = gimple_build_assign (NULL_TREE,
9808 build_constructor
9809 (vectype, ctor_elts));
9810 data_ref = NULL_TREE;
9811 break;
9814 align =
9815 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9816 if (alignment_support_scheme == dr_aligned)
9817 misalign = 0;
9818 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9820 align = dr_alignment
9821 (vect_dr_behavior (vinfo, first_dr_info));
9822 misalign = 0;
9824 else
9825 misalign = misalignment;
9826 if (dataref_offset == NULL_TREE
9827 && TREE_CODE (dataref_ptr) == SSA_NAME)
9828 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9829 align, misalign);
9830 align = least_bit_hwi (misalign | align);
9832 if (final_mask)
9834 tree ptr = build_int_cst (ref_type,
9835 align * BITS_PER_UNIT);
9836 gcall *call
9837 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9838 dataref_ptr, ptr,
9839 final_mask);
9840 gimple_call_set_nothrow (call, true);
9841 new_stmt = call;
9842 data_ref = NULL_TREE;
9844 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9846 tree final_len
9847 = vect_get_loop_len (loop_vinfo, loop_lens,
9848 vec_num * ncopies,
9849 vec_num * j + i);
9850 tree ptr = build_int_cst (ref_type,
9851 align * BITS_PER_UNIT);
9853 machine_mode vmode = TYPE_MODE (vectype);
9854 opt_machine_mode new_ovmode
9855 = get_len_load_store_mode (vmode, true);
9856 machine_mode new_vmode = new_ovmode.require ();
9857 tree qi_type = unsigned_intQI_type_node;
9859 signed char biasval =
9860 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9862 tree bias = build_int_cst (intQI_type_node, biasval);
9864 gcall *call
9865 = gimple_build_call_internal (IFN_LEN_LOAD, 4,
9866 dataref_ptr, ptr,
9867 final_len, bias);
9868 gimple_call_set_nothrow (call, true);
9869 new_stmt = call;
9870 data_ref = NULL_TREE;
9872 /* Need conversion if it's wrapped with VnQI. */
9873 if (vmode != new_vmode)
9875 tree new_vtype
9876 = build_vector_type_for_mode (qi_type, new_vmode);
9877 tree var = vect_get_new_ssa_name (new_vtype,
9878 vect_simple_var);
9879 gimple_set_lhs (call, var);
9880 vect_finish_stmt_generation (vinfo, stmt_info, call,
9881 gsi);
9882 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9883 new_stmt
9884 = gimple_build_assign (vec_dest,
9885 VIEW_CONVERT_EXPR, op);
9888 else
9890 tree ltype = vectype;
9891 tree new_vtype = NULL_TREE;
9892 unsigned HOST_WIDE_INT gap
9893 = DR_GROUP_GAP (first_stmt_info);
9894 unsigned int vect_align
9895 = vect_known_alignment_in_bytes (first_dr_info,
9896 vectype);
9897 unsigned int scalar_dr_size
9898 = vect_get_scalar_dr_size (first_dr_info);
9899 /* If there's no peeling for gaps but we have a gap
9900 with slp loads then load the lower half of the
9901 vector only. See get_group_load_store_type for
9902 when we apply this optimization. */
9903 if (slp
9904 && loop_vinfo
9905 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9906 && gap != 0
9907 && known_eq (nunits, (group_size - gap) * 2)
9908 && known_eq (nunits, group_size)
9909 && gap >= (vect_align / scalar_dr_size))
9911 tree half_vtype;
9912 new_vtype
9913 = vector_vector_composition_type (vectype, 2,
9914 &half_vtype);
9915 if (new_vtype != NULL_TREE)
9916 ltype = half_vtype;
9918 tree offset
9919 = (dataref_offset ? dataref_offset
9920 : build_int_cst (ref_type, 0));
9921 if (ltype != vectype
9922 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9924 unsigned HOST_WIDE_INT gap_offset
9925 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9926 tree gapcst = build_int_cst (ref_type, gap_offset);
9927 offset = size_binop (PLUS_EXPR, offset, gapcst);
9929 data_ref
9930 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9931 if (alignment_support_scheme == dr_aligned)
9933 else
9934 TREE_TYPE (data_ref)
9935 = build_aligned_type (TREE_TYPE (data_ref),
9936 align * BITS_PER_UNIT);
9937 if (ltype != vectype)
9939 vect_copy_ref_info (data_ref,
9940 DR_REF (first_dr_info->dr));
9941 tree tem = make_ssa_name (ltype);
9942 new_stmt = gimple_build_assign (tem, data_ref);
9943 vect_finish_stmt_generation (vinfo, stmt_info,
9944 new_stmt, gsi);
9945 data_ref = NULL;
9946 vec<constructor_elt, va_gc> *v;
9947 vec_alloc (v, 2);
9948 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9950 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9951 build_zero_cst (ltype));
9952 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9954 else
9956 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9957 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9958 build_zero_cst (ltype));
9960 gcc_assert (new_vtype != NULL_TREE);
9961 if (new_vtype == vectype)
9962 new_stmt = gimple_build_assign (
9963 vec_dest, build_constructor (vectype, v));
9964 else
9966 tree new_vname = make_ssa_name (new_vtype);
9967 new_stmt = gimple_build_assign (
9968 new_vname, build_constructor (new_vtype, v));
9969 vect_finish_stmt_generation (vinfo, stmt_info,
9970 new_stmt, gsi);
9971 new_stmt = gimple_build_assign (
9972 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9973 new_vname));
9977 break;
9979 case dr_explicit_realign:
9981 tree ptr, bump;
9983 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9985 if (compute_in_loop)
9986 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9987 &realignment_token,
9988 dr_explicit_realign,
9989 dataref_ptr, NULL);
9991 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9992 ptr = copy_ssa_name (dataref_ptr);
9993 else
9994 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9995 // For explicit realign the target alignment should be
9996 // known at compile time.
9997 unsigned HOST_WIDE_INT align =
9998 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9999 new_stmt = gimple_build_assign
10000 (ptr, BIT_AND_EXPR, dataref_ptr,
10001 build_int_cst
10002 (TREE_TYPE (dataref_ptr),
10003 -(HOST_WIDE_INT) align));
10004 vect_finish_stmt_generation (vinfo, stmt_info,
10005 new_stmt, gsi);
10006 data_ref
10007 = build2 (MEM_REF, vectype, ptr,
10008 build_int_cst (ref_type, 0));
10009 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10010 vec_dest = vect_create_destination_var (scalar_dest,
10011 vectype);
10012 new_stmt = gimple_build_assign (vec_dest, data_ref);
10013 new_temp = make_ssa_name (vec_dest, new_stmt);
10014 gimple_assign_set_lhs (new_stmt, new_temp);
10015 gimple_move_vops (new_stmt, stmt_info->stmt);
10016 vect_finish_stmt_generation (vinfo, stmt_info,
10017 new_stmt, gsi);
10018 msq = new_temp;
10020 bump = size_binop (MULT_EXPR, vs,
10021 TYPE_SIZE_UNIT (elem_type));
10022 bump = size_binop (MINUS_EXPR, bump, size_one_node);
10023 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
10024 stmt_info, bump);
10025 new_stmt = gimple_build_assign
10026 (NULL_TREE, BIT_AND_EXPR, ptr,
10027 build_int_cst
10028 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
10029 if (TREE_CODE (ptr) == SSA_NAME)
10030 ptr = copy_ssa_name (ptr, new_stmt);
10031 else
10032 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
10033 gimple_assign_set_lhs (new_stmt, ptr);
10034 vect_finish_stmt_generation (vinfo, stmt_info,
10035 new_stmt, gsi);
10036 data_ref
10037 = build2 (MEM_REF, vectype, ptr,
10038 build_int_cst (ref_type, 0));
10039 break;
10041 case dr_explicit_realign_optimized:
10043 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10044 new_temp = copy_ssa_name (dataref_ptr);
10045 else
10046 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
10047 // We should only be doing this if we know the target
10048 // alignment at compile time.
10049 unsigned HOST_WIDE_INT align =
10050 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10051 new_stmt = gimple_build_assign
10052 (new_temp, BIT_AND_EXPR, dataref_ptr,
10053 build_int_cst (TREE_TYPE (dataref_ptr),
10054 -(HOST_WIDE_INT) align));
10055 vect_finish_stmt_generation (vinfo, stmt_info,
10056 new_stmt, gsi);
10057 data_ref
10058 = build2 (MEM_REF, vectype, new_temp,
10059 build_int_cst (ref_type, 0));
10060 break;
10062 default:
10063 gcc_unreachable ();
10065 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10066 /* DATA_REF is null if we've already built the statement. */
10067 if (data_ref)
10069 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10070 new_stmt = gimple_build_assign (vec_dest, data_ref);
10072 new_temp = make_ssa_name (vec_dest, new_stmt);
10073 gimple_set_lhs (new_stmt, new_temp);
10074 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10076 /* 3. Handle explicit realignment if necessary/supported.
10077 Create in loop:
10078 vec_dest = realign_load (msq, lsq, realignment_token) */
10079 if (alignment_support_scheme == dr_explicit_realign_optimized
10080 || alignment_support_scheme == dr_explicit_realign)
10082 lsq = gimple_assign_lhs (new_stmt);
10083 if (!realignment_token)
10084 realignment_token = dataref_ptr;
10085 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10086 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
10087 msq, lsq, realignment_token);
10088 new_temp = make_ssa_name (vec_dest, new_stmt);
10089 gimple_assign_set_lhs (new_stmt, new_temp);
10090 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10092 if (alignment_support_scheme == dr_explicit_realign_optimized)
10094 gcc_assert (phi);
10095 if (i == vec_num - 1 && j == ncopies - 1)
10096 add_phi_arg (phi, lsq,
10097 loop_latch_edge (containing_loop),
10098 UNKNOWN_LOCATION);
10099 msq = lsq;
10103 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10105 tree perm_mask = perm_mask_for_reverse (vectype);
10106 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
10107 perm_mask, stmt_info, gsi);
10108 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10111 /* Collect vector loads and later create their permutation in
10112 vect_transform_grouped_load (). */
10113 if (grouped_load || slp_perm)
10114 dr_chain.quick_push (new_temp);
10116 /* Store vector loads in the corresponding SLP_NODE. */
10117 if (slp && !slp_perm)
10118 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10120 /* With SLP permutation we load the gaps as well, without
10121 we need to skip the gaps after we manage to fully load
10122 all elements. group_gap_adj is DR_GROUP_SIZE here. */
10123 group_elt += nunits;
10124 if (maybe_ne (group_gap_adj, 0U)
10125 && !slp_perm
10126 && known_eq (group_elt, group_size - group_gap_adj))
10128 poly_wide_int bump_val
10129 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10130 * group_gap_adj);
10131 if (tree_int_cst_sgn
10132 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10133 bump_val = -bump_val;
10134 tree bump = wide_int_to_tree (sizetype, bump_val);
10135 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10136 gsi, stmt_info, bump);
10137 group_elt = 0;
10140 /* Bump the vector pointer to account for a gap or for excess
10141 elements loaded for a permuted SLP load. */
10142 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
10144 poly_wide_int bump_val
10145 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10146 * group_gap_adj);
10147 if (tree_int_cst_sgn
10148 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10149 bump_val = -bump_val;
10150 tree bump = wide_int_to_tree (sizetype, bump_val);
10151 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10152 stmt_info, bump);
10156 if (slp && !slp_perm)
10157 continue;
10159 if (slp_perm)
10161 unsigned n_perms;
10162 /* For SLP we know we've seen all possible uses of dr_chain so
10163 direct vect_transform_slp_perm_load to DCE the unused parts.
10164 ??? This is a hack to prevent compile-time issues as seen
10165 in PR101120 and friends. */
10166 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
10167 gsi, vf, false, &n_perms,
10168 nullptr, true);
10169 gcc_assert (ok);
10171 else
10173 if (grouped_load)
10175 if (memory_access_type != VMAT_LOAD_STORE_LANES)
10176 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
10177 group_size, gsi);
10178 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10180 else
10182 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10185 dr_chain.release ();
10187 if (!slp)
10188 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10190 return true;
10193 /* Function vect_is_simple_cond.
10195 Input:
10196 LOOP - the loop that is being vectorized.
10197 COND - Condition that is checked for simple use.
10199 Output:
10200 *COMP_VECTYPE - the vector type for the comparison.
10201 *DTS - The def types for the arguments of the comparison
10203 Returns whether a COND can be vectorized. Checks whether
10204 condition operands are supportable using vec_is_simple_use. */
10206 static bool
10207 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
10208 slp_tree slp_node, tree *comp_vectype,
10209 enum vect_def_type *dts, tree vectype)
10211 tree lhs, rhs;
10212 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10213 slp_tree slp_op;
10215 /* Mask case. */
10216 if (TREE_CODE (cond) == SSA_NAME
10217 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
10219 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
10220 &slp_op, &dts[0], comp_vectype)
10221 || !*comp_vectype
10222 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
10223 return false;
10224 return true;
10227 if (!COMPARISON_CLASS_P (cond))
10228 return false;
10230 lhs = TREE_OPERAND (cond, 0);
10231 rhs = TREE_OPERAND (cond, 1);
10233 if (TREE_CODE (lhs) == SSA_NAME)
10235 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10236 &lhs, &slp_op, &dts[0], &vectype1))
10237 return false;
10239 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10240 || TREE_CODE (lhs) == FIXED_CST)
10241 dts[0] = vect_constant_def;
10242 else
10243 return false;
10245 if (TREE_CODE (rhs) == SSA_NAME)
10247 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10248 &rhs, &slp_op, &dts[1], &vectype2))
10249 return false;
10251 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10252 || TREE_CODE (rhs) == FIXED_CST)
10253 dts[1] = vect_constant_def;
10254 else
10255 return false;
10257 if (vectype1 && vectype2
10258 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10259 TYPE_VECTOR_SUBPARTS (vectype2)))
10260 return false;
10262 *comp_vectype = vectype1 ? vectype1 : vectype2;
10263 /* Invariant comparison. */
10264 if (! *comp_vectype)
10266 tree scalar_type = TREE_TYPE (lhs);
10267 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10268 *comp_vectype = truth_type_for (vectype);
10269 else
10271 /* If we can widen the comparison to match vectype do so. */
10272 if (INTEGRAL_TYPE_P (scalar_type)
10273 && !slp_node
10274 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10275 TYPE_SIZE (TREE_TYPE (vectype))))
10276 scalar_type = build_nonstandard_integer_type
10277 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10278 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10279 slp_node);
10283 return true;
10286 /* vectorizable_condition.
10288 Check if STMT_INFO is conditional modify expression that can be vectorized.
10289 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10290 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10291 at GSI.
10293 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10295 Return true if STMT_INFO is vectorizable in this way. */
10297 static bool
10298 vectorizable_condition (vec_info *vinfo,
10299 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10300 gimple **vec_stmt,
10301 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10303 tree scalar_dest = NULL_TREE;
10304 tree vec_dest = NULL_TREE;
10305 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10306 tree then_clause, else_clause;
10307 tree comp_vectype = NULL_TREE;
10308 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10309 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10310 tree vec_compare;
10311 tree new_temp;
10312 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10313 enum vect_def_type dts[4]
10314 = {vect_unknown_def_type, vect_unknown_def_type,
10315 vect_unknown_def_type, vect_unknown_def_type};
10316 int ndts = 4;
10317 int ncopies;
10318 int vec_num;
10319 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10320 int i;
10321 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10322 vec<tree> vec_oprnds0 = vNULL;
10323 vec<tree> vec_oprnds1 = vNULL;
10324 vec<tree> vec_oprnds2 = vNULL;
10325 vec<tree> vec_oprnds3 = vNULL;
10326 tree vec_cmp_type;
10327 bool masked = false;
10329 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10330 return false;
10332 /* Is vectorizable conditional operation? */
10333 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10334 if (!stmt)
10335 return false;
10337 code = gimple_assign_rhs_code (stmt);
10338 if (code != COND_EXPR)
10339 return false;
10341 stmt_vec_info reduc_info = NULL;
10342 int reduc_index = -1;
10343 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10344 bool for_reduction
10345 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10346 if (for_reduction)
10348 if (STMT_SLP_TYPE (stmt_info))
10349 return false;
10350 reduc_info = info_for_reduction (vinfo, stmt_info);
10351 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10352 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10353 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10354 || reduc_index != -1);
10356 else
10358 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10359 return false;
10362 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10363 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10365 if (slp_node)
10367 ncopies = 1;
10368 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10370 else
10372 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10373 vec_num = 1;
10376 gcc_assert (ncopies >= 1);
10377 if (for_reduction && ncopies > 1)
10378 return false; /* FORNOW */
10380 cond_expr = gimple_assign_rhs1 (stmt);
10382 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10383 &comp_vectype, &dts[0], vectype)
10384 || !comp_vectype)
10385 return false;
10387 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10388 slp_tree then_slp_node, else_slp_node;
10389 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10390 &then_clause, &then_slp_node, &dts[2], &vectype1))
10391 return false;
10392 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10393 &else_clause, &else_slp_node, &dts[3], &vectype2))
10394 return false;
10396 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10397 return false;
10399 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10400 return false;
10402 masked = !COMPARISON_CLASS_P (cond_expr);
10403 vec_cmp_type = truth_type_for (comp_vectype);
10405 if (vec_cmp_type == NULL_TREE)
10406 return false;
10408 cond_code = TREE_CODE (cond_expr);
10409 if (!masked)
10411 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10412 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10415 /* For conditional reductions, the "then" value needs to be the candidate
10416 value calculated by this iteration while the "else" value needs to be
10417 the result carried over from previous iterations. If the COND_EXPR
10418 is the other way around, we need to swap it. */
10419 bool must_invert_cmp_result = false;
10420 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10422 if (masked)
10423 must_invert_cmp_result = true;
10424 else
10426 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10427 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10428 if (new_code == ERROR_MARK)
10429 must_invert_cmp_result = true;
10430 else
10432 cond_code = new_code;
10433 /* Make sure we don't accidentally use the old condition. */
10434 cond_expr = NULL_TREE;
10437 std::swap (then_clause, else_clause);
10440 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10442 /* Boolean values may have another representation in vectors
10443 and therefore we prefer bit operations over comparison for
10444 them (which also works for scalar masks). We store opcodes
10445 to use in bitop1 and bitop2. Statement is vectorized as
10446 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10447 depending on bitop1 and bitop2 arity. */
10448 switch (cond_code)
10450 case GT_EXPR:
10451 bitop1 = BIT_NOT_EXPR;
10452 bitop2 = BIT_AND_EXPR;
10453 break;
10454 case GE_EXPR:
10455 bitop1 = BIT_NOT_EXPR;
10456 bitop2 = BIT_IOR_EXPR;
10457 break;
10458 case LT_EXPR:
10459 bitop1 = BIT_NOT_EXPR;
10460 bitop2 = BIT_AND_EXPR;
10461 std::swap (cond_expr0, cond_expr1);
10462 break;
10463 case LE_EXPR:
10464 bitop1 = BIT_NOT_EXPR;
10465 bitop2 = BIT_IOR_EXPR;
10466 std::swap (cond_expr0, cond_expr1);
10467 break;
10468 case NE_EXPR:
10469 bitop1 = BIT_XOR_EXPR;
10470 break;
10471 case EQ_EXPR:
10472 bitop1 = BIT_XOR_EXPR;
10473 bitop2 = BIT_NOT_EXPR;
10474 break;
10475 default:
10476 return false;
10478 cond_code = SSA_NAME;
10481 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10482 && reduction_type == EXTRACT_LAST_REDUCTION
10483 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10485 if (dump_enabled_p ())
10486 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10487 "reduction comparison operation not supported.\n");
10488 return false;
10491 if (!vec_stmt)
10493 if (bitop1 != NOP_EXPR)
10495 machine_mode mode = TYPE_MODE (comp_vectype);
10496 optab optab;
10498 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10499 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10500 return false;
10502 if (bitop2 != NOP_EXPR)
10504 optab = optab_for_tree_code (bitop2, comp_vectype,
10505 optab_default);
10506 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10507 return false;
10511 vect_cost_for_stmt kind = vector_stmt;
10512 if (reduction_type == EXTRACT_LAST_REDUCTION)
10513 /* Count one reduction-like operation per vector. */
10514 kind = vec_to_scalar;
10515 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10516 return false;
10518 if (slp_node
10519 && (!vect_maybe_update_slp_op_vectype
10520 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10521 || (op_adjust == 1
10522 && !vect_maybe_update_slp_op_vectype
10523 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10524 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10525 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10527 if (dump_enabled_p ())
10528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10529 "incompatible vector types for invariants\n");
10530 return false;
10533 if (loop_vinfo && for_reduction
10534 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10536 if (reduction_type == EXTRACT_LAST_REDUCTION)
10537 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10538 ncopies * vec_num, vectype, NULL);
10539 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10540 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10542 if (dump_enabled_p ())
10543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10544 "conditional reduction prevents the use"
10545 " of partial vectors.\n");
10546 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10550 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10551 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10552 cost_vec, kind);
10553 return true;
10556 /* Transform. */
10558 /* Handle def. */
10559 scalar_dest = gimple_assign_lhs (stmt);
10560 if (reduction_type != EXTRACT_LAST_REDUCTION)
10561 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10563 bool swap_cond_operands = false;
10565 /* See whether another part of the vectorized code applies a loop
10566 mask to the condition, or to its inverse. */
10568 vec_loop_masks *masks = NULL;
10569 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10571 if (reduction_type == EXTRACT_LAST_REDUCTION)
10572 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10573 else
10575 scalar_cond_masked_key cond (cond_expr, ncopies);
10576 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10577 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10578 else
10580 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10581 tree_code orig_code = cond.code;
10582 cond.code = invert_tree_comparison (cond.code, honor_nans);
10583 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
10585 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10586 cond_code = cond.code;
10587 swap_cond_operands = true;
10589 else
10591 /* Try the inverse of the current mask. We check if the
10592 inverse mask is live and if so we generate a negate of
10593 the current mask such that we still honor NaNs. */
10594 cond.inverted_p = true;
10595 cond.code = orig_code;
10596 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10598 bitop1 = orig_code;
10599 bitop2 = BIT_NOT_EXPR;
10600 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10601 cond_code = cond.code;
10602 swap_cond_operands = true;
10609 /* Handle cond expr. */
10610 if (masked)
10611 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10612 cond_expr, &vec_oprnds0, comp_vectype,
10613 then_clause, &vec_oprnds2, vectype,
10614 reduction_type != EXTRACT_LAST_REDUCTION
10615 ? else_clause : NULL, &vec_oprnds3, vectype);
10616 else
10617 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10618 cond_expr0, &vec_oprnds0, comp_vectype,
10619 cond_expr1, &vec_oprnds1, comp_vectype,
10620 then_clause, &vec_oprnds2, vectype,
10621 reduction_type != EXTRACT_LAST_REDUCTION
10622 ? else_clause : NULL, &vec_oprnds3, vectype);
10624 /* Arguments are ready. Create the new vector stmt. */
10625 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10627 vec_then_clause = vec_oprnds2[i];
10628 if (reduction_type != EXTRACT_LAST_REDUCTION)
10629 vec_else_clause = vec_oprnds3[i];
10631 if (swap_cond_operands)
10632 std::swap (vec_then_clause, vec_else_clause);
10634 if (masked)
10635 vec_compare = vec_cond_lhs;
10636 else
10638 vec_cond_rhs = vec_oprnds1[i];
10639 if (bitop1 == NOP_EXPR)
10641 gimple_seq stmts = NULL;
10642 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10643 vec_cond_lhs, vec_cond_rhs);
10644 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10646 else
10648 new_temp = make_ssa_name (vec_cmp_type);
10649 gassign *new_stmt;
10650 if (bitop1 == BIT_NOT_EXPR)
10651 new_stmt = gimple_build_assign (new_temp, bitop1,
10652 vec_cond_rhs);
10653 else
10654 new_stmt
10655 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10656 vec_cond_rhs);
10657 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10658 if (bitop2 == NOP_EXPR)
10659 vec_compare = new_temp;
10660 else if (bitop2 == BIT_NOT_EXPR)
10662 /* Instead of doing ~x ? y : z do x ? z : y. */
10663 vec_compare = new_temp;
10664 std::swap (vec_then_clause, vec_else_clause);
10666 else
10668 vec_compare = make_ssa_name (vec_cmp_type);
10669 new_stmt
10670 = gimple_build_assign (vec_compare, bitop2,
10671 vec_cond_lhs, new_temp);
10672 vect_finish_stmt_generation (vinfo, stmt_info,
10673 new_stmt, gsi);
10678 /* If we decided to apply a loop mask to the result of the vector
10679 comparison, AND the comparison with the mask now. Later passes
10680 should then be able to reuse the AND results between mulitple
10681 vector statements.
10683 For example:
10684 for (int i = 0; i < 100; ++i)
10685 x[i] = y[i] ? z[i] : 10;
10687 results in following optimized GIMPLE:
10689 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10690 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10691 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10692 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10693 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10694 vect_iftmp.11_47, { 10, ... }>;
10696 instead of using a masked and unmasked forms of
10697 vec != { 0, ... } (masked in the MASK_LOAD,
10698 unmasked in the VEC_COND_EXPR). */
10700 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10701 in cases where that's necessary. */
10703 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10705 if (!is_gimple_val (vec_compare))
10707 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10708 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10709 vec_compare);
10710 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10711 vec_compare = vec_compare_name;
10714 if (must_invert_cmp_result)
10716 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10717 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10718 BIT_NOT_EXPR,
10719 vec_compare);
10720 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10721 vec_compare = vec_compare_name;
10724 if (masks)
10726 tree loop_mask
10727 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10728 vectype, i);
10729 tree tmp2 = make_ssa_name (vec_cmp_type);
10730 gassign *g
10731 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10732 loop_mask);
10733 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10734 vec_compare = tmp2;
10738 gimple *new_stmt;
10739 if (reduction_type == EXTRACT_LAST_REDUCTION)
10741 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10742 tree lhs = gimple_get_lhs (old_stmt);
10743 new_stmt = gimple_build_call_internal
10744 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10745 vec_then_clause);
10746 gimple_call_set_lhs (new_stmt, lhs);
10747 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10748 if (old_stmt == gsi_stmt (*gsi))
10749 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10750 else
10752 /* In this case we're moving the definition to later in the
10753 block. That doesn't matter because the only uses of the
10754 lhs are in phi statements. */
10755 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10756 gsi_remove (&old_gsi, true);
10757 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10760 else
10762 new_temp = make_ssa_name (vec_dest);
10763 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10764 vec_then_clause, vec_else_clause);
10765 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10767 if (slp_node)
10768 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10769 else
10770 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10773 if (!slp_node)
10774 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10776 vec_oprnds0.release ();
10777 vec_oprnds1.release ();
10778 vec_oprnds2.release ();
10779 vec_oprnds3.release ();
10781 return true;
10784 /* vectorizable_comparison.
10786 Check if STMT_INFO is comparison expression that can be vectorized.
10787 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10788 comparison, put it in VEC_STMT, and insert it at GSI.
10790 Return true if STMT_INFO is vectorizable in this way. */
10792 static bool
10793 vectorizable_comparison (vec_info *vinfo,
10794 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10795 gimple **vec_stmt,
10796 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10798 tree lhs, rhs1, rhs2;
10799 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10800 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10801 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10802 tree new_temp;
10803 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10804 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10805 int ndts = 2;
10806 poly_uint64 nunits;
10807 int ncopies;
10808 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10809 int i;
10810 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10811 vec<tree> vec_oprnds0 = vNULL;
10812 vec<tree> vec_oprnds1 = vNULL;
10813 tree mask_type;
10814 tree mask;
10816 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10817 return false;
10819 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10820 return false;
10822 mask_type = vectype;
10823 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10825 if (slp_node)
10826 ncopies = 1;
10827 else
10828 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10830 gcc_assert (ncopies >= 1);
10831 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10832 return false;
10834 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10835 if (!stmt)
10836 return false;
10838 code = gimple_assign_rhs_code (stmt);
10840 if (TREE_CODE_CLASS (code) != tcc_comparison)
10841 return false;
10843 slp_tree slp_rhs1, slp_rhs2;
10844 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10845 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10846 return false;
10848 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10849 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10850 return false;
10852 if (vectype1 && vectype2
10853 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10854 TYPE_VECTOR_SUBPARTS (vectype2)))
10855 return false;
10857 vectype = vectype1 ? vectype1 : vectype2;
10859 /* Invariant comparison. */
10860 if (!vectype)
10862 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10863 vectype = mask_type;
10864 else
10865 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10866 slp_node);
10867 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10868 return false;
10870 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10871 return false;
10873 /* Can't compare mask and non-mask types. */
10874 if (vectype1 && vectype2
10875 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10876 return false;
10878 /* Boolean values may have another representation in vectors
10879 and therefore we prefer bit operations over comparison for
10880 them (which also works for scalar masks). We store opcodes
10881 to use in bitop1 and bitop2. Statement is vectorized as
10882 BITOP2 (rhs1 BITOP1 rhs2) or
10883 rhs1 BITOP2 (BITOP1 rhs2)
10884 depending on bitop1 and bitop2 arity. */
10885 bool swap_p = false;
10886 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10888 if (code == GT_EXPR)
10890 bitop1 = BIT_NOT_EXPR;
10891 bitop2 = BIT_AND_EXPR;
10893 else if (code == GE_EXPR)
10895 bitop1 = BIT_NOT_EXPR;
10896 bitop2 = BIT_IOR_EXPR;
10898 else if (code == LT_EXPR)
10900 bitop1 = BIT_NOT_EXPR;
10901 bitop2 = BIT_AND_EXPR;
10902 swap_p = true;
10904 else if (code == LE_EXPR)
10906 bitop1 = BIT_NOT_EXPR;
10907 bitop2 = BIT_IOR_EXPR;
10908 swap_p = true;
10910 else
10912 bitop1 = BIT_XOR_EXPR;
10913 if (code == EQ_EXPR)
10914 bitop2 = BIT_NOT_EXPR;
10918 if (!vec_stmt)
10920 if (bitop1 == NOP_EXPR)
10922 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10923 return false;
10925 else
10927 machine_mode mode = TYPE_MODE (vectype);
10928 optab optab;
10930 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10931 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10932 return false;
10934 if (bitop2 != NOP_EXPR)
10936 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10937 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10938 return false;
10942 /* Put types on constant and invariant SLP children. */
10943 if (slp_node
10944 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10945 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10947 if (dump_enabled_p ())
10948 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10949 "incompatible vector types for invariants\n");
10950 return false;
10953 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10954 vect_model_simple_cost (vinfo, stmt_info,
10955 ncopies * (1 + (bitop2 != NOP_EXPR)),
10956 dts, ndts, slp_node, cost_vec);
10957 return true;
10960 /* Transform. */
10962 /* Handle def. */
10963 lhs = gimple_assign_lhs (stmt);
10964 mask = vect_create_destination_var (lhs, mask_type);
10966 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10967 rhs1, &vec_oprnds0, vectype,
10968 rhs2, &vec_oprnds1, vectype);
10969 if (swap_p)
10970 std::swap (vec_oprnds0, vec_oprnds1);
10972 /* Arguments are ready. Create the new vector stmt. */
10973 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10975 gimple *new_stmt;
10976 vec_rhs2 = vec_oprnds1[i];
10978 new_temp = make_ssa_name (mask);
10979 if (bitop1 == NOP_EXPR)
10981 new_stmt = gimple_build_assign (new_temp, code,
10982 vec_rhs1, vec_rhs2);
10983 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10985 else
10987 if (bitop1 == BIT_NOT_EXPR)
10988 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10989 else
10990 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10991 vec_rhs2);
10992 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10993 if (bitop2 != NOP_EXPR)
10995 tree res = make_ssa_name (mask);
10996 if (bitop2 == BIT_NOT_EXPR)
10997 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10998 else
10999 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
11000 new_temp);
11001 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11004 if (slp_node)
11005 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
11006 else
11007 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11010 if (!slp_node)
11011 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11013 vec_oprnds0.release ();
11014 vec_oprnds1.release ();
11016 return true;
11019 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
11020 can handle all live statements in the node. Otherwise return true
11021 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
11022 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
11024 static bool
11025 can_vectorize_live_stmts (vec_info *vinfo,
11026 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11027 slp_tree slp_node, slp_instance slp_node_instance,
11028 bool vec_stmt_p,
11029 stmt_vector_for_cost *cost_vec)
11031 if (slp_node)
11033 stmt_vec_info slp_stmt_info;
11034 unsigned int i;
11035 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
11037 if (STMT_VINFO_LIVE_P (slp_stmt_info)
11038 && !vectorizable_live_operation (vinfo,
11039 slp_stmt_info, gsi, slp_node,
11040 slp_node_instance, i,
11041 vec_stmt_p, cost_vec))
11042 return false;
11045 else if (STMT_VINFO_LIVE_P (stmt_info)
11046 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
11047 slp_node, slp_node_instance, -1,
11048 vec_stmt_p, cost_vec))
11049 return false;
11051 return true;
11054 /* Make sure the statement is vectorizable. */
11056 opt_result
11057 vect_analyze_stmt (vec_info *vinfo,
11058 stmt_vec_info stmt_info, bool *need_to_vectorize,
11059 slp_tree node, slp_instance node_instance,
11060 stmt_vector_for_cost *cost_vec)
11062 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11063 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
11064 bool ok;
11065 gimple_seq pattern_def_seq;
11067 if (dump_enabled_p ())
11068 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
11069 stmt_info->stmt);
11071 if (gimple_has_volatile_ops (stmt_info->stmt))
11072 return opt_result::failure_at (stmt_info->stmt,
11073 "not vectorized:"
11074 " stmt has volatile operands: %G\n",
11075 stmt_info->stmt);
11077 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11078 && node == NULL
11079 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
11081 gimple_stmt_iterator si;
11083 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
11085 stmt_vec_info pattern_def_stmt_info
11086 = vinfo->lookup_stmt (gsi_stmt (si));
11087 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
11088 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
11090 /* Analyze def stmt of STMT if it's a pattern stmt. */
11091 if (dump_enabled_p ())
11092 dump_printf_loc (MSG_NOTE, vect_location,
11093 "==> examining pattern def statement: %G",
11094 pattern_def_stmt_info->stmt);
11096 opt_result res
11097 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
11098 need_to_vectorize, node, node_instance,
11099 cost_vec);
11100 if (!res)
11101 return res;
11106 /* Skip stmts that do not need to be vectorized. In loops this is expected
11107 to include:
11108 - the COND_EXPR which is the loop exit condition
11109 - any LABEL_EXPRs in the loop
11110 - computations that are used only for array indexing or loop control.
11111 In basic blocks we only analyze statements that are a part of some SLP
11112 instance, therefore, all the statements are relevant.
11114 Pattern statement needs to be analyzed instead of the original statement
11115 if the original statement is not relevant. Otherwise, we analyze both
11116 statements. In basic blocks we are called from some SLP instance
11117 traversal, don't analyze pattern stmts instead, the pattern stmts
11118 already will be part of SLP instance. */
11120 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
11121 if (!STMT_VINFO_RELEVANT_P (stmt_info)
11122 && !STMT_VINFO_LIVE_P (stmt_info))
11124 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11125 && pattern_stmt_info
11126 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11127 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11129 /* Analyze PATTERN_STMT instead of the original stmt. */
11130 stmt_info = pattern_stmt_info;
11131 if (dump_enabled_p ())
11132 dump_printf_loc (MSG_NOTE, vect_location,
11133 "==> examining pattern statement: %G",
11134 stmt_info->stmt);
11136 else
11138 if (dump_enabled_p ())
11139 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11141 return opt_result::success ();
11144 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11145 && node == NULL
11146 && pattern_stmt_info
11147 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11148 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11150 /* Analyze PATTERN_STMT too. */
11151 if (dump_enabled_p ())
11152 dump_printf_loc (MSG_NOTE, vect_location,
11153 "==> examining pattern statement: %G",
11154 pattern_stmt_info->stmt);
11156 opt_result res
11157 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11158 node_instance, cost_vec);
11159 if (!res)
11160 return res;
11163 switch (STMT_VINFO_DEF_TYPE (stmt_info))
11165 case vect_internal_def:
11166 break;
11168 case vect_reduction_def:
11169 case vect_nested_cycle:
11170 gcc_assert (!bb_vinfo
11171 && (relevance == vect_used_in_outer
11172 || relevance == vect_used_in_outer_by_reduction
11173 || relevance == vect_used_by_reduction
11174 || relevance == vect_unused_in_scope
11175 || relevance == vect_used_only_live));
11176 break;
11178 case vect_induction_def:
11179 gcc_assert (!bb_vinfo);
11180 break;
11182 case vect_constant_def:
11183 case vect_external_def:
11184 case vect_unknown_def_type:
11185 default:
11186 gcc_unreachable ();
11189 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11190 if (node)
11191 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
11193 if (STMT_VINFO_RELEVANT_P (stmt_info))
11195 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11196 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11197 || (call && gimple_call_lhs (call) == NULL_TREE));
11198 *need_to_vectorize = true;
11201 if (PURE_SLP_STMT (stmt_info) && !node)
11203 if (dump_enabled_p ())
11204 dump_printf_loc (MSG_NOTE, vect_location,
11205 "handled only by SLP analysis\n");
11206 return opt_result::success ();
11209 ok = true;
11210 if (!bb_vinfo
11211 && (STMT_VINFO_RELEVANT_P (stmt_info)
11212 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11213 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11214 -mveclibabi= takes preference over library functions with
11215 the simd attribute. */
11216 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11217 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11218 cost_vec)
11219 || vectorizable_conversion (vinfo, stmt_info,
11220 NULL, NULL, node, cost_vec)
11221 || vectorizable_operation (vinfo, stmt_info,
11222 NULL, NULL, node, cost_vec)
11223 || vectorizable_assignment (vinfo, stmt_info,
11224 NULL, NULL, node, cost_vec)
11225 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11226 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11227 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11228 node, node_instance, cost_vec)
11229 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11230 NULL, node, cost_vec)
11231 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11232 || vectorizable_condition (vinfo, stmt_info,
11233 NULL, NULL, node, cost_vec)
11234 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11235 cost_vec)
11236 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11237 stmt_info, NULL, node));
11238 else
11240 if (bb_vinfo)
11241 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11242 || vectorizable_simd_clone_call (vinfo, stmt_info,
11243 NULL, NULL, node, cost_vec)
11244 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11245 cost_vec)
11246 || vectorizable_shift (vinfo, stmt_info,
11247 NULL, NULL, node, cost_vec)
11248 || vectorizable_operation (vinfo, stmt_info,
11249 NULL, NULL, node, cost_vec)
11250 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11251 cost_vec)
11252 || vectorizable_load (vinfo, stmt_info,
11253 NULL, NULL, node, cost_vec)
11254 || vectorizable_store (vinfo, stmt_info,
11255 NULL, NULL, node, cost_vec)
11256 || vectorizable_condition (vinfo, stmt_info,
11257 NULL, NULL, node, cost_vec)
11258 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11259 cost_vec)
11260 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11263 if (node)
11264 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11266 if (!ok)
11267 return opt_result::failure_at (stmt_info->stmt,
11268 "not vectorized:"
11269 " relevant stmt not supported: %G",
11270 stmt_info->stmt);
11272 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11273 need extra handling, except for vectorizable reductions. */
11274 if (!bb_vinfo
11275 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11276 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11277 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11278 stmt_info, NULL, node, node_instance,
11279 false, cost_vec))
11280 return opt_result::failure_at (stmt_info->stmt,
11281 "not vectorized:"
11282 " live stmt not supported: %G",
11283 stmt_info->stmt);
11285 return opt_result::success ();
11289 /* Function vect_transform_stmt.
11291 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11293 bool
11294 vect_transform_stmt (vec_info *vinfo,
11295 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11296 slp_tree slp_node, slp_instance slp_node_instance)
11298 bool is_store = false;
11299 gimple *vec_stmt = NULL;
11300 bool done;
11302 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11304 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11305 if (slp_node)
11306 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11308 switch (STMT_VINFO_TYPE (stmt_info))
11310 case type_demotion_vec_info_type:
11311 case type_promotion_vec_info_type:
11312 case type_conversion_vec_info_type:
11313 done = vectorizable_conversion (vinfo, stmt_info,
11314 gsi, &vec_stmt, slp_node, NULL);
11315 gcc_assert (done);
11316 break;
11318 case induc_vec_info_type:
11319 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11320 stmt_info, &vec_stmt, slp_node,
11321 NULL);
11322 gcc_assert (done);
11323 break;
11325 case shift_vec_info_type:
11326 done = vectorizable_shift (vinfo, stmt_info,
11327 gsi, &vec_stmt, slp_node, NULL);
11328 gcc_assert (done);
11329 break;
11331 case op_vec_info_type:
11332 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11333 NULL);
11334 gcc_assert (done);
11335 break;
11337 case assignment_vec_info_type:
11338 done = vectorizable_assignment (vinfo, stmt_info,
11339 gsi, &vec_stmt, slp_node, NULL);
11340 gcc_assert (done);
11341 break;
11343 case load_vec_info_type:
11344 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11345 NULL);
11346 gcc_assert (done);
11347 break;
11349 case store_vec_info_type:
11350 done = vectorizable_store (vinfo, stmt_info,
11351 gsi, &vec_stmt, slp_node, NULL);
11352 gcc_assert (done);
11353 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11355 /* In case of interleaving, the whole chain is vectorized when the
11356 last store in the chain is reached. Store stmts before the last
11357 one are skipped, and there vec_stmt_info shouldn't be freed
11358 meanwhile. */
11359 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11360 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11361 is_store = true;
11363 else
11364 is_store = true;
11365 break;
11367 case condition_vec_info_type:
11368 done = vectorizable_condition (vinfo, stmt_info,
11369 gsi, &vec_stmt, slp_node, NULL);
11370 gcc_assert (done);
11371 break;
11373 case comparison_vec_info_type:
11374 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11375 slp_node, NULL);
11376 gcc_assert (done);
11377 break;
11379 case call_vec_info_type:
11380 done = vectorizable_call (vinfo, stmt_info,
11381 gsi, &vec_stmt, slp_node, NULL);
11382 break;
11384 case call_simd_clone_vec_info_type:
11385 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11386 slp_node, NULL);
11387 break;
11389 case reduc_vec_info_type:
11390 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11391 gsi, &vec_stmt, slp_node);
11392 gcc_assert (done);
11393 break;
11395 case cycle_phi_info_type:
11396 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11397 &vec_stmt, slp_node, slp_node_instance);
11398 gcc_assert (done);
11399 break;
11401 case lc_phi_info_type:
11402 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11403 stmt_info, &vec_stmt, slp_node);
11404 gcc_assert (done);
11405 break;
11407 case phi_info_type:
11408 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11409 gcc_assert (done);
11410 break;
11412 default:
11413 if (!STMT_VINFO_LIVE_P (stmt_info))
11415 if (dump_enabled_p ())
11416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11417 "stmt not supported.\n");
11418 gcc_unreachable ();
11420 done = true;
11423 if (!slp_node && vec_stmt)
11424 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11426 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11428 /* Handle stmts whose DEF is used outside the loop-nest that is
11429 being vectorized. */
11430 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11431 slp_node_instance, true, NULL);
11432 gcc_assert (done);
11435 if (slp_node)
11436 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11438 return is_store;
11442 /* Remove a group of stores (for SLP or interleaving), free their
11443 stmt_vec_info. */
11445 void
11446 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11448 stmt_vec_info next_stmt_info = first_stmt_info;
11450 while (next_stmt_info)
11452 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11453 next_stmt_info = vect_orig_stmt (next_stmt_info);
11454 /* Free the attached stmt_vec_info and remove the stmt. */
11455 vinfo->remove_stmt (next_stmt_info);
11456 next_stmt_info = tmp;
11460 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11461 elements of type SCALAR_TYPE, or null if the target doesn't support
11462 such a type.
11464 If NUNITS is zero, return a vector type that contains elements of
11465 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11467 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11468 for this vectorization region and want to "autodetect" the best choice.
11469 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11470 and we want the new type to be interoperable with it. PREVAILING_MODE
11471 in this case can be a scalar integer mode or a vector mode; when it
11472 is a vector mode, the function acts like a tree-level version of
11473 related_vector_mode. */
11475 tree
11476 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11477 tree scalar_type, poly_uint64 nunits)
11479 tree orig_scalar_type = scalar_type;
11480 scalar_mode inner_mode;
11481 machine_mode simd_mode;
11482 tree vectype;
11484 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11485 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11486 return NULL_TREE;
11488 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11490 /* Interoperability between modes requires one to be a constant multiple
11491 of the other, so that the number of vectors required for each operation
11492 is a compile-time constant. */
11493 if (prevailing_mode != VOIDmode
11494 && !constant_multiple_p (nunits * nbytes,
11495 GET_MODE_SIZE (prevailing_mode))
11496 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
11497 nunits * nbytes))
11498 return NULL_TREE;
11500 /* For vector types of elements whose mode precision doesn't
11501 match their types precision we use a element type of mode
11502 precision. The vectorization routines will have to make sure
11503 they support the proper result truncation/extension.
11504 We also make sure to build vector types with INTEGER_TYPE
11505 component type only. */
11506 if (INTEGRAL_TYPE_P (scalar_type)
11507 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11508 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11509 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11510 TYPE_UNSIGNED (scalar_type));
11512 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11513 When the component mode passes the above test simply use a type
11514 corresponding to that mode. The theory is that any use that
11515 would cause problems with this will disable vectorization anyway. */
11516 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11517 && !INTEGRAL_TYPE_P (scalar_type))
11518 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11520 /* We can't build a vector type of elements with alignment bigger than
11521 their size. */
11522 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11523 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11524 TYPE_UNSIGNED (scalar_type));
11526 /* If we felt back to using the mode fail if there was
11527 no scalar type for it. */
11528 if (scalar_type == NULL_TREE)
11529 return NULL_TREE;
11531 /* If no prevailing mode was supplied, use the mode the target prefers.
11532 Otherwise lookup a vector mode based on the prevailing mode. */
11533 if (prevailing_mode == VOIDmode)
11535 gcc_assert (known_eq (nunits, 0U));
11536 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11537 if (SCALAR_INT_MODE_P (simd_mode))
11539 /* Traditional behavior is not to take the integer mode
11540 literally, but simply to use it as a way of determining
11541 the vector size. It is up to mode_for_vector to decide
11542 what the TYPE_MODE should be.
11544 Note that nunits == 1 is allowed in order to support single
11545 element vector types. */
11546 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11547 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11548 return NULL_TREE;
11551 else if (SCALAR_INT_MODE_P (prevailing_mode)
11552 || !related_vector_mode (prevailing_mode,
11553 inner_mode, nunits).exists (&simd_mode))
11555 /* Fall back to using mode_for_vector, mostly in the hope of being
11556 able to use an integer mode. */
11557 if (known_eq (nunits, 0U)
11558 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11559 return NULL_TREE;
11561 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11562 return NULL_TREE;
11565 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11567 /* In cases where the mode was chosen by mode_for_vector, check that
11568 the target actually supports the chosen mode, or that it at least
11569 allows the vector mode to be replaced by a like-sized integer. */
11570 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11571 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11572 return NULL_TREE;
11574 /* Re-attach the address-space qualifier if we canonicalized the scalar
11575 type. */
11576 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11577 return build_qualified_type
11578 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11580 return vectype;
11583 /* Function get_vectype_for_scalar_type.
11585 Returns the vector type corresponding to SCALAR_TYPE as supported
11586 by the target. If GROUP_SIZE is nonzero and we're performing BB
11587 vectorization, make sure that the number of elements in the vector
11588 is no bigger than GROUP_SIZE. */
11590 tree
11591 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11592 unsigned int group_size)
11594 /* For BB vectorization, we should always have a group size once we've
11595 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11596 are tentative requests during things like early data reference
11597 analysis and pattern recognition. */
11598 if (is_a <bb_vec_info> (vinfo))
11599 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11600 else
11601 group_size = 0;
11603 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11604 scalar_type);
11605 if (vectype && vinfo->vector_mode == VOIDmode)
11606 vinfo->vector_mode = TYPE_MODE (vectype);
11608 /* Register the natural choice of vector type, before the group size
11609 has been applied. */
11610 if (vectype)
11611 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11613 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11614 try again with an explicit number of elements. */
11615 if (vectype
11616 && group_size
11617 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11619 /* Start with the biggest number of units that fits within
11620 GROUP_SIZE and halve it until we find a valid vector type.
11621 Usually either the first attempt will succeed or all will
11622 fail (in the latter case because GROUP_SIZE is too small
11623 for the target), but it's possible that a target could have
11624 a hole between supported vector types.
11626 If GROUP_SIZE is not a power of 2, this has the effect of
11627 trying the largest power of 2 that fits within the group,
11628 even though the group is not a multiple of that vector size.
11629 The BB vectorizer will then try to carve up the group into
11630 smaller pieces. */
11631 unsigned int nunits = 1 << floor_log2 (group_size);
11634 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11635 scalar_type, nunits);
11636 nunits /= 2;
11638 while (nunits > 1 && !vectype);
11641 return vectype;
11644 /* Return the vector type corresponding to SCALAR_TYPE as supported
11645 by the target. NODE, if nonnull, is the SLP tree node that will
11646 use the returned vector type. */
11648 tree
11649 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11651 unsigned int group_size = 0;
11652 if (node)
11653 group_size = SLP_TREE_LANES (node);
11654 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11657 /* Function get_mask_type_for_scalar_type.
11659 Returns the mask type corresponding to a result of comparison
11660 of vectors of specified SCALAR_TYPE as supported by target.
11661 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11662 make sure that the number of elements in the vector is no bigger
11663 than GROUP_SIZE. */
11665 tree
11666 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11667 unsigned int group_size)
11669 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11671 if (!vectype)
11672 return NULL;
11674 return truth_type_for (vectype);
11677 /* Function get_same_sized_vectype
11679 Returns a vector type corresponding to SCALAR_TYPE of size
11680 VECTOR_TYPE if supported by the target. */
11682 tree
11683 get_same_sized_vectype (tree scalar_type, tree vector_type)
11685 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11686 return truth_type_for (vector_type);
11688 poly_uint64 nunits;
11689 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11690 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11691 return NULL_TREE;
11693 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11694 scalar_type, nunits);
11697 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11698 would not change the chosen vector modes. */
11700 bool
11701 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11703 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11704 i != vinfo->used_vector_modes.end (); ++i)
11705 if (!VECTOR_MODE_P (*i)
11706 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11707 return false;
11708 return true;
11711 /* Function vect_is_simple_use.
11713 Input:
11714 VINFO - the vect info of the loop or basic block that is being vectorized.
11715 OPERAND - operand in the loop or bb.
11716 Output:
11717 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11718 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11719 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11720 the definition could be anywhere in the function
11721 DT - the type of definition
11723 Returns whether a stmt with OPERAND can be vectorized.
11724 For loops, supportable operands are constants, loop invariants, and operands
11725 that are defined by the current iteration of the loop. Unsupportable
11726 operands are those that are defined by a previous iteration of the loop (as
11727 is the case in reduction/induction computations).
11728 For basic blocks, supportable operands are constants and bb invariants.
11729 For now, operands defined outside the basic block are not supported. */
11731 bool
11732 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11733 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11735 if (def_stmt_info_out)
11736 *def_stmt_info_out = NULL;
11737 if (def_stmt_out)
11738 *def_stmt_out = NULL;
11739 *dt = vect_unknown_def_type;
11741 if (dump_enabled_p ())
11743 dump_printf_loc (MSG_NOTE, vect_location,
11744 "vect_is_simple_use: operand ");
11745 if (TREE_CODE (operand) == SSA_NAME
11746 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11747 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11748 else
11749 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11752 if (CONSTANT_CLASS_P (operand))
11753 *dt = vect_constant_def;
11754 else if (is_gimple_min_invariant (operand))
11755 *dt = vect_external_def;
11756 else if (TREE_CODE (operand) != SSA_NAME)
11757 *dt = vect_unknown_def_type;
11758 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11759 *dt = vect_external_def;
11760 else
11762 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11763 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11764 if (!stmt_vinfo)
11765 *dt = vect_external_def;
11766 else
11768 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11769 def_stmt = stmt_vinfo->stmt;
11770 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11771 if (def_stmt_info_out)
11772 *def_stmt_info_out = stmt_vinfo;
11774 if (def_stmt_out)
11775 *def_stmt_out = def_stmt;
11778 if (dump_enabled_p ())
11780 dump_printf (MSG_NOTE, ", type of def: ");
11781 switch (*dt)
11783 case vect_uninitialized_def:
11784 dump_printf (MSG_NOTE, "uninitialized\n");
11785 break;
11786 case vect_constant_def:
11787 dump_printf (MSG_NOTE, "constant\n");
11788 break;
11789 case vect_external_def:
11790 dump_printf (MSG_NOTE, "external\n");
11791 break;
11792 case vect_internal_def:
11793 dump_printf (MSG_NOTE, "internal\n");
11794 break;
11795 case vect_induction_def:
11796 dump_printf (MSG_NOTE, "induction\n");
11797 break;
11798 case vect_reduction_def:
11799 dump_printf (MSG_NOTE, "reduction\n");
11800 break;
11801 case vect_double_reduction_def:
11802 dump_printf (MSG_NOTE, "double reduction\n");
11803 break;
11804 case vect_nested_cycle:
11805 dump_printf (MSG_NOTE, "nested cycle\n");
11806 break;
11807 case vect_unknown_def_type:
11808 dump_printf (MSG_NOTE, "unknown\n");
11809 break;
11813 if (*dt == vect_unknown_def_type)
11815 if (dump_enabled_p ())
11816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11817 "Unsupported pattern.\n");
11818 return false;
11821 return true;
11824 /* Function vect_is_simple_use.
11826 Same as vect_is_simple_use but also determines the vector operand
11827 type of OPERAND and stores it to *VECTYPE. If the definition of
11828 OPERAND is vect_uninitialized_def, vect_constant_def or
11829 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11830 is responsible to compute the best suited vector type for the
11831 scalar operand. */
11833 bool
11834 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11835 tree *vectype, stmt_vec_info *def_stmt_info_out,
11836 gimple **def_stmt_out)
11838 stmt_vec_info def_stmt_info;
11839 gimple *def_stmt;
11840 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11841 return false;
11843 if (def_stmt_out)
11844 *def_stmt_out = def_stmt;
11845 if (def_stmt_info_out)
11846 *def_stmt_info_out = def_stmt_info;
11848 /* Now get a vector type if the def is internal, otherwise supply
11849 NULL_TREE and leave it up to the caller to figure out a proper
11850 type for the use stmt. */
11851 if (*dt == vect_internal_def
11852 || *dt == vect_induction_def
11853 || *dt == vect_reduction_def
11854 || *dt == vect_double_reduction_def
11855 || *dt == vect_nested_cycle)
11857 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11858 gcc_assert (*vectype != NULL_TREE);
11859 if (dump_enabled_p ())
11860 dump_printf_loc (MSG_NOTE, vect_location,
11861 "vect_is_simple_use: vectype %T\n", *vectype);
11863 else if (*dt == vect_uninitialized_def
11864 || *dt == vect_constant_def
11865 || *dt == vect_external_def)
11866 *vectype = NULL_TREE;
11867 else
11868 gcc_unreachable ();
11870 return true;
11873 /* Function vect_is_simple_use.
11875 Same as vect_is_simple_use but determines the operand by operand
11876 position OPERAND from either STMT or SLP_NODE, filling in *OP
11877 and *SLP_DEF (when SLP_NODE is not NULL). */
11879 bool
11880 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11881 unsigned operand, tree *op, slp_tree *slp_def,
11882 enum vect_def_type *dt,
11883 tree *vectype, stmt_vec_info *def_stmt_info_out)
11885 if (slp_node)
11887 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11888 *slp_def = child;
11889 *vectype = SLP_TREE_VECTYPE (child);
11890 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11892 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11893 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11895 else
11897 if (def_stmt_info_out)
11898 *def_stmt_info_out = NULL;
11899 *op = SLP_TREE_SCALAR_OPS (child)[0];
11900 *dt = SLP_TREE_DEF_TYPE (child);
11901 return true;
11904 else
11906 *slp_def = NULL;
11907 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11909 if (gimple_assign_rhs_code (ass) == COND_EXPR
11910 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11912 if (operand < 2)
11913 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11914 else
11915 *op = gimple_op (ass, operand);
11917 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11918 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11919 else
11920 *op = gimple_op (ass, operand + 1);
11922 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11923 *op = gimple_call_arg (call, operand);
11924 else
11925 gcc_unreachable ();
11926 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11930 /* If OP is not NULL and is external or constant update its vector
11931 type with VECTYPE. Returns true if successful or false if not,
11932 for example when conflicting vector types are present. */
11934 bool
11935 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11937 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11938 return true;
11939 if (SLP_TREE_VECTYPE (op))
11940 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11941 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
11942 should be handled by patters. Allow vect_constant_def for now. */
11943 if (VECTOR_BOOLEAN_TYPE_P (vectype)
11944 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
11945 return false;
11946 SLP_TREE_VECTYPE (op) = vectype;
11947 return true;
11950 /* Function supportable_widening_operation
11952 Check whether an operation represented by the code CODE is a
11953 widening operation that is supported by the target platform in
11954 vector form (i.e., when operating on arguments of type VECTYPE_IN
11955 producing a result of type VECTYPE_OUT).
11957 Widening operations we currently support are NOP (CONVERT), FLOAT,
11958 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11959 are supported by the target platform either directly (via vector
11960 tree-codes), or via target builtins.
11962 Output:
11963 - CODE1 and CODE2 are codes of vector operations to be used when
11964 vectorizing the operation, if available.
11965 - MULTI_STEP_CVT determines the number of required intermediate steps in
11966 case of multi-step conversion (like char->short->int - in that case
11967 MULTI_STEP_CVT will be 1).
11968 - INTERM_TYPES contains the intermediate type required to perform the
11969 widening operation (short in the above example). */
11971 bool
11972 supportable_widening_operation (vec_info *vinfo,
11973 enum tree_code code, stmt_vec_info stmt_info,
11974 tree vectype_out, tree vectype_in,
11975 enum tree_code *code1, enum tree_code *code2,
11976 int *multi_step_cvt,
11977 vec<tree> *interm_types)
11979 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11980 class loop *vect_loop = NULL;
11981 machine_mode vec_mode;
11982 enum insn_code icode1, icode2;
11983 optab optab1, optab2;
11984 tree vectype = vectype_in;
11985 tree wide_vectype = vectype_out;
11986 enum tree_code c1, c2;
11987 int i;
11988 tree prev_type, intermediate_type;
11989 machine_mode intermediate_mode, prev_mode;
11990 optab optab3, optab4;
11992 *multi_step_cvt = 0;
11993 if (loop_info)
11994 vect_loop = LOOP_VINFO_LOOP (loop_info);
11996 switch (code)
11998 case WIDEN_MULT_EXPR:
11999 /* The result of a vectorized widening operation usually requires
12000 two vectors (because the widened results do not fit into one vector).
12001 The generated vector results would normally be expected to be
12002 generated in the same order as in the original scalar computation,
12003 i.e. if 8 results are generated in each vector iteration, they are
12004 to be organized as follows:
12005 vect1: [res1,res2,res3,res4],
12006 vect2: [res5,res6,res7,res8].
12008 However, in the special case that the result of the widening
12009 operation is used in a reduction computation only, the order doesn't
12010 matter (because when vectorizing a reduction we change the order of
12011 the computation). Some targets can take advantage of this and
12012 generate more efficient code. For example, targets like Altivec,
12013 that support widen_mult using a sequence of {mult_even,mult_odd}
12014 generate the following vectors:
12015 vect1: [res1,res3,res5,res7],
12016 vect2: [res2,res4,res6,res8].
12018 When vectorizing outer-loops, we execute the inner-loop sequentially
12019 (each vectorized inner-loop iteration contributes to VF outer-loop
12020 iterations in parallel). We therefore don't allow to change the
12021 order of the computation in the inner-loop during outer-loop
12022 vectorization. */
12023 /* TODO: Another case in which order doesn't *really* matter is when we
12024 widen and then contract again, e.g. (short)((int)x * y >> 8).
12025 Normally, pack_trunc performs an even/odd permute, whereas the
12026 repack from an even/odd expansion would be an interleave, which
12027 would be significantly simpler for e.g. AVX2. */
12028 /* In any case, in order to avoid duplicating the code below, recurse
12029 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
12030 are properly set up for the caller. If we fail, we'll continue with
12031 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
12032 if (vect_loop
12033 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
12034 && !nested_in_vect_loop_p (vect_loop, stmt_info)
12035 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
12036 stmt_info, vectype_out,
12037 vectype_in, code1, code2,
12038 multi_step_cvt, interm_types))
12040 /* Elements in a vector with vect_used_by_reduction property cannot
12041 be reordered if the use chain with this property does not have the
12042 same operation. One such an example is s += a * b, where elements
12043 in a and b cannot be reordered. Here we check if the vector defined
12044 by STMT is only directly used in the reduction statement. */
12045 tree lhs = gimple_assign_lhs (stmt_info->stmt);
12046 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
12047 if (use_stmt_info
12048 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
12049 return true;
12051 c1 = VEC_WIDEN_MULT_LO_EXPR;
12052 c2 = VEC_WIDEN_MULT_HI_EXPR;
12053 break;
12055 case DOT_PROD_EXPR:
12056 c1 = DOT_PROD_EXPR;
12057 c2 = DOT_PROD_EXPR;
12058 break;
12060 case SAD_EXPR:
12061 c1 = SAD_EXPR;
12062 c2 = SAD_EXPR;
12063 break;
12065 case VEC_WIDEN_MULT_EVEN_EXPR:
12066 /* Support the recursion induced just above. */
12067 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
12068 c2 = VEC_WIDEN_MULT_ODD_EXPR;
12069 break;
12071 case WIDEN_LSHIFT_EXPR:
12072 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
12073 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
12074 break;
12076 case WIDEN_PLUS_EXPR:
12077 c1 = VEC_WIDEN_PLUS_LO_EXPR;
12078 c2 = VEC_WIDEN_PLUS_HI_EXPR;
12079 break;
12081 case WIDEN_MINUS_EXPR:
12082 c1 = VEC_WIDEN_MINUS_LO_EXPR;
12083 c2 = VEC_WIDEN_MINUS_HI_EXPR;
12084 break;
12086 CASE_CONVERT:
12087 c1 = VEC_UNPACK_LO_EXPR;
12088 c2 = VEC_UNPACK_HI_EXPR;
12089 break;
12091 case FLOAT_EXPR:
12092 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
12093 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
12094 break;
12096 case FIX_TRUNC_EXPR:
12097 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
12098 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
12099 break;
12101 default:
12102 gcc_unreachable ();
12105 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
12106 std::swap (c1, c2);
12108 if (code == FIX_TRUNC_EXPR)
12110 /* The signedness is determined from output operand. */
12111 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12112 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
12114 else if (CONVERT_EXPR_CODE_P (code)
12115 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
12116 && VECTOR_BOOLEAN_TYPE_P (vectype)
12117 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
12118 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12120 /* If the input and result modes are the same, a different optab
12121 is needed where we pass in the number of units in vectype. */
12122 optab1 = vec_unpacks_sbool_lo_optab;
12123 optab2 = vec_unpacks_sbool_hi_optab;
12125 else
12127 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12128 optab2 = optab_for_tree_code (c2, vectype, optab_default);
12131 if (!optab1 || !optab2)
12132 return false;
12134 vec_mode = TYPE_MODE (vectype);
12135 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
12136 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
12137 return false;
12139 *code1 = c1;
12140 *code2 = c2;
12142 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12143 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12145 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12146 return true;
12147 /* For scalar masks we may have different boolean
12148 vector types having the same QImode. Thus we
12149 add additional check for elements number. */
12150 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
12151 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12152 return true;
12155 /* Check if it's a multi-step conversion that can be done using intermediate
12156 types. */
12158 prev_type = vectype;
12159 prev_mode = vec_mode;
12161 if (!CONVERT_EXPR_CODE_P (code))
12162 return false;
12164 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12165 intermediate steps in promotion sequence. We try
12166 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12167 not. */
12168 interm_types->create (MAX_INTERM_CVT_STEPS);
12169 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12171 intermediate_mode = insn_data[icode1].operand[0].mode;
12172 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12173 intermediate_type
12174 = vect_halve_mask_nunits (prev_type, intermediate_mode);
12175 else
12176 intermediate_type
12177 = lang_hooks.types.type_for_mode (intermediate_mode,
12178 TYPE_UNSIGNED (prev_type));
12180 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12181 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12182 && intermediate_mode == prev_mode
12183 && SCALAR_INT_MODE_P (prev_mode))
12185 /* If the input and result modes are the same, a different optab
12186 is needed where we pass in the number of units in vectype. */
12187 optab3 = vec_unpacks_sbool_lo_optab;
12188 optab4 = vec_unpacks_sbool_hi_optab;
12190 else
12192 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12193 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12196 if (!optab3 || !optab4
12197 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12198 || insn_data[icode1].operand[0].mode != intermediate_mode
12199 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12200 || insn_data[icode2].operand[0].mode != intermediate_mode
12201 || ((icode1 = optab_handler (optab3, intermediate_mode))
12202 == CODE_FOR_nothing)
12203 || ((icode2 = optab_handler (optab4, intermediate_mode))
12204 == CODE_FOR_nothing))
12205 break;
12207 interm_types->quick_push (intermediate_type);
12208 (*multi_step_cvt)++;
12210 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12211 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12213 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12214 return true;
12215 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12216 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12217 return true;
12220 prev_type = intermediate_type;
12221 prev_mode = intermediate_mode;
12224 interm_types->release ();
12225 return false;
12229 /* Function supportable_narrowing_operation
12231 Check whether an operation represented by the code CODE is a
12232 narrowing operation that is supported by the target platform in
12233 vector form (i.e., when operating on arguments of type VECTYPE_IN
12234 and producing a result of type VECTYPE_OUT).
12236 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12237 and FLOAT. This function checks if these operations are supported by
12238 the target platform directly via vector tree-codes.
12240 Output:
12241 - CODE1 is the code of a vector operation to be used when
12242 vectorizing the operation, if available.
12243 - MULTI_STEP_CVT determines the number of required intermediate steps in
12244 case of multi-step conversion (like int->short->char - in that case
12245 MULTI_STEP_CVT will be 1).
12246 - INTERM_TYPES contains the intermediate type required to perform the
12247 narrowing operation (short in the above example). */
12249 bool
12250 supportable_narrowing_operation (enum tree_code code,
12251 tree vectype_out, tree vectype_in,
12252 enum tree_code *code1, int *multi_step_cvt,
12253 vec<tree> *interm_types)
12255 machine_mode vec_mode;
12256 enum insn_code icode1;
12257 optab optab1, interm_optab;
12258 tree vectype = vectype_in;
12259 tree narrow_vectype = vectype_out;
12260 enum tree_code c1;
12261 tree intermediate_type, prev_type;
12262 machine_mode intermediate_mode, prev_mode;
12263 int i;
12264 unsigned HOST_WIDE_INT n_elts;
12265 bool uns;
12267 *multi_step_cvt = 0;
12268 switch (code)
12270 CASE_CONVERT:
12271 c1 = VEC_PACK_TRUNC_EXPR;
12272 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12273 && VECTOR_BOOLEAN_TYPE_P (vectype)
12274 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
12275 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
12276 && n_elts < BITS_PER_UNIT)
12277 optab1 = vec_pack_sbool_trunc_optab;
12278 else
12279 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12280 break;
12282 case FIX_TRUNC_EXPR:
12283 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12284 /* The signedness is determined from output operand. */
12285 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12286 break;
12288 case FLOAT_EXPR:
12289 c1 = VEC_PACK_FLOAT_EXPR;
12290 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12291 break;
12293 default:
12294 gcc_unreachable ();
12297 if (!optab1)
12298 return false;
12300 vec_mode = TYPE_MODE (vectype);
12301 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12302 return false;
12304 *code1 = c1;
12306 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12308 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12309 return true;
12310 /* For scalar masks we may have different boolean
12311 vector types having the same QImode. Thus we
12312 add additional check for elements number. */
12313 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12314 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12315 return true;
12318 if (code == FLOAT_EXPR)
12319 return false;
12321 /* Check if it's a multi-step conversion that can be done using intermediate
12322 types. */
12323 prev_mode = vec_mode;
12324 prev_type = vectype;
12325 if (code == FIX_TRUNC_EXPR)
12326 uns = TYPE_UNSIGNED (vectype_out);
12327 else
12328 uns = TYPE_UNSIGNED (vectype);
12330 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12331 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12332 costly than signed. */
12333 if (code == FIX_TRUNC_EXPR && uns)
12335 enum insn_code icode2;
12337 intermediate_type
12338 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12339 interm_optab
12340 = optab_for_tree_code (c1, intermediate_type, optab_default);
12341 if (interm_optab != unknown_optab
12342 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12343 && insn_data[icode1].operand[0].mode
12344 == insn_data[icode2].operand[0].mode)
12346 uns = false;
12347 optab1 = interm_optab;
12348 icode1 = icode2;
12352 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12353 intermediate steps in promotion sequence. We try
12354 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12355 interm_types->create (MAX_INTERM_CVT_STEPS);
12356 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12358 intermediate_mode = insn_data[icode1].operand[0].mode;
12359 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12360 intermediate_type
12361 = vect_double_mask_nunits (prev_type, intermediate_mode);
12362 else
12363 intermediate_type
12364 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12365 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12366 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12367 && SCALAR_INT_MODE_P (prev_mode)
12368 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
12369 && n_elts < BITS_PER_UNIT)
12370 interm_optab = vec_pack_sbool_trunc_optab;
12371 else
12372 interm_optab
12373 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12374 optab_default);
12375 if (!interm_optab
12376 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12377 || insn_data[icode1].operand[0].mode != intermediate_mode
12378 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12379 == CODE_FOR_nothing))
12380 break;
12382 interm_types->quick_push (intermediate_type);
12383 (*multi_step_cvt)++;
12385 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12387 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12388 return true;
12389 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12390 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12391 return true;
12394 prev_mode = intermediate_mode;
12395 prev_type = intermediate_type;
12396 optab1 = interm_optab;
12399 interm_types->release ();
12400 return false;
12403 /* Generate and return a vector mask of MASK_TYPE such that
12404 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12405 Add the statements to SEQ. */
12407 tree
12408 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12409 tree end_index, const char *name)
12411 tree cmp_type = TREE_TYPE (start_index);
12412 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12413 cmp_type, mask_type,
12414 OPTIMIZE_FOR_SPEED));
12415 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12416 start_index, end_index,
12417 build_zero_cst (mask_type));
12418 tree tmp;
12419 if (name)
12420 tmp = make_temp_ssa_name (mask_type, NULL, name);
12421 else
12422 tmp = make_ssa_name (mask_type);
12423 gimple_call_set_lhs (call, tmp);
12424 gimple_seq_add_stmt (seq, call);
12425 return tmp;
12428 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12429 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12431 tree
12432 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12433 tree end_index)
12435 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12436 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12439 /* Try to compute the vector types required to vectorize STMT_INFO,
12440 returning true on success and false if vectorization isn't possible.
12441 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12442 take sure that the number of elements in the vectors is no bigger
12443 than GROUP_SIZE.
12445 On success:
12447 - Set *STMT_VECTYPE_OUT to:
12448 - NULL_TREE if the statement doesn't need to be vectorized;
12449 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12451 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12452 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12453 statement does not help to determine the overall number of units. */
12455 opt_result
12456 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12457 tree *stmt_vectype_out,
12458 tree *nunits_vectype_out,
12459 unsigned int group_size)
12461 gimple *stmt = stmt_info->stmt;
12463 /* For BB vectorization, we should always have a group size once we've
12464 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12465 are tentative requests during things like early data reference
12466 analysis and pattern recognition. */
12467 if (is_a <bb_vec_info> (vinfo))
12468 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12469 else
12470 group_size = 0;
12472 *stmt_vectype_out = NULL_TREE;
12473 *nunits_vectype_out = NULL_TREE;
12475 if (gimple_get_lhs (stmt) == NULL_TREE
12476 /* MASK_STORE has no lhs, but is ok. */
12477 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12479 if (is_a <gcall *> (stmt))
12481 /* Ignore calls with no lhs. These must be calls to
12482 #pragma omp simd functions, and what vectorization factor
12483 it really needs can't be determined until
12484 vectorizable_simd_clone_call. */
12485 if (dump_enabled_p ())
12486 dump_printf_loc (MSG_NOTE, vect_location,
12487 "defer to SIMD clone analysis.\n");
12488 return opt_result::success ();
12491 return opt_result::failure_at (stmt,
12492 "not vectorized: irregular stmt.%G", stmt);
12495 tree vectype;
12496 tree scalar_type = NULL_TREE;
12497 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12499 vectype = STMT_VINFO_VECTYPE (stmt_info);
12500 if (dump_enabled_p ())
12501 dump_printf_loc (MSG_NOTE, vect_location,
12502 "precomputed vectype: %T\n", vectype);
12504 else if (vect_use_mask_type_p (stmt_info))
12506 unsigned int precision = stmt_info->mask_precision;
12507 scalar_type = build_nonstandard_integer_type (precision, 1);
12508 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12509 if (!vectype)
12510 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12511 " data-type %T\n", scalar_type);
12512 if (dump_enabled_p ())
12513 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12515 else
12517 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12518 scalar_type = TREE_TYPE (DR_REF (dr));
12519 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12520 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12521 else
12522 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12524 if (dump_enabled_p ())
12526 if (group_size)
12527 dump_printf_loc (MSG_NOTE, vect_location,
12528 "get vectype for scalar type (group size %d):"
12529 " %T\n", group_size, scalar_type);
12530 else
12531 dump_printf_loc (MSG_NOTE, vect_location,
12532 "get vectype for scalar type: %T\n", scalar_type);
12534 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12535 if (!vectype)
12536 return opt_result::failure_at (stmt,
12537 "not vectorized:"
12538 " unsupported data-type %T\n",
12539 scalar_type);
12541 if (dump_enabled_p ())
12542 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12545 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12546 return opt_result::failure_at (stmt,
12547 "not vectorized: vector stmt in loop:%G",
12548 stmt);
12550 *stmt_vectype_out = vectype;
12552 /* Don't try to compute scalar types if the stmt produces a boolean
12553 vector; use the existing vector type instead. */
12554 tree nunits_vectype = vectype;
12555 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12557 /* The number of units is set according to the smallest scalar
12558 type (or the largest vector size, but we only support one
12559 vector size per vectorization). */
12560 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12561 TREE_TYPE (vectype));
12562 if (scalar_type != TREE_TYPE (vectype))
12564 if (dump_enabled_p ())
12565 dump_printf_loc (MSG_NOTE, vect_location,
12566 "get vectype for smallest scalar type: %T\n",
12567 scalar_type);
12568 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12569 group_size);
12570 if (!nunits_vectype)
12571 return opt_result::failure_at
12572 (stmt, "not vectorized: unsupported data-type %T\n",
12573 scalar_type);
12574 if (dump_enabled_p ())
12575 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12576 nunits_vectype);
12580 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12581 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12582 return opt_result::failure_at (stmt,
12583 "Not vectorized: Incompatible number "
12584 "of vector subparts between %T and %T\n",
12585 nunits_vectype, *stmt_vectype_out);
12587 if (dump_enabled_p ())
12589 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12590 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12591 dump_printf (MSG_NOTE, "\n");
12594 *nunits_vectype_out = nunits_vectype;
12595 return opt_result::success ();
12598 /* Generate and return statement sequence that sets vector length LEN that is:
12600 min_of_start_and_end = min (START_INDEX, END_INDEX);
12601 left_len = END_INDEX - min_of_start_and_end;
12602 rhs = min (left_len, LEN_LIMIT);
12603 LEN = rhs;
12605 Note: the cost of the code generated by this function is modeled
12606 by vect_estimate_min_profitable_iters, so changes here may need
12607 corresponding changes there. */
12609 gimple_seq
12610 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12612 gimple_seq stmts = NULL;
12613 tree len_type = TREE_TYPE (len);
12614 gcc_assert (TREE_TYPE (start_index) == len_type);
12616 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12617 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12618 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12619 gimple* stmt = gimple_build_assign (len, rhs);
12620 gimple_seq_add_stmt (&stmts, stmt);
12622 return stmts;