Suppress -fstack-protector warning on hppa.
[official-gcc.git] / gcc / tree-vect-stmts.cc
blob5485da58b38a0db2ea1a357ee8647ae47b563a8f
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 static unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind,
95 stmt_vec_info stmt_info, slp_tree node,
96 tree vectype, int misalign,
97 enum vect_cost_model_location where)
99 if ((kind == vector_load || kind == unaligned_load)
100 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
101 kind = vector_gather_load;
102 if ((kind == vector_store || kind == unaligned_store)
103 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
104 kind = vector_scatter_store;
106 stmt_info_for_cost si
107 = { count, kind, where, stmt_info, node, vectype, misalign };
108 body_cost_vec->safe_push (si);
110 return (unsigned)
111 (builtin_vectorization_cost (kind, vectype, misalign) * count);
114 unsigned
115 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
116 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
117 tree vectype, int misalign,
118 enum vect_cost_model_location where)
120 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
121 vectype, misalign, where);
124 unsigned
125 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
126 enum vect_cost_for_stmt kind, slp_tree node,
127 tree vectype, int misalign,
128 enum vect_cost_model_location where)
130 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
131 vectype, misalign, where);
134 unsigned
135 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
136 enum vect_cost_for_stmt kind,
137 enum vect_cost_model_location where)
139 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
140 || kind == scalar_stmt);
141 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
142 NULL_TREE, 0, where);
145 /* Return a variable of type ELEM_TYPE[NELEMS]. */
147 static tree
148 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
150 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
151 "vect_array");
154 /* ARRAY is an array of vectors created by create_vector_array.
155 Return an SSA_NAME for the vector in index N. The reference
156 is part of the vectorization of STMT_INFO and the vector is associated
157 with scalar destination SCALAR_DEST. */
159 static tree
160 read_vector_array (vec_info *vinfo,
161 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
162 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
164 tree vect_type, vect, vect_name, array_ref;
165 gimple *new_stmt;
167 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
168 vect_type = TREE_TYPE (TREE_TYPE (array));
169 vect = vect_create_destination_var (scalar_dest, vect_type);
170 array_ref = build4 (ARRAY_REF, vect_type, array,
171 build_int_cst (size_type_node, n),
172 NULL_TREE, NULL_TREE);
174 new_stmt = gimple_build_assign (vect, array_ref);
175 vect_name = make_ssa_name (vect, new_stmt);
176 gimple_assign_set_lhs (new_stmt, vect_name);
177 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
179 return vect_name;
182 /* ARRAY is an array of vectors created by create_vector_array.
183 Emit code to store SSA_NAME VECT in index N of the array.
184 The store is part of the vectorization of STMT_INFO. */
186 static void
187 write_vector_array (vec_info *vinfo,
188 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
189 tree vect, tree array, unsigned HOST_WIDE_INT n)
191 tree array_ref;
192 gimple *new_stmt;
194 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
195 build_int_cst (size_type_node, n),
196 NULL_TREE, NULL_TREE);
198 new_stmt = gimple_build_assign (array_ref, vect);
199 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
202 /* PTR is a pointer to an array of type TYPE. Return a representation
203 of *PTR. The memory reference replaces those in FIRST_DR
204 (and its group). */
206 static tree
207 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
209 tree mem_ref;
211 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
212 /* Arrays have the same alignment as their type. */
213 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
214 return mem_ref;
217 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
218 Emit the clobber before *GSI. */
220 static void
221 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
222 gimple_stmt_iterator *gsi, tree var)
224 tree clobber = build_clobber (TREE_TYPE (var));
225 gimple *new_stmt = gimple_build_assign (var, clobber);
226 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
229 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
231 /* Function vect_mark_relevant.
233 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
235 static void
236 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
237 enum vect_relevant relevant, bool live_p)
239 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE, vect_location,
244 "mark relevant %d, live %d: %G", relevant, live_p,
245 stmt_info->stmt);
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
253 /* This is the last stmt in a sequence that was detected as a
254 pattern that can potentially be vectorized. Don't mark the stmt
255 as relevant/live because it's not going to be vectorized.
256 Instead mark the pattern-stmt that replaces it. */
258 if (dump_enabled_p ())
259 dump_printf_loc (MSG_NOTE, vect_location,
260 "last stmt in pattern. don't mark"
261 " relevant/live.\n");
262 stmt_vec_info old_stmt_info = stmt_info;
263 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
264 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
265 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
266 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
269 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
270 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
271 STMT_VINFO_RELEVANT (stmt_info) = relevant;
273 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
274 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
276 if (dump_enabled_p ())
277 dump_printf_loc (MSG_NOTE, vect_location,
278 "already marked relevant/live.\n");
279 return;
282 worklist->safe_push (stmt_info);
286 /* Function is_simple_and_all_uses_invariant
288 Return true if STMT_INFO is simple and all uses of it are invariant. */
290 bool
291 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
292 loop_vec_info loop_vinfo)
294 tree op;
295 ssa_op_iter iter;
297 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
298 if (!stmt)
299 return false;
301 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
303 enum vect_def_type dt = vect_uninitialized_def;
305 if (!vect_is_simple_use (op, loop_vinfo, &dt))
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
309 "use not simple.\n");
310 return false;
313 if (dt != vect_external_def && dt != vect_constant_def)
314 return false;
316 return true;
319 /* Function vect_stmt_relevant_p.
321 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
322 is "relevant for vectorization".
324 A stmt is considered "relevant for vectorization" if:
325 - it has uses outside the loop.
326 - it has vdefs (it alters memory).
327 - control stmts in the loop (except for the exit condition).
329 CHECKME: what other side effects would the vectorizer allow? */
331 static bool
332 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
333 enum vect_relevant *relevant, bool *live_p)
335 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
336 ssa_op_iter op_iter;
337 imm_use_iterator imm_iter;
338 use_operand_p use_p;
339 def_operand_p def_p;
341 *relevant = vect_unused_in_scope;
342 *live_p = false;
344 /* cond stmt other than loop exit cond. */
345 if (is_ctrl_stmt (stmt_info->stmt)
346 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
347 *relevant = vect_used_in_scope;
349 /* changing memory. */
350 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
351 if (gimple_vdef (stmt_info->stmt)
352 && !gimple_clobber_p (stmt_info->stmt))
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE, vect_location,
356 "vec_stmt_relevant_p: stmt has vdefs.\n");
357 *relevant = vect_used_in_scope;
360 /* uses outside the loop. */
361 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
363 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
365 basic_block bb = gimple_bb (USE_STMT (use_p));
366 if (!flow_bb_inside_loop_p (loop, bb))
368 if (is_gimple_debug (USE_STMT (use_p)))
369 continue;
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: used out of loop.\n");
375 /* We expect all such uses to be in the loop exit phis
376 (because of loop closed form) */
377 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
378 gcc_assert (bb == single_exit (loop)->dest);
380 *live_p = true;
385 if (*live_p && *relevant == vect_unused_in_scope
386 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: stmt live but not relevant.\n");
391 *relevant = vect_used_only_live;
394 return (*live_p || *relevant);
398 /* Function exist_non_indexing_operands_for_use_p
400 USE is one of the uses attached to STMT_INFO. Check if USE is
401 used in STMT_INFO for anything other than indexing an array. */
403 static bool
404 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
406 tree operand;
408 /* USE corresponds to some operand in STMT. If there is no data
409 reference in STMT, then any operand that corresponds to USE
410 is not indexing an array. */
411 if (!STMT_VINFO_DATA_REF (stmt_info))
412 return true;
414 /* STMT has a data_ref. FORNOW this means that its of one of
415 the following forms:
416 -1- ARRAY_REF = var
417 -2- var = ARRAY_REF
418 (This should have been verified in analyze_data_refs).
420 'var' in the second case corresponds to a def, not a use,
421 so USE cannot correspond to any operands that are not used
422 for array indexing.
424 Therefore, all we need to check is if STMT falls into the
425 first case, and whether var corresponds to USE. */
427 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
428 if (!assign || !gimple_assign_copy_p (assign))
430 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
431 if (call && gimple_call_internal_p (call))
433 internal_fn ifn = gimple_call_internal_fn (call);
434 int mask_index = internal_fn_mask_index (ifn);
435 if (mask_index >= 0
436 && use == gimple_call_arg (call, mask_index))
437 return true;
438 int stored_value_index = internal_fn_stored_value_index (ifn);
439 if (stored_value_index >= 0
440 && use == gimple_call_arg (call, stored_value_index))
441 return true;
442 if (internal_gather_scatter_fn_p (ifn)
443 && use == gimple_call_arg (call, 1))
444 return true;
446 return false;
449 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
450 return false;
451 operand = gimple_assign_rhs1 (assign);
452 if (TREE_CODE (operand) != SSA_NAME)
453 return false;
455 if (operand == use)
456 return true;
458 return false;
463 Function process_use.
465 Inputs:
466 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
467 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
468 that defined USE. This is done by calling mark_relevant and passing it
469 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
470 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
471 be performed.
473 Outputs:
474 Generally, LIVE_P and RELEVANT are used to define the liveness and
475 relevance info of the DEF_STMT of this USE:
476 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
477 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
478 Exceptions:
479 - case 1: If USE is used only for address computations (e.g. array indexing),
480 which does not need to be directly vectorized, then the liveness/relevance
481 of the respective DEF_STMT is left unchanged.
482 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
483 we skip DEF_STMT cause it had already been processed.
484 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
485 "relevant" will be modified accordingly.
487 Return true if everything is as expected. Return false otherwise. */
489 static opt_result
490 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
491 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
492 bool force)
494 stmt_vec_info dstmt_vinfo;
495 enum vect_def_type dt;
497 /* case 1: we are only interested in uses that need to be vectorized. Uses
498 that are used for address computation are not considered relevant. */
499 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
500 return opt_result::success ();
502 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
503 return opt_result::failure_at (stmt_vinfo->stmt,
504 "not vectorized:"
505 " unsupported use in stmt.\n");
507 if (!dstmt_vinfo)
508 return opt_result::success ();
510 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
511 basic_block bb = gimple_bb (stmt_vinfo->stmt);
513 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
514 We have to force the stmt live since the epilogue loop needs it to
515 continue computing the reduction. */
516 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
517 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
518 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
519 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
520 && bb->loop_father == def_bb->loop_father)
522 if (dump_enabled_p ())
523 dump_printf_loc (MSG_NOTE, vect_location,
524 "reduc-stmt defining reduc-phi in the same nest.\n");
525 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
526 return opt_result::success ();
529 /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 outer-loop-header-bb:
531 d = dstmt_vinfo
532 inner-loop:
533 stmt # use (d)
534 outer-loop-tail-bb:
535 ... */
536 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE, vect_location,
540 "outer-loop def-stmt defining inner-loop stmt.\n");
542 switch (relevant)
544 case vect_unused_in_scope:
545 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
546 vect_used_in_scope : vect_unused_in_scope;
547 break;
549 case vect_used_in_outer_by_reduction:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
551 relevant = vect_used_by_reduction;
552 break;
554 case vect_used_in_outer:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
556 relevant = vect_used_in_scope;
557 break;
559 case vect_used_in_scope:
560 break;
562 default:
563 gcc_unreachable ();
567 /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 outer-loop-header-bb:
570 inner-loop:
571 d = dstmt_vinfo
572 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
573 stmt # use (d) */
574 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_NOTE, vect_location,
578 "inner-loop def-stmt defining outer-loop stmt.\n");
580 switch (relevant)
582 case vect_unused_in_scope:
583 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
584 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
585 vect_used_in_outer_by_reduction : vect_unused_in_scope;
586 break;
588 case vect_used_by_reduction:
589 case vect_used_only_live:
590 relevant = vect_used_in_outer_by_reduction;
591 break;
593 case vect_used_in_scope:
594 relevant = vect_used_in_outer;
595 break;
597 default:
598 gcc_unreachable ();
601 /* We are also not interested in uses on loop PHI backedges that are
602 inductions. Otherwise we'll needlessly vectorize the IV increment
603 and cause hybrid SLP for SLP inductions. Unless the PHI is live
604 of course. */
605 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
606 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
607 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
608 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
609 loop_latch_edge (bb->loop_father))
610 == use))
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_NOTE, vect_location,
614 "induction value on backedge.\n");
615 return opt_result::success ();
619 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
620 return opt_result::success ();
624 /* Function vect_mark_stmts_to_be_vectorized.
626 Not all stmts in the loop need to be vectorized. For example:
628 for i...
629 for j...
630 1. T0 = i + j
631 2. T1 = a[T0]
633 3. j = j + 1
635 Stmt 1 and 3 do not need to be vectorized, because loop control and
636 addressing of vectorized data-refs are handled differently.
638 This pass detects such stmts. */
640 opt_result
641 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
643 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
644 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
645 unsigned int nbbs = loop->num_nodes;
646 gimple_stmt_iterator si;
647 unsigned int i;
648 basic_block bb;
649 bool live_p;
650 enum vect_relevant relevant;
652 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
654 auto_vec<stmt_vec_info, 64> worklist;
656 /* 1. Init worklist. */
657 for (i = 0; i < nbbs; i++)
659 bb = bbs[i];
660 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
662 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
665 phi_info->stmt);
667 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
668 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
670 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
672 if (is_gimple_debug (gsi_stmt (si)))
673 continue;
674 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
675 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE, vect_location,
677 "init: stmt relevant? %G", stmt_info->stmt);
679 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
680 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
684 /* 2. Process_worklist */
685 while (worklist.length () > 0)
687 use_operand_p use_p;
688 ssa_op_iter iter;
690 stmt_vec_info stmt_vinfo = worklist.pop ();
691 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE, vect_location,
693 "worklist: examine stmt: %G", stmt_vinfo->stmt);
695 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 (DEF_STMT) as relevant/irrelevant according to the relevance property
697 of STMT. */
698 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
700 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
701 propagated as is to the DEF_STMTs of its USEs.
703 One exception is when STMT has been identified as defining a reduction
704 variable; in this case we set the relevance to vect_used_by_reduction.
705 This is because we distinguish between two kinds of relevant stmts -
706 those that are used by a reduction computation, and those that are
707 (also) used by a regular computation. This allows us later on to
708 identify stmts that are used solely by a reduction, and therefore the
709 order of the results that they produce does not have to be kept. */
711 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
713 case vect_reduction_def:
714 gcc_assert (relevant != vect_unused_in_scope);
715 if (relevant != vect_unused_in_scope
716 && relevant != vect_used_in_scope
717 && relevant != vect_used_by_reduction
718 && relevant != vect_used_only_live)
719 return opt_result::failure_at
720 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
721 break;
723 case vect_nested_cycle:
724 if (relevant != vect_unused_in_scope
725 && relevant != vect_used_in_outer_by_reduction
726 && relevant != vect_used_in_outer)
727 return opt_result::failure_at
728 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
729 break;
731 case vect_double_reduction_def:
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_by_reduction
734 && relevant != vect_used_only_live)
735 return opt_result::failure_at
736 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
737 break;
739 default:
740 break;
743 if (is_pattern_stmt_p (stmt_vinfo))
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
750 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
751 tree op = gimple_assign_rhs1 (assign);
753 i = 1;
754 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
756 opt_result res
757 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
758 loop_vinfo, relevant, &worklist, false);
759 if (!res)
760 return res;
761 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
762 loop_vinfo, relevant, &worklist, false);
763 if (!res)
764 return res;
765 i = 2;
767 for (; i < gimple_num_ops (assign); i++)
769 op = gimple_op (assign, i);
770 if (TREE_CODE (op) == SSA_NAME)
772 opt_result res
773 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
774 &worklist, false);
775 if (!res)
776 return res;
780 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
782 for (i = 0; i < gimple_call_num_args (call); i++)
784 tree arg = gimple_call_arg (call, i);
785 opt_result res
786 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
787 &worklist, false);
788 if (!res)
789 return res;
793 else
794 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
796 tree op = USE_FROM_PTR (use_p);
797 opt_result res
798 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
799 &worklist, false);
800 if (!res)
801 return res;
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 opt_result res
810 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
811 &worklist, true);
812 if (!res)
814 if (fatal)
815 *fatal = false;
816 return res;
819 } /* while worklist */
821 return opt_result::success ();
824 /* Function vect_model_simple_cost.
826 Models cost for simple operations, i.e. those that only emit ncopies of a
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
830 static void
831 vect_model_simple_cost (vec_info *,
832 stmt_vec_info stmt_info, int ncopies,
833 enum vect_def_type *dt,
834 int ndts,
835 slp_tree node,
836 stmt_vector_for_cost *cost_vec,
837 vect_cost_for_stmt kind = vector_stmt)
839 int inside_cost = 0, prologue_cost = 0;
841 gcc_assert (cost_vec != NULL);
843 /* ??? Somehow we need to fix this at the callers. */
844 if (node)
845 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
847 if (!node)
848 /* Cost the "broadcast" of a scalar operand in to a vector operand.
849 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
850 cost model. */
851 for (int i = 0; i < ndts; i++)
852 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
853 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
854 stmt_info, 0, vect_prologue);
856 /* Pass the inside-of-loop statements to the target-specific cost model. */
857 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
858 stmt_info, 0, vect_body);
860 if (dump_enabled_p ())
861 dump_printf_loc (MSG_NOTE, vect_location,
862 "vect_model_simple_cost: inside_cost = %d, "
863 "prologue_cost = %d .\n", inside_cost, prologue_cost);
867 /* Model cost for type demotion and promotion operations. PWR is
868 normally zero for single-step promotions and demotions. It will be
869 one if two-step promotion/demotion is required, and so on. NCOPIES
870 is the number of vector results (and thus number of instructions)
871 for the narrowest end of the operation chain. Each additional
872 step doubles the number of instructions required. If WIDEN_ARITH
873 is true the stmt is doing widening arithmetic. */
875 static void
876 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
877 enum vect_def_type *dt,
878 unsigned int ncopies, int pwr,
879 stmt_vector_for_cost *cost_vec,
880 bool widen_arith)
882 int i;
883 int inside_cost = 0, prologue_cost = 0;
885 for (i = 0; i < pwr + 1; i++)
887 inside_cost += record_stmt_cost (cost_vec, ncopies,
888 widen_arith
889 ? vector_stmt : vec_promote_demote,
890 stmt_info, 0, vect_body);
891 ncopies *= 2;
894 /* FORNOW: Assuming maximum 2 args per stmts. */
895 for (i = 0; i < 2; i++)
896 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
897 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
898 stmt_info, 0, vect_prologue);
900 if (dump_enabled_p ())
901 dump_printf_loc (MSG_NOTE, vect_location,
902 "vect_model_promotion_demotion_cost: inside_cost = %d, "
903 "prologue_cost = %d .\n", inside_cost, prologue_cost);
906 /* Returns true if the current function returns DECL. */
908 static bool
909 cfun_returns (tree decl)
911 edge_iterator ei;
912 edge e;
913 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
915 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
916 if (!ret)
917 continue;
918 if (gimple_return_retval (ret) == decl)
919 return true;
920 /* We often end up with an aggregate copy to the result decl,
921 handle that case as well. First skip intermediate clobbers
922 though. */
923 gimple *def = ret;
926 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
928 while (gimple_clobber_p (def));
929 if (is_a <gassign *> (def)
930 && gimple_assign_lhs (def) == gimple_return_retval (ret)
931 && gimple_assign_rhs1 (def) == decl)
932 return true;
934 return false;
937 /* Function vect_model_store_cost
939 Models cost for stores. In the case of grouped accesses, one access
940 has the overhead of the grouped access attributed to it. */
942 static void
943 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
944 vect_memory_access_type memory_access_type,
945 dr_alignment_support alignment_support_scheme,
946 int misalignment,
947 vec_load_store_type vls_type, slp_tree slp_node,
948 stmt_vector_for_cost *cost_vec)
950 unsigned int inside_cost = 0, prologue_cost = 0;
951 stmt_vec_info first_stmt_info = stmt_info;
952 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
954 /* ??? Somehow we need to fix this at the callers. */
955 if (slp_node)
956 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
958 if (vls_type == VLS_STORE_INVARIANT)
960 if (!slp_node)
961 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
962 stmt_info, 0, vect_prologue);
965 /* Grouped stores update all elements in the group at once,
966 so we want the DR for the first statement. */
967 if (!slp_node && grouped_access_p)
968 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
970 /* True if we should include any once-per-group costs as well as
971 the cost of the statement itself. For SLP we only get called
972 once per group anyhow. */
973 bool first_stmt_p = (first_stmt_info == stmt_info);
975 /* We assume that the cost of a single store-lanes instruction is
976 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
977 access is instead being provided by a permute-and-store operation,
978 include the cost of the permutes. */
979 if (first_stmt_p
980 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
982 /* Uses a high and low interleave or shuffle operations for each
983 needed permute. */
984 int group_size = DR_GROUP_SIZE (first_stmt_info);
985 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
986 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
987 stmt_info, 0, vect_body);
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_NOTE, vect_location,
991 "vect_model_store_cost: strided group_size = %d .\n",
992 group_size);
995 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
996 /* Costs of the stores. */
997 if (memory_access_type == VMAT_ELEMENTWISE
998 || memory_access_type == VMAT_GATHER_SCATTER)
1000 /* N scalar stores plus extracting the elements. */
1001 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1002 inside_cost += record_stmt_cost (cost_vec,
1003 ncopies * assumed_nunits,
1004 scalar_store, stmt_info, 0, vect_body);
1006 else
1007 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1008 misalignment, &inside_cost, cost_vec);
1010 if (memory_access_type == VMAT_ELEMENTWISE
1011 || memory_access_type == VMAT_STRIDED_SLP)
1013 /* N scalar stores plus extracting the elements. */
1014 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1015 inside_cost += record_stmt_cost (cost_vec,
1016 ncopies * assumed_nunits,
1017 vec_to_scalar, stmt_info, 0, vect_body);
1020 /* When vectorizing a store into the function result assign
1021 a penalty if the function returns in a multi-register location.
1022 In this case we assume we'll end up with having to spill the
1023 vector result and do piecewise loads as a conservative estimate. */
1024 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1025 if (base
1026 && (TREE_CODE (base) == RESULT_DECL
1027 || (DECL_P (base) && cfun_returns (base)))
1028 && !aggregate_value_p (base, cfun->decl))
1030 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1031 /* ??? Handle PARALLEL in some way. */
1032 if (REG_P (reg))
1034 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1035 /* Assume that a single reg-reg move is possible and cheap,
1036 do not account for vector to gp register move cost. */
1037 if (nregs > 1)
1039 /* Spill. */
1040 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1041 vector_store,
1042 stmt_info, 0, vect_epilogue);
1043 /* Loads. */
1044 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1045 scalar_load,
1046 stmt_info, 0, vect_epilogue);
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_NOTE, vect_location,
1053 "vect_model_store_cost: inside_cost = %d, "
1054 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1058 /* Calculate cost of DR's memory access. */
1059 void
1060 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1061 dr_alignment_support alignment_support_scheme,
1062 int misalignment,
1063 unsigned int *inside_cost,
1064 stmt_vector_for_cost *body_cost_vec)
1066 switch (alignment_support_scheme)
1068 case dr_aligned:
1070 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1071 vector_store, stmt_info, 0,
1072 vect_body);
1074 if (dump_enabled_p ())
1075 dump_printf_loc (MSG_NOTE, vect_location,
1076 "vect_model_store_cost: aligned.\n");
1077 break;
1080 case dr_unaligned_supported:
1082 /* Here, we assign an additional cost for the unaligned store. */
1083 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1084 unaligned_store, stmt_info,
1085 misalignment, vect_body);
1086 if (dump_enabled_p ())
1087 dump_printf_loc (MSG_NOTE, vect_location,
1088 "vect_model_store_cost: unaligned supported by "
1089 "hardware.\n");
1090 break;
1093 case dr_unaligned_unsupported:
1095 *inside_cost = VECT_MAX_COST;
1097 if (dump_enabled_p ())
1098 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1099 "vect_model_store_cost: unsupported access.\n");
1100 break;
1103 default:
1104 gcc_unreachable ();
1109 /* Function vect_model_load_cost
1111 Models cost for loads. In the case of grouped accesses, one access has
1112 the overhead of the grouped access attributed to it. Since unaligned
1113 accesses are supported for loads, we also account for the costs of the
1114 access scheme chosen. */
1116 static void
1117 vect_model_load_cost (vec_info *vinfo,
1118 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1119 vect_memory_access_type memory_access_type,
1120 dr_alignment_support alignment_support_scheme,
1121 int misalignment,
1122 gather_scatter_info *gs_info,
1123 slp_tree slp_node,
1124 stmt_vector_for_cost *cost_vec)
1126 unsigned int inside_cost = 0, prologue_cost = 0;
1127 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1129 gcc_assert (cost_vec);
1131 /* ??? Somehow we need to fix this at the callers. */
1132 if (slp_node)
1133 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1135 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1137 /* If the load is permuted then the alignment is determined by
1138 the first group element not by the first scalar stmt DR. */
1139 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1140 /* Record the cost for the permutation. */
1141 unsigned n_perms, n_loads;
1142 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1143 vf, true, &n_perms, &n_loads);
1144 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1145 first_stmt_info, 0, vect_body);
1147 /* And adjust the number of loads performed. This handles
1148 redundancies as well as loads that are later dead. */
1149 ncopies = n_loads;
1152 /* Grouped loads read all elements in the group at once,
1153 so we want the DR for the first statement. */
1154 stmt_vec_info first_stmt_info = stmt_info;
1155 if (!slp_node && grouped_access_p)
1156 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1158 /* True if we should include any once-per-group costs as well as
1159 the cost of the statement itself. For SLP we only get called
1160 once per group anyhow. */
1161 bool first_stmt_p = (first_stmt_info == stmt_info);
1163 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1164 ones we actually need. Account for the cost of unused results. */
1165 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1167 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1168 stmt_vec_info next_stmt_info = first_stmt_info;
1171 gaps -= 1;
1172 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1174 while (next_stmt_info);
1175 if (gaps)
1177 if (dump_enabled_p ())
1178 dump_printf_loc (MSG_NOTE, vect_location,
1179 "vect_model_load_cost: %d unused vectors.\n",
1180 gaps);
1181 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1182 alignment_support_scheme, misalignment, false,
1183 &inside_cost, &prologue_cost,
1184 cost_vec, cost_vec, true);
1188 /* We assume that the cost of a single load-lanes instruction is
1189 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1190 access is instead being provided by a load-and-permute operation,
1191 include the cost of the permutes. */
1192 if (first_stmt_p
1193 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1195 /* Uses an even and odd extract operations or shuffle operations
1196 for each needed permute. */
1197 int group_size = DR_GROUP_SIZE (first_stmt_info);
1198 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1199 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1200 stmt_info, 0, vect_body);
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE, vect_location,
1204 "vect_model_load_cost: strided group_size = %d .\n",
1205 group_size);
1208 /* The loads themselves. */
1209 if (memory_access_type == VMAT_ELEMENTWISE
1210 || memory_access_type == VMAT_GATHER_SCATTER)
1212 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1213 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1214 if (memory_access_type == VMAT_GATHER_SCATTER
1215 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1216 /* For emulated gathers N offset vector element extracts
1217 (we assume the scalar scaling and ptr + offset add is consumed by
1218 the load). */
1219 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1220 vec_to_scalar, stmt_info, 0,
1221 vect_body);
1222 /* N scalar loads plus gathering them into a vector. */
1223 inside_cost += record_stmt_cost (cost_vec,
1224 ncopies * assumed_nunits,
1225 scalar_load, stmt_info, 0, vect_body);
1227 else if (memory_access_type == VMAT_INVARIANT)
1229 /* Invariant loads will ideally be hoisted and splat to a vector. */
1230 prologue_cost += record_stmt_cost (cost_vec, 1,
1231 scalar_load, stmt_info, 0,
1232 vect_prologue);
1233 prologue_cost += record_stmt_cost (cost_vec, 1,
1234 scalar_to_vec, stmt_info, 0,
1235 vect_prologue);
1237 else
1238 vect_get_load_cost (vinfo, stmt_info, ncopies,
1239 alignment_support_scheme, misalignment, first_stmt_p,
1240 &inside_cost, &prologue_cost,
1241 cost_vec, cost_vec, true);
1242 if (memory_access_type == VMAT_ELEMENTWISE
1243 || memory_access_type == VMAT_STRIDED_SLP
1244 || (memory_access_type == VMAT_GATHER_SCATTER
1245 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1246 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1247 stmt_info, 0, vect_body);
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE, vect_location,
1251 "vect_model_load_cost: inside_cost = %d, "
1252 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1256 /* Calculate cost of DR's memory access. */
1257 void
1258 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1259 dr_alignment_support alignment_support_scheme,
1260 int misalignment,
1261 bool add_realign_cost, unsigned int *inside_cost,
1262 unsigned int *prologue_cost,
1263 stmt_vector_for_cost *prologue_cost_vec,
1264 stmt_vector_for_cost *body_cost_vec,
1265 bool record_prologue_costs)
1267 switch (alignment_support_scheme)
1269 case dr_aligned:
1271 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1272 stmt_info, 0, vect_body);
1274 if (dump_enabled_p ())
1275 dump_printf_loc (MSG_NOTE, vect_location,
1276 "vect_model_load_cost: aligned.\n");
1278 break;
1280 case dr_unaligned_supported:
1282 /* Here, we assign an additional cost for the unaligned load. */
1283 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1284 unaligned_load, stmt_info,
1285 misalignment, vect_body);
1287 if (dump_enabled_p ())
1288 dump_printf_loc (MSG_NOTE, vect_location,
1289 "vect_model_load_cost: unaligned supported by "
1290 "hardware.\n");
1292 break;
1294 case dr_explicit_realign:
1296 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1297 vector_load, stmt_info, 0, vect_body);
1298 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1299 vec_perm, stmt_info, 0, vect_body);
1301 /* FIXME: If the misalignment remains fixed across the iterations of
1302 the containing loop, the following cost should be added to the
1303 prologue costs. */
1304 if (targetm.vectorize.builtin_mask_for_load)
1305 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1306 stmt_info, 0, vect_body);
1308 if (dump_enabled_p ())
1309 dump_printf_loc (MSG_NOTE, vect_location,
1310 "vect_model_load_cost: explicit realign\n");
1312 break;
1314 case dr_explicit_realign_optimized:
1316 if (dump_enabled_p ())
1317 dump_printf_loc (MSG_NOTE, vect_location,
1318 "vect_model_load_cost: unaligned software "
1319 "pipelined.\n");
1321 /* Unaligned software pipeline has a load of an address, an initial
1322 load, and possibly a mask operation to "prime" the loop. However,
1323 if this is an access in a group of loads, which provide grouped
1324 access, then the above cost should only be considered for one
1325 access in the group. Inside the loop, there is a load op
1326 and a realignment op. */
1328 if (add_realign_cost && record_prologue_costs)
1330 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1331 vector_stmt, stmt_info,
1332 0, vect_prologue);
1333 if (targetm.vectorize.builtin_mask_for_load)
1334 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1335 vector_stmt, stmt_info,
1336 0, vect_prologue);
1339 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1340 stmt_info, 0, vect_body);
1341 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1342 stmt_info, 0, vect_body);
1344 if (dump_enabled_p ())
1345 dump_printf_loc (MSG_NOTE, vect_location,
1346 "vect_model_load_cost: explicit realign optimized"
1347 "\n");
1349 break;
1352 case dr_unaligned_unsupported:
1354 *inside_cost = VECT_MAX_COST;
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1358 "vect_model_load_cost: unsupported access.\n");
1359 break;
1362 default:
1363 gcc_unreachable ();
1367 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1368 the loop preheader for the vectorized stmt STMT_VINFO. */
1370 static void
1371 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1372 gimple_stmt_iterator *gsi)
1374 if (gsi)
1375 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1376 else
1377 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1379 if (dump_enabled_p ())
1380 dump_printf_loc (MSG_NOTE, vect_location,
1381 "created new init_stmt: %G", new_stmt);
1384 /* Function vect_init_vector.
1386 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1387 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1388 vector type a vector with all elements equal to VAL is created first.
1389 Place the initialization at GSI if it is not NULL. Otherwise, place the
1390 initialization at the loop preheader.
1391 Return the DEF of INIT_STMT.
1392 It will be used in the vectorization of STMT_INFO. */
1394 tree
1395 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1396 gimple_stmt_iterator *gsi)
1398 gimple *init_stmt;
1399 tree new_temp;
1401 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1402 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1404 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1405 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1407 /* Scalar boolean value should be transformed into
1408 all zeros or all ones value before building a vector. */
1409 if (VECTOR_BOOLEAN_TYPE_P (type))
1411 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1412 tree false_val = build_zero_cst (TREE_TYPE (type));
1414 if (CONSTANT_CLASS_P (val))
1415 val = integer_zerop (val) ? false_val : true_val;
1416 else
1418 new_temp = make_ssa_name (TREE_TYPE (type));
1419 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1420 val, true_val, false_val);
1421 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1422 val = new_temp;
1425 else
1427 gimple_seq stmts = NULL;
1428 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1429 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1430 TREE_TYPE (type), val);
1431 else
1432 /* ??? Condition vectorization expects us to do
1433 promotion of invariant/external defs. */
1434 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1435 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1436 !gsi_end_p (gsi2); )
1438 init_stmt = gsi_stmt (gsi2);
1439 gsi_remove (&gsi2, false);
1440 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1444 val = build_vector_from_val (type, val);
1447 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1448 init_stmt = gimple_build_assign (new_temp, val);
1449 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1450 return new_temp;
1454 /* Function vect_get_vec_defs_for_operand.
1456 OP is an operand in STMT_VINFO. This function returns a vector of
1457 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1459 In the case that OP is an SSA_NAME which is defined in the loop, then
1460 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1462 In case OP is an invariant or constant, a new stmt that creates a vector def
1463 needs to be introduced. VECTYPE may be used to specify a required type for
1464 vector invariant. */
1466 void
1467 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1468 unsigned ncopies,
1469 tree op, vec<tree> *vec_oprnds, tree vectype)
1471 gimple *def_stmt;
1472 enum vect_def_type dt;
1473 bool is_simple_use;
1474 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1476 if (dump_enabled_p ())
1477 dump_printf_loc (MSG_NOTE, vect_location,
1478 "vect_get_vec_defs_for_operand: %T\n", op);
1480 stmt_vec_info def_stmt_info;
1481 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1482 &def_stmt_info, &def_stmt);
1483 gcc_assert (is_simple_use);
1484 if (def_stmt && dump_enabled_p ())
1485 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1487 vec_oprnds->create (ncopies);
1488 if (dt == vect_constant_def || dt == vect_external_def)
1490 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1491 tree vector_type;
1493 if (vectype)
1494 vector_type = vectype;
1495 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1496 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1497 vector_type = truth_type_for (stmt_vectype);
1498 else
1499 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1501 gcc_assert (vector_type);
1502 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1503 while (ncopies--)
1504 vec_oprnds->quick_push (vop);
1506 else
1508 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1509 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1510 for (unsigned i = 0; i < ncopies; ++i)
1511 vec_oprnds->quick_push (gimple_get_lhs
1512 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1517 /* Get vectorized definitions for OP0 and OP1. */
1519 void
1520 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1521 unsigned ncopies,
1522 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1523 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1524 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1525 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1527 if (slp_node)
1529 if (op0)
1530 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1531 if (op1)
1532 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1533 if (op2)
1534 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1535 if (op3)
1536 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1538 else
1540 if (op0)
1541 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1542 op0, vec_oprnds0, vectype0);
1543 if (op1)
1544 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1545 op1, vec_oprnds1, vectype1);
1546 if (op2)
1547 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1548 op2, vec_oprnds2, vectype2);
1549 if (op3)
1550 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1551 op3, vec_oprnds3, vectype3);
1555 void
1556 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1557 unsigned ncopies,
1558 tree op0, vec<tree> *vec_oprnds0,
1559 tree op1, vec<tree> *vec_oprnds1,
1560 tree op2, vec<tree> *vec_oprnds2,
1561 tree op3, vec<tree> *vec_oprnds3)
1563 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1564 op0, vec_oprnds0, NULL_TREE,
1565 op1, vec_oprnds1, NULL_TREE,
1566 op2, vec_oprnds2, NULL_TREE,
1567 op3, vec_oprnds3, NULL_TREE);
1570 /* Helper function called by vect_finish_replace_stmt and
1571 vect_finish_stmt_generation. Set the location of the new
1572 statement and create and return a stmt_vec_info for it. */
1574 static void
1575 vect_finish_stmt_generation_1 (vec_info *,
1576 stmt_vec_info stmt_info, gimple *vec_stmt)
1578 if (dump_enabled_p ())
1579 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1581 if (stmt_info)
1583 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1585 /* While EH edges will generally prevent vectorization, stmt might
1586 e.g. be in a must-not-throw region. Ensure newly created stmts
1587 that could throw are part of the same region. */
1588 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1589 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1590 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1592 else
1593 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1596 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1597 which sets the same scalar result as STMT_INFO did. Create and return a
1598 stmt_vec_info for VEC_STMT. */
1600 void
1601 vect_finish_replace_stmt (vec_info *vinfo,
1602 stmt_vec_info stmt_info, gimple *vec_stmt)
1604 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1605 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1607 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1608 gsi_replace (&gsi, vec_stmt, true);
1610 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1613 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1614 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1616 void
1617 vect_finish_stmt_generation (vec_info *vinfo,
1618 stmt_vec_info stmt_info, gimple *vec_stmt,
1619 gimple_stmt_iterator *gsi)
1621 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1623 if (!gsi_end_p (*gsi)
1624 && gimple_has_mem_ops (vec_stmt))
1626 gimple *at_stmt = gsi_stmt (*gsi);
1627 tree vuse = gimple_vuse (at_stmt);
1628 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1630 tree vdef = gimple_vdef (at_stmt);
1631 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1632 gimple_set_modified (vec_stmt, true);
1633 /* If we have an SSA vuse and insert a store, update virtual
1634 SSA form to avoid triggering the renamer. Do so only
1635 if we can easily see all uses - which is what almost always
1636 happens with the way vectorized stmts are inserted. */
1637 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1638 && ((is_gimple_assign (vec_stmt)
1639 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1640 || (is_gimple_call (vec_stmt)
1641 && (!(gimple_call_flags (vec_stmt)
1642 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1643 || (gimple_call_lhs (vec_stmt)
1644 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1646 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1647 gimple_set_vdef (vec_stmt, new_vdef);
1648 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1652 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1653 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1656 /* We want to vectorize a call to combined function CFN with function
1657 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1658 as the types of all inputs. Check whether this is possible using
1659 an internal function, returning its code if so or IFN_LAST if not. */
1661 static internal_fn
1662 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1663 tree vectype_out, tree vectype_in)
1665 internal_fn ifn;
1666 if (internal_fn_p (cfn))
1667 ifn = as_internal_fn (cfn);
1668 else
1669 ifn = associated_internal_fn (fndecl);
1670 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1672 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1673 if (info.vectorizable)
1675 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1676 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1677 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1678 OPTIMIZE_FOR_SPEED))
1679 return ifn;
1682 return IFN_LAST;
1686 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1687 gimple_stmt_iterator *);
1689 /* Check whether a load or store statement in the loop described by
1690 LOOP_VINFO is possible in a loop using partial vectors. This is
1691 testing whether the vectorizer pass has the appropriate support,
1692 as well as whether the target does.
1694 VLS_TYPE says whether the statement is a load or store and VECTYPE
1695 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1696 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1697 says how the load or store is going to be implemented and GROUP_SIZE
1698 is the number of load or store statements in the containing group.
1699 If the access is a gather load or scatter store, GS_INFO describes
1700 its arguments. If the load or store is conditional, SCALAR_MASK is the
1701 condition under which it occurs.
1703 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1704 vectors is not supported, otherwise record the required rgroup control
1705 types. */
1707 static void
1708 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1709 slp_tree slp_node,
1710 vec_load_store_type vls_type,
1711 int group_size,
1712 vect_memory_access_type
1713 memory_access_type,
1714 gather_scatter_info *gs_info,
1715 tree scalar_mask)
1717 /* Invariant loads need no special support. */
1718 if (memory_access_type == VMAT_INVARIANT)
1719 return;
1721 unsigned int nvectors;
1722 if (slp_node)
1723 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1724 else
1725 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1727 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1728 machine_mode vecmode = TYPE_MODE (vectype);
1729 bool is_load = (vls_type == VLS_LOAD);
1730 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1732 if (is_load
1733 ? !vect_load_lanes_supported (vectype, group_size, true)
1734 : !vect_store_lanes_supported (vectype, group_size, true))
1736 if (dump_enabled_p ())
1737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1738 "can't operate on partial vectors because"
1739 " the target doesn't have an appropriate"
1740 " load/store-lanes instruction.\n");
1741 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1742 return;
1744 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1745 scalar_mask);
1746 return;
1749 if (memory_access_type == VMAT_GATHER_SCATTER)
1751 internal_fn ifn = (is_load
1752 ? IFN_MASK_GATHER_LOAD
1753 : IFN_MASK_SCATTER_STORE);
1754 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1755 gs_info->memory_type,
1756 gs_info->offset_vectype,
1757 gs_info->scale))
1759 if (dump_enabled_p ())
1760 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1761 "can't operate on partial vectors because"
1762 " the target doesn't have an appropriate"
1763 " gather load or scatter store instruction.\n");
1764 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1765 return;
1767 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1768 scalar_mask);
1769 return;
1772 if (memory_access_type != VMAT_CONTIGUOUS
1773 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1775 /* Element X of the data must come from iteration i * VF + X of the
1776 scalar loop. We need more work to support other mappings. */
1777 if (dump_enabled_p ())
1778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1779 "can't operate on partial vectors because an"
1780 " access isn't contiguous.\n");
1781 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1782 return;
1785 if (!VECTOR_MODE_P (vecmode))
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1789 "can't operate on partial vectors when emulating"
1790 " vector operations.\n");
1791 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1792 return;
1795 /* We might load more scalars than we need for permuting SLP loads.
1796 We checked in get_group_load_store_type that the extra elements
1797 don't leak into a new vector. */
1798 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1800 unsigned int nvectors;
1801 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1802 return nvectors;
1803 gcc_unreachable ();
1806 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1807 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1808 machine_mode mask_mode;
1809 bool using_partial_vectors_p = false;
1810 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1811 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1813 nvectors = group_memory_nvectors (group_size * vf, nunits);
1814 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1815 using_partial_vectors_p = true;
1818 machine_mode vmode;
1819 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1821 nvectors = group_memory_nvectors (group_size * vf, nunits);
1822 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1823 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1824 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1825 using_partial_vectors_p = true;
1828 if (!using_partial_vectors_p)
1830 if (dump_enabled_p ())
1831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1832 "can't operate on partial vectors because the"
1833 " target doesn't have the appropriate partial"
1834 " vectorization load or store.\n");
1835 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1839 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1840 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1841 that needs to be applied to all loads and stores in a vectorized loop.
1842 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1843 otherwise return VEC_MASK & LOOP_MASK.
1845 MASK_TYPE is the type of both masks. If new statements are needed,
1846 insert them before GSI. */
1848 static tree
1849 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1850 tree vec_mask, gimple_stmt_iterator *gsi)
1852 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1853 if (!loop_mask)
1854 return vec_mask;
1856 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1858 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1859 return vec_mask;
1861 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1862 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1863 vec_mask, loop_mask);
1865 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1866 return and_res;
1869 /* Determine whether we can use a gather load or scatter store to vectorize
1870 strided load or store STMT_INFO by truncating the current offset to a
1871 smaller width. We need to be able to construct an offset vector:
1873 { 0, X, X*2, X*3, ... }
1875 without loss of precision, where X is STMT_INFO's DR_STEP.
1877 Return true if this is possible, describing the gather load or scatter
1878 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1880 static bool
1881 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1882 loop_vec_info loop_vinfo, bool masked_p,
1883 gather_scatter_info *gs_info)
1885 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1886 data_reference *dr = dr_info->dr;
1887 tree step = DR_STEP (dr);
1888 if (TREE_CODE (step) != INTEGER_CST)
1890 /* ??? Perhaps we could use range information here? */
1891 if (dump_enabled_p ())
1892 dump_printf_loc (MSG_NOTE, vect_location,
1893 "cannot truncate variable step.\n");
1894 return false;
1897 /* Get the number of bits in an element. */
1898 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1899 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1900 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1902 /* Set COUNT to the upper limit on the number of elements - 1.
1903 Start with the maximum vectorization factor. */
1904 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1906 /* Try lowering COUNT to the number of scalar latch iterations. */
1907 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1908 widest_int max_iters;
1909 if (max_loop_iterations (loop, &max_iters)
1910 && max_iters < count)
1911 count = max_iters.to_shwi ();
1913 /* Try scales of 1 and the element size. */
1914 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1915 wi::overflow_type overflow = wi::OVF_NONE;
1916 for (int i = 0; i < 2; ++i)
1918 int scale = scales[i];
1919 widest_int factor;
1920 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1921 continue;
1923 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1924 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1925 if (overflow)
1926 continue;
1927 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1928 unsigned int min_offset_bits = wi::min_precision (range, sign);
1930 /* Find the narrowest viable offset type. */
1931 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1932 tree offset_type = build_nonstandard_integer_type (offset_bits,
1933 sign == UNSIGNED);
1935 /* See whether the target supports the operation with an offset
1936 no narrower than OFFSET_TYPE. */
1937 tree memory_type = TREE_TYPE (DR_REF (dr));
1938 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1939 vectype, memory_type, offset_type, scale,
1940 &gs_info->ifn, &gs_info->offset_vectype)
1941 || gs_info->ifn == IFN_LAST)
1942 continue;
1944 gs_info->decl = NULL_TREE;
1945 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1946 but we don't need to store that here. */
1947 gs_info->base = NULL_TREE;
1948 gs_info->element_type = TREE_TYPE (vectype);
1949 gs_info->offset = fold_convert (offset_type, step);
1950 gs_info->offset_dt = vect_constant_def;
1951 gs_info->scale = scale;
1952 gs_info->memory_type = memory_type;
1953 return true;
1956 if (overflow && dump_enabled_p ())
1957 dump_printf_loc (MSG_NOTE, vect_location,
1958 "truncating gather/scatter offset to %d bits"
1959 " might change its value.\n", element_bits);
1961 return false;
1964 /* Return true if we can use gather/scatter internal functions to
1965 vectorize STMT_INFO, which is a grouped or strided load or store.
1966 MASKED_P is true if load or store is conditional. When returning
1967 true, fill in GS_INFO with the information required to perform the
1968 operation. */
1970 static bool
1971 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1972 loop_vec_info loop_vinfo, bool masked_p,
1973 gather_scatter_info *gs_info)
1975 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1976 || gs_info->ifn == IFN_LAST)
1977 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1978 masked_p, gs_info);
1980 tree old_offset_type = TREE_TYPE (gs_info->offset);
1981 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1983 gcc_assert (TYPE_PRECISION (new_offset_type)
1984 >= TYPE_PRECISION (old_offset_type));
1985 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1987 if (dump_enabled_p ())
1988 dump_printf_loc (MSG_NOTE, vect_location,
1989 "using gather/scatter for strided/grouped access,"
1990 " scale = %d\n", gs_info->scale);
1992 return true;
1995 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1996 elements with a known constant step. Return -1 if that step
1997 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1999 static int
2000 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
2002 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2003 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
2004 size_zero_node);
2007 /* If the target supports a permute mask that reverses the elements in
2008 a vector of type VECTYPE, return that mask, otherwise return null. */
2010 static tree
2011 perm_mask_for_reverse (tree vectype)
2013 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2015 /* The encoding has a single stepped pattern. */
2016 vec_perm_builder sel (nunits, 1, 3);
2017 for (int i = 0; i < 3; ++i)
2018 sel.quick_push (nunits - 1 - i);
2020 vec_perm_indices indices (sel, 1, nunits);
2021 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
2022 indices))
2023 return NULL_TREE;
2024 return vect_gen_perm_mask_checked (vectype, indices);
2027 /* A subroutine of get_load_store_type, with a subset of the same
2028 arguments. Handle the case where STMT_INFO is a load or store that
2029 accesses consecutive elements with a negative step. Sets *POFFSET
2030 to the offset to be applied to the DR for the first access. */
2032 static vect_memory_access_type
2033 get_negative_load_store_type (vec_info *vinfo,
2034 stmt_vec_info stmt_info, tree vectype,
2035 vec_load_store_type vls_type,
2036 unsigned int ncopies, poly_int64 *poffset)
2038 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2039 dr_alignment_support alignment_support_scheme;
2041 if (ncopies > 1)
2043 if (dump_enabled_p ())
2044 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2045 "multiple types with negative step.\n");
2046 return VMAT_ELEMENTWISE;
2049 /* For backward running DRs the first access in vectype actually is
2050 N-1 elements before the address of the DR. */
2051 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2052 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2054 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
2055 alignment_support_scheme
2056 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2057 if (alignment_support_scheme != dr_aligned
2058 && alignment_support_scheme != dr_unaligned_supported)
2060 if (dump_enabled_p ())
2061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2062 "negative step but alignment required.\n");
2063 *poffset = 0;
2064 return VMAT_ELEMENTWISE;
2067 if (vls_type == VLS_STORE_INVARIANT)
2069 if (dump_enabled_p ())
2070 dump_printf_loc (MSG_NOTE, vect_location,
2071 "negative step with invariant source;"
2072 " no permute needed.\n");
2073 return VMAT_CONTIGUOUS_DOWN;
2076 if (!perm_mask_for_reverse (vectype))
2078 if (dump_enabled_p ())
2079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2080 "negative step and reversing not supported.\n");
2081 *poffset = 0;
2082 return VMAT_ELEMENTWISE;
2085 return VMAT_CONTIGUOUS_REVERSE;
2088 /* STMT_INFO is either a masked or unconditional store. Return the value
2089 being stored. */
2091 tree
2092 vect_get_store_rhs (stmt_vec_info stmt_info)
2094 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2096 gcc_assert (gimple_assign_single_p (assign));
2097 return gimple_assign_rhs1 (assign);
2099 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2101 internal_fn ifn = gimple_call_internal_fn (call);
2102 int index = internal_fn_stored_value_index (ifn);
2103 gcc_assert (index >= 0);
2104 return gimple_call_arg (call, index);
2106 gcc_unreachable ();
2109 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2111 This function returns a vector type which can be composed with NETLS pieces,
2112 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2113 same vector size as the return vector. It checks target whether supports
2114 pieces-size vector mode for construction firstly, if target fails to, check
2115 pieces-size scalar mode for construction further. It returns NULL_TREE if
2116 fails to find the available composition.
2118 For example, for (vtype=V16QI, nelts=4), we can probably get:
2119 - V16QI with PTYPE V4QI.
2120 - V4SI with PTYPE SI.
2121 - NULL_TREE. */
2123 static tree
2124 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2126 gcc_assert (VECTOR_TYPE_P (vtype));
2127 gcc_assert (known_gt (nelts, 0U));
2129 machine_mode vmode = TYPE_MODE (vtype);
2130 if (!VECTOR_MODE_P (vmode))
2131 return NULL_TREE;
2133 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2134 unsigned int pbsize;
2135 if (constant_multiple_p (vbsize, nelts, &pbsize))
2137 /* First check if vec_init optab supports construction from
2138 vector pieces directly. */
2139 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2140 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2141 machine_mode rmode;
2142 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2143 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2144 != CODE_FOR_nothing))
2146 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2147 return vtype;
2150 /* Otherwise check if exists an integer type of the same piece size and
2151 if vec_init optab supports construction from it directly. */
2152 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2153 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2154 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2155 != CODE_FOR_nothing))
2157 *ptype = build_nonstandard_integer_type (pbsize, 1);
2158 return build_vector_type (*ptype, nelts);
2162 return NULL_TREE;
2165 /* A subroutine of get_load_store_type, with a subset of the same
2166 arguments. Handle the case where STMT_INFO is part of a grouped load
2167 or store.
2169 For stores, the statements in the group are all consecutive
2170 and there is no gap at the end. For loads, the statements in the
2171 group might not be consecutive; there can be gaps between statements
2172 as well as at the end. */
2174 static bool
2175 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2176 tree vectype, slp_tree slp_node,
2177 bool masked_p, vec_load_store_type vls_type,
2178 vect_memory_access_type *memory_access_type,
2179 poly_int64 *poffset,
2180 dr_alignment_support *alignment_support_scheme,
2181 int *misalignment,
2182 gather_scatter_info *gs_info)
2184 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2185 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2186 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2187 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2188 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2189 bool single_element_p = (stmt_info == first_stmt_info
2190 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2191 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2192 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2194 /* True if the vectorized statements would access beyond the last
2195 statement in the group. */
2196 bool overrun_p = false;
2198 /* True if we can cope with such overrun by peeling for gaps, so that
2199 there is at least one final scalar iteration after the vector loop. */
2200 bool can_overrun_p = (!masked_p
2201 && vls_type == VLS_LOAD
2202 && loop_vinfo
2203 && !loop->inner);
2205 /* There can only be a gap at the end of the group if the stride is
2206 known at compile time. */
2207 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2209 /* Stores can't yet have gaps. */
2210 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2212 if (slp_node)
2214 /* For SLP vectorization we directly vectorize a subchain
2215 without permutation. */
2216 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2217 first_dr_info
2218 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2219 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2221 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2222 separated by the stride, until we have a complete vector.
2223 Fall back to scalar accesses if that isn't possible. */
2224 if (multiple_p (nunits, group_size))
2225 *memory_access_type = VMAT_STRIDED_SLP;
2226 else
2227 *memory_access_type = VMAT_ELEMENTWISE;
2229 else
2231 overrun_p = loop_vinfo && gap != 0;
2232 if (overrun_p && vls_type != VLS_LOAD)
2234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2235 "Grouped store with gaps requires"
2236 " non-consecutive accesses\n");
2237 return false;
2239 /* An overrun is fine if the trailing elements are smaller
2240 than the alignment boundary B. Every vector access will
2241 be a multiple of B and so we are guaranteed to access a
2242 non-gap element in the same B-sized block. */
2243 if (overrun_p
2244 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2245 vectype)
2246 / vect_get_scalar_dr_size (first_dr_info)))
2247 overrun_p = false;
2249 /* If the gap splits the vector in half and the target
2250 can do half-vector operations avoid the epilogue peeling
2251 by simply loading half of the vector only. Usually
2252 the construction with an upper zero half will be elided. */
2253 dr_alignment_support alss;
2254 int misalign = dr_misalignment (first_dr_info, vectype);
2255 tree half_vtype;
2256 if (overrun_p
2257 && !masked_p
2258 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2259 vectype, misalign)))
2260 == dr_aligned
2261 || alss == dr_unaligned_supported)
2262 && known_eq (nunits, (group_size - gap) * 2)
2263 && known_eq (nunits, group_size)
2264 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2265 != NULL_TREE))
2266 overrun_p = false;
2268 if (overrun_p && !can_overrun_p)
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2272 "Peeling for outer loop is not supported\n");
2273 return false;
2275 int cmp = compare_step_with_zero (vinfo, stmt_info);
2276 if (cmp < 0)
2278 if (single_element_p)
2279 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2280 only correct for single element "interleaving" SLP. */
2281 *memory_access_type = get_negative_load_store_type
2282 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2283 else
2285 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2286 separated by the stride, until we have a complete vector.
2287 Fall back to scalar accesses if that isn't possible. */
2288 if (multiple_p (nunits, group_size))
2289 *memory_access_type = VMAT_STRIDED_SLP;
2290 else
2291 *memory_access_type = VMAT_ELEMENTWISE;
2294 else
2296 gcc_assert (!loop_vinfo || cmp > 0);
2297 *memory_access_type = VMAT_CONTIGUOUS;
2300 /* When we have a contiguous access across loop iterations
2301 but the access in the loop doesn't cover the full vector
2302 we can end up with no gap recorded but still excess
2303 elements accessed, see PR103116. Make sure we peel for
2304 gaps if necessary and sufficient and give up if not. */
2305 if (loop_vinfo
2306 && *memory_access_type == VMAT_CONTIGUOUS
2307 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2308 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2309 nunits))
2311 unsigned HOST_WIDE_INT cnunits, cvf;
2312 if (!can_overrun_p
2313 || !nunits.is_constant (&cnunits)
2314 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2315 /* Peeling for gaps assumes that a single scalar iteration
2316 is enough to make sure the last vector iteration doesn't
2317 access excess elements.
2318 ??? Enhancements include peeling multiple iterations
2319 or using masked loads with a static mask. */
2320 || (group_size * cvf) % cnunits + group_size < cnunits)
2322 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2324 "peeling for gaps insufficient for "
2325 "access\n");
2326 return false;
2328 overrun_p = true;
2332 else
2334 /* We can always handle this case using elementwise accesses,
2335 but see if something more efficient is available. */
2336 *memory_access_type = VMAT_ELEMENTWISE;
2338 /* If there is a gap at the end of the group then these optimizations
2339 would access excess elements in the last iteration. */
2340 bool would_overrun_p = (gap != 0);
2341 /* An overrun is fine if the trailing elements are smaller than the
2342 alignment boundary B. Every vector access will be a multiple of B
2343 and so we are guaranteed to access a non-gap element in the
2344 same B-sized block. */
2345 if (would_overrun_p
2346 && !masked_p
2347 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2348 / vect_get_scalar_dr_size (first_dr_info)))
2349 would_overrun_p = false;
2351 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2352 && (can_overrun_p || !would_overrun_p)
2353 && compare_step_with_zero (vinfo, stmt_info) > 0)
2355 /* First cope with the degenerate case of a single-element
2356 vector. */
2357 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2360 /* Otherwise try using LOAD/STORE_LANES. */
2361 else if (vls_type == VLS_LOAD
2362 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2363 : vect_store_lanes_supported (vectype, group_size,
2364 masked_p))
2366 *memory_access_type = VMAT_LOAD_STORE_LANES;
2367 overrun_p = would_overrun_p;
2370 /* If that fails, try using permuting loads. */
2371 else if (vls_type == VLS_LOAD
2372 ? vect_grouped_load_supported (vectype, single_element_p,
2373 group_size)
2374 : vect_grouped_store_supported (vectype, group_size))
2376 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2377 overrun_p = would_overrun_p;
2381 /* As a last resort, trying using a gather load or scatter store.
2383 ??? Although the code can handle all group sizes correctly,
2384 it probably isn't a win to use separate strided accesses based
2385 on nearby locations. Or, even if it's a win over scalar code,
2386 it might not be a win over vectorizing at a lower VF, if that
2387 allows us to use contiguous accesses. */
2388 if (*memory_access_type == VMAT_ELEMENTWISE
2389 && single_element_p
2390 && loop_vinfo
2391 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2392 masked_p, gs_info))
2393 *memory_access_type = VMAT_GATHER_SCATTER;
2396 if (*memory_access_type == VMAT_GATHER_SCATTER
2397 || *memory_access_type == VMAT_ELEMENTWISE)
2399 *alignment_support_scheme = dr_unaligned_supported;
2400 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2402 else
2404 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2405 *alignment_support_scheme
2406 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2407 *misalignment);
2410 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2412 /* STMT is the leader of the group. Check the operands of all the
2413 stmts of the group. */
2414 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2415 while (next_stmt_info)
2417 tree op = vect_get_store_rhs (next_stmt_info);
2418 enum vect_def_type dt;
2419 if (!vect_is_simple_use (op, vinfo, &dt))
2421 if (dump_enabled_p ())
2422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2423 "use not simple.\n");
2424 return false;
2426 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2430 if (overrun_p)
2432 gcc_assert (can_overrun_p);
2433 if (dump_enabled_p ())
2434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2435 "Data access with gaps requires scalar "
2436 "epilogue loop\n");
2437 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2440 return true;
2443 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2444 if there is a memory access type that the vectorized form can use,
2445 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2446 or scatters, fill in GS_INFO accordingly. In addition
2447 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2448 the target does not support the alignment scheme. *MISALIGNMENT
2449 is set according to the alignment of the access (including
2450 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2452 SLP says whether we're performing SLP rather than loop vectorization.
2453 MASKED_P is true if the statement is conditional on a vectorized mask.
2454 VECTYPE is the vector type that the vectorized statements will use.
2455 NCOPIES is the number of vector statements that will be needed. */
2457 static bool
2458 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2459 tree vectype, slp_tree slp_node,
2460 bool masked_p, vec_load_store_type vls_type,
2461 unsigned int ncopies,
2462 vect_memory_access_type *memory_access_type,
2463 poly_int64 *poffset,
2464 dr_alignment_support *alignment_support_scheme,
2465 int *misalignment,
2466 gather_scatter_info *gs_info)
2468 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2469 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2470 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2471 *poffset = 0;
2472 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2474 *memory_access_type = VMAT_GATHER_SCATTER;
2475 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2476 gcc_unreachable ();
2477 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2478 &gs_info->offset_dt,
2479 &gs_info->offset_vectype))
2481 if (dump_enabled_p ())
2482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2483 "%s index use not simple.\n",
2484 vls_type == VLS_LOAD ? "gather" : "scatter");
2485 return false;
2487 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2489 if (vls_type != VLS_LOAD)
2491 if (dump_enabled_p ())
2492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2493 "unsupported emulated scatter.\n");
2494 return false;
2496 else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2497 || !TYPE_VECTOR_SUBPARTS
2498 (gs_info->offset_vectype).is_constant ()
2499 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2500 (gs_info->offset_vectype),
2501 TYPE_VECTOR_SUBPARTS (vectype)))
2503 if (dump_enabled_p ())
2504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2505 "unsupported vector types for emulated "
2506 "gather.\n");
2507 return false;
2510 /* Gather-scatter accesses perform only component accesses, alignment
2511 is irrelevant for them. */
2512 *alignment_support_scheme = dr_unaligned_supported;
2514 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2516 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2517 masked_p,
2518 vls_type, memory_access_type, poffset,
2519 alignment_support_scheme,
2520 misalignment, gs_info))
2521 return false;
2523 else if (STMT_VINFO_STRIDED_P (stmt_info))
2525 gcc_assert (!slp_node);
2526 if (loop_vinfo
2527 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2528 masked_p, gs_info))
2529 *memory_access_type = VMAT_GATHER_SCATTER;
2530 else
2531 *memory_access_type = VMAT_ELEMENTWISE;
2532 /* Alignment is irrelevant here. */
2533 *alignment_support_scheme = dr_unaligned_supported;
2535 else
2537 int cmp = compare_step_with_zero (vinfo, stmt_info);
2538 if (cmp == 0)
2540 gcc_assert (vls_type == VLS_LOAD);
2541 *memory_access_type = VMAT_INVARIANT;
2542 /* Invariant accesses perform only component accesses, alignment
2543 is irrelevant for them. */
2544 *alignment_support_scheme = dr_unaligned_supported;
2546 else
2548 if (cmp < 0)
2549 *memory_access_type = get_negative_load_store_type
2550 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2551 else
2552 *memory_access_type = VMAT_CONTIGUOUS;
2553 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2554 vectype, *poffset);
2555 *alignment_support_scheme
2556 = vect_supportable_dr_alignment (vinfo,
2557 STMT_VINFO_DR_INFO (stmt_info),
2558 vectype, *misalignment);
2562 if ((*memory_access_type == VMAT_ELEMENTWISE
2563 || *memory_access_type == VMAT_STRIDED_SLP)
2564 && !nunits.is_constant ())
2566 if (dump_enabled_p ())
2567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2568 "Not using elementwise accesses due to variable "
2569 "vectorization factor.\n");
2570 return false;
2573 if (*alignment_support_scheme == dr_unaligned_unsupported)
2575 if (dump_enabled_p ())
2576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2577 "unsupported unaligned access\n");
2578 return false;
2581 /* FIXME: At the moment the cost model seems to underestimate the
2582 cost of using elementwise accesses. This check preserves the
2583 traditional behavior until that can be fixed. */
2584 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2585 if (!first_stmt_info)
2586 first_stmt_info = stmt_info;
2587 if (*memory_access_type == VMAT_ELEMENTWISE
2588 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2589 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2590 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2591 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2593 if (dump_enabled_p ())
2594 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2595 "not falling back to elementwise accesses\n");
2596 return false;
2598 return true;
2601 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2602 conditional operation STMT_INFO. When returning true, store the mask
2603 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2604 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2605 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2607 static bool
2608 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2609 slp_tree slp_node, unsigned mask_index,
2610 tree *mask, slp_tree *mask_node,
2611 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2613 enum vect_def_type mask_dt;
2614 tree mask_vectype;
2615 slp_tree mask_node_1;
2616 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2617 mask, &mask_node_1, &mask_dt, &mask_vectype))
2619 if (dump_enabled_p ())
2620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2621 "mask use not simple.\n");
2622 return false;
2625 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2627 if (dump_enabled_p ())
2628 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2629 "mask argument is not a boolean.\n");
2630 return false;
2633 /* If the caller is not prepared for adjusting an external/constant
2634 SLP mask vector type fail. */
2635 if (slp_node
2636 && !mask_node
2637 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2639 if (dump_enabled_p ())
2640 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2641 "SLP mask argument is not vectorized.\n");
2642 return false;
2645 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2646 if (!mask_vectype)
2647 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2649 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2651 if (dump_enabled_p ())
2652 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2653 "could not find an appropriate vector mask type.\n");
2654 return false;
2657 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2658 TYPE_VECTOR_SUBPARTS (vectype)))
2660 if (dump_enabled_p ())
2661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2662 "vector mask type %T"
2663 " does not match vector data type %T.\n",
2664 mask_vectype, vectype);
2666 return false;
2669 *mask_dt_out = mask_dt;
2670 *mask_vectype_out = mask_vectype;
2671 if (mask_node)
2672 *mask_node = mask_node_1;
2673 return true;
2676 /* Return true if stored value RHS is suitable for vectorizing store
2677 statement STMT_INFO. When returning true, store the type of the
2678 definition in *RHS_DT_OUT, the type of the vectorized store value in
2679 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2681 static bool
2682 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2683 slp_tree slp_node, tree rhs,
2684 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2685 vec_load_store_type *vls_type_out)
2687 /* In the case this is a store from a constant make sure
2688 native_encode_expr can handle it. */
2689 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2691 if (dump_enabled_p ())
2692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2693 "cannot encode constant as a byte sequence.\n");
2694 return false;
2697 unsigned op_no = 0;
2698 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2700 if (gimple_call_internal_p (call)
2701 && internal_store_fn_p (gimple_call_internal_fn (call)))
2702 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2705 enum vect_def_type rhs_dt;
2706 tree rhs_vectype;
2707 slp_tree slp_op;
2708 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2709 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2711 if (dump_enabled_p ())
2712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2713 "use not simple.\n");
2714 return false;
2717 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2718 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2720 if (dump_enabled_p ())
2721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2722 "incompatible vector types.\n");
2723 return false;
2726 *rhs_dt_out = rhs_dt;
2727 *rhs_vectype_out = rhs_vectype;
2728 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2729 *vls_type_out = VLS_STORE_INVARIANT;
2730 else
2731 *vls_type_out = VLS_STORE;
2732 return true;
2735 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2736 Note that we support masks with floating-point type, in which case the
2737 floats are interpreted as a bitmask. */
2739 static tree
2740 vect_build_all_ones_mask (vec_info *vinfo,
2741 stmt_vec_info stmt_info, tree masktype)
2743 if (TREE_CODE (masktype) == INTEGER_TYPE)
2744 return build_int_cst (masktype, -1);
2745 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2747 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2748 mask = build_vector_from_val (masktype, mask);
2749 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2751 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2753 REAL_VALUE_TYPE r;
2754 long tmp[6];
2755 for (int j = 0; j < 6; ++j)
2756 tmp[j] = -1;
2757 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2758 tree mask = build_real (TREE_TYPE (masktype), r);
2759 mask = build_vector_from_val (masktype, mask);
2760 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2762 gcc_unreachable ();
2765 /* Build an all-zero merge value of type VECTYPE while vectorizing
2766 STMT_INFO as a gather load. */
2768 static tree
2769 vect_build_zero_merge_argument (vec_info *vinfo,
2770 stmt_vec_info stmt_info, tree vectype)
2772 tree merge;
2773 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2774 merge = build_int_cst (TREE_TYPE (vectype), 0);
2775 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2777 REAL_VALUE_TYPE r;
2778 long tmp[6];
2779 for (int j = 0; j < 6; ++j)
2780 tmp[j] = 0;
2781 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2782 merge = build_real (TREE_TYPE (vectype), r);
2784 else
2785 gcc_unreachable ();
2786 merge = build_vector_from_val (vectype, merge);
2787 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2790 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2791 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2792 the gather load operation. If the load is conditional, MASK is the
2793 unvectorized condition and MASK_DT is its definition type, otherwise
2794 MASK is null. */
2796 static void
2797 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2798 gimple_stmt_iterator *gsi,
2799 gimple **vec_stmt,
2800 gather_scatter_info *gs_info,
2801 tree mask)
2803 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2804 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2805 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2806 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2807 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2808 edge pe = loop_preheader_edge (loop);
2809 enum { NARROW, NONE, WIDEN } modifier;
2810 poly_uint64 gather_off_nunits
2811 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2813 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2814 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2815 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2816 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2817 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2818 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2819 tree scaletype = TREE_VALUE (arglist);
2820 tree real_masktype = masktype;
2821 gcc_checking_assert (types_compatible_p (srctype, rettype)
2822 && (!mask
2823 || TREE_CODE (masktype) == INTEGER_TYPE
2824 || types_compatible_p (srctype, masktype)));
2825 if (mask)
2826 masktype = truth_type_for (srctype);
2828 tree mask_halftype = masktype;
2829 tree perm_mask = NULL_TREE;
2830 tree mask_perm_mask = NULL_TREE;
2831 if (known_eq (nunits, gather_off_nunits))
2832 modifier = NONE;
2833 else if (known_eq (nunits * 2, gather_off_nunits))
2835 modifier = WIDEN;
2837 /* Currently widening gathers and scatters are only supported for
2838 fixed-length vectors. */
2839 int count = gather_off_nunits.to_constant ();
2840 vec_perm_builder sel (count, count, 1);
2841 for (int i = 0; i < count; ++i)
2842 sel.quick_push (i | (count / 2));
2844 vec_perm_indices indices (sel, 1, count);
2845 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2846 indices);
2848 else if (known_eq (nunits, gather_off_nunits * 2))
2850 modifier = NARROW;
2852 /* Currently narrowing gathers and scatters are only supported for
2853 fixed-length vectors. */
2854 int count = nunits.to_constant ();
2855 vec_perm_builder sel (count, count, 1);
2856 sel.quick_grow (count);
2857 for (int i = 0; i < count; ++i)
2858 sel[i] = i < count / 2 ? i : i + count / 2;
2859 vec_perm_indices indices (sel, 2, count);
2860 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2862 ncopies *= 2;
2864 if (mask && VECTOR_TYPE_P (real_masktype))
2866 for (int i = 0; i < count; ++i)
2867 sel[i] = i | (count / 2);
2868 indices.new_vector (sel, 2, count);
2869 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2871 else if (mask)
2872 mask_halftype = truth_type_for (gs_info->offset_vectype);
2874 else
2875 gcc_unreachable ();
2877 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2878 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2880 tree ptr = fold_convert (ptrtype, gs_info->base);
2881 if (!is_gimple_min_invariant (ptr))
2883 gimple_seq seq;
2884 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2885 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2886 gcc_assert (!new_bb);
2889 tree scale = build_int_cst (scaletype, gs_info->scale);
2891 tree vec_oprnd0 = NULL_TREE;
2892 tree vec_mask = NULL_TREE;
2893 tree src_op = NULL_TREE;
2894 tree mask_op = NULL_TREE;
2895 tree prev_res = NULL_TREE;
2897 if (!mask)
2899 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2900 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2903 auto_vec<tree> vec_oprnds0;
2904 auto_vec<tree> vec_masks;
2905 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2906 modifier == WIDEN ? ncopies / 2 : ncopies,
2907 gs_info->offset, &vec_oprnds0);
2908 if (mask)
2909 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2910 modifier == NARROW ? ncopies / 2 : ncopies,
2911 mask, &vec_masks, masktype);
2912 for (int j = 0; j < ncopies; ++j)
2914 tree op, var;
2915 if (modifier == WIDEN && (j & 1))
2916 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2917 perm_mask, stmt_info, gsi);
2918 else
2919 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2921 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2923 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2924 TYPE_VECTOR_SUBPARTS (idxtype)));
2925 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2926 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2927 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2928 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2929 op = var;
2932 if (mask)
2934 if (mask_perm_mask && (j & 1))
2935 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2936 mask_perm_mask, stmt_info, gsi);
2937 else
2939 if (modifier == NARROW)
2941 if ((j & 1) == 0)
2942 vec_mask = vec_masks[j / 2];
2944 else
2945 vec_mask = vec_masks[j];
2947 mask_op = vec_mask;
2948 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2950 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2951 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2952 gcc_assert (known_eq (sub1, sub2));
2953 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2954 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2955 gassign *new_stmt
2956 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2957 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2958 mask_op = var;
2961 if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2963 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2964 gassign *new_stmt
2965 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2966 : VEC_UNPACK_LO_EXPR,
2967 mask_op);
2968 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2969 mask_op = var;
2971 src_op = mask_op;
2974 tree mask_arg = mask_op;
2975 if (masktype != real_masktype)
2977 tree utype, optype = TREE_TYPE (mask_op);
2978 if (VECTOR_TYPE_P (real_masktype)
2979 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2980 utype = real_masktype;
2981 else
2982 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2983 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2984 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2985 gassign *new_stmt
2986 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2987 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2988 mask_arg = var;
2989 if (!useless_type_conversion_p (real_masktype, utype))
2991 gcc_assert (TYPE_PRECISION (utype)
2992 <= TYPE_PRECISION (real_masktype));
2993 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2994 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2995 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2996 mask_arg = var;
2998 src_op = build_zero_cst (srctype);
3000 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
3001 mask_arg, scale);
3003 if (!useless_type_conversion_p (vectype, rettype))
3005 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
3006 TYPE_VECTOR_SUBPARTS (rettype)));
3007 op = vect_get_new_ssa_name (rettype, vect_simple_var);
3008 gimple_call_set_lhs (new_stmt, op);
3009 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3010 var = make_ssa_name (vec_dest);
3011 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
3012 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3013 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3015 else
3017 var = make_ssa_name (vec_dest, new_stmt);
3018 gimple_call_set_lhs (new_stmt, var);
3019 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3022 if (modifier == NARROW)
3024 if ((j & 1) == 0)
3026 prev_res = var;
3027 continue;
3029 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
3030 stmt_info, gsi);
3031 new_stmt = SSA_NAME_DEF_STMT (var);
3034 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3036 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3039 /* Prepare the base and offset in GS_INFO for vectorization.
3040 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3041 to the vectorized offset argument for the first copy of STMT_INFO.
3042 STMT_INFO is the statement described by GS_INFO and LOOP is the
3043 containing loop. */
3045 static void
3046 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
3047 class loop *loop, stmt_vec_info stmt_info,
3048 slp_tree slp_node, gather_scatter_info *gs_info,
3049 tree *dataref_ptr, vec<tree> *vec_offset)
3051 gimple_seq stmts = NULL;
3052 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3053 if (stmts != NULL)
3055 basic_block new_bb;
3056 edge pe = loop_preheader_edge (loop);
3057 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3058 gcc_assert (!new_bb);
3060 if (slp_node)
3061 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3062 else
3064 unsigned ncopies
3065 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
3066 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
3067 gs_info->offset, vec_offset,
3068 gs_info->offset_vectype);
3072 /* Prepare to implement a grouped or strided load or store using
3073 the gather load or scatter store operation described by GS_INFO.
3074 STMT_INFO is the load or store statement.
3076 Set *DATAREF_BUMP to the amount that should be added to the base
3077 address after each copy of the vectorized statement. Set *VEC_OFFSET
3078 to an invariant offset vector in which element I has the value
3079 I * DR_STEP / SCALE. */
3081 static void
3082 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3083 loop_vec_info loop_vinfo,
3084 gather_scatter_info *gs_info,
3085 tree *dataref_bump, tree *vec_offset)
3087 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3088 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3090 tree bump = size_binop (MULT_EXPR,
3091 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3092 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3093 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3095 /* The offset given in GS_INFO can have pointer type, so use the element
3096 type of the vector instead. */
3097 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3099 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3100 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3101 ssize_int (gs_info->scale));
3102 step = fold_convert (offset_type, step);
3104 /* Create {0, X, X*2, X*3, ...}. */
3105 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3106 build_zero_cst (offset_type), step);
3107 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3110 /* Return the amount that should be added to a vector pointer to move
3111 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3112 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3113 vectorization. */
3115 static tree
3116 vect_get_data_ptr_increment (vec_info *vinfo,
3117 dr_vec_info *dr_info, tree aggr_type,
3118 vect_memory_access_type memory_access_type)
3120 if (memory_access_type == VMAT_INVARIANT)
3121 return size_zero_node;
3123 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3124 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3125 if (tree_int_cst_sgn (step) == -1)
3126 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3127 return iv_step;
3130 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3132 static bool
3133 vectorizable_bswap (vec_info *vinfo,
3134 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3135 gimple **vec_stmt, slp_tree slp_node,
3136 slp_tree *slp_op,
3137 tree vectype_in, stmt_vector_for_cost *cost_vec)
3139 tree op, vectype;
3140 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3141 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3142 unsigned ncopies;
3144 op = gimple_call_arg (stmt, 0);
3145 vectype = STMT_VINFO_VECTYPE (stmt_info);
3146 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3148 /* Multiple types in SLP are handled by creating the appropriate number of
3149 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3150 case of SLP. */
3151 if (slp_node)
3152 ncopies = 1;
3153 else
3154 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3156 gcc_assert (ncopies >= 1);
3158 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3159 if (! char_vectype)
3160 return false;
3162 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3163 unsigned word_bytes;
3164 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3165 return false;
3167 /* The encoding uses one stepped pattern for each byte in the word. */
3168 vec_perm_builder elts (num_bytes, word_bytes, 3);
3169 for (unsigned i = 0; i < 3; ++i)
3170 for (unsigned j = 0; j < word_bytes; ++j)
3171 elts.quick_push ((i + 1) * word_bytes - j - 1);
3173 vec_perm_indices indices (elts, 1, num_bytes);
3174 machine_mode vmode = TYPE_MODE (char_vectype);
3175 if (!can_vec_perm_const_p (vmode, vmode, indices))
3176 return false;
3178 if (! vec_stmt)
3180 if (slp_node
3181 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3183 if (dump_enabled_p ())
3184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3185 "incompatible vector types for invariants\n");
3186 return false;
3189 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3190 DUMP_VECT_SCOPE ("vectorizable_bswap");
3191 record_stmt_cost (cost_vec,
3192 1, vector_stmt, stmt_info, 0, vect_prologue);
3193 record_stmt_cost (cost_vec,
3194 slp_node
3195 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3196 vec_perm, stmt_info, 0, vect_body);
3197 return true;
3200 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3202 /* Transform. */
3203 vec<tree> vec_oprnds = vNULL;
3204 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3205 op, &vec_oprnds);
3206 /* Arguments are ready. create the new vector stmt. */
3207 unsigned i;
3208 tree vop;
3209 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3211 gimple *new_stmt;
3212 tree tem = make_ssa_name (char_vectype);
3213 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3214 char_vectype, vop));
3215 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3216 tree tem2 = make_ssa_name (char_vectype);
3217 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3218 tem, tem, bswap_vconst);
3219 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3220 tem = make_ssa_name (vectype);
3221 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3222 vectype, tem2));
3223 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3224 if (slp_node)
3225 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3226 else
3227 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3230 if (!slp_node)
3231 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3233 vec_oprnds.release ();
3234 return true;
3237 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3238 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3239 in a single step. On success, store the binary pack code in
3240 *CONVERT_CODE. */
3242 static bool
3243 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3244 tree_code *convert_code)
3246 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3247 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3248 return false;
3250 tree_code code;
3251 int multi_step_cvt = 0;
3252 auto_vec <tree, 8> interm_types;
3253 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3254 &code, &multi_step_cvt, &interm_types)
3255 || multi_step_cvt)
3256 return false;
3258 *convert_code = code;
3259 return true;
3262 /* Function vectorizable_call.
3264 Check if STMT_INFO performs a function call that can be vectorized.
3265 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3266 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3267 Return true if STMT_INFO is vectorizable in this way. */
3269 static bool
3270 vectorizable_call (vec_info *vinfo,
3271 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3272 gimple **vec_stmt, slp_tree slp_node,
3273 stmt_vector_for_cost *cost_vec)
3275 gcall *stmt;
3276 tree vec_dest;
3277 tree scalar_dest;
3278 tree op;
3279 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3280 tree vectype_out, vectype_in;
3281 poly_uint64 nunits_in;
3282 poly_uint64 nunits_out;
3283 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3284 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3285 tree fndecl, new_temp, rhs_type;
3286 enum vect_def_type dt[4]
3287 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3288 vect_unknown_def_type };
3289 tree vectypes[ARRAY_SIZE (dt)] = {};
3290 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3291 int ndts = ARRAY_SIZE (dt);
3292 int ncopies, j;
3293 auto_vec<tree, 8> vargs;
3294 enum { NARROW, NONE, WIDEN } modifier;
3295 size_t i, nargs;
3296 tree lhs;
3298 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3299 return false;
3301 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3302 && ! vec_stmt)
3303 return false;
3305 /* Is STMT_INFO a vectorizable call? */
3306 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3307 if (!stmt)
3308 return false;
3310 if (gimple_call_internal_p (stmt)
3311 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3312 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3313 /* Handled by vectorizable_load and vectorizable_store. */
3314 return false;
3316 if (gimple_call_lhs (stmt) == NULL_TREE
3317 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3318 return false;
3320 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3322 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3324 /* Process function arguments. */
3325 rhs_type = NULL_TREE;
3326 vectype_in = NULL_TREE;
3327 nargs = gimple_call_num_args (stmt);
3329 /* Bail out if the function has more than four arguments, we do not have
3330 interesting builtin functions to vectorize with more than two arguments
3331 except for fma. No arguments is also not good. */
3332 if (nargs == 0 || nargs > 4)
3333 return false;
3335 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3336 combined_fn cfn = gimple_call_combined_fn (stmt);
3337 if (cfn == CFN_GOMP_SIMD_LANE)
3339 nargs = 0;
3340 rhs_type = unsigned_type_node;
3343 int mask_opno = -1;
3344 if (internal_fn_p (cfn))
3345 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3347 for (i = 0; i < nargs; i++)
3349 if ((int) i == mask_opno)
3351 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3352 &op, &slp_op[i], &dt[i], &vectypes[i]))
3353 return false;
3354 continue;
3357 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3358 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3360 if (dump_enabled_p ())
3361 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3362 "use not simple.\n");
3363 return false;
3366 /* We can only handle calls with arguments of the same type. */
3367 if (rhs_type
3368 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3370 if (dump_enabled_p ())
3371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3372 "argument types differ.\n");
3373 return false;
3375 if (!rhs_type)
3376 rhs_type = TREE_TYPE (op);
3378 if (!vectype_in)
3379 vectype_in = vectypes[i];
3380 else if (vectypes[i]
3381 && !types_compatible_p (vectypes[i], vectype_in))
3383 if (dump_enabled_p ())
3384 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3385 "argument vector types differ.\n");
3386 return false;
3389 /* If all arguments are external or constant defs, infer the vector type
3390 from the scalar type. */
3391 if (!vectype_in)
3392 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3393 if (vec_stmt)
3394 gcc_assert (vectype_in);
3395 if (!vectype_in)
3397 if (dump_enabled_p ())
3398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3399 "no vectype for scalar type %T\n", rhs_type);
3401 return false;
3403 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3404 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3405 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3406 by a pack of the two vectors into an SI vector. We would need
3407 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3408 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3410 if (dump_enabled_p ())
3411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3412 "mismatched vector sizes %T and %T\n",
3413 vectype_in, vectype_out);
3414 return false;
3417 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3418 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3420 if (dump_enabled_p ())
3421 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3422 "mixed mask and nonmask vector types\n");
3423 return false;
3426 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3428 if (dump_enabled_p ())
3429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3430 "use emulated vector type for call\n");
3431 return false;
3434 /* FORNOW */
3435 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3436 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3437 if (known_eq (nunits_in * 2, nunits_out))
3438 modifier = NARROW;
3439 else if (known_eq (nunits_out, nunits_in))
3440 modifier = NONE;
3441 else if (known_eq (nunits_out * 2, nunits_in))
3442 modifier = WIDEN;
3443 else
3444 return false;
3446 /* We only handle functions that do not read or clobber memory. */
3447 if (gimple_vuse (stmt))
3449 if (dump_enabled_p ())
3450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3451 "function reads from or writes to memory.\n");
3452 return false;
3455 /* For now, we only vectorize functions if a target specific builtin
3456 is available. TODO -- in some cases, it might be profitable to
3457 insert the calls for pieces of the vector, in order to be able
3458 to vectorize other operations in the loop. */
3459 fndecl = NULL_TREE;
3460 internal_fn ifn = IFN_LAST;
3461 tree callee = gimple_call_fndecl (stmt);
3463 /* First try using an internal function. */
3464 tree_code convert_code = ERROR_MARK;
3465 if (cfn != CFN_LAST
3466 && (modifier == NONE
3467 || (modifier == NARROW
3468 && simple_integer_narrowing (vectype_out, vectype_in,
3469 &convert_code))))
3470 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3471 vectype_in);
3473 /* If that fails, try asking for a target-specific built-in function. */
3474 if (ifn == IFN_LAST)
3476 if (cfn != CFN_LAST)
3477 fndecl = targetm.vectorize.builtin_vectorized_function
3478 (cfn, vectype_out, vectype_in);
3479 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3480 fndecl = targetm.vectorize.builtin_md_vectorized_function
3481 (callee, vectype_out, vectype_in);
3484 if (ifn == IFN_LAST && !fndecl)
3486 if (cfn == CFN_GOMP_SIMD_LANE
3487 && !slp_node
3488 && loop_vinfo
3489 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3490 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3491 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3492 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3494 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3495 { 0, 1, 2, ... vf - 1 } vector. */
3496 gcc_assert (nargs == 0);
3498 else if (modifier == NONE
3499 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3500 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3501 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3502 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3503 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3504 slp_op, vectype_in, cost_vec);
3505 else
3507 if (dump_enabled_p ())
3508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3509 "function is not vectorizable.\n");
3510 return false;
3514 if (slp_node)
3515 ncopies = 1;
3516 else if (modifier == NARROW && ifn == IFN_LAST)
3517 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3518 else
3519 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3521 /* Sanity check: make sure that at least one copy of the vectorized stmt
3522 needs to be generated. */
3523 gcc_assert (ncopies >= 1);
3525 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3526 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3527 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3528 if (!vec_stmt) /* transformation not required. */
3530 if (slp_node)
3531 for (i = 0; i < nargs; ++i)
3532 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3533 vectypes[i]
3534 ? vectypes[i] : vectype_in))
3536 if (dump_enabled_p ())
3537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3538 "incompatible vector types for invariants\n");
3539 return false;
3541 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3542 DUMP_VECT_SCOPE ("vectorizable_call");
3543 vect_model_simple_cost (vinfo, stmt_info,
3544 ncopies, dt, ndts, slp_node, cost_vec);
3545 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3546 record_stmt_cost (cost_vec, ncopies / 2,
3547 vec_promote_demote, stmt_info, 0, vect_body);
3549 if (loop_vinfo
3550 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3551 && (reduc_idx >= 0 || mask_opno >= 0))
3553 if (reduc_idx >= 0
3554 && (cond_fn == IFN_LAST
3555 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3556 OPTIMIZE_FOR_SPEED)))
3558 if (dump_enabled_p ())
3559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3560 "can't use a fully-masked loop because no"
3561 " conditional operation is available.\n");
3562 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3564 else
3566 unsigned int nvectors
3567 = (slp_node
3568 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3569 : ncopies);
3570 tree scalar_mask = NULL_TREE;
3571 if (mask_opno >= 0)
3572 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3573 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3574 vectype_out, scalar_mask);
3577 return true;
3580 /* Transform. */
3582 if (dump_enabled_p ())
3583 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3585 /* Handle def. */
3586 scalar_dest = gimple_call_lhs (stmt);
3587 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3589 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3590 unsigned int vect_nargs = nargs;
3591 if (masked_loop_p && reduc_idx >= 0)
3593 ifn = cond_fn;
3594 vect_nargs += 2;
3597 if (modifier == NONE || ifn != IFN_LAST)
3599 tree prev_res = NULL_TREE;
3600 vargs.safe_grow (vect_nargs, true);
3601 auto_vec<vec<tree> > vec_defs (nargs);
3602 for (j = 0; j < ncopies; ++j)
3604 /* Build argument list for the vectorized call. */
3605 if (slp_node)
3607 vec<tree> vec_oprnds0;
3609 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3610 vec_oprnds0 = vec_defs[0];
3612 /* Arguments are ready. Create the new vector stmt. */
3613 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3615 int varg = 0;
3616 if (masked_loop_p && reduc_idx >= 0)
3618 unsigned int vec_num = vec_oprnds0.length ();
3619 /* Always true for SLP. */
3620 gcc_assert (ncopies == 1);
3621 vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
3622 vectype_out, i);
3624 size_t k;
3625 for (k = 0; k < nargs; k++)
3627 vec<tree> vec_oprndsk = vec_defs[k];
3628 vargs[varg++] = vec_oprndsk[i];
3630 if (masked_loop_p && reduc_idx >= 0)
3631 vargs[varg++] = vargs[reduc_idx + 1];
3632 gimple *new_stmt;
3633 if (modifier == NARROW)
3635 /* We don't define any narrowing conditional functions
3636 at present. */
3637 gcc_assert (mask_opno < 0);
3638 tree half_res = make_ssa_name (vectype_in);
3639 gcall *call
3640 = gimple_build_call_internal_vec (ifn, vargs);
3641 gimple_call_set_lhs (call, half_res);
3642 gimple_call_set_nothrow (call, true);
3643 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3644 if ((i & 1) == 0)
3646 prev_res = half_res;
3647 continue;
3649 new_temp = make_ssa_name (vec_dest);
3650 new_stmt = gimple_build_assign (new_temp, convert_code,
3651 prev_res, half_res);
3652 vect_finish_stmt_generation (vinfo, stmt_info,
3653 new_stmt, gsi);
3655 else
3657 if (mask_opno >= 0 && masked_loop_p)
3659 unsigned int vec_num = vec_oprnds0.length ();
3660 /* Always true for SLP. */
3661 gcc_assert (ncopies == 1);
3662 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3663 vectype_out, i);
3664 vargs[mask_opno] = prepare_vec_mask
3665 (loop_vinfo, TREE_TYPE (mask), mask,
3666 vargs[mask_opno], gsi);
3669 gcall *call;
3670 if (ifn != IFN_LAST)
3671 call = gimple_build_call_internal_vec (ifn, vargs);
3672 else
3673 call = gimple_build_call_vec (fndecl, vargs);
3674 new_temp = make_ssa_name (vec_dest, call);
3675 gimple_call_set_lhs (call, new_temp);
3676 gimple_call_set_nothrow (call, true);
3677 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3678 new_stmt = call;
3680 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3682 continue;
3685 int varg = 0;
3686 if (masked_loop_p && reduc_idx >= 0)
3687 vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
3688 vectype_out, j);
3689 for (i = 0; i < nargs; i++)
3691 op = gimple_call_arg (stmt, i);
3692 if (j == 0)
3694 vec_defs.quick_push (vNULL);
3695 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3696 op, &vec_defs[i],
3697 vectypes[i]);
3699 vargs[varg++] = vec_defs[i][j];
3701 if (masked_loop_p && reduc_idx >= 0)
3702 vargs[varg++] = vargs[reduc_idx + 1];
3704 if (mask_opno >= 0 && masked_loop_p)
3706 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3707 vectype_out, j);
3708 vargs[mask_opno]
3709 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3710 vargs[mask_opno], gsi);
3713 gimple *new_stmt;
3714 if (cfn == CFN_GOMP_SIMD_LANE)
3716 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3717 tree new_var
3718 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3719 gimple *init_stmt = gimple_build_assign (new_var, cst);
3720 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3721 new_temp = make_ssa_name (vec_dest);
3722 new_stmt = gimple_build_assign (new_temp, new_var);
3723 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3725 else if (modifier == NARROW)
3727 /* We don't define any narrowing conditional functions at
3728 present. */
3729 gcc_assert (mask_opno < 0);
3730 tree half_res = make_ssa_name (vectype_in);
3731 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3732 gimple_call_set_lhs (call, half_res);
3733 gimple_call_set_nothrow (call, true);
3734 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3735 if ((j & 1) == 0)
3737 prev_res = half_res;
3738 continue;
3740 new_temp = make_ssa_name (vec_dest);
3741 new_stmt = gimple_build_assign (new_temp, convert_code,
3742 prev_res, half_res);
3743 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3745 else
3747 gcall *call;
3748 if (ifn != IFN_LAST)
3749 call = gimple_build_call_internal_vec (ifn, vargs);
3750 else
3751 call = gimple_build_call_vec (fndecl, vargs);
3752 new_temp = make_ssa_name (vec_dest, call);
3753 gimple_call_set_lhs (call, new_temp);
3754 gimple_call_set_nothrow (call, true);
3755 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3756 new_stmt = call;
3759 if (j == (modifier == NARROW ? 1 : 0))
3760 *vec_stmt = new_stmt;
3761 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3763 for (i = 0; i < nargs; i++)
3765 vec<tree> vec_oprndsi = vec_defs[i];
3766 vec_oprndsi.release ();
3769 else if (modifier == NARROW)
3771 auto_vec<vec<tree> > vec_defs (nargs);
3772 /* We don't define any narrowing conditional functions at present. */
3773 gcc_assert (mask_opno < 0);
3774 for (j = 0; j < ncopies; ++j)
3776 /* Build argument list for the vectorized call. */
3777 if (j == 0)
3778 vargs.create (nargs * 2);
3779 else
3780 vargs.truncate (0);
3782 if (slp_node)
3784 vec<tree> vec_oprnds0;
3786 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3787 vec_oprnds0 = vec_defs[0];
3789 /* Arguments are ready. Create the new vector stmt. */
3790 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3792 size_t k;
3793 vargs.truncate (0);
3794 for (k = 0; k < nargs; k++)
3796 vec<tree> vec_oprndsk = vec_defs[k];
3797 vargs.quick_push (vec_oprndsk[i]);
3798 vargs.quick_push (vec_oprndsk[i + 1]);
3800 gcall *call;
3801 if (ifn != IFN_LAST)
3802 call = gimple_build_call_internal_vec (ifn, vargs);
3803 else
3804 call = gimple_build_call_vec (fndecl, vargs);
3805 new_temp = make_ssa_name (vec_dest, call);
3806 gimple_call_set_lhs (call, new_temp);
3807 gimple_call_set_nothrow (call, true);
3808 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3809 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3811 continue;
3814 for (i = 0; i < nargs; i++)
3816 op = gimple_call_arg (stmt, i);
3817 if (j == 0)
3819 vec_defs.quick_push (vNULL);
3820 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3821 op, &vec_defs[i], vectypes[i]);
3823 vec_oprnd0 = vec_defs[i][2*j];
3824 vec_oprnd1 = vec_defs[i][2*j+1];
3826 vargs.quick_push (vec_oprnd0);
3827 vargs.quick_push (vec_oprnd1);
3830 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3831 new_temp = make_ssa_name (vec_dest, new_stmt);
3832 gimple_call_set_lhs (new_stmt, new_temp);
3833 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3835 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3838 if (!slp_node)
3839 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3841 for (i = 0; i < nargs; i++)
3843 vec<tree> vec_oprndsi = vec_defs[i];
3844 vec_oprndsi.release ();
3847 else
3848 /* No current target implements this case. */
3849 return false;
3851 vargs.release ();
3853 /* The call in STMT might prevent it from being removed in dce.
3854 We however cannot remove it here, due to the way the ssa name
3855 it defines is mapped to the new definition. So just replace
3856 rhs of the statement with something harmless. */
3858 if (slp_node)
3859 return true;
3861 stmt_info = vect_orig_stmt (stmt_info);
3862 lhs = gimple_get_lhs (stmt_info->stmt);
3864 gassign *new_stmt
3865 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3866 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3868 return true;
3872 struct simd_call_arg_info
3874 tree vectype;
3875 tree op;
3876 HOST_WIDE_INT linear_step;
3877 enum vect_def_type dt;
3878 unsigned int align;
3879 bool simd_lane_linear;
3882 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3883 is linear within simd lane (but not within whole loop), note it in
3884 *ARGINFO. */
3886 static void
3887 vect_simd_lane_linear (tree op, class loop *loop,
3888 struct simd_call_arg_info *arginfo)
3890 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3892 if (!is_gimple_assign (def_stmt)
3893 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3894 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3895 return;
3897 tree base = gimple_assign_rhs1 (def_stmt);
3898 HOST_WIDE_INT linear_step = 0;
3899 tree v = gimple_assign_rhs2 (def_stmt);
3900 while (TREE_CODE (v) == SSA_NAME)
3902 tree t;
3903 def_stmt = SSA_NAME_DEF_STMT (v);
3904 if (is_gimple_assign (def_stmt))
3905 switch (gimple_assign_rhs_code (def_stmt))
3907 case PLUS_EXPR:
3908 t = gimple_assign_rhs2 (def_stmt);
3909 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3910 return;
3911 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3912 v = gimple_assign_rhs1 (def_stmt);
3913 continue;
3914 case MULT_EXPR:
3915 t = gimple_assign_rhs2 (def_stmt);
3916 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3917 return;
3918 linear_step = tree_to_shwi (t);
3919 v = gimple_assign_rhs1 (def_stmt);
3920 continue;
3921 CASE_CONVERT:
3922 t = gimple_assign_rhs1 (def_stmt);
3923 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3924 || (TYPE_PRECISION (TREE_TYPE (v))
3925 < TYPE_PRECISION (TREE_TYPE (t))))
3926 return;
3927 if (!linear_step)
3928 linear_step = 1;
3929 v = t;
3930 continue;
3931 default:
3932 return;
3934 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3935 && loop->simduid
3936 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3937 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3938 == loop->simduid))
3940 if (!linear_step)
3941 linear_step = 1;
3942 arginfo->linear_step = linear_step;
3943 arginfo->op = base;
3944 arginfo->simd_lane_linear = true;
3945 return;
3950 /* Return the number of elements in vector type VECTYPE, which is associated
3951 with a SIMD clone. At present these vectors always have a constant
3952 length. */
3954 static unsigned HOST_WIDE_INT
3955 simd_clone_subparts (tree vectype)
3957 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3960 /* Function vectorizable_simd_clone_call.
3962 Check if STMT_INFO performs a function call that can be vectorized
3963 by calling a simd clone of the function.
3964 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3965 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3966 Return true if STMT_INFO is vectorizable in this way. */
3968 static bool
3969 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3970 gimple_stmt_iterator *gsi,
3971 gimple **vec_stmt, slp_tree slp_node,
3972 stmt_vector_for_cost *)
3974 tree vec_dest;
3975 tree scalar_dest;
3976 tree op, type;
3977 tree vec_oprnd0 = NULL_TREE;
3978 tree vectype;
3979 poly_uint64 nunits;
3980 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3981 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3982 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3983 tree fndecl, new_temp;
3984 int ncopies, j;
3985 auto_vec<simd_call_arg_info> arginfo;
3986 vec<tree> vargs = vNULL;
3987 size_t i, nargs;
3988 tree lhs, rtype, ratype;
3989 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3991 /* Is STMT a vectorizable call? */
3992 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3993 if (!stmt)
3994 return false;
3996 fndecl = gimple_call_fndecl (stmt);
3997 if (fndecl == NULL_TREE)
3998 return false;
4000 struct cgraph_node *node = cgraph_node::get (fndecl);
4001 if (node == NULL || node->simd_clones == NULL)
4002 return false;
4004 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4005 return false;
4007 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4008 && ! vec_stmt)
4009 return false;
4011 if (gimple_call_lhs (stmt)
4012 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4013 return false;
4015 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4017 vectype = STMT_VINFO_VECTYPE (stmt_info);
4019 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4020 return false;
4022 /* FORNOW */
4023 if (slp_node)
4024 return false;
4026 /* Process function arguments. */
4027 nargs = gimple_call_num_args (stmt);
4029 /* Bail out if the function has zero arguments. */
4030 if (nargs == 0)
4031 return false;
4033 arginfo.reserve (nargs, true);
4035 for (i = 0; i < nargs; i++)
4037 simd_call_arg_info thisarginfo;
4038 affine_iv iv;
4040 thisarginfo.linear_step = 0;
4041 thisarginfo.align = 0;
4042 thisarginfo.op = NULL_TREE;
4043 thisarginfo.simd_lane_linear = false;
4045 op = gimple_call_arg (stmt, i);
4046 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
4047 &thisarginfo.vectype)
4048 || thisarginfo.dt == vect_uninitialized_def)
4050 if (dump_enabled_p ())
4051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4052 "use not simple.\n");
4053 return false;
4056 if (thisarginfo.dt == vect_constant_def
4057 || thisarginfo.dt == vect_external_def)
4058 gcc_assert (thisarginfo.vectype == NULL_TREE);
4059 else
4061 gcc_assert (thisarginfo.vectype != NULL_TREE);
4062 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
4064 if (dump_enabled_p ())
4065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4066 "vector mask arguments are not supported\n");
4067 return false;
4071 /* For linear arguments, the analyze phase should have saved
4072 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4073 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4074 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4076 gcc_assert (vec_stmt);
4077 thisarginfo.linear_step
4078 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4079 thisarginfo.op
4080 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4081 thisarginfo.simd_lane_linear
4082 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4083 == boolean_true_node);
4084 /* If loop has been peeled for alignment, we need to adjust it. */
4085 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4086 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4087 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4089 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4090 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4091 tree opt = TREE_TYPE (thisarginfo.op);
4092 bias = fold_convert (TREE_TYPE (step), bias);
4093 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4094 thisarginfo.op
4095 = fold_build2 (POINTER_TYPE_P (opt)
4096 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4097 thisarginfo.op, bias);
4100 else if (!vec_stmt
4101 && thisarginfo.dt != vect_constant_def
4102 && thisarginfo.dt != vect_external_def
4103 && loop_vinfo
4104 && TREE_CODE (op) == SSA_NAME
4105 && simple_iv (loop, loop_containing_stmt (stmt), op,
4106 &iv, false)
4107 && tree_fits_shwi_p (iv.step))
4109 thisarginfo.linear_step = tree_to_shwi (iv.step);
4110 thisarginfo.op = iv.base;
4112 else if ((thisarginfo.dt == vect_constant_def
4113 || thisarginfo.dt == vect_external_def)
4114 && POINTER_TYPE_P (TREE_TYPE (op)))
4115 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4116 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4117 linear too. */
4118 if (POINTER_TYPE_P (TREE_TYPE (op))
4119 && !thisarginfo.linear_step
4120 && !vec_stmt
4121 && thisarginfo.dt != vect_constant_def
4122 && thisarginfo.dt != vect_external_def
4123 && loop_vinfo
4124 && !slp_node
4125 && TREE_CODE (op) == SSA_NAME)
4126 vect_simd_lane_linear (op, loop, &thisarginfo);
4128 arginfo.quick_push (thisarginfo);
4131 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4132 if (!vf.is_constant ())
4134 if (dump_enabled_p ())
4135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4136 "not considering SIMD clones; not yet supported"
4137 " for variable-width vectors.\n");
4138 return false;
4141 unsigned int badness = 0;
4142 struct cgraph_node *bestn = NULL;
4143 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4144 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4145 else
4146 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4147 n = n->simdclone->next_clone)
4149 unsigned int this_badness = 0;
4150 unsigned int num_calls;
4151 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4152 || n->simdclone->nargs != nargs)
4153 continue;
4154 if (num_calls != 1)
4155 this_badness += exact_log2 (num_calls) * 4096;
4156 if (n->simdclone->inbranch)
4157 this_badness += 8192;
4158 int target_badness = targetm.simd_clone.usable (n);
4159 if (target_badness < 0)
4160 continue;
4161 this_badness += target_badness * 512;
4162 /* FORNOW: Have to add code to add the mask argument. */
4163 if (n->simdclone->inbranch)
4164 continue;
4165 for (i = 0; i < nargs; i++)
4167 switch (n->simdclone->args[i].arg_type)
4169 case SIMD_CLONE_ARG_TYPE_VECTOR:
4170 if (!useless_type_conversion_p
4171 (n->simdclone->args[i].orig_type,
4172 TREE_TYPE (gimple_call_arg (stmt, i))))
4173 i = -1;
4174 else if (arginfo[i].dt == vect_constant_def
4175 || arginfo[i].dt == vect_external_def
4176 || arginfo[i].linear_step)
4177 this_badness += 64;
4178 break;
4179 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4180 if (arginfo[i].dt != vect_constant_def
4181 && arginfo[i].dt != vect_external_def)
4182 i = -1;
4183 break;
4184 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4185 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4186 if (arginfo[i].dt == vect_constant_def
4187 || arginfo[i].dt == vect_external_def
4188 || (arginfo[i].linear_step
4189 != n->simdclone->args[i].linear_step))
4190 i = -1;
4191 break;
4192 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4193 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4194 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4195 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4196 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4197 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4198 /* FORNOW */
4199 i = -1;
4200 break;
4201 case SIMD_CLONE_ARG_TYPE_MASK:
4202 gcc_unreachable ();
4204 if (i == (size_t) -1)
4205 break;
4206 if (n->simdclone->args[i].alignment > arginfo[i].align)
4208 i = -1;
4209 break;
4211 if (arginfo[i].align)
4212 this_badness += (exact_log2 (arginfo[i].align)
4213 - exact_log2 (n->simdclone->args[i].alignment));
4215 if (i == (size_t) -1)
4216 continue;
4217 if (bestn == NULL || this_badness < badness)
4219 bestn = n;
4220 badness = this_badness;
4224 if (bestn == NULL)
4225 return false;
4227 for (i = 0; i < nargs; i++)
4228 if ((arginfo[i].dt == vect_constant_def
4229 || arginfo[i].dt == vect_external_def)
4230 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4232 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4233 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4234 slp_node);
4235 if (arginfo[i].vectype == NULL
4236 || !constant_multiple_p (bestn->simdclone->simdlen,
4237 simd_clone_subparts (arginfo[i].vectype)))
4238 return false;
4241 fndecl = bestn->decl;
4242 nunits = bestn->simdclone->simdlen;
4243 ncopies = vector_unroll_factor (vf, nunits);
4245 /* If the function isn't const, only allow it in simd loops where user
4246 has asserted that at least nunits consecutive iterations can be
4247 performed using SIMD instructions. */
4248 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4249 && gimple_vuse (stmt))
4250 return false;
4252 /* Sanity check: make sure that at least one copy of the vectorized stmt
4253 needs to be generated. */
4254 gcc_assert (ncopies >= 1);
4256 if (!vec_stmt) /* transformation not required. */
4258 /* When the original call is pure or const but the SIMD ABI dictates
4259 an aggregate return we will have to use a virtual definition and
4260 in a loop eventually even need to add a virtual PHI. That's
4261 not straight-forward so allow to fix this up via renaming. */
4262 if (gimple_call_lhs (stmt)
4263 && !gimple_vdef (stmt)
4264 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4265 vinfo->any_known_not_updated_vssa = true;
4266 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4267 for (i = 0; i < nargs; i++)
4268 if ((bestn->simdclone->args[i].arg_type
4269 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4270 || (bestn->simdclone->args[i].arg_type
4271 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4273 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4274 + 1,
4275 true);
4276 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4277 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4278 ? size_type_node : TREE_TYPE (arginfo[i].op);
4279 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4280 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4281 tree sll = arginfo[i].simd_lane_linear
4282 ? boolean_true_node : boolean_false_node;
4283 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4285 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4286 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4287 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4288 dt, slp_node, cost_vec); */
4289 return true;
4292 /* Transform. */
4294 if (dump_enabled_p ())
4295 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4297 /* Handle def. */
4298 scalar_dest = gimple_call_lhs (stmt);
4299 vec_dest = NULL_TREE;
4300 rtype = NULL_TREE;
4301 ratype = NULL_TREE;
4302 if (scalar_dest)
4304 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4305 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4306 if (TREE_CODE (rtype) == ARRAY_TYPE)
4308 ratype = rtype;
4309 rtype = TREE_TYPE (ratype);
4313 auto_vec<vec<tree> > vec_oprnds;
4314 auto_vec<unsigned> vec_oprnds_i;
4315 vec_oprnds.safe_grow_cleared (nargs, true);
4316 vec_oprnds_i.safe_grow_cleared (nargs, true);
4317 for (j = 0; j < ncopies; ++j)
4319 /* Build argument list for the vectorized call. */
4320 if (j == 0)
4321 vargs.create (nargs);
4322 else
4323 vargs.truncate (0);
4325 for (i = 0; i < nargs; i++)
4327 unsigned int k, l, m, o;
4328 tree atype;
4329 op = gimple_call_arg (stmt, i);
4330 switch (bestn->simdclone->args[i].arg_type)
4332 case SIMD_CLONE_ARG_TYPE_VECTOR:
4333 atype = bestn->simdclone->args[i].vector_type;
4334 o = vector_unroll_factor (nunits,
4335 simd_clone_subparts (atype));
4336 for (m = j * o; m < (j + 1) * o; m++)
4338 if (simd_clone_subparts (atype)
4339 < simd_clone_subparts (arginfo[i].vectype))
4341 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4342 k = (simd_clone_subparts (arginfo[i].vectype)
4343 / simd_clone_subparts (atype));
4344 gcc_assert ((k & (k - 1)) == 0);
4345 if (m == 0)
4347 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4348 ncopies * o / k, op,
4349 &vec_oprnds[i]);
4350 vec_oprnds_i[i] = 0;
4351 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4353 else
4355 vec_oprnd0 = arginfo[i].op;
4356 if ((m & (k - 1)) == 0)
4357 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4359 arginfo[i].op = vec_oprnd0;
4360 vec_oprnd0
4361 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4362 bitsize_int (prec),
4363 bitsize_int ((m & (k - 1)) * prec));
4364 gassign *new_stmt
4365 = gimple_build_assign (make_ssa_name (atype),
4366 vec_oprnd0);
4367 vect_finish_stmt_generation (vinfo, stmt_info,
4368 new_stmt, gsi);
4369 vargs.safe_push (gimple_assign_lhs (new_stmt));
4371 else
4373 k = (simd_clone_subparts (atype)
4374 / simd_clone_subparts (arginfo[i].vectype));
4375 gcc_assert ((k & (k - 1)) == 0);
4376 vec<constructor_elt, va_gc> *ctor_elts;
4377 if (k != 1)
4378 vec_alloc (ctor_elts, k);
4379 else
4380 ctor_elts = NULL;
4381 for (l = 0; l < k; l++)
4383 if (m == 0 && l == 0)
4385 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4386 k * o * ncopies,
4388 &vec_oprnds[i]);
4389 vec_oprnds_i[i] = 0;
4390 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4392 else
4393 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4394 arginfo[i].op = vec_oprnd0;
4395 if (k == 1)
4396 break;
4397 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4398 vec_oprnd0);
4400 if (k == 1)
4401 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4402 atype))
4404 vec_oprnd0
4405 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4406 gassign *new_stmt
4407 = gimple_build_assign (make_ssa_name (atype),
4408 vec_oprnd0);
4409 vect_finish_stmt_generation (vinfo, stmt_info,
4410 new_stmt, gsi);
4411 vargs.safe_push (gimple_assign_lhs (new_stmt));
4413 else
4414 vargs.safe_push (vec_oprnd0);
4415 else
4417 vec_oprnd0 = build_constructor (atype, ctor_elts);
4418 gassign *new_stmt
4419 = gimple_build_assign (make_ssa_name (atype),
4420 vec_oprnd0);
4421 vect_finish_stmt_generation (vinfo, stmt_info,
4422 new_stmt, gsi);
4423 vargs.safe_push (gimple_assign_lhs (new_stmt));
4427 break;
4428 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4429 vargs.safe_push (op);
4430 break;
4431 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4432 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4433 if (j == 0)
4435 gimple_seq stmts;
4436 arginfo[i].op
4437 = force_gimple_operand (unshare_expr (arginfo[i].op),
4438 &stmts, true, NULL_TREE);
4439 if (stmts != NULL)
4441 basic_block new_bb;
4442 edge pe = loop_preheader_edge (loop);
4443 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4444 gcc_assert (!new_bb);
4446 if (arginfo[i].simd_lane_linear)
4448 vargs.safe_push (arginfo[i].op);
4449 break;
4451 tree phi_res = copy_ssa_name (op);
4452 gphi *new_phi = create_phi_node (phi_res, loop->header);
4453 add_phi_arg (new_phi, arginfo[i].op,
4454 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4455 enum tree_code code
4456 = POINTER_TYPE_P (TREE_TYPE (op))
4457 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4458 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4459 ? sizetype : TREE_TYPE (op);
4460 poly_widest_int cst
4461 = wi::mul (bestn->simdclone->args[i].linear_step,
4462 ncopies * nunits);
4463 tree tcst = wide_int_to_tree (type, cst);
4464 tree phi_arg = copy_ssa_name (op);
4465 gassign *new_stmt
4466 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4467 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4468 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4469 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4470 UNKNOWN_LOCATION);
4471 arginfo[i].op = phi_res;
4472 vargs.safe_push (phi_res);
4474 else
4476 enum tree_code code
4477 = POINTER_TYPE_P (TREE_TYPE (op))
4478 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4479 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4480 ? sizetype : TREE_TYPE (op);
4481 poly_widest_int cst
4482 = wi::mul (bestn->simdclone->args[i].linear_step,
4483 j * nunits);
4484 tree tcst = wide_int_to_tree (type, cst);
4485 new_temp = make_ssa_name (TREE_TYPE (op));
4486 gassign *new_stmt
4487 = gimple_build_assign (new_temp, code,
4488 arginfo[i].op, tcst);
4489 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4490 vargs.safe_push (new_temp);
4492 break;
4493 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4494 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4495 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4496 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4497 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4498 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4499 default:
4500 gcc_unreachable ();
4504 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4505 if (vec_dest)
4507 gcc_assert (ratype
4508 || known_eq (simd_clone_subparts (rtype), nunits));
4509 if (ratype)
4510 new_temp = create_tmp_var (ratype);
4511 else if (useless_type_conversion_p (vectype, rtype))
4512 new_temp = make_ssa_name (vec_dest, new_call);
4513 else
4514 new_temp = make_ssa_name (rtype, new_call);
4515 gimple_call_set_lhs (new_call, new_temp);
4517 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4518 gimple *new_stmt = new_call;
4520 if (vec_dest)
4522 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4524 unsigned int k, l;
4525 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4526 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4527 k = vector_unroll_factor (nunits,
4528 simd_clone_subparts (vectype));
4529 gcc_assert ((k & (k - 1)) == 0);
4530 for (l = 0; l < k; l++)
4532 tree t;
4533 if (ratype)
4535 t = build_fold_addr_expr (new_temp);
4536 t = build2 (MEM_REF, vectype, t,
4537 build_int_cst (TREE_TYPE (t), l * bytes));
4539 else
4540 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4541 bitsize_int (prec), bitsize_int (l * prec));
4542 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4543 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4545 if (j == 0 && l == 0)
4546 *vec_stmt = new_stmt;
4547 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4550 if (ratype)
4551 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4552 continue;
4554 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4556 unsigned int k = (simd_clone_subparts (vectype)
4557 / simd_clone_subparts (rtype));
4558 gcc_assert ((k & (k - 1)) == 0);
4559 if ((j & (k - 1)) == 0)
4560 vec_alloc (ret_ctor_elts, k);
4561 if (ratype)
4563 unsigned int m, o;
4564 o = vector_unroll_factor (nunits,
4565 simd_clone_subparts (rtype));
4566 for (m = 0; m < o; m++)
4568 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4569 size_int (m), NULL_TREE, NULL_TREE);
4570 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4571 tem);
4572 vect_finish_stmt_generation (vinfo, stmt_info,
4573 new_stmt, gsi);
4574 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4575 gimple_assign_lhs (new_stmt));
4577 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4579 else
4580 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4581 if ((j & (k - 1)) != k - 1)
4582 continue;
4583 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4584 new_stmt
4585 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4586 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4588 if ((unsigned) j == k - 1)
4589 *vec_stmt = new_stmt;
4590 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4591 continue;
4593 else if (ratype)
4595 tree t = build_fold_addr_expr (new_temp);
4596 t = build2 (MEM_REF, vectype, t,
4597 build_int_cst (TREE_TYPE (t), 0));
4598 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4599 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4600 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4602 else if (!useless_type_conversion_p (vectype, rtype))
4604 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4605 new_stmt
4606 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4607 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4611 if (j == 0)
4612 *vec_stmt = new_stmt;
4613 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4616 for (i = 0; i < nargs; ++i)
4618 vec<tree> oprndsi = vec_oprnds[i];
4619 oprndsi.release ();
4621 vargs.release ();
4623 /* The call in STMT might prevent it from being removed in dce.
4624 We however cannot remove it here, due to the way the ssa name
4625 it defines is mapped to the new definition. So just replace
4626 rhs of the statement with something harmless. */
4628 if (slp_node)
4629 return true;
4631 gimple *new_stmt;
4632 if (scalar_dest)
4634 type = TREE_TYPE (scalar_dest);
4635 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4636 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4638 else
4639 new_stmt = gimple_build_nop ();
4640 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4641 unlink_stmt_vdef (stmt);
4643 return true;
4647 /* Function vect_gen_widened_results_half
4649 Create a vector stmt whose code, type, number of arguments, and result
4650 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4651 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4652 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4653 needs to be created (DECL is a function-decl of a target-builtin).
4654 STMT_INFO is the original scalar stmt that we are vectorizing. */
4656 static gimple *
4657 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4658 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4659 tree vec_dest, gimple_stmt_iterator *gsi,
4660 stmt_vec_info stmt_info)
4662 gimple *new_stmt;
4663 tree new_temp;
4665 /* Generate half of the widened result: */
4666 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4667 if (op_type != binary_op)
4668 vec_oprnd1 = NULL;
4669 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4670 new_temp = make_ssa_name (vec_dest, new_stmt);
4671 gimple_assign_set_lhs (new_stmt, new_temp);
4672 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4674 return new_stmt;
4678 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4679 For multi-step conversions store the resulting vectors and call the function
4680 recursively. */
4682 static void
4683 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4684 int multi_step_cvt,
4685 stmt_vec_info stmt_info,
4686 vec<tree> &vec_dsts,
4687 gimple_stmt_iterator *gsi,
4688 slp_tree slp_node, enum tree_code code)
4690 unsigned int i;
4691 tree vop0, vop1, new_tmp, vec_dest;
4693 vec_dest = vec_dsts.pop ();
4695 for (i = 0; i < vec_oprnds->length (); i += 2)
4697 /* Create demotion operation. */
4698 vop0 = (*vec_oprnds)[i];
4699 vop1 = (*vec_oprnds)[i + 1];
4700 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4701 new_tmp = make_ssa_name (vec_dest, new_stmt);
4702 gimple_assign_set_lhs (new_stmt, new_tmp);
4703 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4705 if (multi_step_cvt)
4706 /* Store the resulting vector for next recursive call. */
4707 (*vec_oprnds)[i/2] = new_tmp;
4708 else
4710 /* This is the last step of the conversion sequence. Store the
4711 vectors in SLP_NODE or in vector info of the scalar statement
4712 (or in STMT_VINFO_RELATED_STMT chain). */
4713 if (slp_node)
4714 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4715 else
4716 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4720 /* For multi-step demotion operations we first generate demotion operations
4721 from the source type to the intermediate types, and then combine the
4722 results (stored in VEC_OPRNDS) in demotion operation to the destination
4723 type. */
4724 if (multi_step_cvt)
4726 /* At each level of recursion we have half of the operands we had at the
4727 previous level. */
4728 vec_oprnds->truncate ((i+1)/2);
4729 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4730 multi_step_cvt - 1,
4731 stmt_info, vec_dsts, gsi,
4732 slp_node, VEC_PACK_TRUNC_EXPR);
4735 vec_dsts.quick_push (vec_dest);
4739 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4740 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4741 STMT_INFO. For multi-step conversions store the resulting vectors and
4742 call the function recursively. */
4744 static void
4745 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4746 vec<tree> *vec_oprnds0,
4747 vec<tree> *vec_oprnds1,
4748 stmt_vec_info stmt_info, tree vec_dest,
4749 gimple_stmt_iterator *gsi,
4750 enum tree_code code1,
4751 enum tree_code code2, int op_type)
4753 int i;
4754 tree vop0, vop1, new_tmp1, new_tmp2;
4755 gimple *new_stmt1, *new_stmt2;
4756 vec<tree> vec_tmp = vNULL;
4758 vec_tmp.create (vec_oprnds0->length () * 2);
4759 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4761 if (op_type == binary_op)
4762 vop1 = (*vec_oprnds1)[i];
4763 else
4764 vop1 = NULL_TREE;
4766 /* Generate the two halves of promotion operation. */
4767 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4768 op_type, vec_dest, gsi,
4769 stmt_info);
4770 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4771 op_type, vec_dest, gsi,
4772 stmt_info);
4773 if (is_gimple_call (new_stmt1))
4775 new_tmp1 = gimple_call_lhs (new_stmt1);
4776 new_tmp2 = gimple_call_lhs (new_stmt2);
4778 else
4780 new_tmp1 = gimple_assign_lhs (new_stmt1);
4781 new_tmp2 = gimple_assign_lhs (new_stmt2);
4784 /* Store the results for the next step. */
4785 vec_tmp.quick_push (new_tmp1);
4786 vec_tmp.quick_push (new_tmp2);
4789 vec_oprnds0->release ();
4790 *vec_oprnds0 = vec_tmp;
4793 /* Create vectorized promotion stmts for widening stmts using only half the
4794 potential vector size for input. */
4795 static void
4796 vect_create_half_widening_stmts (vec_info *vinfo,
4797 vec<tree> *vec_oprnds0,
4798 vec<tree> *vec_oprnds1,
4799 stmt_vec_info stmt_info, tree vec_dest,
4800 gimple_stmt_iterator *gsi,
4801 enum tree_code code1,
4802 int op_type)
4804 int i;
4805 tree vop0, vop1;
4806 gimple *new_stmt1;
4807 gimple *new_stmt2;
4808 gimple *new_stmt3;
4809 vec<tree> vec_tmp = vNULL;
4811 vec_tmp.create (vec_oprnds0->length ());
4812 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4814 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4816 gcc_assert (op_type == binary_op);
4817 vop1 = (*vec_oprnds1)[i];
4819 /* Widen the first vector input. */
4820 out_type = TREE_TYPE (vec_dest);
4821 new_tmp1 = make_ssa_name (out_type);
4822 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4823 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4824 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4826 /* Widen the second vector input. */
4827 new_tmp2 = make_ssa_name (out_type);
4828 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4829 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4830 /* Perform the operation. With both vector inputs widened. */
4831 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4833 else
4835 /* Perform the operation. With the single vector input widened. */
4836 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4839 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4840 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4841 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4843 /* Store the results for the next step. */
4844 vec_tmp.quick_push (new_tmp3);
4847 vec_oprnds0->release ();
4848 *vec_oprnds0 = vec_tmp;
4852 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4853 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4854 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4855 Return true if STMT_INFO is vectorizable in this way. */
4857 static bool
4858 vectorizable_conversion (vec_info *vinfo,
4859 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4860 gimple **vec_stmt, slp_tree slp_node,
4861 stmt_vector_for_cost *cost_vec)
4863 tree vec_dest;
4864 tree scalar_dest;
4865 tree op0, op1 = NULL_TREE;
4866 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4867 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4868 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4869 tree new_temp;
4870 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4871 int ndts = 2;
4872 poly_uint64 nunits_in;
4873 poly_uint64 nunits_out;
4874 tree vectype_out, vectype_in;
4875 int ncopies, i;
4876 tree lhs_type, rhs_type;
4877 enum { NARROW, NONE, WIDEN } modifier;
4878 vec<tree> vec_oprnds0 = vNULL;
4879 vec<tree> vec_oprnds1 = vNULL;
4880 tree vop0;
4881 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4882 int multi_step_cvt = 0;
4883 vec<tree> interm_types = vNULL;
4884 tree intermediate_type, cvt_type = NULL_TREE;
4885 int op_type;
4886 unsigned short fltsz;
4888 /* Is STMT a vectorizable conversion? */
4890 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4891 return false;
4893 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4894 && ! vec_stmt)
4895 return false;
4897 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4898 if (!stmt)
4899 return false;
4901 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4902 return false;
4904 code = gimple_assign_rhs_code (stmt);
4905 if (!CONVERT_EXPR_CODE_P (code)
4906 && code != FIX_TRUNC_EXPR
4907 && code != FLOAT_EXPR
4908 && code != WIDEN_PLUS_EXPR
4909 && code != WIDEN_MINUS_EXPR
4910 && code != WIDEN_MULT_EXPR
4911 && code != WIDEN_LSHIFT_EXPR)
4912 return false;
4914 bool widen_arith = (code == WIDEN_PLUS_EXPR
4915 || code == WIDEN_MINUS_EXPR
4916 || code == WIDEN_MULT_EXPR
4917 || code == WIDEN_LSHIFT_EXPR);
4918 op_type = TREE_CODE_LENGTH (code);
4920 /* Check types of lhs and rhs. */
4921 scalar_dest = gimple_assign_lhs (stmt);
4922 lhs_type = TREE_TYPE (scalar_dest);
4923 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4925 /* Check the operands of the operation. */
4926 slp_tree slp_op0, slp_op1 = NULL;
4927 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4928 0, &op0, &slp_op0, &dt[0], &vectype_in))
4930 if (dump_enabled_p ())
4931 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4932 "use not simple.\n");
4933 return false;
4936 rhs_type = TREE_TYPE (op0);
4937 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4938 && !((INTEGRAL_TYPE_P (lhs_type)
4939 && INTEGRAL_TYPE_P (rhs_type))
4940 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4941 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4942 return false;
4944 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4945 && ((INTEGRAL_TYPE_P (lhs_type)
4946 && !type_has_mode_precision_p (lhs_type))
4947 || (INTEGRAL_TYPE_P (rhs_type)
4948 && !type_has_mode_precision_p (rhs_type))))
4950 if (dump_enabled_p ())
4951 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4952 "type conversion to/from bit-precision unsupported."
4953 "\n");
4954 return false;
4957 if (op_type == binary_op)
4959 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4960 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4962 op1 = gimple_assign_rhs2 (stmt);
4963 tree vectype1_in;
4964 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4965 &op1, &slp_op1, &dt[1], &vectype1_in))
4967 if (dump_enabled_p ())
4968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4969 "use not simple.\n");
4970 return false;
4972 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4973 OP1. */
4974 if (!vectype_in)
4975 vectype_in = vectype1_in;
4978 /* If op0 is an external or constant def, infer the vector type
4979 from the scalar type. */
4980 if (!vectype_in)
4981 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4982 if (vec_stmt)
4983 gcc_assert (vectype_in);
4984 if (!vectype_in)
4986 if (dump_enabled_p ())
4987 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4988 "no vectype for scalar type %T\n", rhs_type);
4990 return false;
4993 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4994 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4996 if (dump_enabled_p ())
4997 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4998 "can't convert between boolean and non "
4999 "boolean vectors %T\n", rhs_type);
5001 return false;
5004 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5005 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5006 if (known_eq (nunits_out, nunits_in))
5007 if (widen_arith)
5008 modifier = WIDEN;
5009 else
5010 modifier = NONE;
5011 else if (multiple_p (nunits_out, nunits_in))
5012 modifier = NARROW;
5013 else
5015 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5016 modifier = WIDEN;
5019 /* Multiple types in SLP are handled by creating the appropriate number of
5020 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5021 case of SLP. */
5022 if (slp_node)
5023 ncopies = 1;
5024 else if (modifier == NARROW)
5025 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5026 else
5027 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5029 /* Sanity check: make sure that at least one copy of the vectorized stmt
5030 needs to be generated. */
5031 gcc_assert (ncopies >= 1);
5033 bool found_mode = false;
5034 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5035 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5036 opt_scalar_mode rhs_mode_iter;
5038 /* Supportable by target? */
5039 switch (modifier)
5041 case NONE:
5042 if (code != FIX_TRUNC_EXPR
5043 && code != FLOAT_EXPR
5044 && !CONVERT_EXPR_CODE_P (code))
5045 return false;
5046 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
5047 break;
5048 /* FALLTHRU */
5049 unsupported:
5050 if (dump_enabled_p ())
5051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5052 "conversion not supported by target.\n");
5053 return false;
5055 case WIDEN:
5056 if (known_eq (nunits_in, nunits_out))
5058 if (!supportable_half_widening_operation (code, vectype_out,
5059 vectype_in, &code1))
5060 goto unsupported;
5061 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5062 break;
5064 if (supportable_widening_operation (vinfo, code, stmt_info,
5065 vectype_out, vectype_in, &code1,
5066 &code2, &multi_step_cvt,
5067 &interm_types))
5069 /* Binary widening operation can only be supported directly by the
5070 architecture. */
5071 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5072 break;
5075 if (code != FLOAT_EXPR
5076 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5077 goto unsupported;
5079 fltsz = GET_MODE_SIZE (lhs_mode);
5080 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5082 rhs_mode = rhs_mode_iter.require ();
5083 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5084 break;
5086 cvt_type
5087 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5088 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5089 if (cvt_type == NULL_TREE)
5090 goto unsupported;
5092 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5094 if (!supportable_convert_operation (code, vectype_out,
5095 cvt_type, &codecvt1))
5096 goto unsupported;
5098 else if (!supportable_widening_operation (vinfo, code, stmt_info,
5099 vectype_out, cvt_type,
5100 &codecvt1, &codecvt2,
5101 &multi_step_cvt,
5102 &interm_types))
5103 continue;
5104 else
5105 gcc_assert (multi_step_cvt == 0);
5107 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5108 cvt_type,
5109 vectype_in, &code1, &code2,
5110 &multi_step_cvt, &interm_types))
5112 found_mode = true;
5113 break;
5117 if (!found_mode)
5118 goto unsupported;
5120 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5121 codecvt2 = ERROR_MARK;
5122 else
5124 multi_step_cvt++;
5125 interm_types.safe_push (cvt_type);
5126 cvt_type = NULL_TREE;
5128 break;
5130 case NARROW:
5131 gcc_assert (op_type == unary_op);
5132 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5133 &code1, &multi_step_cvt,
5134 &interm_types))
5135 break;
5137 if (code != FIX_TRUNC_EXPR
5138 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5139 goto unsupported;
5141 cvt_type
5142 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5143 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5144 if (cvt_type == NULL_TREE)
5145 goto unsupported;
5146 if (!supportable_convert_operation (code, cvt_type, vectype_in,
5147 &codecvt1))
5148 goto unsupported;
5149 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5150 &code1, &multi_step_cvt,
5151 &interm_types))
5152 break;
5153 goto unsupported;
5155 default:
5156 gcc_unreachable ();
5159 if (!vec_stmt) /* transformation not required. */
5161 if (slp_node
5162 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5163 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5165 if (dump_enabled_p ())
5166 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5167 "incompatible vector types for invariants\n");
5168 return false;
5170 DUMP_VECT_SCOPE ("vectorizable_conversion");
5171 if (modifier == NONE)
5173 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5174 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5175 cost_vec);
5177 else if (modifier == NARROW)
5179 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5180 /* The final packing step produces one vector result per copy. */
5181 unsigned int nvectors
5182 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5183 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5184 multi_step_cvt, cost_vec,
5185 widen_arith);
5187 else
5189 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5190 /* The initial unpacking step produces two vector results
5191 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5192 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5193 unsigned int nvectors
5194 = (slp_node
5195 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5196 : ncopies * 2);
5197 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5198 multi_step_cvt, cost_vec,
5199 widen_arith);
5201 interm_types.release ();
5202 return true;
5205 /* Transform. */
5206 if (dump_enabled_p ())
5207 dump_printf_loc (MSG_NOTE, vect_location,
5208 "transform conversion. ncopies = %d.\n", ncopies);
5210 if (op_type == binary_op)
5212 if (CONSTANT_CLASS_P (op0))
5213 op0 = fold_convert (TREE_TYPE (op1), op0);
5214 else if (CONSTANT_CLASS_P (op1))
5215 op1 = fold_convert (TREE_TYPE (op0), op1);
5218 /* In case of multi-step conversion, we first generate conversion operations
5219 to the intermediate types, and then from that types to the final one.
5220 We create vector destinations for the intermediate type (TYPES) received
5221 from supportable_*_operation, and store them in the correct order
5222 for future use in vect_create_vectorized_*_stmts (). */
5223 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5224 vec_dest = vect_create_destination_var (scalar_dest,
5225 (cvt_type && modifier == WIDEN)
5226 ? cvt_type : vectype_out);
5227 vec_dsts.quick_push (vec_dest);
5229 if (multi_step_cvt)
5231 for (i = interm_types.length () - 1;
5232 interm_types.iterate (i, &intermediate_type); i--)
5234 vec_dest = vect_create_destination_var (scalar_dest,
5235 intermediate_type);
5236 vec_dsts.quick_push (vec_dest);
5240 if (cvt_type)
5241 vec_dest = vect_create_destination_var (scalar_dest,
5242 modifier == WIDEN
5243 ? vectype_out : cvt_type);
5245 int ninputs = 1;
5246 if (!slp_node)
5248 if (modifier == WIDEN)
5250 else if (modifier == NARROW)
5252 if (multi_step_cvt)
5253 ninputs = vect_pow2 (multi_step_cvt);
5254 ninputs *= 2;
5258 switch (modifier)
5260 case NONE:
5261 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5262 op0, &vec_oprnds0);
5263 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5265 /* Arguments are ready, create the new vector stmt. */
5266 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5267 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5268 new_temp = make_ssa_name (vec_dest, new_stmt);
5269 gimple_assign_set_lhs (new_stmt, new_temp);
5270 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5272 if (slp_node)
5273 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5274 else
5275 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5277 break;
5279 case WIDEN:
5280 /* In case the vectorization factor (VF) is bigger than the number
5281 of elements that we can fit in a vectype (nunits), we have to
5282 generate more than one vector stmt - i.e - we need to "unroll"
5283 the vector stmt by a factor VF/nunits. */
5284 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5285 op0, &vec_oprnds0,
5286 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5287 &vec_oprnds1);
5288 if (code == WIDEN_LSHIFT_EXPR)
5290 int oprnds_size = vec_oprnds0.length ();
5291 vec_oprnds1.create (oprnds_size);
5292 for (i = 0; i < oprnds_size; ++i)
5293 vec_oprnds1.quick_push (op1);
5295 /* Arguments are ready. Create the new vector stmts. */
5296 for (i = multi_step_cvt; i >= 0; i--)
5298 tree this_dest = vec_dsts[i];
5299 enum tree_code c1 = code1, c2 = code2;
5300 if (i == 0 && codecvt2 != ERROR_MARK)
5302 c1 = codecvt1;
5303 c2 = codecvt2;
5305 if (known_eq (nunits_out, nunits_in))
5306 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5307 &vec_oprnds1, stmt_info,
5308 this_dest, gsi,
5309 c1, op_type);
5310 else
5311 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5312 &vec_oprnds1, stmt_info,
5313 this_dest, gsi,
5314 c1, c2, op_type);
5317 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5319 gimple *new_stmt;
5320 if (cvt_type)
5322 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5323 new_temp = make_ssa_name (vec_dest);
5324 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5325 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5327 else
5328 new_stmt = SSA_NAME_DEF_STMT (vop0);
5330 if (slp_node)
5331 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5332 else
5333 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5335 break;
5337 case NARROW:
5338 /* In case the vectorization factor (VF) is bigger than the number
5339 of elements that we can fit in a vectype (nunits), we have to
5340 generate more than one vector stmt - i.e - we need to "unroll"
5341 the vector stmt by a factor VF/nunits. */
5342 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5343 op0, &vec_oprnds0);
5344 /* Arguments are ready. Create the new vector stmts. */
5345 if (cvt_type)
5346 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5348 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5349 new_temp = make_ssa_name (vec_dest);
5350 gassign *new_stmt
5351 = gimple_build_assign (new_temp, codecvt1, vop0);
5352 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5353 vec_oprnds0[i] = new_temp;
5356 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5357 multi_step_cvt,
5358 stmt_info, vec_dsts, gsi,
5359 slp_node, code1);
5360 break;
5362 if (!slp_node)
5363 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5365 vec_oprnds0.release ();
5366 vec_oprnds1.release ();
5367 interm_types.release ();
5369 return true;
5372 /* Return true if we can assume from the scalar form of STMT_INFO that
5373 neither the scalar nor the vector forms will generate code. STMT_INFO
5374 is known not to involve a data reference. */
5376 bool
5377 vect_nop_conversion_p (stmt_vec_info stmt_info)
5379 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5380 if (!stmt)
5381 return false;
5383 tree lhs = gimple_assign_lhs (stmt);
5384 tree_code code = gimple_assign_rhs_code (stmt);
5385 tree rhs = gimple_assign_rhs1 (stmt);
5387 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5388 return true;
5390 if (CONVERT_EXPR_CODE_P (code))
5391 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5393 return false;
5396 /* Function vectorizable_assignment.
5398 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5399 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5400 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5401 Return true if STMT_INFO is vectorizable in this way. */
5403 static bool
5404 vectorizable_assignment (vec_info *vinfo,
5405 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5406 gimple **vec_stmt, slp_tree slp_node,
5407 stmt_vector_for_cost *cost_vec)
5409 tree vec_dest;
5410 tree scalar_dest;
5411 tree op;
5412 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5413 tree new_temp;
5414 enum vect_def_type dt[1] = {vect_unknown_def_type};
5415 int ndts = 1;
5416 int ncopies;
5417 int i;
5418 vec<tree> vec_oprnds = vNULL;
5419 tree vop;
5420 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5421 enum tree_code code;
5422 tree vectype_in;
5424 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5425 return false;
5427 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5428 && ! vec_stmt)
5429 return false;
5431 /* Is vectorizable assignment? */
5432 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5433 if (!stmt)
5434 return false;
5436 scalar_dest = gimple_assign_lhs (stmt);
5437 if (TREE_CODE (scalar_dest) != SSA_NAME)
5438 return false;
5440 if (STMT_VINFO_DATA_REF (stmt_info))
5441 return false;
5443 code = gimple_assign_rhs_code (stmt);
5444 if (!(gimple_assign_single_p (stmt)
5445 || code == PAREN_EXPR
5446 || CONVERT_EXPR_CODE_P (code)))
5447 return false;
5449 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5450 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5452 /* Multiple types in SLP are handled by creating the appropriate number of
5453 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5454 case of SLP. */
5455 if (slp_node)
5456 ncopies = 1;
5457 else
5458 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5460 gcc_assert (ncopies >= 1);
5462 slp_tree slp_op;
5463 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5464 &dt[0], &vectype_in))
5466 if (dump_enabled_p ())
5467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5468 "use not simple.\n");
5469 return false;
5471 if (!vectype_in)
5472 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5474 /* We can handle NOP_EXPR conversions that do not change the number
5475 of elements or the vector size. */
5476 if ((CONVERT_EXPR_CODE_P (code)
5477 || code == VIEW_CONVERT_EXPR)
5478 && (!vectype_in
5479 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5480 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5481 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5482 return false;
5484 if (VECTOR_BOOLEAN_TYPE_P (vectype)
5485 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5487 if (dump_enabled_p ())
5488 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5489 "can't convert between boolean and non "
5490 "boolean vectors %T\n", TREE_TYPE (op));
5492 return false;
5495 /* We do not handle bit-precision changes. */
5496 if ((CONVERT_EXPR_CODE_P (code)
5497 || code == VIEW_CONVERT_EXPR)
5498 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5499 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5500 || !type_has_mode_precision_p (TREE_TYPE (op)))
5501 /* But a conversion that does not change the bit-pattern is ok. */
5502 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5503 > TYPE_PRECISION (TREE_TYPE (op)))
5504 && TYPE_UNSIGNED (TREE_TYPE (op))))
5506 if (dump_enabled_p ())
5507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5508 "type conversion to/from bit-precision "
5509 "unsupported.\n");
5510 return false;
5513 if (!vec_stmt) /* transformation not required. */
5515 if (slp_node
5516 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5518 if (dump_enabled_p ())
5519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5520 "incompatible vector types for invariants\n");
5521 return false;
5523 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5524 DUMP_VECT_SCOPE ("vectorizable_assignment");
5525 if (!vect_nop_conversion_p (stmt_info))
5526 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5527 cost_vec);
5528 return true;
5531 /* Transform. */
5532 if (dump_enabled_p ())
5533 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5535 /* Handle def. */
5536 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5538 /* Handle use. */
5539 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5541 /* Arguments are ready. create the new vector stmt. */
5542 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5544 if (CONVERT_EXPR_CODE_P (code)
5545 || code == VIEW_CONVERT_EXPR)
5546 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5547 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5548 new_temp = make_ssa_name (vec_dest, new_stmt);
5549 gimple_assign_set_lhs (new_stmt, new_temp);
5550 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5551 if (slp_node)
5552 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5553 else
5554 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5556 if (!slp_node)
5557 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5559 vec_oprnds.release ();
5560 return true;
5564 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5565 either as shift by a scalar or by a vector. */
5567 bool
5568 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5571 machine_mode vec_mode;
5572 optab optab;
5573 int icode;
5574 tree vectype;
5576 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5577 if (!vectype)
5578 return false;
5580 optab = optab_for_tree_code (code, vectype, optab_scalar);
5581 if (!optab
5582 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5584 optab = optab_for_tree_code (code, vectype, optab_vector);
5585 if (!optab
5586 || (optab_handler (optab, TYPE_MODE (vectype))
5587 == CODE_FOR_nothing))
5588 return false;
5591 vec_mode = TYPE_MODE (vectype);
5592 icode = (int) optab_handler (optab, vec_mode);
5593 if (icode == CODE_FOR_nothing)
5594 return false;
5596 return true;
5600 /* Function vectorizable_shift.
5602 Check if STMT_INFO performs a shift operation that can be vectorized.
5603 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5604 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5605 Return true if STMT_INFO is vectorizable in this way. */
5607 static bool
5608 vectorizable_shift (vec_info *vinfo,
5609 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5610 gimple **vec_stmt, slp_tree slp_node,
5611 stmt_vector_for_cost *cost_vec)
5613 tree vec_dest;
5614 tree scalar_dest;
5615 tree op0, op1 = NULL;
5616 tree vec_oprnd1 = NULL_TREE;
5617 tree vectype;
5618 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5619 enum tree_code code;
5620 machine_mode vec_mode;
5621 tree new_temp;
5622 optab optab;
5623 int icode;
5624 machine_mode optab_op2_mode;
5625 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5626 int ndts = 2;
5627 poly_uint64 nunits_in;
5628 poly_uint64 nunits_out;
5629 tree vectype_out;
5630 tree op1_vectype;
5631 int ncopies;
5632 int i;
5633 vec<tree> vec_oprnds0 = vNULL;
5634 vec<tree> vec_oprnds1 = vNULL;
5635 tree vop0, vop1;
5636 unsigned int k;
5637 bool scalar_shift_arg = true;
5638 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5639 bool incompatible_op1_vectype_p = false;
5641 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5642 return false;
5644 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5645 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5646 && ! vec_stmt)
5647 return false;
5649 /* Is STMT a vectorizable binary/unary operation? */
5650 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5651 if (!stmt)
5652 return false;
5654 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5655 return false;
5657 code = gimple_assign_rhs_code (stmt);
5659 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5660 || code == RROTATE_EXPR))
5661 return false;
5663 scalar_dest = gimple_assign_lhs (stmt);
5664 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5665 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5667 if (dump_enabled_p ())
5668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5669 "bit-precision shifts not supported.\n");
5670 return false;
5673 slp_tree slp_op0;
5674 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5675 0, &op0, &slp_op0, &dt[0], &vectype))
5677 if (dump_enabled_p ())
5678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5679 "use not simple.\n");
5680 return false;
5682 /* If op0 is an external or constant def, infer the vector type
5683 from the scalar type. */
5684 if (!vectype)
5685 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5686 if (vec_stmt)
5687 gcc_assert (vectype);
5688 if (!vectype)
5690 if (dump_enabled_p ())
5691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5692 "no vectype for scalar type\n");
5693 return false;
5696 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5697 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5698 if (maybe_ne (nunits_out, nunits_in))
5699 return false;
5701 stmt_vec_info op1_def_stmt_info;
5702 slp_tree slp_op1;
5703 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5704 &dt[1], &op1_vectype, &op1_def_stmt_info))
5706 if (dump_enabled_p ())
5707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5708 "use not simple.\n");
5709 return false;
5712 /* Multiple types in SLP are handled by creating the appropriate number of
5713 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5714 case of SLP. */
5715 if (slp_node)
5716 ncopies = 1;
5717 else
5718 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5720 gcc_assert (ncopies >= 1);
5722 /* Determine whether the shift amount is a vector, or scalar. If the
5723 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5725 if ((dt[1] == vect_internal_def
5726 || dt[1] == vect_induction_def
5727 || dt[1] == vect_nested_cycle)
5728 && !slp_node)
5729 scalar_shift_arg = false;
5730 else if (dt[1] == vect_constant_def
5731 || dt[1] == vect_external_def
5732 || dt[1] == vect_internal_def)
5734 /* In SLP, need to check whether the shift count is the same,
5735 in loops if it is a constant or invariant, it is always
5736 a scalar shift. */
5737 if (slp_node)
5739 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5740 stmt_vec_info slpstmt_info;
5742 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5744 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5745 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5746 scalar_shift_arg = false;
5749 /* For internal SLP defs we have to make sure we see scalar stmts
5750 for all vector elements.
5751 ??? For different vectors we could resort to a different
5752 scalar shift operand but code-generation below simply always
5753 takes the first. */
5754 if (dt[1] == vect_internal_def
5755 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5756 stmts.length ()))
5757 scalar_shift_arg = false;
5760 /* If the shift amount is computed by a pattern stmt we cannot
5761 use the scalar amount directly thus give up and use a vector
5762 shift. */
5763 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5764 scalar_shift_arg = false;
5766 else
5768 if (dump_enabled_p ())
5769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5770 "operand mode requires invariant argument.\n");
5771 return false;
5774 /* Vector shifted by vector. */
5775 bool was_scalar_shift_arg = scalar_shift_arg;
5776 if (!scalar_shift_arg)
5778 optab = optab_for_tree_code (code, vectype, optab_vector);
5779 if (dump_enabled_p ())
5780 dump_printf_loc (MSG_NOTE, vect_location,
5781 "vector/vector shift/rotate found.\n");
5783 if (!op1_vectype)
5784 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5785 slp_op1);
5786 incompatible_op1_vectype_p
5787 = (op1_vectype == NULL_TREE
5788 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5789 TYPE_VECTOR_SUBPARTS (vectype))
5790 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5791 if (incompatible_op1_vectype_p
5792 && (!slp_node
5793 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5794 || slp_op1->refcnt != 1))
5796 if (dump_enabled_p ())
5797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5798 "unusable type for last operand in"
5799 " vector/vector shift/rotate.\n");
5800 return false;
5803 /* See if the machine has a vector shifted by scalar insn and if not
5804 then see if it has a vector shifted by vector insn. */
5805 else
5807 optab = optab_for_tree_code (code, vectype, optab_scalar);
5808 if (optab
5809 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5811 if (dump_enabled_p ())
5812 dump_printf_loc (MSG_NOTE, vect_location,
5813 "vector/scalar shift/rotate found.\n");
5815 else
5817 optab = optab_for_tree_code (code, vectype, optab_vector);
5818 if (optab
5819 && (optab_handler (optab, TYPE_MODE (vectype))
5820 != CODE_FOR_nothing))
5822 scalar_shift_arg = false;
5824 if (dump_enabled_p ())
5825 dump_printf_loc (MSG_NOTE, vect_location,
5826 "vector/vector shift/rotate found.\n");
5828 if (!op1_vectype)
5829 op1_vectype = get_vectype_for_scalar_type (vinfo,
5830 TREE_TYPE (op1),
5831 slp_op1);
5833 /* Unlike the other binary operators, shifts/rotates have
5834 the rhs being int, instead of the same type as the lhs,
5835 so make sure the scalar is the right type if we are
5836 dealing with vectors of long long/long/short/char. */
5837 incompatible_op1_vectype_p
5838 = (!op1_vectype
5839 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5840 TREE_TYPE (op1)));
5841 if (incompatible_op1_vectype_p
5842 && dt[1] == vect_internal_def)
5844 if (dump_enabled_p ())
5845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5846 "unusable type for last operand in"
5847 " vector/vector shift/rotate.\n");
5848 return false;
5854 /* Supportable by target? */
5855 if (!optab)
5857 if (dump_enabled_p ())
5858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5859 "no optab.\n");
5860 return false;
5862 vec_mode = TYPE_MODE (vectype);
5863 icode = (int) optab_handler (optab, vec_mode);
5864 if (icode == CODE_FOR_nothing)
5866 if (dump_enabled_p ())
5867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5868 "op not supported by target.\n");
5869 return false;
5871 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5872 if (vect_emulated_vector_p (vectype))
5873 return false;
5875 if (!vec_stmt) /* transformation not required. */
5877 if (slp_node
5878 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5879 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5880 && (!incompatible_op1_vectype_p
5881 || dt[1] == vect_constant_def)
5882 && !vect_maybe_update_slp_op_vectype
5883 (slp_op1,
5884 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5886 if (dump_enabled_p ())
5887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5888 "incompatible vector types for invariants\n");
5889 return false;
5891 /* Now adjust the constant shift amount in place. */
5892 if (slp_node
5893 && incompatible_op1_vectype_p
5894 && dt[1] == vect_constant_def)
5896 for (unsigned i = 0;
5897 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5899 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5900 = fold_convert (TREE_TYPE (vectype),
5901 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5902 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5903 == INTEGER_CST));
5906 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5907 DUMP_VECT_SCOPE ("vectorizable_shift");
5908 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5909 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5910 return true;
5913 /* Transform. */
5915 if (dump_enabled_p ())
5916 dump_printf_loc (MSG_NOTE, vect_location,
5917 "transform binary/unary operation.\n");
5919 if (incompatible_op1_vectype_p && !slp_node)
5921 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5922 op1 = fold_convert (TREE_TYPE (vectype), op1);
5923 if (dt[1] != vect_constant_def)
5924 op1 = vect_init_vector (vinfo, stmt_info, op1,
5925 TREE_TYPE (vectype), NULL);
5928 /* Handle def. */
5929 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5931 if (scalar_shift_arg && dt[1] != vect_internal_def)
5933 /* Vector shl and shr insn patterns can be defined with scalar
5934 operand 2 (shift operand). In this case, use constant or loop
5935 invariant op1 directly, without extending it to vector mode
5936 first. */
5937 optab_op2_mode = insn_data[icode].operand[2].mode;
5938 if (!VECTOR_MODE_P (optab_op2_mode))
5940 if (dump_enabled_p ())
5941 dump_printf_loc (MSG_NOTE, vect_location,
5942 "operand 1 using scalar mode.\n");
5943 vec_oprnd1 = op1;
5944 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5945 vec_oprnds1.quick_push (vec_oprnd1);
5946 /* Store vec_oprnd1 for every vector stmt to be created.
5947 We check during the analysis that all the shift arguments
5948 are the same.
5949 TODO: Allow different constants for different vector
5950 stmts generated for an SLP instance. */
5951 for (k = 0;
5952 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5953 vec_oprnds1.quick_push (vec_oprnd1);
5956 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5958 if (was_scalar_shift_arg)
5960 /* If the argument was the same in all lanes create
5961 the correctly typed vector shift amount directly. */
5962 op1 = fold_convert (TREE_TYPE (vectype), op1);
5963 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5964 !loop_vinfo ? gsi : NULL);
5965 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5966 !loop_vinfo ? gsi : NULL);
5967 vec_oprnds1.create (slp_node->vec_stmts_size);
5968 for (k = 0; k < slp_node->vec_stmts_size; k++)
5969 vec_oprnds1.quick_push (vec_oprnd1);
5971 else if (dt[1] == vect_constant_def)
5972 /* The constant shift amount has been adjusted in place. */
5974 else
5975 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5978 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5979 (a special case for certain kind of vector shifts); otherwise,
5980 operand 1 should be of a vector type (the usual case). */
5981 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5982 op0, &vec_oprnds0,
5983 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5985 /* Arguments are ready. Create the new vector stmt. */
5986 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5988 /* For internal defs where we need to use a scalar shift arg
5989 extract the first lane. */
5990 if (scalar_shift_arg && dt[1] == vect_internal_def)
5992 vop1 = vec_oprnds1[0];
5993 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5994 gassign *new_stmt
5995 = gimple_build_assign (new_temp,
5996 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5997 vop1,
5998 TYPE_SIZE (TREE_TYPE (new_temp)),
5999 bitsize_zero_node));
6000 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6001 vop1 = new_temp;
6003 else
6004 vop1 = vec_oprnds1[i];
6005 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6006 new_temp = make_ssa_name (vec_dest, new_stmt);
6007 gimple_assign_set_lhs (new_stmt, new_temp);
6008 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6009 if (slp_node)
6010 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6011 else
6012 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6015 if (!slp_node)
6016 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6018 vec_oprnds0.release ();
6019 vec_oprnds1.release ();
6021 return true;
6025 /* Function vectorizable_operation.
6027 Check if STMT_INFO performs a binary, unary or ternary operation that can
6028 be vectorized.
6029 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6030 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6031 Return true if STMT_INFO is vectorizable in this way. */
6033 static bool
6034 vectorizable_operation (vec_info *vinfo,
6035 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6036 gimple **vec_stmt, slp_tree slp_node,
6037 stmt_vector_for_cost *cost_vec)
6039 tree vec_dest;
6040 tree scalar_dest;
6041 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6042 tree vectype;
6043 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6044 enum tree_code code, orig_code;
6045 machine_mode vec_mode;
6046 tree new_temp;
6047 int op_type;
6048 optab optab;
6049 bool target_support_p;
6050 enum vect_def_type dt[3]
6051 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6052 int ndts = 3;
6053 poly_uint64 nunits_in;
6054 poly_uint64 nunits_out;
6055 tree vectype_out;
6056 int ncopies, vec_num;
6057 int i;
6058 vec<tree> vec_oprnds0 = vNULL;
6059 vec<tree> vec_oprnds1 = vNULL;
6060 vec<tree> vec_oprnds2 = vNULL;
6061 tree vop0, vop1, vop2;
6062 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6064 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6065 return false;
6067 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6068 && ! vec_stmt)
6069 return false;
6071 /* Is STMT a vectorizable binary/unary operation? */
6072 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6073 if (!stmt)
6074 return false;
6076 /* Loads and stores are handled in vectorizable_{load,store}. */
6077 if (STMT_VINFO_DATA_REF (stmt_info))
6078 return false;
6080 orig_code = code = gimple_assign_rhs_code (stmt);
6082 /* Shifts are handled in vectorizable_shift. */
6083 if (code == LSHIFT_EXPR
6084 || code == RSHIFT_EXPR
6085 || code == LROTATE_EXPR
6086 || code == RROTATE_EXPR)
6087 return false;
6089 /* Comparisons are handled in vectorizable_comparison. */
6090 if (TREE_CODE_CLASS (code) == tcc_comparison)
6091 return false;
6093 /* Conditions are handled in vectorizable_condition. */
6094 if (code == COND_EXPR)
6095 return false;
6097 /* For pointer addition and subtraction, we should use the normal
6098 plus and minus for the vector operation. */
6099 if (code == POINTER_PLUS_EXPR)
6100 code = PLUS_EXPR;
6101 if (code == POINTER_DIFF_EXPR)
6102 code = MINUS_EXPR;
6104 /* Support only unary or binary operations. */
6105 op_type = TREE_CODE_LENGTH (code);
6106 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6108 if (dump_enabled_p ())
6109 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6110 "num. args = %d (not unary/binary/ternary op).\n",
6111 op_type);
6112 return false;
6115 scalar_dest = gimple_assign_lhs (stmt);
6116 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6118 /* Most operations cannot handle bit-precision types without extra
6119 truncations. */
6120 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6121 if (!mask_op_p
6122 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6123 /* Exception are bitwise binary operations. */
6124 && code != BIT_IOR_EXPR
6125 && code != BIT_XOR_EXPR
6126 && code != BIT_AND_EXPR)
6128 if (dump_enabled_p ())
6129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6130 "bit-precision arithmetic not supported.\n");
6131 return false;
6134 slp_tree slp_op0;
6135 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6136 0, &op0, &slp_op0, &dt[0], &vectype))
6138 if (dump_enabled_p ())
6139 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6140 "use not simple.\n");
6141 return false;
6143 /* If op0 is an external or constant def, infer the vector type
6144 from the scalar type. */
6145 if (!vectype)
6147 /* For boolean type we cannot determine vectype by
6148 invariant value (don't know whether it is a vector
6149 of booleans or vector of integers). We use output
6150 vectype because operations on boolean don't change
6151 type. */
6152 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6154 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6156 if (dump_enabled_p ())
6157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6158 "not supported operation on bool value.\n");
6159 return false;
6161 vectype = vectype_out;
6163 else
6164 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6165 slp_node);
6167 if (vec_stmt)
6168 gcc_assert (vectype);
6169 if (!vectype)
6171 if (dump_enabled_p ())
6172 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6173 "no vectype for scalar type %T\n",
6174 TREE_TYPE (op0));
6176 return false;
6179 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6180 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6181 if (maybe_ne (nunits_out, nunits_in))
6182 return false;
6184 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6185 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6186 if (op_type == binary_op || op_type == ternary_op)
6188 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6189 1, &op1, &slp_op1, &dt[1], &vectype2))
6191 if (dump_enabled_p ())
6192 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6193 "use not simple.\n");
6194 return false;
6196 if (vectype2
6197 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6198 return false;
6200 if (op_type == ternary_op)
6202 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6203 2, &op2, &slp_op2, &dt[2], &vectype3))
6205 if (dump_enabled_p ())
6206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6207 "use not simple.\n");
6208 return false;
6210 if (vectype3
6211 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6212 return false;
6215 /* Multiple types in SLP are handled by creating the appropriate number of
6216 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6217 case of SLP. */
6218 if (slp_node)
6220 ncopies = 1;
6221 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6223 else
6225 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6226 vec_num = 1;
6229 gcc_assert (ncopies >= 1);
6231 /* Reject attempts to combine mask types with nonmask types, e.g. if
6232 we have an AND between a (nonmask) boolean loaded from memory and
6233 a (mask) boolean result of a comparison.
6235 TODO: We could easily fix these cases up using pattern statements. */
6236 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6237 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6238 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6240 if (dump_enabled_p ())
6241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6242 "mixed mask and nonmask vector types\n");
6243 return false;
6246 /* Supportable by target? */
6248 vec_mode = TYPE_MODE (vectype);
6249 if (code == MULT_HIGHPART_EXPR)
6250 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6251 else
6253 optab = optab_for_tree_code (code, vectype, optab_default);
6254 if (!optab)
6256 if (dump_enabled_p ())
6257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6258 "no optab.\n");
6259 return false;
6261 target_support_p = (optab_handler (optab, vec_mode)
6262 != CODE_FOR_nothing);
6263 tree cst;
6264 if (!target_support_p
6265 && op1
6266 && (cst = uniform_integer_cst_p (op1)))
6267 target_support_p
6268 = targetm.vectorize.can_special_div_by_const (code, vectype,
6269 wi::to_wide (cst),
6270 NULL, NULL_RTX,
6271 NULL_RTX);
6274 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6275 if (!target_support_p)
6277 if (dump_enabled_p ())
6278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6279 "op not supported by target.\n");
6280 /* Check only during analysis. */
6281 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6282 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6283 return false;
6284 if (dump_enabled_p ())
6285 dump_printf_loc (MSG_NOTE, vect_location,
6286 "proceeding using word mode.\n");
6287 using_emulated_vectors_p = true;
6290 if (using_emulated_vectors_p
6291 && !vect_can_vectorize_without_simd_p (code))
6293 if (dump_enabled_p ())
6294 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6295 return false;
6298 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6299 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6300 internal_fn cond_fn = get_conditional_internal_fn (code);
6302 if (!vec_stmt) /* transformation not required. */
6304 /* If this operation is part of a reduction, a fully-masked loop
6305 should only change the active lanes of the reduction chain,
6306 keeping the inactive lanes as-is. */
6307 if (loop_vinfo
6308 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6309 && reduc_idx >= 0)
6311 if (cond_fn == IFN_LAST
6312 || !direct_internal_fn_supported_p (cond_fn, vectype,
6313 OPTIMIZE_FOR_SPEED))
6315 if (dump_enabled_p ())
6316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6317 "can't use a fully-masked loop because no"
6318 " conditional operation is available.\n");
6319 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6321 else
6322 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6323 vectype, NULL);
6326 /* Put types on constant and invariant SLP children. */
6327 if (slp_node
6328 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6329 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6330 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6332 if (dump_enabled_p ())
6333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6334 "incompatible vector types for invariants\n");
6335 return false;
6338 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6339 DUMP_VECT_SCOPE ("vectorizable_operation");
6340 vect_model_simple_cost (vinfo, stmt_info,
6341 ncopies, dt, ndts, slp_node, cost_vec);
6342 if (using_emulated_vectors_p)
6344 /* The above vect_model_simple_cost call handles constants
6345 in the prologue and (mis-)costs one of the stmts as
6346 vector stmt. See tree-vect-generic.cc:do_plus_minus/do_negate
6347 for the actual lowering that will be applied. */
6348 unsigned n
6349 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6350 switch (code)
6352 case PLUS_EXPR:
6353 n *= 5;
6354 break;
6355 case MINUS_EXPR:
6356 n *= 6;
6357 break;
6358 case NEGATE_EXPR:
6359 n *= 4;
6360 break;
6361 default:;
6363 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6365 return true;
6368 /* Transform. */
6370 if (dump_enabled_p ())
6371 dump_printf_loc (MSG_NOTE, vect_location,
6372 "transform binary/unary operation.\n");
6374 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6376 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6377 vectors with unsigned elements, but the result is signed. So, we
6378 need to compute the MINUS_EXPR into vectype temporary and
6379 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6380 tree vec_cvt_dest = NULL_TREE;
6381 if (orig_code == POINTER_DIFF_EXPR)
6383 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6384 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6386 /* Handle def. */
6387 else
6388 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6390 /* In case the vectorization factor (VF) is bigger than the number
6391 of elements that we can fit in a vectype (nunits), we have to generate
6392 more than one vector stmt - i.e - we need to "unroll" the
6393 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6394 from one copy of the vector stmt to the next, in the field
6395 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6396 stages to find the correct vector defs to be used when vectorizing
6397 stmts that use the defs of the current stmt. The example below
6398 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6399 we need to create 4 vectorized stmts):
6401 before vectorization:
6402 RELATED_STMT VEC_STMT
6403 S1: x = memref - -
6404 S2: z = x + 1 - -
6406 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6407 there):
6408 RELATED_STMT VEC_STMT
6409 VS1_0: vx0 = memref0 VS1_1 -
6410 VS1_1: vx1 = memref1 VS1_2 -
6411 VS1_2: vx2 = memref2 VS1_3 -
6412 VS1_3: vx3 = memref3 - -
6413 S1: x = load - VS1_0
6414 S2: z = x + 1 - -
6416 step2: vectorize stmt S2 (done here):
6417 To vectorize stmt S2 we first need to find the relevant vector
6418 def for the first operand 'x'. This is, as usual, obtained from
6419 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6420 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6421 relevant vector def 'vx0'. Having found 'vx0' we can generate
6422 the vector stmt VS2_0, and as usual, record it in the
6423 STMT_VINFO_VEC_STMT of stmt S2.
6424 When creating the second copy (VS2_1), we obtain the relevant vector
6425 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6426 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6427 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6428 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6429 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6430 chain of stmts and pointers:
6431 RELATED_STMT VEC_STMT
6432 VS1_0: vx0 = memref0 VS1_1 -
6433 VS1_1: vx1 = memref1 VS1_2 -
6434 VS1_2: vx2 = memref2 VS1_3 -
6435 VS1_3: vx3 = memref3 - -
6436 S1: x = load - VS1_0
6437 VS2_0: vz0 = vx0 + v1 VS2_1 -
6438 VS2_1: vz1 = vx1 + v1 VS2_2 -
6439 VS2_2: vz2 = vx2 + v1 VS2_3 -
6440 VS2_3: vz3 = vx3 + v1 - -
6441 S2: z = x + 1 - VS2_0 */
6443 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6444 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6445 /* Arguments are ready. Create the new vector stmt. */
6446 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6448 gimple *new_stmt = NULL;
6449 vop1 = ((op_type == binary_op || op_type == ternary_op)
6450 ? vec_oprnds1[i] : NULL_TREE);
6451 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6452 if (masked_loop_p && reduc_idx >= 0)
6454 /* Perform the operation on active elements only and take
6455 inactive elements from the reduction chain input. */
6456 gcc_assert (!vop2);
6457 vop2 = reduc_idx == 1 ? vop1 : vop0;
6458 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6459 vectype, i);
6460 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6461 vop0, vop1, vop2);
6462 new_temp = make_ssa_name (vec_dest, call);
6463 gimple_call_set_lhs (call, new_temp);
6464 gimple_call_set_nothrow (call, true);
6465 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6466 new_stmt = call;
6468 else
6470 tree mask = NULL_TREE;
6471 /* When combining two masks check if either of them is elsewhere
6472 combined with a loop mask, if that's the case we can mark that the
6473 new combined mask doesn't need to be combined with a loop mask. */
6474 if (masked_loop_p
6475 && code == BIT_AND_EXPR
6476 && VECTOR_BOOLEAN_TYPE_P (vectype))
6478 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
6479 ncopies}))
6481 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6482 vectype, i);
6484 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6485 vop0, gsi);
6488 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
6489 ncopies }))
6491 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6492 vectype, i);
6494 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6495 vop1, gsi);
6499 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6500 new_temp = make_ssa_name (vec_dest, new_stmt);
6501 gimple_assign_set_lhs (new_stmt, new_temp);
6502 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6503 if (using_emulated_vectors_p)
6504 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
6506 /* Enter the combined value into the vector cond hash so we don't
6507 AND it with a loop mask again. */
6508 if (mask)
6509 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
6511 if (vec_cvt_dest)
6513 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6514 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6515 new_temp);
6516 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6517 gimple_assign_set_lhs (new_stmt, new_temp);
6518 vect_finish_stmt_generation (vinfo, stmt_info,
6519 new_stmt, gsi);
6522 if (slp_node)
6523 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6524 else
6525 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6528 if (!slp_node)
6529 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6531 vec_oprnds0.release ();
6532 vec_oprnds1.release ();
6533 vec_oprnds2.release ();
6535 return true;
6538 /* A helper function to ensure data reference DR_INFO's base alignment. */
6540 static void
6541 ensure_base_align (dr_vec_info *dr_info)
6543 /* Alignment is only analyzed for the first element of a DR group,
6544 use that to look at base alignment we need to enforce. */
6545 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6546 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6548 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6550 if (dr_info->base_misaligned)
6552 tree base_decl = dr_info->base_decl;
6554 // We should only be able to increase the alignment of a base object if
6555 // we know what its new alignment should be at compile time.
6556 unsigned HOST_WIDE_INT align_base_to =
6557 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6559 if (decl_in_symtab_p (base_decl))
6560 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6561 else if (DECL_ALIGN (base_decl) < align_base_to)
6563 SET_DECL_ALIGN (base_decl, align_base_to);
6564 DECL_USER_ALIGN (base_decl) = 1;
6566 dr_info->base_misaligned = false;
6571 /* Function get_group_alias_ptr_type.
6573 Return the alias type for the group starting at FIRST_STMT_INFO. */
6575 static tree
6576 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6578 struct data_reference *first_dr, *next_dr;
6580 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6581 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6582 while (next_stmt_info)
6584 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6585 if (get_alias_set (DR_REF (first_dr))
6586 != get_alias_set (DR_REF (next_dr)))
6588 if (dump_enabled_p ())
6589 dump_printf_loc (MSG_NOTE, vect_location,
6590 "conflicting alias set types.\n");
6591 return ptr_type_node;
6593 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6595 return reference_alias_ptr_type (DR_REF (first_dr));
6599 /* Function scan_operand_equal_p.
6601 Helper function for check_scan_store. Compare two references
6602 with .GOMP_SIMD_LANE bases. */
6604 static bool
6605 scan_operand_equal_p (tree ref1, tree ref2)
6607 tree ref[2] = { ref1, ref2 };
6608 poly_int64 bitsize[2], bitpos[2];
6609 tree offset[2], base[2];
6610 for (int i = 0; i < 2; ++i)
6612 machine_mode mode;
6613 int unsignedp, reversep, volatilep = 0;
6614 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6615 &offset[i], &mode, &unsignedp,
6616 &reversep, &volatilep);
6617 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6618 return false;
6619 if (TREE_CODE (base[i]) == MEM_REF
6620 && offset[i] == NULL_TREE
6621 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6623 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6624 if (is_gimple_assign (def_stmt)
6625 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6626 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6627 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6629 if (maybe_ne (mem_ref_offset (base[i]), 0))
6630 return false;
6631 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6632 offset[i] = gimple_assign_rhs2 (def_stmt);
6637 if (!operand_equal_p (base[0], base[1], 0))
6638 return false;
6639 if (maybe_ne (bitsize[0], bitsize[1]))
6640 return false;
6641 if (offset[0] != offset[1])
6643 if (!offset[0] || !offset[1])
6644 return false;
6645 if (!operand_equal_p (offset[0], offset[1], 0))
6647 tree step[2];
6648 for (int i = 0; i < 2; ++i)
6650 step[i] = integer_one_node;
6651 if (TREE_CODE (offset[i]) == SSA_NAME)
6653 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6654 if (is_gimple_assign (def_stmt)
6655 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6656 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6657 == INTEGER_CST))
6659 step[i] = gimple_assign_rhs2 (def_stmt);
6660 offset[i] = gimple_assign_rhs1 (def_stmt);
6663 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6665 step[i] = TREE_OPERAND (offset[i], 1);
6666 offset[i] = TREE_OPERAND (offset[i], 0);
6668 tree rhs1 = NULL_TREE;
6669 if (TREE_CODE (offset[i]) == SSA_NAME)
6671 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6672 if (gimple_assign_cast_p (def_stmt))
6673 rhs1 = gimple_assign_rhs1 (def_stmt);
6675 else if (CONVERT_EXPR_P (offset[i]))
6676 rhs1 = TREE_OPERAND (offset[i], 0);
6677 if (rhs1
6678 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6679 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6680 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6681 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6682 offset[i] = rhs1;
6684 if (!operand_equal_p (offset[0], offset[1], 0)
6685 || !operand_equal_p (step[0], step[1], 0))
6686 return false;
6689 return true;
6693 enum scan_store_kind {
6694 /* Normal permutation. */
6695 scan_store_kind_perm,
6697 /* Whole vector left shift permutation with zero init. */
6698 scan_store_kind_lshift_zero,
6700 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6701 scan_store_kind_lshift_cond
6704 /* Function check_scan_store.
6706 Verify if we can perform the needed permutations or whole vector shifts.
6707 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6708 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6709 to do at each step. */
6711 static int
6712 scan_store_can_perm_p (tree vectype, tree init,
6713 vec<enum scan_store_kind> *use_whole_vector = NULL)
6715 enum machine_mode vec_mode = TYPE_MODE (vectype);
6716 unsigned HOST_WIDE_INT nunits;
6717 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6718 return -1;
6719 int units_log2 = exact_log2 (nunits);
6720 if (units_log2 <= 0)
6721 return -1;
6723 int i;
6724 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6725 for (i = 0; i <= units_log2; ++i)
6727 unsigned HOST_WIDE_INT j, k;
6728 enum scan_store_kind kind = scan_store_kind_perm;
6729 vec_perm_builder sel (nunits, nunits, 1);
6730 sel.quick_grow (nunits);
6731 if (i == units_log2)
6733 for (j = 0; j < nunits; ++j)
6734 sel[j] = nunits - 1;
6736 else
6738 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6739 sel[j] = j;
6740 for (k = 0; j < nunits; ++j, ++k)
6741 sel[j] = nunits + k;
6743 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6744 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
6746 if (i == units_log2)
6747 return -1;
6749 if (whole_vector_shift_kind == scan_store_kind_perm)
6751 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6752 return -1;
6753 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6754 /* Whole vector shifts shift in zeros, so if init is all zero
6755 constant, there is no need to do anything further. */
6756 if ((TREE_CODE (init) != INTEGER_CST
6757 && TREE_CODE (init) != REAL_CST)
6758 || !initializer_zerop (init))
6760 tree masktype = truth_type_for (vectype);
6761 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6762 return -1;
6763 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6766 kind = whole_vector_shift_kind;
6768 if (use_whole_vector)
6770 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6771 use_whole_vector->safe_grow_cleared (i, true);
6772 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6773 use_whole_vector->safe_push (kind);
6777 return units_log2;
6781 /* Function check_scan_store.
6783 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6785 static bool
6786 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6787 enum vect_def_type rhs_dt, bool slp, tree mask,
6788 vect_memory_access_type memory_access_type)
6790 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6791 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6792 tree ref_type;
6794 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6795 if (slp
6796 || mask
6797 || memory_access_type != VMAT_CONTIGUOUS
6798 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6799 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6800 || loop_vinfo == NULL
6801 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6802 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6803 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6804 || !integer_zerop (DR_INIT (dr_info->dr))
6805 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6806 || !alias_sets_conflict_p (get_alias_set (vectype),
6807 get_alias_set (TREE_TYPE (ref_type))))
6809 if (dump_enabled_p ())
6810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6811 "unsupported OpenMP scan store.\n");
6812 return false;
6815 /* We need to pattern match code built by OpenMP lowering and simplified
6816 by following optimizations into something we can handle.
6817 #pragma omp simd reduction(inscan,+:r)
6818 for (...)
6820 r += something ();
6821 #pragma omp scan inclusive (r)
6822 use (r);
6824 shall have body with:
6825 // Initialization for input phase, store the reduction initializer:
6826 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6827 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6828 D.2042[_21] = 0;
6829 // Actual input phase:
6831 r.0_5 = D.2042[_20];
6832 _6 = _4 + r.0_5;
6833 D.2042[_20] = _6;
6834 // Initialization for scan phase:
6835 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6836 _26 = D.2043[_25];
6837 _27 = D.2042[_25];
6838 _28 = _26 + _27;
6839 D.2043[_25] = _28;
6840 D.2042[_25] = _28;
6841 // Actual scan phase:
6843 r.1_8 = D.2042[_20];
6845 The "omp simd array" variable D.2042 holds the privatized copy used
6846 inside of the loop and D.2043 is another one that holds copies of
6847 the current original list item. The separate GOMP_SIMD_LANE ifn
6848 kinds are there in order to allow optimizing the initializer store
6849 and combiner sequence, e.g. if it is originally some C++ish user
6850 defined reduction, but allow the vectorizer to pattern recognize it
6851 and turn into the appropriate vectorized scan.
6853 For exclusive scan, this is slightly different:
6854 #pragma omp simd reduction(inscan,+:r)
6855 for (...)
6857 use (r);
6858 #pragma omp scan exclusive (r)
6859 r += something ();
6861 shall have body with:
6862 // Initialization for input phase, store the reduction initializer:
6863 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6864 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6865 D.2042[_21] = 0;
6866 // Actual input phase:
6868 r.0_5 = D.2042[_20];
6869 _6 = _4 + r.0_5;
6870 D.2042[_20] = _6;
6871 // Initialization for scan phase:
6872 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6873 _26 = D.2043[_25];
6874 D.2044[_25] = _26;
6875 _27 = D.2042[_25];
6876 _28 = _26 + _27;
6877 D.2043[_25] = _28;
6878 // Actual scan phase:
6880 r.1_8 = D.2044[_20];
6881 ... */
6883 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6885 /* Match the D.2042[_21] = 0; store above. Just require that
6886 it is a constant or external definition store. */
6887 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6889 fail_init:
6890 if (dump_enabled_p ())
6891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6892 "unsupported OpenMP scan initializer store.\n");
6893 return false;
6896 if (! loop_vinfo->scan_map)
6897 loop_vinfo->scan_map = new hash_map<tree, tree>;
6898 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6899 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6900 if (cached)
6901 goto fail_init;
6902 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6904 /* These stores can be vectorized normally. */
6905 return true;
6908 if (rhs_dt != vect_internal_def)
6910 fail:
6911 if (dump_enabled_p ())
6912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6913 "unsupported OpenMP scan combiner pattern.\n");
6914 return false;
6917 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6918 tree rhs = gimple_assign_rhs1 (stmt);
6919 if (TREE_CODE (rhs) != SSA_NAME)
6920 goto fail;
6922 gimple *other_store_stmt = NULL;
6923 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6924 bool inscan_var_store
6925 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6927 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6929 if (!inscan_var_store)
6931 use_operand_p use_p;
6932 imm_use_iterator iter;
6933 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6935 gimple *use_stmt = USE_STMT (use_p);
6936 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6937 continue;
6938 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6939 || !is_gimple_assign (use_stmt)
6940 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6941 || other_store_stmt
6942 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6943 goto fail;
6944 other_store_stmt = use_stmt;
6946 if (other_store_stmt == NULL)
6947 goto fail;
6948 rhs = gimple_assign_lhs (other_store_stmt);
6949 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6950 goto fail;
6953 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6955 use_operand_p use_p;
6956 imm_use_iterator iter;
6957 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6959 gimple *use_stmt = USE_STMT (use_p);
6960 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6961 continue;
6962 if (other_store_stmt)
6963 goto fail;
6964 other_store_stmt = use_stmt;
6967 else
6968 goto fail;
6970 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6971 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6972 || !is_gimple_assign (def_stmt)
6973 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6974 goto fail;
6976 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6977 /* For pointer addition, we should use the normal plus for the vector
6978 operation. */
6979 switch (code)
6981 case POINTER_PLUS_EXPR:
6982 code = PLUS_EXPR;
6983 break;
6984 case MULT_HIGHPART_EXPR:
6985 goto fail;
6986 default:
6987 break;
6989 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6990 goto fail;
6992 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6993 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6994 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6995 goto fail;
6997 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6998 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6999 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7000 || !gimple_assign_load_p (load1_stmt)
7001 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7002 || !gimple_assign_load_p (load2_stmt))
7003 goto fail;
7005 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7006 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7007 if (load1_stmt_info == NULL
7008 || load2_stmt_info == NULL
7009 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7010 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7011 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7012 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7013 goto fail;
7015 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7017 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7018 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7019 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7020 goto fail;
7021 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7022 tree lrhs;
7023 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7024 lrhs = rhs1;
7025 else
7026 lrhs = rhs2;
7027 use_operand_p use_p;
7028 imm_use_iterator iter;
7029 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7031 gimple *use_stmt = USE_STMT (use_p);
7032 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7033 continue;
7034 if (other_store_stmt)
7035 goto fail;
7036 other_store_stmt = use_stmt;
7040 if (other_store_stmt == NULL)
7041 goto fail;
7042 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7043 || !gimple_store_p (other_store_stmt))
7044 goto fail;
7046 stmt_vec_info other_store_stmt_info
7047 = loop_vinfo->lookup_stmt (other_store_stmt);
7048 if (other_store_stmt_info == NULL
7049 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7050 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7051 goto fail;
7053 gimple *stmt1 = stmt;
7054 gimple *stmt2 = other_store_stmt;
7055 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7056 std::swap (stmt1, stmt2);
7057 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7058 gimple_assign_rhs1 (load2_stmt)))
7060 std::swap (rhs1, rhs2);
7061 std::swap (load1_stmt, load2_stmt);
7062 std::swap (load1_stmt_info, load2_stmt_info);
7064 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7065 gimple_assign_rhs1 (load1_stmt)))
7066 goto fail;
7068 tree var3 = NULL_TREE;
7069 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7070 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7071 gimple_assign_rhs1 (load2_stmt)))
7072 goto fail;
7073 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7075 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7076 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7077 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7078 goto fail;
7079 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7080 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7081 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7082 || lookup_attribute ("omp simd inscan exclusive",
7083 DECL_ATTRIBUTES (var3)))
7084 goto fail;
7087 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7088 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7089 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7090 goto fail;
7092 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7093 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7094 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7095 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7096 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7097 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7098 goto fail;
7100 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7101 std::swap (var1, var2);
7103 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7105 if (!lookup_attribute ("omp simd inscan exclusive",
7106 DECL_ATTRIBUTES (var1)))
7107 goto fail;
7108 var1 = var3;
7111 if (loop_vinfo->scan_map == NULL)
7112 goto fail;
7113 tree *init = loop_vinfo->scan_map->get (var1);
7114 if (init == NULL)
7115 goto fail;
7117 /* The IL is as expected, now check if we can actually vectorize it.
7118 Inclusive scan:
7119 _26 = D.2043[_25];
7120 _27 = D.2042[_25];
7121 _28 = _26 + _27;
7122 D.2043[_25] = _28;
7123 D.2042[_25] = _28;
7124 should be vectorized as (where _40 is the vectorized rhs
7125 from the D.2042[_21] = 0; store):
7126 _30 = MEM <vector(8) int> [(int *)&D.2043];
7127 _31 = MEM <vector(8) int> [(int *)&D.2042];
7128 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7129 _33 = _31 + _32;
7130 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7131 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7132 _35 = _33 + _34;
7133 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7134 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7135 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7136 _37 = _35 + _36;
7137 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7138 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7139 _38 = _30 + _37;
7140 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7141 MEM <vector(8) int> [(int *)&D.2043] = _39;
7142 MEM <vector(8) int> [(int *)&D.2042] = _38;
7143 Exclusive scan:
7144 _26 = D.2043[_25];
7145 D.2044[_25] = _26;
7146 _27 = D.2042[_25];
7147 _28 = _26 + _27;
7148 D.2043[_25] = _28;
7149 should be vectorized as (where _40 is the vectorized rhs
7150 from the D.2042[_21] = 0; store):
7151 _30 = MEM <vector(8) int> [(int *)&D.2043];
7152 _31 = MEM <vector(8) int> [(int *)&D.2042];
7153 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7154 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7155 _34 = _32 + _33;
7156 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7157 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7158 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7159 _36 = _34 + _35;
7160 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7161 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7162 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7163 _38 = _36 + _37;
7164 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7165 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7166 _39 = _30 + _38;
7167 _50 = _31 + _39;
7168 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7169 MEM <vector(8) int> [(int *)&D.2044] = _39;
7170 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7171 enum machine_mode vec_mode = TYPE_MODE (vectype);
7172 optab optab = optab_for_tree_code (code, vectype, optab_default);
7173 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7174 goto fail;
7176 int units_log2 = scan_store_can_perm_p (vectype, *init);
7177 if (units_log2 == -1)
7178 goto fail;
7180 return true;
7184 /* Function vectorizable_scan_store.
7186 Helper of vectorizable_score, arguments like on vectorizable_store.
7187 Handle only the transformation, checking is done in check_scan_store. */
7189 static bool
7190 vectorizable_scan_store (vec_info *vinfo,
7191 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7192 gimple **vec_stmt, int ncopies)
7194 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7195 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7196 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7197 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7199 if (dump_enabled_p ())
7200 dump_printf_loc (MSG_NOTE, vect_location,
7201 "transform scan store. ncopies = %d\n", ncopies);
7203 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7204 tree rhs = gimple_assign_rhs1 (stmt);
7205 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7207 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7208 bool inscan_var_store
7209 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7211 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7213 use_operand_p use_p;
7214 imm_use_iterator iter;
7215 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7217 gimple *use_stmt = USE_STMT (use_p);
7218 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7219 continue;
7220 rhs = gimple_assign_lhs (use_stmt);
7221 break;
7225 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7226 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7227 if (code == POINTER_PLUS_EXPR)
7228 code = PLUS_EXPR;
7229 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7230 && commutative_tree_code (code));
7231 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7232 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7233 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7234 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7235 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7236 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7237 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7238 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7239 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7240 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7241 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7243 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7245 std::swap (rhs1, rhs2);
7246 std::swap (var1, var2);
7247 std::swap (load1_dr_info, load2_dr_info);
7250 tree *init = loop_vinfo->scan_map->get (var1);
7251 gcc_assert (init);
7253 unsigned HOST_WIDE_INT nunits;
7254 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7255 gcc_unreachable ();
7256 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7257 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7258 gcc_assert (units_log2 > 0);
7259 auto_vec<tree, 16> perms;
7260 perms.quick_grow (units_log2 + 1);
7261 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7262 for (int i = 0; i <= units_log2; ++i)
7264 unsigned HOST_WIDE_INT j, k;
7265 vec_perm_builder sel (nunits, nunits, 1);
7266 sel.quick_grow (nunits);
7267 if (i == units_log2)
7268 for (j = 0; j < nunits; ++j)
7269 sel[j] = nunits - 1;
7270 else
7272 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7273 sel[j] = j;
7274 for (k = 0; j < nunits; ++j, ++k)
7275 sel[j] = nunits + k;
7277 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7278 if (!use_whole_vector.is_empty ()
7279 && use_whole_vector[i] != scan_store_kind_perm)
7281 if (zero_vec == NULL_TREE)
7282 zero_vec = build_zero_cst (vectype);
7283 if (masktype == NULL_TREE
7284 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7285 masktype = truth_type_for (vectype);
7286 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7288 else
7289 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7292 tree vec_oprnd1 = NULL_TREE;
7293 tree vec_oprnd2 = NULL_TREE;
7294 tree vec_oprnd3 = NULL_TREE;
7295 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7296 tree dataref_offset = build_int_cst (ref_type, 0);
7297 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7298 vectype, VMAT_CONTIGUOUS);
7299 tree ldataref_ptr = NULL_TREE;
7300 tree orig = NULL_TREE;
7301 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7302 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7303 auto_vec<tree> vec_oprnds1;
7304 auto_vec<tree> vec_oprnds2;
7305 auto_vec<tree> vec_oprnds3;
7306 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7307 *init, &vec_oprnds1,
7308 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7309 rhs2, &vec_oprnds3);
7310 for (int j = 0; j < ncopies; j++)
7312 vec_oprnd1 = vec_oprnds1[j];
7313 if (ldataref_ptr == NULL)
7314 vec_oprnd2 = vec_oprnds2[j];
7315 vec_oprnd3 = vec_oprnds3[j];
7316 if (j == 0)
7317 orig = vec_oprnd3;
7318 else if (!inscan_var_store)
7319 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7321 if (ldataref_ptr)
7323 vec_oprnd2 = make_ssa_name (vectype);
7324 tree data_ref = fold_build2 (MEM_REF, vectype,
7325 unshare_expr (ldataref_ptr),
7326 dataref_offset);
7327 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7328 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7329 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7330 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7331 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7334 tree v = vec_oprnd2;
7335 for (int i = 0; i < units_log2; ++i)
7337 tree new_temp = make_ssa_name (vectype);
7338 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7339 (zero_vec
7340 && (use_whole_vector[i]
7341 != scan_store_kind_perm))
7342 ? zero_vec : vec_oprnd1, v,
7343 perms[i]);
7344 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7345 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7346 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7348 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7350 /* Whole vector shift shifted in zero bits, but if *init
7351 is not initializer_zerop, we need to replace those elements
7352 with elements from vec_oprnd1. */
7353 tree_vector_builder vb (masktype, nunits, 1);
7354 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7355 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7356 ? boolean_false_node : boolean_true_node);
7358 tree new_temp2 = make_ssa_name (vectype);
7359 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7360 new_temp, vec_oprnd1);
7361 vect_finish_stmt_generation (vinfo, stmt_info,
7362 g, gsi);
7363 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7364 new_temp = new_temp2;
7367 /* For exclusive scan, perform the perms[i] permutation once
7368 more. */
7369 if (i == 0
7370 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7371 && v == vec_oprnd2)
7373 v = new_temp;
7374 --i;
7375 continue;
7378 tree new_temp2 = make_ssa_name (vectype);
7379 g = gimple_build_assign (new_temp2, code, v, new_temp);
7380 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7381 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7383 v = new_temp2;
7386 tree new_temp = make_ssa_name (vectype);
7387 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7388 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7389 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7391 tree last_perm_arg = new_temp;
7392 /* For exclusive scan, new_temp computed above is the exclusive scan
7393 prefix sum. Turn it into inclusive prefix sum for the broadcast
7394 of the last element into orig. */
7395 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7397 last_perm_arg = make_ssa_name (vectype);
7398 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7399 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7400 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7403 orig = make_ssa_name (vectype);
7404 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7405 last_perm_arg, perms[units_log2]);
7406 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7407 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7409 if (!inscan_var_store)
7411 tree data_ref = fold_build2 (MEM_REF, vectype,
7412 unshare_expr (dataref_ptr),
7413 dataref_offset);
7414 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7415 g = gimple_build_assign (data_ref, new_temp);
7416 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7417 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7421 if (inscan_var_store)
7422 for (int j = 0; j < ncopies; j++)
7424 if (j != 0)
7425 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7427 tree data_ref = fold_build2 (MEM_REF, vectype,
7428 unshare_expr (dataref_ptr),
7429 dataref_offset);
7430 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7431 gimple *g = gimple_build_assign (data_ref, orig);
7432 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7433 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7435 return true;
7439 /* Function vectorizable_store.
7441 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7442 that can be vectorized.
7443 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7444 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7445 Return true if STMT_INFO is vectorizable in this way. */
7447 static bool
7448 vectorizable_store (vec_info *vinfo,
7449 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7450 gimple **vec_stmt, slp_tree slp_node,
7451 stmt_vector_for_cost *cost_vec)
7453 tree data_ref;
7454 tree op;
7455 tree vec_oprnd = NULL_TREE;
7456 tree elem_type;
7457 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7458 class loop *loop = NULL;
7459 machine_mode vec_mode;
7460 tree dummy;
7461 enum vect_def_type rhs_dt = vect_unknown_def_type;
7462 enum vect_def_type mask_dt = vect_unknown_def_type;
7463 tree dataref_ptr = NULL_TREE;
7464 tree dataref_offset = NULL_TREE;
7465 gimple *ptr_incr = NULL;
7466 int ncopies;
7467 int j;
7468 stmt_vec_info first_stmt_info;
7469 bool grouped_store;
7470 unsigned int group_size, i;
7471 vec<tree> oprnds = vNULL;
7472 vec<tree> result_chain = vNULL;
7473 vec<tree> vec_oprnds = vNULL;
7474 bool slp = (slp_node != NULL);
7475 unsigned int vec_num;
7476 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7477 tree aggr_type;
7478 gather_scatter_info gs_info;
7479 poly_uint64 vf;
7480 vec_load_store_type vls_type;
7481 tree ref_type;
7483 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7484 return false;
7486 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7487 && ! vec_stmt)
7488 return false;
7490 /* Is vectorizable store? */
7492 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7493 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7495 tree scalar_dest = gimple_assign_lhs (assign);
7496 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7497 && is_pattern_stmt_p (stmt_info))
7498 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7499 if (TREE_CODE (scalar_dest) != ARRAY_REF
7500 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7501 && TREE_CODE (scalar_dest) != INDIRECT_REF
7502 && TREE_CODE (scalar_dest) != COMPONENT_REF
7503 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7504 && TREE_CODE (scalar_dest) != REALPART_EXPR
7505 && TREE_CODE (scalar_dest) != MEM_REF)
7506 return false;
7508 else
7510 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7511 if (!call || !gimple_call_internal_p (call))
7512 return false;
7514 internal_fn ifn = gimple_call_internal_fn (call);
7515 if (!internal_store_fn_p (ifn))
7516 return false;
7518 if (slp_node != NULL)
7520 if (dump_enabled_p ())
7521 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7522 "SLP of masked stores not supported.\n");
7523 return false;
7526 int mask_index = internal_fn_mask_index (ifn);
7527 if (mask_index >= 0
7528 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7529 &mask, NULL, &mask_dt, &mask_vectype))
7530 return false;
7533 op = vect_get_store_rhs (stmt_info);
7535 /* Cannot have hybrid store SLP -- that would mean storing to the
7536 same location twice. */
7537 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7539 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7540 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7542 if (loop_vinfo)
7544 loop = LOOP_VINFO_LOOP (loop_vinfo);
7545 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7547 else
7548 vf = 1;
7550 /* Multiple types in SLP are handled by creating the appropriate number of
7551 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7552 case of SLP. */
7553 if (slp)
7554 ncopies = 1;
7555 else
7556 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7558 gcc_assert (ncopies >= 1);
7560 /* FORNOW. This restriction should be relaxed. */
7561 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7563 if (dump_enabled_p ())
7564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7565 "multiple types in nested loop.\n");
7566 return false;
7569 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7570 op, &rhs_dt, &rhs_vectype, &vls_type))
7571 return false;
7573 elem_type = TREE_TYPE (vectype);
7574 vec_mode = TYPE_MODE (vectype);
7576 if (!STMT_VINFO_DATA_REF (stmt_info))
7577 return false;
7579 vect_memory_access_type memory_access_type;
7580 enum dr_alignment_support alignment_support_scheme;
7581 int misalignment;
7582 poly_int64 poffset;
7583 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7584 ncopies, &memory_access_type, &poffset,
7585 &alignment_support_scheme, &misalignment, &gs_info))
7586 return false;
7588 if (mask)
7590 if (memory_access_type == VMAT_CONTIGUOUS)
7592 if (!VECTOR_MODE_P (vec_mode)
7593 || !can_vec_mask_load_store_p (vec_mode,
7594 TYPE_MODE (mask_vectype), false))
7595 return false;
7597 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7598 && (memory_access_type != VMAT_GATHER_SCATTER
7599 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7601 if (dump_enabled_p ())
7602 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7603 "unsupported access type for masked store.\n");
7604 return false;
7607 else
7609 /* FORNOW. In some cases can vectorize even if data-type not supported
7610 (e.g. - array initialization with 0). */
7611 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7612 return false;
7615 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7616 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7617 && memory_access_type != VMAT_GATHER_SCATTER
7618 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7619 if (grouped_store)
7621 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7622 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7623 group_size = DR_GROUP_SIZE (first_stmt_info);
7625 else
7627 first_stmt_info = stmt_info;
7628 first_dr_info = dr_info;
7629 group_size = vec_num = 1;
7632 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7634 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7635 memory_access_type))
7636 return false;
7639 if (!vec_stmt) /* transformation not required. */
7641 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7643 if (loop_vinfo
7644 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7645 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
7646 vls_type, group_size,
7647 memory_access_type, &gs_info,
7648 mask);
7650 if (slp_node
7651 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7652 vectype))
7654 if (dump_enabled_p ())
7655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7656 "incompatible vector types for invariants\n");
7657 return false;
7660 if (dump_enabled_p ()
7661 && memory_access_type != VMAT_ELEMENTWISE
7662 && memory_access_type != VMAT_GATHER_SCATTER
7663 && alignment_support_scheme != dr_aligned)
7664 dump_printf_loc (MSG_NOTE, vect_location,
7665 "Vectorizing an unaligned access.\n");
7667 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7668 vect_model_store_cost (vinfo, stmt_info, ncopies,
7669 memory_access_type, alignment_support_scheme,
7670 misalignment, vls_type, slp_node, cost_vec);
7671 return true;
7673 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7675 /* Transform. */
7677 ensure_base_align (dr_info);
7679 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7681 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7682 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7683 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7684 tree ptr, var, scale, vec_mask;
7685 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7686 tree mask_halfvectype = mask_vectype;
7687 edge pe = loop_preheader_edge (loop);
7688 gimple_seq seq;
7689 basic_block new_bb;
7690 enum { NARROW, NONE, WIDEN } modifier;
7691 poly_uint64 scatter_off_nunits
7692 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7694 if (known_eq (nunits, scatter_off_nunits))
7695 modifier = NONE;
7696 else if (known_eq (nunits * 2, scatter_off_nunits))
7698 modifier = WIDEN;
7700 /* Currently gathers and scatters are only supported for
7701 fixed-length vectors. */
7702 unsigned int count = scatter_off_nunits.to_constant ();
7703 vec_perm_builder sel (count, count, 1);
7704 for (i = 0; i < (unsigned int) count; ++i)
7705 sel.quick_push (i | (count / 2));
7707 vec_perm_indices indices (sel, 1, count);
7708 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7709 indices);
7710 gcc_assert (perm_mask != NULL_TREE);
7712 else if (known_eq (nunits, scatter_off_nunits * 2))
7714 modifier = NARROW;
7716 /* Currently gathers and scatters are only supported for
7717 fixed-length vectors. */
7718 unsigned int count = nunits.to_constant ();
7719 vec_perm_builder sel (count, count, 1);
7720 for (i = 0; i < (unsigned int) count; ++i)
7721 sel.quick_push (i | (count / 2));
7723 vec_perm_indices indices (sel, 2, count);
7724 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7725 gcc_assert (perm_mask != NULL_TREE);
7726 ncopies *= 2;
7728 if (mask)
7729 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7731 else
7732 gcc_unreachable ();
7734 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7735 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7736 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7737 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7738 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7739 scaletype = TREE_VALUE (arglist);
7741 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7742 && TREE_CODE (rettype) == VOID_TYPE);
7744 ptr = fold_convert (ptrtype, gs_info.base);
7745 if (!is_gimple_min_invariant (ptr))
7747 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7748 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7749 gcc_assert (!new_bb);
7752 if (mask == NULL_TREE)
7754 mask_arg = build_int_cst (masktype, -1);
7755 mask_arg = vect_init_vector (vinfo, stmt_info,
7756 mask_arg, masktype, NULL);
7759 scale = build_int_cst (scaletype, gs_info.scale);
7761 auto_vec<tree> vec_oprnds0;
7762 auto_vec<tree> vec_oprnds1;
7763 auto_vec<tree> vec_masks;
7764 if (mask)
7766 tree mask_vectype = truth_type_for (vectype);
7767 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7768 modifier == NARROW
7769 ? ncopies / 2 : ncopies,
7770 mask, &vec_masks, mask_vectype);
7772 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7773 modifier == WIDEN
7774 ? ncopies / 2 : ncopies,
7775 gs_info.offset, &vec_oprnds0);
7776 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7777 modifier == NARROW
7778 ? ncopies / 2 : ncopies,
7779 op, &vec_oprnds1);
7780 for (j = 0; j < ncopies; ++j)
7782 if (modifier == WIDEN)
7784 if (j & 1)
7785 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7786 perm_mask, stmt_info, gsi);
7787 else
7788 op = vec_oprnd0 = vec_oprnds0[j / 2];
7789 src = vec_oprnd1 = vec_oprnds1[j];
7790 if (mask)
7791 mask_op = vec_mask = vec_masks[j];
7793 else if (modifier == NARROW)
7795 if (j & 1)
7796 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7797 perm_mask, stmt_info, gsi);
7798 else
7799 src = vec_oprnd1 = vec_oprnds1[j / 2];
7800 op = vec_oprnd0 = vec_oprnds0[j];
7801 if (mask)
7802 mask_op = vec_mask = vec_masks[j / 2];
7804 else
7806 op = vec_oprnd0 = vec_oprnds0[j];
7807 src = vec_oprnd1 = vec_oprnds1[j];
7808 if (mask)
7809 mask_op = vec_mask = vec_masks[j];
7812 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7814 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7815 TYPE_VECTOR_SUBPARTS (srctype)));
7816 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7817 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7818 gassign *new_stmt
7819 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7820 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7821 src = var;
7824 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7826 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7827 TYPE_VECTOR_SUBPARTS (idxtype)));
7828 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7829 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7830 gassign *new_stmt
7831 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7832 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7833 op = var;
7836 if (mask)
7838 tree utype;
7839 mask_arg = mask_op;
7840 if (modifier == NARROW)
7842 var = vect_get_new_ssa_name (mask_halfvectype,
7843 vect_simple_var);
7844 gassign *new_stmt
7845 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7846 : VEC_UNPACK_LO_EXPR,
7847 mask_op);
7848 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7849 mask_arg = var;
7851 tree optype = TREE_TYPE (mask_arg);
7852 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7853 utype = masktype;
7854 else
7855 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7856 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7857 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7858 gassign *new_stmt
7859 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7860 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7861 mask_arg = var;
7862 if (!useless_type_conversion_p (masktype, utype))
7864 gcc_assert (TYPE_PRECISION (utype)
7865 <= TYPE_PRECISION (masktype));
7866 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7867 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7868 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7869 mask_arg = var;
7873 gcall *new_stmt
7874 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7875 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7877 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7879 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7880 return true;
7882 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7883 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7885 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7886 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7888 if (grouped_store)
7890 /* FORNOW */
7891 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7893 /* We vectorize all the stmts of the interleaving group when we
7894 reach the last stmt in the group. */
7895 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7896 < DR_GROUP_SIZE (first_stmt_info)
7897 && !slp)
7899 *vec_stmt = NULL;
7900 return true;
7903 if (slp)
7905 grouped_store = false;
7906 /* VEC_NUM is the number of vect stmts to be created for this
7907 group. */
7908 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7909 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7910 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7911 == first_stmt_info);
7912 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7913 op = vect_get_store_rhs (first_stmt_info);
7915 else
7916 /* VEC_NUM is the number of vect stmts to be created for this
7917 group. */
7918 vec_num = group_size;
7920 ref_type = get_group_alias_ptr_type (first_stmt_info);
7922 else
7923 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7925 if (dump_enabled_p ())
7926 dump_printf_loc (MSG_NOTE, vect_location,
7927 "transform store. ncopies = %d\n", ncopies);
7929 if (memory_access_type == VMAT_ELEMENTWISE
7930 || memory_access_type == VMAT_STRIDED_SLP)
7932 gimple_stmt_iterator incr_gsi;
7933 bool insert_after;
7934 gimple *incr;
7935 tree offvar;
7936 tree ivstep;
7937 tree running_off;
7938 tree stride_base, stride_step, alias_off;
7939 tree vec_oprnd;
7940 tree dr_offset;
7941 unsigned int g;
7942 /* Checked by get_load_store_type. */
7943 unsigned int const_nunits = nunits.to_constant ();
7945 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7946 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7948 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7949 stride_base
7950 = fold_build_pointer_plus
7951 (DR_BASE_ADDRESS (first_dr_info->dr),
7952 size_binop (PLUS_EXPR,
7953 convert_to_ptrofftype (dr_offset),
7954 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7955 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7957 /* For a store with loop-invariant (but other than power-of-2)
7958 stride (i.e. not a grouped access) like so:
7960 for (i = 0; i < n; i += stride)
7961 array[i] = ...;
7963 we generate a new induction variable and new stores from
7964 the components of the (vectorized) rhs:
7966 for (j = 0; ; j += VF*stride)
7967 vectemp = ...;
7968 tmp1 = vectemp[0];
7969 array[j] = tmp1;
7970 tmp2 = vectemp[1];
7971 array[j + stride] = tmp2;
7975 unsigned nstores = const_nunits;
7976 unsigned lnel = 1;
7977 tree ltype = elem_type;
7978 tree lvectype = vectype;
7979 if (slp)
7981 if (group_size < const_nunits
7982 && const_nunits % group_size == 0)
7984 nstores = const_nunits / group_size;
7985 lnel = group_size;
7986 ltype = build_vector_type (elem_type, group_size);
7987 lvectype = vectype;
7989 /* First check if vec_extract optab doesn't support extraction
7990 of vector elts directly. */
7991 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7992 machine_mode vmode;
7993 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7994 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7995 group_size).exists (&vmode)
7996 || (convert_optab_handler (vec_extract_optab,
7997 TYPE_MODE (vectype), vmode)
7998 == CODE_FOR_nothing))
8000 /* Try to avoid emitting an extract of vector elements
8001 by performing the extracts using an integer type of the
8002 same size, extracting from a vector of those and then
8003 re-interpreting it as the original vector type if
8004 supported. */
8005 unsigned lsize
8006 = group_size * GET_MODE_BITSIZE (elmode);
8007 unsigned int lnunits = const_nunits / group_size;
8008 /* If we can't construct such a vector fall back to
8009 element extracts from the original vector type and
8010 element size stores. */
8011 if (int_mode_for_size (lsize, 0).exists (&elmode)
8012 && VECTOR_MODE_P (TYPE_MODE (vectype))
8013 && related_vector_mode (TYPE_MODE (vectype), elmode,
8014 lnunits).exists (&vmode)
8015 && (convert_optab_handler (vec_extract_optab,
8016 vmode, elmode)
8017 != CODE_FOR_nothing))
8019 nstores = lnunits;
8020 lnel = group_size;
8021 ltype = build_nonstandard_integer_type (lsize, 1);
8022 lvectype = build_vector_type (ltype, nstores);
8024 /* Else fall back to vector extraction anyway.
8025 Fewer stores are more important than avoiding spilling
8026 of the vector we extract from. Compared to the
8027 construction case in vectorizable_load no store-forwarding
8028 issue exists here for reasonable archs. */
8031 else if (group_size >= const_nunits
8032 && group_size % const_nunits == 0)
8034 nstores = 1;
8035 lnel = const_nunits;
8036 ltype = vectype;
8037 lvectype = vectype;
8039 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8040 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8043 ivstep = stride_step;
8044 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8045 build_int_cst (TREE_TYPE (ivstep), vf));
8047 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8049 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8050 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8051 create_iv (stride_base, ivstep, NULL,
8052 loop, &incr_gsi, insert_after,
8053 &offvar, NULL);
8054 incr = gsi_stmt (incr_gsi);
8056 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8058 alias_off = build_int_cst (ref_type, 0);
8059 stmt_vec_info next_stmt_info = first_stmt_info;
8060 for (g = 0; g < group_size; g++)
8062 running_off = offvar;
8063 if (g)
8065 tree size = TYPE_SIZE_UNIT (ltype);
8066 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
8067 size);
8068 tree newoff = copy_ssa_name (running_off, NULL);
8069 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8070 running_off, pos);
8071 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8072 running_off = newoff;
8074 if (!slp)
8075 op = vect_get_store_rhs (next_stmt_info);
8076 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
8077 op, &vec_oprnds);
8078 unsigned int group_el = 0;
8079 unsigned HOST_WIDE_INT
8080 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8081 for (j = 0; j < ncopies; j++)
8083 vec_oprnd = vec_oprnds[j];
8084 /* Pun the vector to extract from if necessary. */
8085 if (lvectype != vectype)
8087 tree tem = make_ssa_name (lvectype);
8088 gimple *pun
8089 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8090 lvectype, vec_oprnd));
8091 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8092 vec_oprnd = tem;
8094 for (i = 0; i < nstores; i++)
8096 tree newref, newoff;
8097 gimple *incr, *assign;
8098 tree size = TYPE_SIZE (ltype);
8099 /* Extract the i'th component. */
8100 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8101 bitsize_int (i), size);
8102 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8103 size, pos);
8105 elem = force_gimple_operand_gsi (gsi, elem, true,
8106 NULL_TREE, true,
8107 GSI_SAME_STMT);
8109 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8110 group_el * elsz);
8111 newref = build2 (MEM_REF, ltype,
8112 running_off, this_off);
8113 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8115 /* And store it to *running_off. */
8116 assign = gimple_build_assign (newref, elem);
8117 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8119 group_el += lnel;
8120 if (! slp
8121 || group_el == group_size)
8123 newoff = copy_ssa_name (running_off, NULL);
8124 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8125 running_off, stride_step);
8126 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8128 running_off = newoff;
8129 group_el = 0;
8131 if (g == group_size - 1
8132 && !slp)
8134 if (j == 0 && i == 0)
8135 *vec_stmt = assign;
8136 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8140 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8141 vec_oprnds.release ();
8142 if (slp)
8143 break;
8146 return true;
8149 auto_vec<tree> dr_chain (group_size);
8150 oprnds.create (group_size);
8152 gcc_assert (alignment_support_scheme);
8153 vec_loop_masks *loop_masks
8154 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8155 ? &LOOP_VINFO_MASKS (loop_vinfo)
8156 : NULL);
8157 vec_loop_lens *loop_lens
8158 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8159 ? &LOOP_VINFO_LENS (loop_vinfo)
8160 : NULL);
8162 /* Shouldn't go with length-based approach if fully masked. */
8163 gcc_assert (!loop_lens || !loop_masks);
8165 /* Targets with store-lane instructions must not require explicit
8166 realignment. vect_supportable_dr_alignment always returns either
8167 dr_aligned or dr_unaligned_supported for masked operations. */
8168 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8169 && !mask
8170 && !loop_masks)
8171 || alignment_support_scheme == dr_aligned
8172 || alignment_support_scheme == dr_unaligned_supported);
8174 tree offset = NULL_TREE;
8175 if (!known_eq (poffset, 0))
8176 offset = size_int (poffset);
8178 tree bump;
8179 tree vec_offset = NULL_TREE;
8180 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8182 aggr_type = NULL_TREE;
8183 bump = NULL_TREE;
8185 else if (memory_access_type == VMAT_GATHER_SCATTER)
8187 aggr_type = elem_type;
8188 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8189 &bump, &vec_offset);
8191 else
8193 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8194 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8195 else
8196 aggr_type = vectype;
8197 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8198 memory_access_type);
8201 if (mask)
8202 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8204 /* In case the vectorization factor (VF) is bigger than the number
8205 of elements that we can fit in a vectype (nunits), we have to generate
8206 more than one vector stmt - i.e - we need to "unroll" the
8207 vector stmt by a factor VF/nunits. */
8209 /* In case of interleaving (non-unit grouped access):
8211 S1: &base + 2 = x2
8212 S2: &base = x0
8213 S3: &base + 1 = x1
8214 S4: &base + 3 = x3
8216 We create vectorized stores starting from base address (the access of the
8217 first stmt in the chain (S2 in the above example), when the last store stmt
8218 of the chain (S4) is reached:
8220 VS1: &base = vx2
8221 VS2: &base + vec_size*1 = vx0
8222 VS3: &base + vec_size*2 = vx1
8223 VS4: &base + vec_size*3 = vx3
8225 Then permutation statements are generated:
8227 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8228 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8231 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8232 (the order of the data-refs in the output of vect_permute_store_chain
8233 corresponds to the order of scalar stmts in the interleaving chain - see
8234 the documentation of vect_permute_store_chain()).
8236 In case of both multiple types and interleaving, above vector stores and
8237 permutation stmts are created for every copy. The result vector stmts are
8238 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8239 STMT_VINFO_RELATED_STMT for the next copies.
8242 auto_vec<tree> vec_masks;
8243 tree vec_mask = NULL;
8244 auto_vec<tree> vec_offsets;
8245 auto_vec<vec<tree> > gvec_oprnds;
8246 gvec_oprnds.safe_grow_cleared (group_size, true);
8247 for (j = 0; j < ncopies; j++)
8249 gimple *new_stmt;
8250 if (j == 0)
8252 if (slp)
8254 /* Get vectorized arguments for SLP_NODE. */
8255 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8256 op, &vec_oprnds);
8257 vec_oprnd = vec_oprnds[0];
8259 else
8261 /* For interleaved stores we collect vectorized defs for all the
8262 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8263 used as an input to vect_permute_store_chain().
8265 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8266 and OPRNDS are of size 1. */
8267 stmt_vec_info next_stmt_info = first_stmt_info;
8268 for (i = 0; i < group_size; i++)
8270 /* Since gaps are not supported for interleaved stores,
8271 DR_GROUP_SIZE is the exact number of stmts in the chain.
8272 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8273 that there is no interleaving, DR_GROUP_SIZE is 1,
8274 and only one iteration of the loop will be executed. */
8275 op = vect_get_store_rhs (next_stmt_info);
8276 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8277 ncopies, op, &gvec_oprnds[i]);
8278 vec_oprnd = gvec_oprnds[i][0];
8279 dr_chain.quick_push (gvec_oprnds[i][0]);
8280 oprnds.quick_push (gvec_oprnds[i][0]);
8281 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8283 if (mask)
8285 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8286 mask, &vec_masks, mask_vectype);
8287 vec_mask = vec_masks[0];
8291 /* We should have catched mismatched types earlier. */
8292 gcc_assert (useless_type_conversion_p (vectype,
8293 TREE_TYPE (vec_oprnd)));
8294 bool simd_lane_access_p
8295 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8296 if (simd_lane_access_p
8297 && !loop_masks
8298 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8299 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8300 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8301 && integer_zerop (DR_INIT (first_dr_info->dr))
8302 && alias_sets_conflict_p (get_alias_set (aggr_type),
8303 get_alias_set (TREE_TYPE (ref_type))))
8305 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8306 dataref_offset = build_int_cst (ref_type, 0);
8308 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8310 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8311 slp_node, &gs_info, &dataref_ptr,
8312 &vec_offsets);
8313 vec_offset = vec_offsets[0];
8315 else
8316 dataref_ptr
8317 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8318 simd_lane_access_p ? loop : NULL,
8319 offset, &dummy, gsi, &ptr_incr,
8320 simd_lane_access_p, bump);
8322 else
8324 /* For interleaved stores we created vectorized defs for all the
8325 defs stored in OPRNDS in the previous iteration (previous copy).
8326 DR_CHAIN is then used as an input to vect_permute_store_chain().
8327 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8328 OPRNDS are of size 1. */
8329 for (i = 0; i < group_size; i++)
8331 vec_oprnd = gvec_oprnds[i][j];
8332 dr_chain[i] = gvec_oprnds[i][j];
8333 oprnds[i] = gvec_oprnds[i][j];
8335 if (mask)
8336 vec_mask = vec_masks[j];
8337 if (dataref_offset)
8338 dataref_offset
8339 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8340 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8341 vec_offset = vec_offsets[j];
8342 else
8343 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8344 stmt_info, bump);
8347 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8349 tree vec_array;
8351 /* Get an array into which we can store the individual vectors. */
8352 vec_array = create_vector_array (vectype, vec_num);
8354 /* Invalidate the current contents of VEC_ARRAY. This should
8355 become an RTL clobber too, which prevents the vector registers
8356 from being upward-exposed. */
8357 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8359 /* Store the individual vectors into the array. */
8360 for (i = 0; i < vec_num; i++)
8362 vec_oprnd = dr_chain[i];
8363 write_vector_array (vinfo, stmt_info,
8364 gsi, vec_oprnd, vec_array, i);
8367 tree final_mask = NULL;
8368 if (loop_masks)
8369 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8370 vectype, j);
8371 if (vec_mask)
8372 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8373 final_mask, vec_mask, gsi);
8375 gcall *call;
8376 if (final_mask)
8378 /* Emit:
8379 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8380 VEC_ARRAY). */
8381 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8382 tree alias_ptr = build_int_cst (ref_type, align);
8383 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8384 dataref_ptr, alias_ptr,
8385 final_mask, vec_array);
8387 else
8389 /* Emit:
8390 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8391 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8392 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8393 vec_array);
8394 gimple_call_set_lhs (call, data_ref);
8396 gimple_call_set_nothrow (call, true);
8397 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8398 new_stmt = call;
8400 /* Record that VEC_ARRAY is now dead. */
8401 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8403 else
8405 new_stmt = NULL;
8406 if (grouped_store)
8408 if (j == 0)
8409 result_chain.create (group_size);
8410 /* Permute. */
8411 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8412 gsi, &result_chain);
8415 stmt_vec_info next_stmt_info = first_stmt_info;
8416 for (i = 0; i < vec_num; i++)
8418 unsigned misalign;
8419 unsigned HOST_WIDE_INT align;
8421 tree final_mask = NULL_TREE;
8422 if (loop_masks)
8423 final_mask = vect_get_loop_mask (gsi, loop_masks,
8424 vec_num * ncopies,
8425 vectype, vec_num * j + i);
8426 if (vec_mask)
8427 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8428 final_mask, vec_mask, gsi);
8430 if (memory_access_type == VMAT_GATHER_SCATTER)
8432 tree scale = size_int (gs_info.scale);
8433 gcall *call;
8434 if (final_mask)
8435 call = gimple_build_call_internal
8436 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8437 scale, vec_oprnd, final_mask);
8438 else
8439 call = gimple_build_call_internal
8440 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8441 scale, vec_oprnd);
8442 gimple_call_set_nothrow (call, true);
8443 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8444 new_stmt = call;
8445 break;
8448 if (i > 0)
8449 /* Bump the vector pointer. */
8450 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8451 gsi, stmt_info, bump);
8453 if (slp)
8454 vec_oprnd = vec_oprnds[i];
8455 else if (grouped_store)
8456 /* For grouped stores vectorized defs are interleaved in
8457 vect_permute_store_chain(). */
8458 vec_oprnd = result_chain[i];
8460 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8461 if (alignment_support_scheme == dr_aligned)
8462 misalign = 0;
8463 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8465 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8466 misalign = 0;
8468 else
8469 misalign = misalignment;
8470 if (dataref_offset == NULL_TREE
8471 && TREE_CODE (dataref_ptr) == SSA_NAME)
8472 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8473 misalign);
8474 align = least_bit_hwi (misalign | align);
8476 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8478 tree perm_mask = perm_mask_for_reverse (vectype);
8479 tree perm_dest = vect_create_destination_var
8480 (vect_get_store_rhs (stmt_info), vectype);
8481 tree new_temp = make_ssa_name (perm_dest);
8483 /* Generate the permute statement. */
8484 gimple *perm_stmt
8485 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8486 vec_oprnd, perm_mask);
8487 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8489 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8490 vec_oprnd = new_temp;
8493 /* Arguments are ready. Create the new vector stmt. */
8494 if (final_mask)
8496 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8497 gcall *call
8498 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8499 dataref_ptr, ptr,
8500 final_mask, vec_oprnd);
8501 gimple_call_set_nothrow (call, true);
8502 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8503 new_stmt = call;
8505 else if (loop_lens)
8507 tree final_len
8508 = vect_get_loop_len (loop_vinfo, loop_lens,
8509 vec_num * ncopies, vec_num * j + i);
8510 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8511 machine_mode vmode = TYPE_MODE (vectype);
8512 opt_machine_mode new_ovmode
8513 = get_len_load_store_mode (vmode, false);
8514 machine_mode new_vmode = new_ovmode.require ();
8515 /* Need conversion if it's wrapped with VnQI. */
8516 if (vmode != new_vmode)
8518 tree new_vtype
8519 = build_vector_type_for_mode (unsigned_intQI_type_node,
8520 new_vmode);
8521 tree var
8522 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8523 vec_oprnd
8524 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8525 gassign *new_stmt
8526 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8527 vec_oprnd);
8528 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8529 gsi);
8530 vec_oprnd = var;
8533 signed char biasval =
8534 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8536 tree bias = build_int_cst (intQI_type_node, biasval);
8537 gcall *call
8538 = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
8539 ptr, final_len, vec_oprnd,
8540 bias);
8541 gimple_call_set_nothrow (call, true);
8542 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8543 new_stmt = call;
8545 else
8547 data_ref = fold_build2 (MEM_REF, vectype,
8548 dataref_ptr,
8549 dataref_offset
8550 ? dataref_offset
8551 : build_int_cst (ref_type, 0));
8552 if (alignment_support_scheme == dr_aligned)
8554 else
8555 TREE_TYPE (data_ref)
8556 = build_aligned_type (TREE_TYPE (data_ref),
8557 align * BITS_PER_UNIT);
8558 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8559 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8560 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8563 if (slp)
8564 continue;
8566 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8567 if (!next_stmt_info)
8568 break;
8571 if (!slp)
8573 if (j == 0)
8574 *vec_stmt = new_stmt;
8575 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8579 for (i = 0; i < group_size; ++i)
8581 vec<tree> oprndsi = gvec_oprnds[i];
8582 oprndsi.release ();
8584 oprnds.release ();
8585 result_chain.release ();
8586 vec_oprnds.release ();
8588 return true;
8591 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8592 VECTOR_CST mask. No checks are made that the target platform supports the
8593 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8594 vect_gen_perm_mask_checked. */
8596 tree
8597 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8599 tree mask_type;
8601 poly_uint64 nunits = sel.length ();
8602 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8604 mask_type = build_vector_type (ssizetype, nunits);
8605 return vec_perm_indices_to_tree (mask_type, sel);
8608 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8609 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8611 tree
8612 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8614 machine_mode vmode = TYPE_MODE (vectype);
8615 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
8616 return vect_gen_perm_mask_any (vectype, sel);
8619 /* Given a vector variable X and Y, that was generated for the scalar
8620 STMT_INFO, generate instructions to permute the vector elements of X and Y
8621 using permutation mask MASK_VEC, insert them at *GSI and return the
8622 permuted vector variable. */
8624 static tree
8625 permute_vec_elements (vec_info *vinfo,
8626 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8627 gimple_stmt_iterator *gsi)
8629 tree vectype = TREE_TYPE (x);
8630 tree perm_dest, data_ref;
8631 gimple *perm_stmt;
8633 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8634 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8635 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8636 else
8637 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8638 data_ref = make_ssa_name (perm_dest);
8640 /* Generate the permute statement. */
8641 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8642 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8644 return data_ref;
8647 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8648 inserting them on the loops preheader edge. Returns true if we
8649 were successful in doing so (and thus STMT_INFO can be moved then),
8650 otherwise returns false. */
8652 static bool
8653 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8655 ssa_op_iter i;
8656 tree op;
8657 bool any = false;
8659 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8661 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8662 if (!gimple_nop_p (def_stmt)
8663 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8665 /* Make sure we don't need to recurse. While we could do
8666 so in simple cases when there are more complex use webs
8667 we don't have an easy way to preserve stmt order to fulfil
8668 dependencies within them. */
8669 tree op2;
8670 ssa_op_iter i2;
8671 if (gimple_code (def_stmt) == GIMPLE_PHI)
8672 return false;
8673 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8675 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8676 if (!gimple_nop_p (def_stmt2)
8677 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8678 return false;
8680 any = true;
8684 if (!any)
8685 return true;
8687 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8689 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8690 if (!gimple_nop_p (def_stmt)
8691 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8693 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8694 gsi_remove (&gsi, false);
8695 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8699 return true;
8702 /* vectorizable_load.
8704 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8705 that can be vectorized.
8706 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8707 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8708 Return true if STMT_INFO is vectorizable in this way. */
8710 static bool
8711 vectorizable_load (vec_info *vinfo,
8712 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8713 gimple **vec_stmt, slp_tree slp_node,
8714 stmt_vector_for_cost *cost_vec)
8716 tree scalar_dest;
8717 tree vec_dest = NULL;
8718 tree data_ref = NULL;
8719 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8720 class loop *loop = NULL;
8721 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8722 bool nested_in_vect_loop = false;
8723 tree elem_type;
8724 tree new_temp;
8725 machine_mode mode;
8726 tree dummy;
8727 tree dataref_ptr = NULL_TREE;
8728 tree dataref_offset = NULL_TREE;
8729 gimple *ptr_incr = NULL;
8730 int ncopies;
8731 int i, j;
8732 unsigned int group_size;
8733 poly_uint64 group_gap_adj;
8734 tree msq = NULL_TREE, lsq;
8735 tree realignment_token = NULL_TREE;
8736 gphi *phi = NULL;
8737 vec<tree> dr_chain = vNULL;
8738 bool grouped_load = false;
8739 stmt_vec_info first_stmt_info;
8740 stmt_vec_info first_stmt_info_for_drptr = NULL;
8741 bool compute_in_loop = false;
8742 class loop *at_loop;
8743 int vec_num;
8744 bool slp = (slp_node != NULL);
8745 bool slp_perm = false;
8746 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8747 poly_uint64 vf;
8748 tree aggr_type;
8749 gather_scatter_info gs_info;
8750 tree ref_type;
8751 enum vect_def_type mask_dt = vect_unknown_def_type;
8753 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8754 return false;
8756 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8757 && ! vec_stmt)
8758 return false;
8760 if (!STMT_VINFO_DATA_REF (stmt_info))
8761 return false;
8763 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8764 int mask_index = -1;
8765 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8767 scalar_dest = gimple_assign_lhs (assign);
8768 if (TREE_CODE (scalar_dest) != SSA_NAME)
8769 return false;
8771 tree_code code = gimple_assign_rhs_code (assign);
8772 if (code != ARRAY_REF
8773 && code != BIT_FIELD_REF
8774 && code != INDIRECT_REF
8775 && code != COMPONENT_REF
8776 && code != IMAGPART_EXPR
8777 && code != REALPART_EXPR
8778 && code != MEM_REF
8779 && TREE_CODE_CLASS (code) != tcc_declaration)
8780 return false;
8782 else
8784 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8785 if (!call || !gimple_call_internal_p (call))
8786 return false;
8788 internal_fn ifn = gimple_call_internal_fn (call);
8789 if (!internal_load_fn_p (ifn))
8790 return false;
8792 scalar_dest = gimple_call_lhs (call);
8793 if (!scalar_dest)
8794 return false;
8796 mask_index = internal_fn_mask_index (ifn);
8797 /* ??? For SLP the mask operand is always last. */
8798 if (mask_index >= 0 && slp_node)
8799 mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
8800 if (mask_index >= 0
8801 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8802 &mask, NULL, &mask_dt, &mask_vectype))
8803 return false;
8806 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8807 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8809 if (loop_vinfo)
8811 loop = LOOP_VINFO_LOOP (loop_vinfo);
8812 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8813 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8815 else
8816 vf = 1;
8818 /* Multiple types in SLP are handled by creating the appropriate number of
8819 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8820 case of SLP. */
8821 if (slp)
8822 ncopies = 1;
8823 else
8824 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8826 gcc_assert (ncopies >= 1);
8828 /* FORNOW. This restriction should be relaxed. */
8829 if (nested_in_vect_loop && ncopies > 1)
8831 if (dump_enabled_p ())
8832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8833 "multiple types in nested loop.\n");
8834 return false;
8837 /* Invalidate assumptions made by dependence analysis when vectorization
8838 on the unrolled body effectively re-orders stmts. */
8839 if (ncopies > 1
8840 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8841 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8842 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8844 if (dump_enabled_p ())
8845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8846 "cannot perform implicit CSE when unrolling "
8847 "with negative dependence distance\n");
8848 return false;
8851 elem_type = TREE_TYPE (vectype);
8852 mode = TYPE_MODE (vectype);
8854 /* FORNOW. In some cases can vectorize even if data-type not supported
8855 (e.g. - data copies). */
8856 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8858 if (dump_enabled_p ())
8859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8860 "Aligned load, but unsupported type.\n");
8861 return false;
8864 /* Check if the load is a part of an interleaving chain. */
8865 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8867 grouped_load = true;
8868 /* FORNOW */
8869 gcc_assert (!nested_in_vect_loop);
8870 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8872 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8873 group_size = DR_GROUP_SIZE (first_stmt_info);
8875 /* Refuse non-SLP vectorization of SLP-only groups. */
8876 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8878 if (dump_enabled_p ())
8879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8880 "cannot vectorize load in non-SLP mode.\n");
8881 return false;
8884 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8886 slp_perm = true;
8888 if (!loop_vinfo)
8890 /* In BB vectorization we may not actually use a loaded vector
8891 accessing elements in excess of DR_GROUP_SIZE. */
8892 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8893 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8894 unsigned HOST_WIDE_INT nunits;
8895 unsigned j, k, maxk = 0;
8896 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8897 if (k > maxk)
8898 maxk = k;
8899 tree vectype = SLP_TREE_VECTYPE (slp_node);
8900 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8901 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8903 if (dump_enabled_p ())
8904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8905 "BB vectorization with gaps at the end of "
8906 "a load is not supported\n");
8907 return false;
8911 auto_vec<tree> tem;
8912 unsigned n_perms;
8913 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8914 true, &n_perms))
8916 if (dump_enabled_p ())
8917 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8918 vect_location,
8919 "unsupported load permutation\n");
8920 return false;
8924 /* Invalidate assumptions made by dependence analysis when vectorization
8925 on the unrolled body effectively re-orders stmts. */
8926 if (!PURE_SLP_STMT (stmt_info)
8927 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8928 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8929 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8931 if (dump_enabled_p ())
8932 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8933 "cannot perform implicit CSE when performing "
8934 "group loads with negative dependence distance\n");
8935 return false;
8938 else
8939 group_size = 1;
8941 vect_memory_access_type memory_access_type;
8942 enum dr_alignment_support alignment_support_scheme;
8943 int misalignment;
8944 poly_int64 poffset;
8945 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8946 ncopies, &memory_access_type, &poffset,
8947 &alignment_support_scheme, &misalignment, &gs_info))
8948 return false;
8950 if (mask)
8952 if (memory_access_type == VMAT_CONTIGUOUS)
8954 machine_mode vec_mode = TYPE_MODE (vectype);
8955 if (!VECTOR_MODE_P (vec_mode)
8956 || !can_vec_mask_load_store_p (vec_mode,
8957 TYPE_MODE (mask_vectype), true))
8958 return false;
8960 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8961 && memory_access_type != VMAT_GATHER_SCATTER)
8963 if (dump_enabled_p ())
8964 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8965 "unsupported access type for masked load.\n");
8966 return false;
8968 else if (memory_access_type == VMAT_GATHER_SCATTER
8969 && gs_info.ifn == IFN_LAST
8970 && !gs_info.decl)
8972 if (dump_enabled_p ())
8973 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8974 "unsupported masked emulated gather.\n");
8975 return false;
8979 if (!vec_stmt) /* transformation not required. */
8981 if (slp_node
8982 && mask
8983 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8984 mask_vectype))
8986 if (dump_enabled_p ())
8987 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8988 "incompatible vector types for invariants\n");
8989 return false;
8992 if (!slp)
8993 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8995 if (loop_vinfo
8996 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8997 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8998 VLS_LOAD, group_size,
8999 memory_access_type, &gs_info,
9000 mask);
9002 if (dump_enabled_p ()
9003 && memory_access_type != VMAT_ELEMENTWISE
9004 && memory_access_type != VMAT_GATHER_SCATTER
9005 && alignment_support_scheme != dr_aligned)
9006 dump_printf_loc (MSG_NOTE, vect_location,
9007 "Vectorizing an unaligned access.\n");
9009 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9010 vinfo->any_known_not_updated_vssa = true;
9012 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
9013 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
9014 alignment_support_scheme, misalignment,
9015 &gs_info, slp_node, cost_vec);
9016 return true;
9019 if (!slp)
9020 gcc_assert (memory_access_type
9021 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
9023 if (dump_enabled_p ())
9024 dump_printf_loc (MSG_NOTE, vect_location,
9025 "transform load. ncopies = %d\n", ncopies);
9027 /* Transform. */
9029 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
9030 ensure_base_align (dr_info);
9032 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
9034 vect_build_gather_load_calls (vinfo,
9035 stmt_info, gsi, vec_stmt, &gs_info, mask);
9036 return true;
9039 if (memory_access_type == VMAT_INVARIANT)
9041 gcc_assert (!grouped_load && !mask && !bb_vinfo);
9042 /* If we have versioned for aliasing or the loop doesn't
9043 have any data dependencies that would preclude this,
9044 then we are sure this is a loop invariant load and
9045 thus we can insert it on the preheader edge. */
9046 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
9047 && !nested_in_vect_loop
9048 && hoist_defs_of_uses (stmt_info, loop));
9049 if (hoist_p)
9051 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
9052 if (dump_enabled_p ())
9053 dump_printf_loc (MSG_NOTE, vect_location,
9054 "hoisting out of the vectorized loop: %G",
9055 (gimple *) stmt);
9056 scalar_dest = copy_ssa_name (scalar_dest);
9057 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
9058 edge pe = loop_preheader_edge (loop);
9059 gphi *vphi = get_virtual_phi (loop->header);
9060 tree vuse;
9061 if (vphi)
9062 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
9063 else
9064 vuse = gimple_vuse (gsi_stmt (*gsi));
9065 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
9066 gimple_set_vuse (new_stmt, vuse);
9067 gsi_insert_on_edge_immediate (pe, new_stmt);
9069 /* These copies are all equivalent, but currently the representation
9070 requires a separate STMT_VINFO_VEC_STMT for each one. */
9071 gimple_stmt_iterator gsi2 = *gsi;
9072 gsi_next (&gsi2);
9073 for (j = 0; j < ncopies; j++)
9075 if (hoist_p)
9076 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9077 vectype, NULL);
9078 else
9079 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9080 vectype, &gsi2);
9081 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
9082 if (slp)
9083 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9084 else
9086 if (j == 0)
9087 *vec_stmt = new_stmt;
9088 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9091 return true;
9094 if (memory_access_type == VMAT_ELEMENTWISE
9095 || memory_access_type == VMAT_STRIDED_SLP)
9097 gimple_stmt_iterator incr_gsi;
9098 bool insert_after;
9099 tree offvar;
9100 tree ivstep;
9101 tree running_off;
9102 vec<constructor_elt, va_gc> *v = NULL;
9103 tree stride_base, stride_step, alias_off;
9104 /* Checked by get_load_store_type. */
9105 unsigned int const_nunits = nunits.to_constant ();
9106 unsigned HOST_WIDE_INT cst_offset = 0;
9107 tree dr_offset;
9109 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
9110 gcc_assert (!nested_in_vect_loop);
9112 if (grouped_load)
9114 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9115 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9117 else
9119 first_stmt_info = stmt_info;
9120 first_dr_info = dr_info;
9122 if (slp && grouped_load)
9124 group_size = DR_GROUP_SIZE (first_stmt_info);
9125 ref_type = get_group_alias_ptr_type (first_stmt_info);
9127 else
9129 if (grouped_load)
9130 cst_offset
9131 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
9132 * vect_get_place_in_interleaving_chain (stmt_info,
9133 first_stmt_info));
9134 group_size = 1;
9135 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9138 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9139 stride_base
9140 = fold_build_pointer_plus
9141 (DR_BASE_ADDRESS (first_dr_info->dr),
9142 size_binop (PLUS_EXPR,
9143 convert_to_ptrofftype (dr_offset),
9144 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9145 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9147 /* For a load with loop-invariant (but other than power-of-2)
9148 stride (i.e. not a grouped access) like so:
9150 for (i = 0; i < n; i += stride)
9151 ... = array[i];
9153 we generate a new induction variable and new accesses to
9154 form a new vector (or vectors, depending on ncopies):
9156 for (j = 0; ; j += VF*stride)
9157 tmp1 = array[j];
9158 tmp2 = array[j + stride];
9160 vectemp = {tmp1, tmp2, ...}
9163 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9164 build_int_cst (TREE_TYPE (stride_step), vf));
9166 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9168 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9169 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9170 create_iv (stride_base, ivstep, NULL,
9171 loop, &incr_gsi, insert_after,
9172 &offvar, NULL);
9174 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9176 running_off = offvar;
9177 alias_off = build_int_cst (ref_type, 0);
9178 int nloads = const_nunits;
9179 int lnel = 1;
9180 tree ltype = TREE_TYPE (vectype);
9181 tree lvectype = vectype;
9182 auto_vec<tree> dr_chain;
9183 if (memory_access_type == VMAT_STRIDED_SLP)
9185 if (group_size < const_nunits)
9187 /* First check if vec_init optab supports construction from vector
9188 elts directly. Otherwise avoid emitting a constructor of
9189 vector elements by performing the loads using an integer type
9190 of the same size, constructing a vector of those and then
9191 re-interpreting it as the original vector type. This avoids a
9192 huge runtime penalty due to the general inability to perform
9193 store forwarding from smaller stores to a larger load. */
9194 tree ptype;
9195 tree vtype
9196 = vector_vector_composition_type (vectype,
9197 const_nunits / group_size,
9198 &ptype);
9199 if (vtype != NULL_TREE)
9201 nloads = const_nunits / group_size;
9202 lnel = group_size;
9203 lvectype = vtype;
9204 ltype = ptype;
9207 else
9209 nloads = 1;
9210 lnel = const_nunits;
9211 ltype = vectype;
9213 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9215 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9216 else if (nloads == 1)
9217 ltype = vectype;
9219 if (slp)
9221 /* For SLP permutation support we need to load the whole group,
9222 not only the number of vector stmts the permutation result
9223 fits in. */
9224 if (slp_perm)
9226 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9227 variable VF. */
9228 unsigned int const_vf = vf.to_constant ();
9229 ncopies = CEIL (group_size * const_vf, const_nunits);
9230 dr_chain.create (ncopies);
9232 else
9233 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9235 unsigned int group_el = 0;
9236 unsigned HOST_WIDE_INT
9237 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9238 for (j = 0; j < ncopies; j++)
9240 if (nloads > 1)
9241 vec_alloc (v, nloads);
9242 gimple *new_stmt = NULL;
9243 for (i = 0; i < nloads; i++)
9245 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9246 group_el * elsz + cst_offset);
9247 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9248 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9249 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9250 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9251 if (nloads > 1)
9252 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9253 gimple_assign_lhs (new_stmt));
9255 group_el += lnel;
9256 if (! slp
9257 || group_el == group_size)
9259 tree newoff = copy_ssa_name (running_off);
9260 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9261 running_off, stride_step);
9262 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9264 running_off = newoff;
9265 group_el = 0;
9268 if (nloads > 1)
9270 tree vec_inv = build_constructor (lvectype, v);
9271 new_temp = vect_init_vector (vinfo, stmt_info,
9272 vec_inv, lvectype, gsi);
9273 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9274 if (lvectype != vectype)
9276 new_stmt = gimple_build_assign (make_ssa_name (vectype),
9277 VIEW_CONVERT_EXPR,
9278 build1 (VIEW_CONVERT_EXPR,
9279 vectype, new_temp));
9280 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9284 if (slp)
9286 if (slp_perm)
9287 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9288 else
9289 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9291 else
9293 if (j == 0)
9294 *vec_stmt = new_stmt;
9295 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9298 if (slp_perm)
9300 unsigned n_perms;
9301 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9302 false, &n_perms);
9304 return true;
9307 if (memory_access_type == VMAT_GATHER_SCATTER
9308 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9309 grouped_load = false;
9311 if (grouped_load)
9313 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9314 group_size = DR_GROUP_SIZE (first_stmt_info);
9315 /* For SLP vectorization we directly vectorize a subchain
9316 without permutation. */
9317 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9318 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9319 /* For BB vectorization always use the first stmt to base
9320 the data ref pointer on. */
9321 if (bb_vinfo)
9322 first_stmt_info_for_drptr
9323 = vect_find_first_scalar_stmt_in_slp (slp_node);
9325 /* Check if the chain of loads is already vectorized. */
9326 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9327 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9328 ??? But we can only do so if there is exactly one
9329 as we have no way to get at the rest. Leave the CSE
9330 opportunity alone.
9331 ??? With the group load eventually participating
9332 in multiple different permutations (having multiple
9333 slp nodes which refer to the same group) the CSE
9334 is even wrong code. See PR56270. */
9335 && !slp)
9337 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9338 return true;
9340 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9341 group_gap_adj = 0;
9343 /* VEC_NUM is the number of vect stmts to be created for this group. */
9344 if (slp)
9346 grouped_load = false;
9347 /* If an SLP permutation is from N elements to N elements,
9348 and if one vector holds a whole number of N, we can load
9349 the inputs to the permutation in the same way as an
9350 unpermuted sequence. In other cases we need to load the
9351 whole group, not only the number of vector stmts the
9352 permutation result fits in. */
9353 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9354 if (slp_perm
9355 && (group_size != scalar_lanes
9356 || !multiple_p (nunits, group_size)))
9358 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9359 variable VF; see vect_transform_slp_perm_load. */
9360 unsigned int const_vf = vf.to_constant ();
9361 unsigned int const_nunits = nunits.to_constant ();
9362 vec_num = CEIL (group_size * const_vf, const_nunits);
9363 group_gap_adj = vf * group_size - nunits * vec_num;
9365 else
9367 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9368 group_gap_adj
9369 = group_size - scalar_lanes;
9372 else
9373 vec_num = group_size;
9375 ref_type = get_group_alias_ptr_type (first_stmt_info);
9377 else
9379 first_stmt_info = stmt_info;
9380 first_dr_info = dr_info;
9381 group_size = vec_num = 1;
9382 group_gap_adj = 0;
9383 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9384 if (slp)
9385 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9388 gcc_assert (alignment_support_scheme);
9389 vec_loop_masks *loop_masks
9390 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9391 ? &LOOP_VINFO_MASKS (loop_vinfo)
9392 : NULL);
9393 vec_loop_lens *loop_lens
9394 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9395 ? &LOOP_VINFO_LENS (loop_vinfo)
9396 : NULL);
9398 /* Shouldn't go with length-based approach if fully masked. */
9399 gcc_assert (!loop_lens || !loop_masks);
9401 /* Targets with store-lane instructions must not require explicit
9402 realignment. vect_supportable_dr_alignment always returns either
9403 dr_aligned or dr_unaligned_supported for masked operations. */
9404 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9405 && !mask
9406 && !loop_masks)
9407 || alignment_support_scheme == dr_aligned
9408 || alignment_support_scheme == dr_unaligned_supported);
9410 /* In case the vectorization factor (VF) is bigger than the number
9411 of elements that we can fit in a vectype (nunits), we have to generate
9412 more than one vector stmt - i.e - we need to "unroll" the
9413 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9414 from one copy of the vector stmt to the next, in the field
9415 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9416 stages to find the correct vector defs to be used when vectorizing
9417 stmts that use the defs of the current stmt. The example below
9418 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9419 need to create 4 vectorized stmts):
9421 before vectorization:
9422 RELATED_STMT VEC_STMT
9423 S1: x = memref - -
9424 S2: z = x + 1 - -
9426 step 1: vectorize stmt S1:
9427 We first create the vector stmt VS1_0, and, as usual, record a
9428 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9429 Next, we create the vector stmt VS1_1, and record a pointer to
9430 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9431 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9432 stmts and pointers:
9433 RELATED_STMT VEC_STMT
9434 VS1_0: vx0 = memref0 VS1_1 -
9435 VS1_1: vx1 = memref1 VS1_2 -
9436 VS1_2: vx2 = memref2 VS1_3 -
9437 VS1_3: vx3 = memref3 - -
9438 S1: x = load - VS1_0
9439 S2: z = x + 1 - -
9442 /* In case of interleaving (non-unit grouped access):
9444 S1: x2 = &base + 2
9445 S2: x0 = &base
9446 S3: x1 = &base + 1
9447 S4: x3 = &base + 3
9449 Vectorized loads are created in the order of memory accesses
9450 starting from the access of the first stmt of the chain:
9452 VS1: vx0 = &base
9453 VS2: vx1 = &base + vec_size*1
9454 VS3: vx3 = &base + vec_size*2
9455 VS4: vx4 = &base + vec_size*3
9457 Then permutation statements are generated:
9459 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9460 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9463 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9464 (the order of the data-refs in the output of vect_permute_load_chain
9465 corresponds to the order of scalar stmts in the interleaving chain - see
9466 the documentation of vect_permute_load_chain()).
9467 The generation of permutation stmts and recording them in
9468 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9470 In case of both multiple types and interleaving, the vector loads and
9471 permutation stmts above are created for every copy. The result vector
9472 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9473 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9475 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9476 on a target that supports unaligned accesses (dr_unaligned_supported)
9477 we generate the following code:
9478 p = initial_addr;
9479 indx = 0;
9480 loop {
9481 p = p + indx * vectype_size;
9482 vec_dest = *(p);
9483 indx = indx + 1;
9486 Otherwise, the data reference is potentially unaligned on a target that
9487 does not support unaligned accesses (dr_explicit_realign_optimized) -
9488 then generate the following code, in which the data in each iteration is
9489 obtained by two vector loads, one from the previous iteration, and one
9490 from the current iteration:
9491 p1 = initial_addr;
9492 msq_init = *(floor(p1))
9493 p2 = initial_addr + VS - 1;
9494 realignment_token = call target_builtin;
9495 indx = 0;
9496 loop {
9497 p2 = p2 + indx * vectype_size
9498 lsq = *(floor(p2))
9499 vec_dest = realign_load (msq, lsq, realignment_token)
9500 indx = indx + 1;
9501 msq = lsq;
9502 } */
9504 /* If the misalignment remains the same throughout the execution of the
9505 loop, we can create the init_addr and permutation mask at the loop
9506 preheader. Otherwise, it needs to be created inside the loop.
9507 This can only occur when vectorizing memory accesses in the inner-loop
9508 nested within an outer-loop that is being vectorized. */
9510 if (nested_in_vect_loop
9511 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9512 GET_MODE_SIZE (TYPE_MODE (vectype))))
9514 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9515 compute_in_loop = true;
9518 bool diff_first_stmt_info
9519 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9521 tree offset = NULL_TREE;
9522 if ((alignment_support_scheme == dr_explicit_realign_optimized
9523 || alignment_support_scheme == dr_explicit_realign)
9524 && !compute_in_loop)
9526 /* If we have different first_stmt_info, we can't set up realignment
9527 here, since we can't guarantee first_stmt_info DR has been
9528 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9529 distance from first_stmt_info DR instead as below. */
9530 if (!diff_first_stmt_info)
9531 msq = vect_setup_realignment (vinfo,
9532 first_stmt_info, gsi, &realignment_token,
9533 alignment_support_scheme, NULL_TREE,
9534 &at_loop);
9535 if (alignment_support_scheme == dr_explicit_realign_optimized)
9537 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9538 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9539 size_one_node);
9540 gcc_assert (!first_stmt_info_for_drptr);
9543 else
9544 at_loop = loop;
9546 if (!known_eq (poffset, 0))
9547 offset = (offset
9548 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
9549 : size_int (poffset));
9551 tree bump;
9552 tree vec_offset = NULL_TREE;
9553 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9555 aggr_type = NULL_TREE;
9556 bump = NULL_TREE;
9558 else if (memory_access_type == VMAT_GATHER_SCATTER)
9560 aggr_type = elem_type;
9561 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9562 &bump, &vec_offset);
9564 else
9566 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9567 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9568 else
9569 aggr_type = vectype;
9570 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9571 memory_access_type);
9574 auto_vec<tree> vec_offsets;
9575 auto_vec<tree> vec_masks;
9576 if (mask)
9578 if (slp_node)
9579 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
9580 &vec_masks);
9581 else
9582 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
9583 &vec_masks, mask_vectype);
9585 tree vec_mask = NULL_TREE;
9586 poly_uint64 group_elt = 0;
9587 for (j = 0; j < ncopies; j++)
9589 /* 1. Create the vector or array pointer update chain. */
9590 if (j == 0)
9592 bool simd_lane_access_p
9593 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9594 if (simd_lane_access_p
9595 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9596 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9597 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9598 && integer_zerop (DR_INIT (first_dr_info->dr))
9599 && alias_sets_conflict_p (get_alias_set (aggr_type),
9600 get_alias_set (TREE_TYPE (ref_type)))
9601 && (alignment_support_scheme == dr_aligned
9602 || alignment_support_scheme == dr_unaligned_supported))
9604 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9605 dataref_offset = build_int_cst (ref_type, 0);
9607 else if (diff_first_stmt_info)
9609 dataref_ptr
9610 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9611 aggr_type, at_loop, offset, &dummy,
9612 gsi, &ptr_incr, simd_lane_access_p,
9613 bump);
9614 /* Adjust the pointer by the difference to first_stmt. */
9615 data_reference_p ptrdr
9616 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9617 tree diff
9618 = fold_convert (sizetype,
9619 size_binop (MINUS_EXPR,
9620 DR_INIT (first_dr_info->dr),
9621 DR_INIT (ptrdr)));
9622 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9623 stmt_info, diff);
9624 if (alignment_support_scheme == dr_explicit_realign)
9626 msq = vect_setup_realignment (vinfo,
9627 first_stmt_info_for_drptr, gsi,
9628 &realignment_token,
9629 alignment_support_scheme,
9630 dataref_ptr, &at_loop);
9631 gcc_assert (!compute_in_loop);
9634 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9636 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9637 slp_node, &gs_info, &dataref_ptr,
9638 &vec_offsets);
9640 else
9641 dataref_ptr
9642 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9643 at_loop,
9644 offset, &dummy, gsi, &ptr_incr,
9645 simd_lane_access_p, bump);
9646 if (mask)
9647 vec_mask = vec_masks[0];
9649 else
9651 if (dataref_offset)
9652 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9653 bump);
9654 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9655 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9656 stmt_info, bump);
9657 if (mask)
9658 vec_mask = vec_masks[j];
9661 if (grouped_load || slp_perm)
9662 dr_chain.create (vec_num);
9664 gimple *new_stmt = NULL;
9665 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9667 tree vec_array;
9669 vec_array = create_vector_array (vectype, vec_num);
9671 tree final_mask = NULL_TREE;
9672 if (loop_masks)
9673 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9674 vectype, j);
9675 if (vec_mask)
9676 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9677 final_mask, vec_mask, gsi);
9679 gcall *call;
9680 if (final_mask)
9682 /* Emit:
9683 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9684 VEC_MASK). */
9685 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9686 tree alias_ptr = build_int_cst (ref_type, align);
9687 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9688 dataref_ptr, alias_ptr,
9689 final_mask);
9691 else
9693 /* Emit:
9694 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9695 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9696 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9698 gimple_call_set_lhs (call, vec_array);
9699 gimple_call_set_nothrow (call, true);
9700 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9701 new_stmt = call;
9703 /* Extract each vector into an SSA_NAME. */
9704 for (i = 0; i < vec_num; i++)
9706 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9707 vec_array, i);
9708 dr_chain.quick_push (new_temp);
9711 /* Record the mapping between SSA_NAMEs and statements. */
9712 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9714 /* Record that VEC_ARRAY is now dead. */
9715 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9717 else
9719 for (i = 0; i < vec_num; i++)
9721 tree final_mask = NULL_TREE;
9722 if (loop_masks
9723 && memory_access_type != VMAT_INVARIANT)
9724 final_mask = vect_get_loop_mask (gsi, loop_masks,
9725 vec_num * ncopies,
9726 vectype, vec_num * j + i);
9727 if (vec_mask)
9728 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9729 final_mask, vec_mask, gsi);
9731 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9732 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9733 gsi, stmt_info, bump);
9735 /* 2. Create the vector-load in the loop. */
9736 switch (alignment_support_scheme)
9738 case dr_aligned:
9739 case dr_unaligned_supported:
9741 unsigned int misalign;
9742 unsigned HOST_WIDE_INT align;
9744 if (memory_access_type == VMAT_GATHER_SCATTER
9745 && gs_info.ifn != IFN_LAST)
9747 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9748 vec_offset = vec_offsets[vec_num * j + i];
9749 tree zero = build_zero_cst (vectype);
9750 tree scale = size_int (gs_info.scale);
9751 gcall *call;
9752 if (final_mask)
9753 call = gimple_build_call_internal
9754 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9755 vec_offset, scale, zero, final_mask);
9756 else
9757 call = gimple_build_call_internal
9758 (IFN_GATHER_LOAD, 4, dataref_ptr,
9759 vec_offset, scale, zero);
9760 gimple_call_set_nothrow (call, true);
9761 new_stmt = call;
9762 data_ref = NULL_TREE;
9763 break;
9765 else if (memory_access_type == VMAT_GATHER_SCATTER)
9767 /* Emulated gather-scatter. */
9768 gcc_assert (!final_mask);
9769 unsigned HOST_WIDE_INT const_nunits
9770 = nunits.to_constant ();
9771 unsigned HOST_WIDE_INT const_offset_nunits
9772 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9773 .to_constant ();
9774 vec<constructor_elt, va_gc> *ctor_elts;
9775 vec_alloc (ctor_elts, const_nunits);
9776 gimple_seq stmts = NULL;
9777 /* We support offset vectors with more elements
9778 than the data vector for now. */
9779 unsigned HOST_WIDE_INT factor
9780 = const_offset_nunits / const_nunits;
9781 vec_offset = vec_offsets[j / factor];
9782 unsigned elt_offset = (j % factor) * const_nunits;
9783 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9784 tree scale = size_int (gs_info.scale);
9785 align
9786 = get_object_alignment (DR_REF (first_dr_info->dr));
9787 tree ltype = build_aligned_type (TREE_TYPE (vectype),
9788 align);
9789 for (unsigned k = 0; k < const_nunits; ++k)
9791 tree boff = size_binop (MULT_EXPR,
9792 TYPE_SIZE (idx_type),
9793 bitsize_int
9794 (k + elt_offset));
9795 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9796 idx_type, vec_offset,
9797 TYPE_SIZE (idx_type),
9798 boff);
9799 idx = gimple_convert (&stmts, sizetype, idx);
9800 idx = gimple_build (&stmts, MULT_EXPR,
9801 sizetype, idx, scale);
9802 tree ptr = gimple_build (&stmts, PLUS_EXPR,
9803 TREE_TYPE (dataref_ptr),
9804 dataref_ptr, idx);
9805 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9806 tree elt = make_ssa_name (TREE_TYPE (vectype));
9807 tree ref = build2 (MEM_REF, ltype, ptr,
9808 build_int_cst (ref_type, 0));
9809 new_stmt = gimple_build_assign (elt, ref);
9810 gimple_set_vuse (new_stmt,
9811 gimple_vuse (gsi_stmt (*gsi)));
9812 gimple_seq_add_stmt (&stmts, new_stmt);
9813 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
9815 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9816 new_stmt = gimple_build_assign (NULL_TREE,
9817 build_constructor
9818 (vectype, ctor_elts));
9819 data_ref = NULL_TREE;
9820 break;
9823 align =
9824 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9825 if (alignment_support_scheme == dr_aligned)
9826 misalign = 0;
9827 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9829 align = dr_alignment
9830 (vect_dr_behavior (vinfo, first_dr_info));
9831 misalign = 0;
9833 else
9834 misalign = misalignment;
9835 if (dataref_offset == NULL_TREE
9836 && TREE_CODE (dataref_ptr) == SSA_NAME)
9837 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9838 align, misalign);
9839 align = least_bit_hwi (misalign | align);
9841 if (final_mask)
9843 tree ptr = build_int_cst (ref_type,
9844 align * BITS_PER_UNIT);
9845 gcall *call
9846 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9847 dataref_ptr, ptr,
9848 final_mask);
9849 gimple_call_set_nothrow (call, true);
9850 new_stmt = call;
9851 data_ref = NULL_TREE;
9853 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9855 tree final_len
9856 = vect_get_loop_len (loop_vinfo, loop_lens,
9857 vec_num * ncopies,
9858 vec_num * j + i);
9859 tree ptr = build_int_cst (ref_type,
9860 align * BITS_PER_UNIT);
9862 machine_mode vmode = TYPE_MODE (vectype);
9863 opt_machine_mode new_ovmode
9864 = get_len_load_store_mode (vmode, true);
9865 machine_mode new_vmode = new_ovmode.require ();
9866 tree qi_type = unsigned_intQI_type_node;
9868 signed char biasval =
9869 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9871 tree bias = build_int_cst (intQI_type_node, biasval);
9873 gcall *call
9874 = gimple_build_call_internal (IFN_LEN_LOAD, 4,
9875 dataref_ptr, ptr,
9876 final_len, bias);
9877 gimple_call_set_nothrow (call, true);
9878 new_stmt = call;
9879 data_ref = NULL_TREE;
9881 /* Need conversion if it's wrapped with VnQI. */
9882 if (vmode != new_vmode)
9884 tree new_vtype
9885 = build_vector_type_for_mode (qi_type, new_vmode);
9886 tree var = vect_get_new_ssa_name (new_vtype,
9887 vect_simple_var);
9888 gimple_set_lhs (call, var);
9889 vect_finish_stmt_generation (vinfo, stmt_info, call,
9890 gsi);
9891 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9892 new_stmt
9893 = gimple_build_assign (vec_dest,
9894 VIEW_CONVERT_EXPR, op);
9897 else
9899 tree ltype = vectype;
9900 tree new_vtype = NULL_TREE;
9901 unsigned HOST_WIDE_INT gap
9902 = DR_GROUP_GAP (first_stmt_info);
9903 unsigned int vect_align
9904 = vect_known_alignment_in_bytes (first_dr_info,
9905 vectype);
9906 unsigned int scalar_dr_size
9907 = vect_get_scalar_dr_size (first_dr_info);
9908 /* If there's no peeling for gaps but we have a gap
9909 with slp loads then load the lower half of the
9910 vector only. See get_group_load_store_type for
9911 when we apply this optimization. */
9912 if (slp
9913 && loop_vinfo
9914 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9915 && gap != 0
9916 && known_eq (nunits, (group_size - gap) * 2)
9917 && known_eq (nunits, group_size)
9918 && gap >= (vect_align / scalar_dr_size))
9920 tree half_vtype;
9921 new_vtype
9922 = vector_vector_composition_type (vectype, 2,
9923 &half_vtype);
9924 if (new_vtype != NULL_TREE)
9925 ltype = half_vtype;
9927 tree offset
9928 = (dataref_offset ? dataref_offset
9929 : build_int_cst (ref_type, 0));
9930 if (ltype != vectype
9931 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9933 unsigned HOST_WIDE_INT gap_offset
9934 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9935 tree gapcst = build_int_cst (ref_type, gap_offset);
9936 offset = size_binop (PLUS_EXPR, offset, gapcst);
9938 data_ref
9939 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9940 if (alignment_support_scheme == dr_aligned)
9942 else
9943 TREE_TYPE (data_ref)
9944 = build_aligned_type (TREE_TYPE (data_ref),
9945 align * BITS_PER_UNIT);
9946 if (ltype != vectype)
9948 vect_copy_ref_info (data_ref,
9949 DR_REF (first_dr_info->dr));
9950 tree tem = make_ssa_name (ltype);
9951 new_stmt = gimple_build_assign (tem, data_ref);
9952 vect_finish_stmt_generation (vinfo, stmt_info,
9953 new_stmt, gsi);
9954 data_ref = NULL;
9955 vec<constructor_elt, va_gc> *v;
9956 vec_alloc (v, 2);
9957 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9959 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9960 build_zero_cst (ltype));
9961 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9963 else
9965 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9966 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9967 build_zero_cst (ltype));
9969 gcc_assert (new_vtype != NULL_TREE);
9970 if (new_vtype == vectype)
9971 new_stmt = gimple_build_assign (
9972 vec_dest, build_constructor (vectype, v));
9973 else
9975 tree new_vname = make_ssa_name (new_vtype);
9976 new_stmt = gimple_build_assign (
9977 new_vname, build_constructor (new_vtype, v));
9978 vect_finish_stmt_generation (vinfo, stmt_info,
9979 new_stmt, gsi);
9980 new_stmt = gimple_build_assign (
9981 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9982 new_vname));
9986 break;
9988 case dr_explicit_realign:
9990 tree ptr, bump;
9992 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9994 if (compute_in_loop)
9995 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9996 &realignment_token,
9997 dr_explicit_realign,
9998 dataref_ptr, NULL);
10000 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10001 ptr = copy_ssa_name (dataref_ptr);
10002 else
10003 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
10004 // For explicit realign the target alignment should be
10005 // known at compile time.
10006 unsigned HOST_WIDE_INT align =
10007 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10008 new_stmt = gimple_build_assign
10009 (ptr, BIT_AND_EXPR, dataref_ptr,
10010 build_int_cst
10011 (TREE_TYPE (dataref_ptr),
10012 -(HOST_WIDE_INT) align));
10013 vect_finish_stmt_generation (vinfo, stmt_info,
10014 new_stmt, gsi);
10015 data_ref
10016 = build2 (MEM_REF, vectype, ptr,
10017 build_int_cst (ref_type, 0));
10018 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10019 vec_dest = vect_create_destination_var (scalar_dest,
10020 vectype);
10021 new_stmt = gimple_build_assign (vec_dest, data_ref);
10022 new_temp = make_ssa_name (vec_dest, new_stmt);
10023 gimple_assign_set_lhs (new_stmt, new_temp);
10024 gimple_move_vops (new_stmt, stmt_info->stmt);
10025 vect_finish_stmt_generation (vinfo, stmt_info,
10026 new_stmt, gsi);
10027 msq = new_temp;
10029 bump = size_binop (MULT_EXPR, vs,
10030 TYPE_SIZE_UNIT (elem_type));
10031 bump = size_binop (MINUS_EXPR, bump, size_one_node);
10032 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
10033 stmt_info, bump);
10034 new_stmt = gimple_build_assign
10035 (NULL_TREE, BIT_AND_EXPR, ptr,
10036 build_int_cst
10037 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
10038 if (TREE_CODE (ptr) == SSA_NAME)
10039 ptr = copy_ssa_name (ptr, new_stmt);
10040 else
10041 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
10042 gimple_assign_set_lhs (new_stmt, ptr);
10043 vect_finish_stmt_generation (vinfo, stmt_info,
10044 new_stmt, gsi);
10045 data_ref
10046 = build2 (MEM_REF, vectype, ptr,
10047 build_int_cst (ref_type, 0));
10048 break;
10050 case dr_explicit_realign_optimized:
10052 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10053 new_temp = copy_ssa_name (dataref_ptr);
10054 else
10055 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
10056 // We should only be doing this if we know the target
10057 // alignment at compile time.
10058 unsigned HOST_WIDE_INT align =
10059 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10060 new_stmt = gimple_build_assign
10061 (new_temp, BIT_AND_EXPR, dataref_ptr,
10062 build_int_cst (TREE_TYPE (dataref_ptr),
10063 -(HOST_WIDE_INT) align));
10064 vect_finish_stmt_generation (vinfo, stmt_info,
10065 new_stmt, gsi);
10066 data_ref
10067 = build2 (MEM_REF, vectype, new_temp,
10068 build_int_cst (ref_type, 0));
10069 break;
10071 default:
10072 gcc_unreachable ();
10074 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10075 /* DATA_REF is null if we've already built the statement. */
10076 if (data_ref)
10078 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10079 new_stmt = gimple_build_assign (vec_dest, data_ref);
10081 new_temp = make_ssa_name (vec_dest, new_stmt);
10082 gimple_set_lhs (new_stmt, new_temp);
10083 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10085 /* 3. Handle explicit realignment if necessary/supported.
10086 Create in loop:
10087 vec_dest = realign_load (msq, lsq, realignment_token) */
10088 if (alignment_support_scheme == dr_explicit_realign_optimized
10089 || alignment_support_scheme == dr_explicit_realign)
10091 lsq = gimple_assign_lhs (new_stmt);
10092 if (!realignment_token)
10093 realignment_token = dataref_ptr;
10094 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10095 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
10096 msq, lsq, realignment_token);
10097 new_temp = make_ssa_name (vec_dest, new_stmt);
10098 gimple_assign_set_lhs (new_stmt, new_temp);
10099 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10101 if (alignment_support_scheme == dr_explicit_realign_optimized)
10103 gcc_assert (phi);
10104 if (i == vec_num - 1 && j == ncopies - 1)
10105 add_phi_arg (phi, lsq,
10106 loop_latch_edge (containing_loop),
10107 UNKNOWN_LOCATION);
10108 msq = lsq;
10112 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10114 tree perm_mask = perm_mask_for_reverse (vectype);
10115 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
10116 perm_mask, stmt_info, gsi);
10117 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10120 /* Collect vector loads and later create their permutation in
10121 vect_transform_grouped_load (). */
10122 if (grouped_load || slp_perm)
10123 dr_chain.quick_push (new_temp);
10125 /* Store vector loads in the corresponding SLP_NODE. */
10126 if (slp && !slp_perm)
10127 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10129 /* With SLP permutation we load the gaps as well, without
10130 we need to skip the gaps after we manage to fully load
10131 all elements. group_gap_adj is DR_GROUP_SIZE here. */
10132 group_elt += nunits;
10133 if (maybe_ne (group_gap_adj, 0U)
10134 && !slp_perm
10135 && known_eq (group_elt, group_size - group_gap_adj))
10137 poly_wide_int bump_val
10138 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10139 * group_gap_adj);
10140 if (tree_int_cst_sgn
10141 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10142 bump_val = -bump_val;
10143 tree bump = wide_int_to_tree (sizetype, bump_val);
10144 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10145 gsi, stmt_info, bump);
10146 group_elt = 0;
10149 /* Bump the vector pointer to account for a gap or for excess
10150 elements loaded for a permuted SLP load. */
10151 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
10153 poly_wide_int bump_val
10154 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10155 * group_gap_adj);
10156 if (tree_int_cst_sgn
10157 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10158 bump_val = -bump_val;
10159 tree bump = wide_int_to_tree (sizetype, bump_val);
10160 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10161 stmt_info, bump);
10165 if (slp && !slp_perm)
10166 continue;
10168 if (slp_perm)
10170 unsigned n_perms;
10171 /* For SLP we know we've seen all possible uses of dr_chain so
10172 direct vect_transform_slp_perm_load to DCE the unused parts.
10173 ??? This is a hack to prevent compile-time issues as seen
10174 in PR101120 and friends. */
10175 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
10176 gsi, vf, false, &n_perms,
10177 nullptr, true);
10178 gcc_assert (ok);
10180 else
10182 if (grouped_load)
10184 if (memory_access_type != VMAT_LOAD_STORE_LANES)
10185 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
10186 group_size, gsi);
10187 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10189 else
10191 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10194 dr_chain.release ();
10196 if (!slp)
10197 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10199 return true;
10202 /* Function vect_is_simple_cond.
10204 Input:
10205 LOOP - the loop that is being vectorized.
10206 COND - Condition that is checked for simple use.
10208 Output:
10209 *COMP_VECTYPE - the vector type for the comparison.
10210 *DTS - The def types for the arguments of the comparison
10212 Returns whether a COND can be vectorized. Checks whether
10213 condition operands are supportable using vec_is_simple_use. */
10215 static bool
10216 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
10217 slp_tree slp_node, tree *comp_vectype,
10218 enum vect_def_type *dts, tree vectype)
10220 tree lhs, rhs;
10221 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10222 slp_tree slp_op;
10224 /* Mask case. */
10225 if (TREE_CODE (cond) == SSA_NAME
10226 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
10228 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
10229 &slp_op, &dts[0], comp_vectype)
10230 || !*comp_vectype
10231 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
10232 return false;
10233 return true;
10236 if (!COMPARISON_CLASS_P (cond))
10237 return false;
10239 lhs = TREE_OPERAND (cond, 0);
10240 rhs = TREE_OPERAND (cond, 1);
10242 if (TREE_CODE (lhs) == SSA_NAME)
10244 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10245 &lhs, &slp_op, &dts[0], &vectype1))
10246 return false;
10248 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10249 || TREE_CODE (lhs) == FIXED_CST)
10250 dts[0] = vect_constant_def;
10251 else
10252 return false;
10254 if (TREE_CODE (rhs) == SSA_NAME)
10256 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10257 &rhs, &slp_op, &dts[1], &vectype2))
10258 return false;
10260 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10261 || TREE_CODE (rhs) == FIXED_CST)
10262 dts[1] = vect_constant_def;
10263 else
10264 return false;
10266 if (vectype1 && vectype2
10267 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10268 TYPE_VECTOR_SUBPARTS (vectype2)))
10269 return false;
10271 *comp_vectype = vectype1 ? vectype1 : vectype2;
10272 /* Invariant comparison. */
10273 if (! *comp_vectype)
10275 tree scalar_type = TREE_TYPE (lhs);
10276 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10277 *comp_vectype = truth_type_for (vectype);
10278 else
10280 /* If we can widen the comparison to match vectype do so. */
10281 if (INTEGRAL_TYPE_P (scalar_type)
10282 && !slp_node
10283 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10284 TYPE_SIZE (TREE_TYPE (vectype))))
10285 scalar_type = build_nonstandard_integer_type
10286 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10287 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10288 slp_node);
10292 return true;
10295 /* vectorizable_condition.
10297 Check if STMT_INFO is conditional modify expression that can be vectorized.
10298 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10299 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10300 at GSI.
10302 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10304 Return true if STMT_INFO is vectorizable in this way. */
10306 static bool
10307 vectorizable_condition (vec_info *vinfo,
10308 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10309 gimple **vec_stmt,
10310 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10312 tree scalar_dest = NULL_TREE;
10313 tree vec_dest = NULL_TREE;
10314 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10315 tree then_clause, else_clause;
10316 tree comp_vectype = NULL_TREE;
10317 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10318 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10319 tree vec_compare;
10320 tree new_temp;
10321 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10322 enum vect_def_type dts[4]
10323 = {vect_unknown_def_type, vect_unknown_def_type,
10324 vect_unknown_def_type, vect_unknown_def_type};
10325 int ndts = 4;
10326 int ncopies;
10327 int vec_num;
10328 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10329 int i;
10330 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10331 vec<tree> vec_oprnds0 = vNULL;
10332 vec<tree> vec_oprnds1 = vNULL;
10333 vec<tree> vec_oprnds2 = vNULL;
10334 vec<tree> vec_oprnds3 = vNULL;
10335 tree vec_cmp_type;
10336 bool masked = false;
10338 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10339 return false;
10341 /* Is vectorizable conditional operation? */
10342 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10343 if (!stmt)
10344 return false;
10346 code = gimple_assign_rhs_code (stmt);
10347 if (code != COND_EXPR)
10348 return false;
10350 stmt_vec_info reduc_info = NULL;
10351 int reduc_index = -1;
10352 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10353 bool for_reduction
10354 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10355 if (for_reduction)
10357 if (STMT_SLP_TYPE (stmt_info))
10358 return false;
10359 reduc_info = info_for_reduction (vinfo, stmt_info);
10360 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10361 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10362 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10363 || reduc_index != -1);
10365 else
10367 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10368 return false;
10371 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10372 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10374 if (slp_node)
10376 ncopies = 1;
10377 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10379 else
10381 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10382 vec_num = 1;
10385 gcc_assert (ncopies >= 1);
10386 if (for_reduction && ncopies > 1)
10387 return false; /* FORNOW */
10389 cond_expr = gimple_assign_rhs1 (stmt);
10391 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10392 &comp_vectype, &dts[0], vectype)
10393 || !comp_vectype)
10394 return false;
10396 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10397 slp_tree then_slp_node, else_slp_node;
10398 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10399 &then_clause, &then_slp_node, &dts[2], &vectype1))
10400 return false;
10401 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10402 &else_clause, &else_slp_node, &dts[3], &vectype2))
10403 return false;
10405 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10406 return false;
10408 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10409 return false;
10411 masked = !COMPARISON_CLASS_P (cond_expr);
10412 vec_cmp_type = truth_type_for (comp_vectype);
10414 if (vec_cmp_type == NULL_TREE)
10415 return false;
10417 cond_code = TREE_CODE (cond_expr);
10418 if (!masked)
10420 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10421 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10424 /* For conditional reductions, the "then" value needs to be the candidate
10425 value calculated by this iteration while the "else" value needs to be
10426 the result carried over from previous iterations. If the COND_EXPR
10427 is the other way around, we need to swap it. */
10428 bool must_invert_cmp_result = false;
10429 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10431 if (masked)
10432 must_invert_cmp_result = true;
10433 else
10435 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10436 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10437 if (new_code == ERROR_MARK)
10438 must_invert_cmp_result = true;
10439 else
10441 cond_code = new_code;
10442 /* Make sure we don't accidentally use the old condition. */
10443 cond_expr = NULL_TREE;
10446 std::swap (then_clause, else_clause);
10449 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10451 /* Boolean values may have another representation in vectors
10452 and therefore we prefer bit operations over comparison for
10453 them (which also works for scalar masks). We store opcodes
10454 to use in bitop1 and bitop2. Statement is vectorized as
10455 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10456 depending on bitop1 and bitop2 arity. */
10457 switch (cond_code)
10459 case GT_EXPR:
10460 bitop1 = BIT_NOT_EXPR;
10461 bitop2 = BIT_AND_EXPR;
10462 break;
10463 case GE_EXPR:
10464 bitop1 = BIT_NOT_EXPR;
10465 bitop2 = BIT_IOR_EXPR;
10466 break;
10467 case LT_EXPR:
10468 bitop1 = BIT_NOT_EXPR;
10469 bitop2 = BIT_AND_EXPR;
10470 std::swap (cond_expr0, cond_expr1);
10471 break;
10472 case LE_EXPR:
10473 bitop1 = BIT_NOT_EXPR;
10474 bitop2 = BIT_IOR_EXPR;
10475 std::swap (cond_expr0, cond_expr1);
10476 break;
10477 case NE_EXPR:
10478 bitop1 = BIT_XOR_EXPR;
10479 break;
10480 case EQ_EXPR:
10481 bitop1 = BIT_XOR_EXPR;
10482 bitop2 = BIT_NOT_EXPR;
10483 break;
10484 default:
10485 return false;
10487 cond_code = SSA_NAME;
10490 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10491 && reduction_type == EXTRACT_LAST_REDUCTION
10492 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10494 if (dump_enabled_p ())
10495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10496 "reduction comparison operation not supported.\n");
10497 return false;
10500 if (!vec_stmt)
10502 if (bitop1 != NOP_EXPR)
10504 machine_mode mode = TYPE_MODE (comp_vectype);
10505 optab optab;
10507 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10508 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10509 return false;
10511 if (bitop2 != NOP_EXPR)
10513 optab = optab_for_tree_code (bitop2, comp_vectype,
10514 optab_default);
10515 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10516 return false;
10520 vect_cost_for_stmt kind = vector_stmt;
10521 if (reduction_type == EXTRACT_LAST_REDUCTION)
10522 /* Count one reduction-like operation per vector. */
10523 kind = vec_to_scalar;
10524 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10525 return false;
10527 if (slp_node
10528 && (!vect_maybe_update_slp_op_vectype
10529 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10530 || (op_adjust == 1
10531 && !vect_maybe_update_slp_op_vectype
10532 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10533 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10534 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10536 if (dump_enabled_p ())
10537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10538 "incompatible vector types for invariants\n");
10539 return false;
10542 if (loop_vinfo && for_reduction
10543 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10545 if (reduction_type == EXTRACT_LAST_REDUCTION)
10546 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10547 ncopies * vec_num, vectype, NULL);
10548 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10549 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10551 if (dump_enabled_p ())
10552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10553 "conditional reduction prevents the use"
10554 " of partial vectors.\n");
10555 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10559 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10560 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10561 cost_vec, kind);
10562 return true;
10565 /* Transform. */
10567 /* Handle def. */
10568 scalar_dest = gimple_assign_lhs (stmt);
10569 if (reduction_type != EXTRACT_LAST_REDUCTION)
10570 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10572 bool swap_cond_operands = false;
10574 /* See whether another part of the vectorized code applies a loop
10575 mask to the condition, or to its inverse. */
10577 vec_loop_masks *masks = NULL;
10578 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10580 if (reduction_type == EXTRACT_LAST_REDUCTION)
10581 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10582 else
10584 scalar_cond_masked_key cond (cond_expr, ncopies);
10585 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10586 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10587 else
10589 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10590 tree_code orig_code = cond.code;
10591 cond.code = invert_tree_comparison (cond.code, honor_nans);
10592 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
10594 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10595 cond_code = cond.code;
10596 swap_cond_operands = true;
10598 else
10600 /* Try the inverse of the current mask. We check if the
10601 inverse mask is live and if so we generate a negate of
10602 the current mask such that we still honor NaNs. */
10603 cond.inverted_p = true;
10604 cond.code = orig_code;
10605 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10607 bitop1 = orig_code;
10608 bitop2 = BIT_NOT_EXPR;
10609 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10610 cond_code = cond.code;
10611 swap_cond_operands = true;
10618 /* Handle cond expr. */
10619 if (masked)
10620 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10621 cond_expr, &vec_oprnds0, comp_vectype,
10622 then_clause, &vec_oprnds2, vectype,
10623 reduction_type != EXTRACT_LAST_REDUCTION
10624 ? else_clause : NULL, &vec_oprnds3, vectype);
10625 else
10626 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10627 cond_expr0, &vec_oprnds0, comp_vectype,
10628 cond_expr1, &vec_oprnds1, comp_vectype,
10629 then_clause, &vec_oprnds2, vectype,
10630 reduction_type != EXTRACT_LAST_REDUCTION
10631 ? else_clause : NULL, &vec_oprnds3, vectype);
10633 /* Arguments are ready. Create the new vector stmt. */
10634 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10636 vec_then_clause = vec_oprnds2[i];
10637 if (reduction_type != EXTRACT_LAST_REDUCTION)
10638 vec_else_clause = vec_oprnds3[i];
10640 if (swap_cond_operands)
10641 std::swap (vec_then_clause, vec_else_clause);
10643 if (masked)
10644 vec_compare = vec_cond_lhs;
10645 else
10647 vec_cond_rhs = vec_oprnds1[i];
10648 if (bitop1 == NOP_EXPR)
10650 gimple_seq stmts = NULL;
10651 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10652 vec_cond_lhs, vec_cond_rhs);
10653 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10655 else
10657 new_temp = make_ssa_name (vec_cmp_type);
10658 gassign *new_stmt;
10659 if (bitop1 == BIT_NOT_EXPR)
10660 new_stmt = gimple_build_assign (new_temp, bitop1,
10661 vec_cond_rhs);
10662 else
10663 new_stmt
10664 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10665 vec_cond_rhs);
10666 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10667 if (bitop2 == NOP_EXPR)
10668 vec_compare = new_temp;
10669 else if (bitop2 == BIT_NOT_EXPR)
10671 /* Instead of doing ~x ? y : z do x ? z : y. */
10672 vec_compare = new_temp;
10673 std::swap (vec_then_clause, vec_else_clause);
10675 else
10677 vec_compare = make_ssa_name (vec_cmp_type);
10678 new_stmt
10679 = gimple_build_assign (vec_compare, bitop2,
10680 vec_cond_lhs, new_temp);
10681 vect_finish_stmt_generation (vinfo, stmt_info,
10682 new_stmt, gsi);
10687 /* If we decided to apply a loop mask to the result of the vector
10688 comparison, AND the comparison with the mask now. Later passes
10689 should then be able to reuse the AND results between mulitple
10690 vector statements.
10692 For example:
10693 for (int i = 0; i < 100; ++i)
10694 x[i] = y[i] ? z[i] : 10;
10696 results in following optimized GIMPLE:
10698 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10699 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10700 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10701 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10702 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10703 vect_iftmp.11_47, { 10, ... }>;
10705 instead of using a masked and unmasked forms of
10706 vec != { 0, ... } (masked in the MASK_LOAD,
10707 unmasked in the VEC_COND_EXPR). */
10709 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10710 in cases where that's necessary. */
10712 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10714 if (!is_gimple_val (vec_compare))
10716 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10717 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10718 vec_compare);
10719 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10720 vec_compare = vec_compare_name;
10723 if (must_invert_cmp_result)
10725 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10726 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10727 BIT_NOT_EXPR,
10728 vec_compare);
10729 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10730 vec_compare = vec_compare_name;
10733 if (masks)
10735 tree loop_mask
10736 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10737 vectype, i);
10738 tree tmp2 = make_ssa_name (vec_cmp_type);
10739 gassign *g
10740 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10741 loop_mask);
10742 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10743 vec_compare = tmp2;
10747 gimple *new_stmt;
10748 if (reduction_type == EXTRACT_LAST_REDUCTION)
10750 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10751 tree lhs = gimple_get_lhs (old_stmt);
10752 new_stmt = gimple_build_call_internal
10753 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10754 vec_then_clause);
10755 gimple_call_set_lhs (new_stmt, lhs);
10756 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10757 if (old_stmt == gsi_stmt (*gsi))
10758 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10759 else
10761 /* In this case we're moving the definition to later in the
10762 block. That doesn't matter because the only uses of the
10763 lhs are in phi statements. */
10764 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10765 gsi_remove (&old_gsi, true);
10766 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10769 else
10771 new_temp = make_ssa_name (vec_dest);
10772 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10773 vec_then_clause, vec_else_clause);
10774 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10776 if (slp_node)
10777 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10778 else
10779 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10782 if (!slp_node)
10783 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10785 vec_oprnds0.release ();
10786 vec_oprnds1.release ();
10787 vec_oprnds2.release ();
10788 vec_oprnds3.release ();
10790 return true;
10793 /* vectorizable_comparison.
10795 Check if STMT_INFO is comparison expression that can be vectorized.
10796 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10797 comparison, put it in VEC_STMT, and insert it at GSI.
10799 Return true if STMT_INFO is vectorizable in this way. */
10801 static bool
10802 vectorizable_comparison (vec_info *vinfo,
10803 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10804 gimple **vec_stmt,
10805 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10807 tree lhs, rhs1, rhs2;
10808 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10809 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10810 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10811 tree new_temp;
10812 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10813 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10814 int ndts = 2;
10815 poly_uint64 nunits;
10816 int ncopies;
10817 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10818 int i;
10819 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10820 vec<tree> vec_oprnds0 = vNULL;
10821 vec<tree> vec_oprnds1 = vNULL;
10822 tree mask_type;
10823 tree mask;
10825 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10826 return false;
10828 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10829 return false;
10831 mask_type = vectype;
10832 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10834 if (slp_node)
10835 ncopies = 1;
10836 else
10837 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10839 gcc_assert (ncopies >= 1);
10840 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10841 return false;
10843 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10844 if (!stmt)
10845 return false;
10847 code = gimple_assign_rhs_code (stmt);
10849 if (TREE_CODE_CLASS (code) != tcc_comparison)
10850 return false;
10852 slp_tree slp_rhs1, slp_rhs2;
10853 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10854 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10855 return false;
10857 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10858 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10859 return false;
10861 if (vectype1 && vectype2
10862 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10863 TYPE_VECTOR_SUBPARTS (vectype2)))
10864 return false;
10866 vectype = vectype1 ? vectype1 : vectype2;
10868 /* Invariant comparison. */
10869 if (!vectype)
10871 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10872 vectype = mask_type;
10873 else
10874 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10875 slp_node);
10876 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10877 return false;
10879 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10880 return false;
10882 /* Can't compare mask and non-mask types. */
10883 if (vectype1 && vectype2
10884 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10885 return false;
10887 /* Boolean values may have another representation in vectors
10888 and therefore we prefer bit operations over comparison for
10889 them (which also works for scalar masks). We store opcodes
10890 to use in bitop1 and bitop2. Statement is vectorized as
10891 BITOP2 (rhs1 BITOP1 rhs2) or
10892 rhs1 BITOP2 (BITOP1 rhs2)
10893 depending on bitop1 and bitop2 arity. */
10894 bool swap_p = false;
10895 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10897 if (code == GT_EXPR)
10899 bitop1 = BIT_NOT_EXPR;
10900 bitop2 = BIT_AND_EXPR;
10902 else if (code == GE_EXPR)
10904 bitop1 = BIT_NOT_EXPR;
10905 bitop2 = BIT_IOR_EXPR;
10907 else if (code == LT_EXPR)
10909 bitop1 = BIT_NOT_EXPR;
10910 bitop2 = BIT_AND_EXPR;
10911 swap_p = true;
10913 else if (code == LE_EXPR)
10915 bitop1 = BIT_NOT_EXPR;
10916 bitop2 = BIT_IOR_EXPR;
10917 swap_p = true;
10919 else
10921 bitop1 = BIT_XOR_EXPR;
10922 if (code == EQ_EXPR)
10923 bitop2 = BIT_NOT_EXPR;
10927 if (!vec_stmt)
10929 if (bitop1 == NOP_EXPR)
10931 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10932 return false;
10934 else
10936 machine_mode mode = TYPE_MODE (vectype);
10937 optab optab;
10939 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10940 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10941 return false;
10943 if (bitop2 != NOP_EXPR)
10945 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10946 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10947 return false;
10951 /* Put types on constant and invariant SLP children. */
10952 if (slp_node
10953 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10954 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10956 if (dump_enabled_p ())
10957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10958 "incompatible vector types for invariants\n");
10959 return false;
10962 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10963 vect_model_simple_cost (vinfo, stmt_info,
10964 ncopies * (1 + (bitop2 != NOP_EXPR)),
10965 dts, ndts, slp_node, cost_vec);
10966 return true;
10969 /* Transform. */
10971 /* Handle def. */
10972 lhs = gimple_assign_lhs (stmt);
10973 mask = vect_create_destination_var (lhs, mask_type);
10975 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10976 rhs1, &vec_oprnds0, vectype,
10977 rhs2, &vec_oprnds1, vectype);
10978 if (swap_p)
10979 std::swap (vec_oprnds0, vec_oprnds1);
10981 /* Arguments are ready. Create the new vector stmt. */
10982 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10984 gimple *new_stmt;
10985 vec_rhs2 = vec_oprnds1[i];
10987 new_temp = make_ssa_name (mask);
10988 if (bitop1 == NOP_EXPR)
10990 new_stmt = gimple_build_assign (new_temp, code,
10991 vec_rhs1, vec_rhs2);
10992 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10994 else
10996 if (bitop1 == BIT_NOT_EXPR)
10997 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10998 else
10999 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
11000 vec_rhs2);
11001 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11002 if (bitop2 != NOP_EXPR)
11004 tree res = make_ssa_name (mask);
11005 if (bitop2 == BIT_NOT_EXPR)
11006 new_stmt = gimple_build_assign (res, bitop2, new_temp);
11007 else
11008 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
11009 new_temp);
11010 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11013 if (slp_node)
11014 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
11015 else
11016 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11019 if (!slp_node)
11020 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11022 vec_oprnds0.release ();
11023 vec_oprnds1.release ();
11025 return true;
11028 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
11029 can handle all live statements in the node. Otherwise return true
11030 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
11031 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
11033 static bool
11034 can_vectorize_live_stmts (vec_info *vinfo,
11035 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11036 slp_tree slp_node, slp_instance slp_node_instance,
11037 bool vec_stmt_p,
11038 stmt_vector_for_cost *cost_vec)
11040 if (slp_node)
11042 stmt_vec_info slp_stmt_info;
11043 unsigned int i;
11044 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
11046 if (STMT_VINFO_LIVE_P (slp_stmt_info)
11047 && !vectorizable_live_operation (vinfo,
11048 slp_stmt_info, gsi, slp_node,
11049 slp_node_instance, i,
11050 vec_stmt_p, cost_vec))
11051 return false;
11054 else if (STMT_VINFO_LIVE_P (stmt_info)
11055 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
11056 slp_node, slp_node_instance, -1,
11057 vec_stmt_p, cost_vec))
11058 return false;
11060 return true;
11063 /* Make sure the statement is vectorizable. */
11065 opt_result
11066 vect_analyze_stmt (vec_info *vinfo,
11067 stmt_vec_info stmt_info, bool *need_to_vectorize,
11068 slp_tree node, slp_instance node_instance,
11069 stmt_vector_for_cost *cost_vec)
11071 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11072 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
11073 bool ok;
11074 gimple_seq pattern_def_seq;
11076 if (dump_enabled_p ())
11077 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
11078 stmt_info->stmt);
11080 if (gimple_has_volatile_ops (stmt_info->stmt))
11081 return opt_result::failure_at (stmt_info->stmt,
11082 "not vectorized:"
11083 " stmt has volatile operands: %G\n",
11084 stmt_info->stmt);
11086 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11087 && node == NULL
11088 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
11090 gimple_stmt_iterator si;
11092 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
11094 stmt_vec_info pattern_def_stmt_info
11095 = vinfo->lookup_stmt (gsi_stmt (si));
11096 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
11097 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
11099 /* Analyze def stmt of STMT if it's a pattern stmt. */
11100 if (dump_enabled_p ())
11101 dump_printf_loc (MSG_NOTE, vect_location,
11102 "==> examining pattern def statement: %G",
11103 pattern_def_stmt_info->stmt);
11105 opt_result res
11106 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
11107 need_to_vectorize, node, node_instance,
11108 cost_vec);
11109 if (!res)
11110 return res;
11115 /* Skip stmts that do not need to be vectorized. In loops this is expected
11116 to include:
11117 - the COND_EXPR which is the loop exit condition
11118 - any LABEL_EXPRs in the loop
11119 - computations that are used only for array indexing or loop control.
11120 In basic blocks we only analyze statements that are a part of some SLP
11121 instance, therefore, all the statements are relevant.
11123 Pattern statement needs to be analyzed instead of the original statement
11124 if the original statement is not relevant. Otherwise, we analyze both
11125 statements. In basic blocks we are called from some SLP instance
11126 traversal, don't analyze pattern stmts instead, the pattern stmts
11127 already will be part of SLP instance. */
11129 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
11130 if (!STMT_VINFO_RELEVANT_P (stmt_info)
11131 && !STMT_VINFO_LIVE_P (stmt_info))
11133 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11134 && pattern_stmt_info
11135 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11136 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11138 /* Analyze PATTERN_STMT instead of the original stmt. */
11139 stmt_info = pattern_stmt_info;
11140 if (dump_enabled_p ())
11141 dump_printf_loc (MSG_NOTE, vect_location,
11142 "==> examining pattern statement: %G",
11143 stmt_info->stmt);
11145 else
11147 if (dump_enabled_p ())
11148 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11150 return opt_result::success ();
11153 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11154 && node == NULL
11155 && pattern_stmt_info
11156 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11157 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11159 /* Analyze PATTERN_STMT too. */
11160 if (dump_enabled_p ())
11161 dump_printf_loc (MSG_NOTE, vect_location,
11162 "==> examining pattern statement: %G",
11163 pattern_stmt_info->stmt);
11165 opt_result res
11166 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11167 node_instance, cost_vec);
11168 if (!res)
11169 return res;
11172 switch (STMT_VINFO_DEF_TYPE (stmt_info))
11174 case vect_internal_def:
11175 break;
11177 case vect_reduction_def:
11178 case vect_nested_cycle:
11179 gcc_assert (!bb_vinfo
11180 && (relevance == vect_used_in_outer
11181 || relevance == vect_used_in_outer_by_reduction
11182 || relevance == vect_used_by_reduction
11183 || relevance == vect_unused_in_scope
11184 || relevance == vect_used_only_live));
11185 break;
11187 case vect_induction_def:
11188 case vect_first_order_recurrence:
11189 gcc_assert (!bb_vinfo);
11190 break;
11192 case vect_constant_def:
11193 case vect_external_def:
11194 case vect_unknown_def_type:
11195 default:
11196 gcc_unreachable ();
11199 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11200 if (node)
11201 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
11203 if (STMT_VINFO_RELEVANT_P (stmt_info))
11205 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11206 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11207 || (call && gimple_call_lhs (call) == NULL_TREE));
11208 *need_to_vectorize = true;
11211 if (PURE_SLP_STMT (stmt_info) && !node)
11213 if (dump_enabled_p ())
11214 dump_printf_loc (MSG_NOTE, vect_location,
11215 "handled only by SLP analysis\n");
11216 return opt_result::success ();
11219 ok = true;
11220 if (!bb_vinfo
11221 && (STMT_VINFO_RELEVANT_P (stmt_info)
11222 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11223 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11224 -mveclibabi= takes preference over library functions with
11225 the simd attribute. */
11226 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11227 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11228 cost_vec)
11229 || vectorizable_conversion (vinfo, stmt_info,
11230 NULL, NULL, node, cost_vec)
11231 || vectorizable_operation (vinfo, stmt_info,
11232 NULL, NULL, node, cost_vec)
11233 || vectorizable_assignment (vinfo, stmt_info,
11234 NULL, NULL, node, cost_vec)
11235 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11236 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11237 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11238 node, node_instance, cost_vec)
11239 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11240 NULL, node, cost_vec)
11241 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11242 || vectorizable_condition (vinfo, stmt_info,
11243 NULL, NULL, node, cost_vec)
11244 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11245 cost_vec)
11246 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11247 stmt_info, NULL, node)
11248 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
11249 stmt_info, NULL, node, cost_vec));
11250 else
11252 if (bb_vinfo)
11253 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11254 || vectorizable_simd_clone_call (vinfo, stmt_info,
11255 NULL, NULL, node, cost_vec)
11256 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11257 cost_vec)
11258 || vectorizable_shift (vinfo, stmt_info,
11259 NULL, NULL, node, cost_vec)
11260 || vectorizable_operation (vinfo, stmt_info,
11261 NULL, NULL, node, cost_vec)
11262 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11263 cost_vec)
11264 || vectorizable_load (vinfo, stmt_info,
11265 NULL, NULL, node, cost_vec)
11266 || vectorizable_store (vinfo, stmt_info,
11267 NULL, NULL, node, cost_vec)
11268 || vectorizable_condition (vinfo, stmt_info,
11269 NULL, NULL, node, cost_vec)
11270 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11271 cost_vec)
11272 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11275 if (node)
11276 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11278 if (!ok)
11279 return opt_result::failure_at (stmt_info->stmt,
11280 "not vectorized:"
11281 " relevant stmt not supported: %G",
11282 stmt_info->stmt);
11284 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11285 need extra handling, except for vectorizable reductions. */
11286 if (!bb_vinfo
11287 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11288 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11289 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11290 stmt_info, NULL, node, node_instance,
11291 false, cost_vec))
11292 return opt_result::failure_at (stmt_info->stmt,
11293 "not vectorized:"
11294 " live stmt not supported: %G",
11295 stmt_info->stmt);
11297 return opt_result::success ();
11301 /* Function vect_transform_stmt.
11303 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11305 bool
11306 vect_transform_stmt (vec_info *vinfo,
11307 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11308 slp_tree slp_node, slp_instance slp_node_instance)
11310 bool is_store = false;
11311 gimple *vec_stmt = NULL;
11312 bool done;
11314 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11316 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11317 if (slp_node)
11318 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11320 switch (STMT_VINFO_TYPE (stmt_info))
11322 case type_demotion_vec_info_type:
11323 case type_promotion_vec_info_type:
11324 case type_conversion_vec_info_type:
11325 done = vectorizable_conversion (vinfo, stmt_info,
11326 gsi, &vec_stmt, slp_node, NULL);
11327 gcc_assert (done);
11328 break;
11330 case induc_vec_info_type:
11331 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11332 stmt_info, &vec_stmt, slp_node,
11333 NULL);
11334 gcc_assert (done);
11335 break;
11337 case shift_vec_info_type:
11338 done = vectorizable_shift (vinfo, stmt_info,
11339 gsi, &vec_stmt, slp_node, NULL);
11340 gcc_assert (done);
11341 break;
11343 case op_vec_info_type:
11344 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11345 NULL);
11346 gcc_assert (done);
11347 break;
11349 case assignment_vec_info_type:
11350 done = vectorizable_assignment (vinfo, stmt_info,
11351 gsi, &vec_stmt, slp_node, NULL);
11352 gcc_assert (done);
11353 break;
11355 case load_vec_info_type:
11356 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11357 NULL);
11358 gcc_assert (done);
11359 break;
11361 case store_vec_info_type:
11362 done = vectorizable_store (vinfo, stmt_info,
11363 gsi, &vec_stmt, slp_node, NULL);
11364 gcc_assert (done);
11365 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11367 /* In case of interleaving, the whole chain is vectorized when the
11368 last store in the chain is reached. Store stmts before the last
11369 one are skipped, and there vec_stmt_info shouldn't be freed
11370 meanwhile. */
11371 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11372 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11373 is_store = true;
11375 else
11376 is_store = true;
11377 break;
11379 case condition_vec_info_type:
11380 done = vectorizable_condition (vinfo, stmt_info,
11381 gsi, &vec_stmt, slp_node, NULL);
11382 gcc_assert (done);
11383 break;
11385 case comparison_vec_info_type:
11386 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11387 slp_node, NULL);
11388 gcc_assert (done);
11389 break;
11391 case call_vec_info_type:
11392 done = vectorizable_call (vinfo, stmt_info,
11393 gsi, &vec_stmt, slp_node, NULL);
11394 break;
11396 case call_simd_clone_vec_info_type:
11397 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11398 slp_node, NULL);
11399 break;
11401 case reduc_vec_info_type:
11402 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11403 gsi, &vec_stmt, slp_node);
11404 gcc_assert (done);
11405 break;
11407 case cycle_phi_info_type:
11408 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11409 &vec_stmt, slp_node, slp_node_instance);
11410 gcc_assert (done);
11411 break;
11413 case lc_phi_info_type:
11414 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11415 stmt_info, &vec_stmt, slp_node);
11416 gcc_assert (done);
11417 break;
11419 case recurr_info_type:
11420 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
11421 stmt_info, &vec_stmt, slp_node, NULL);
11422 gcc_assert (done);
11423 break;
11425 case phi_info_type:
11426 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11427 gcc_assert (done);
11428 break;
11430 default:
11431 if (!STMT_VINFO_LIVE_P (stmt_info))
11433 if (dump_enabled_p ())
11434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11435 "stmt not supported.\n");
11436 gcc_unreachable ();
11438 done = true;
11441 if (!slp_node && vec_stmt)
11442 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11444 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11446 /* Handle stmts whose DEF is used outside the loop-nest that is
11447 being vectorized. */
11448 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11449 slp_node_instance, true, NULL);
11450 gcc_assert (done);
11453 if (slp_node)
11454 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11456 return is_store;
11460 /* Remove a group of stores (for SLP or interleaving), free their
11461 stmt_vec_info. */
11463 void
11464 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11466 stmt_vec_info next_stmt_info = first_stmt_info;
11468 while (next_stmt_info)
11470 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11471 next_stmt_info = vect_orig_stmt (next_stmt_info);
11472 /* Free the attached stmt_vec_info and remove the stmt. */
11473 vinfo->remove_stmt (next_stmt_info);
11474 next_stmt_info = tmp;
11478 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11479 elements of type SCALAR_TYPE, or null if the target doesn't support
11480 such a type.
11482 If NUNITS is zero, return a vector type that contains elements of
11483 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11485 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11486 for this vectorization region and want to "autodetect" the best choice.
11487 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11488 and we want the new type to be interoperable with it. PREVAILING_MODE
11489 in this case can be a scalar integer mode or a vector mode; when it
11490 is a vector mode, the function acts like a tree-level version of
11491 related_vector_mode. */
11493 tree
11494 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11495 tree scalar_type, poly_uint64 nunits)
11497 tree orig_scalar_type = scalar_type;
11498 scalar_mode inner_mode;
11499 machine_mode simd_mode;
11500 tree vectype;
11502 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11503 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11504 return NULL_TREE;
11506 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11508 /* Interoperability between modes requires one to be a constant multiple
11509 of the other, so that the number of vectors required for each operation
11510 is a compile-time constant. */
11511 if (prevailing_mode != VOIDmode
11512 && !constant_multiple_p (nunits * nbytes,
11513 GET_MODE_SIZE (prevailing_mode))
11514 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
11515 nunits * nbytes))
11516 return NULL_TREE;
11518 /* For vector types of elements whose mode precision doesn't
11519 match their types precision we use a element type of mode
11520 precision. The vectorization routines will have to make sure
11521 they support the proper result truncation/extension.
11522 We also make sure to build vector types with INTEGER_TYPE
11523 component type only. */
11524 if (INTEGRAL_TYPE_P (scalar_type)
11525 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11526 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11527 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11528 TYPE_UNSIGNED (scalar_type));
11530 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11531 When the component mode passes the above test simply use a type
11532 corresponding to that mode. The theory is that any use that
11533 would cause problems with this will disable vectorization anyway. */
11534 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11535 && !INTEGRAL_TYPE_P (scalar_type))
11536 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11538 /* We can't build a vector type of elements with alignment bigger than
11539 their size. */
11540 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11541 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11542 TYPE_UNSIGNED (scalar_type));
11544 /* If we felt back to using the mode fail if there was
11545 no scalar type for it. */
11546 if (scalar_type == NULL_TREE)
11547 return NULL_TREE;
11549 /* If no prevailing mode was supplied, use the mode the target prefers.
11550 Otherwise lookup a vector mode based on the prevailing mode. */
11551 if (prevailing_mode == VOIDmode)
11553 gcc_assert (known_eq (nunits, 0U));
11554 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11555 if (SCALAR_INT_MODE_P (simd_mode))
11557 /* Traditional behavior is not to take the integer mode
11558 literally, but simply to use it as a way of determining
11559 the vector size. It is up to mode_for_vector to decide
11560 what the TYPE_MODE should be.
11562 Note that nunits == 1 is allowed in order to support single
11563 element vector types. */
11564 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11565 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11566 return NULL_TREE;
11569 else if (SCALAR_INT_MODE_P (prevailing_mode)
11570 || !related_vector_mode (prevailing_mode,
11571 inner_mode, nunits).exists (&simd_mode))
11573 /* Fall back to using mode_for_vector, mostly in the hope of being
11574 able to use an integer mode. */
11575 if (known_eq (nunits, 0U)
11576 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11577 return NULL_TREE;
11579 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11580 return NULL_TREE;
11583 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11585 /* In cases where the mode was chosen by mode_for_vector, check that
11586 the target actually supports the chosen mode, or that it at least
11587 allows the vector mode to be replaced by a like-sized integer. */
11588 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11589 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11590 return NULL_TREE;
11592 /* Re-attach the address-space qualifier if we canonicalized the scalar
11593 type. */
11594 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11595 return build_qualified_type
11596 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11598 return vectype;
11601 /* Function get_vectype_for_scalar_type.
11603 Returns the vector type corresponding to SCALAR_TYPE as supported
11604 by the target. If GROUP_SIZE is nonzero and we're performing BB
11605 vectorization, make sure that the number of elements in the vector
11606 is no bigger than GROUP_SIZE. */
11608 tree
11609 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11610 unsigned int group_size)
11612 /* For BB vectorization, we should always have a group size once we've
11613 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11614 are tentative requests during things like early data reference
11615 analysis and pattern recognition. */
11616 if (is_a <bb_vec_info> (vinfo))
11617 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11618 else
11619 group_size = 0;
11621 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11622 scalar_type);
11623 if (vectype && vinfo->vector_mode == VOIDmode)
11624 vinfo->vector_mode = TYPE_MODE (vectype);
11626 /* Register the natural choice of vector type, before the group size
11627 has been applied. */
11628 if (vectype)
11629 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11631 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11632 try again with an explicit number of elements. */
11633 if (vectype
11634 && group_size
11635 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11637 /* Start with the biggest number of units that fits within
11638 GROUP_SIZE and halve it until we find a valid vector type.
11639 Usually either the first attempt will succeed or all will
11640 fail (in the latter case because GROUP_SIZE is too small
11641 for the target), but it's possible that a target could have
11642 a hole between supported vector types.
11644 If GROUP_SIZE is not a power of 2, this has the effect of
11645 trying the largest power of 2 that fits within the group,
11646 even though the group is not a multiple of that vector size.
11647 The BB vectorizer will then try to carve up the group into
11648 smaller pieces. */
11649 unsigned int nunits = 1 << floor_log2 (group_size);
11652 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11653 scalar_type, nunits);
11654 nunits /= 2;
11656 while (nunits > 1 && !vectype);
11659 return vectype;
11662 /* Return the vector type corresponding to SCALAR_TYPE as supported
11663 by the target. NODE, if nonnull, is the SLP tree node that will
11664 use the returned vector type. */
11666 tree
11667 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11669 unsigned int group_size = 0;
11670 if (node)
11671 group_size = SLP_TREE_LANES (node);
11672 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11675 /* Function get_mask_type_for_scalar_type.
11677 Returns the mask type corresponding to a result of comparison
11678 of vectors of specified SCALAR_TYPE as supported by target.
11679 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11680 make sure that the number of elements in the vector is no bigger
11681 than GROUP_SIZE. */
11683 tree
11684 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11685 unsigned int group_size)
11687 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11689 if (!vectype)
11690 return NULL;
11692 return truth_type_for (vectype);
11695 /* Function get_same_sized_vectype
11697 Returns a vector type corresponding to SCALAR_TYPE of size
11698 VECTOR_TYPE if supported by the target. */
11700 tree
11701 get_same_sized_vectype (tree scalar_type, tree vector_type)
11703 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11704 return truth_type_for (vector_type);
11706 poly_uint64 nunits;
11707 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11708 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11709 return NULL_TREE;
11711 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11712 scalar_type, nunits);
11715 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11716 would not change the chosen vector modes. */
11718 bool
11719 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11721 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11722 i != vinfo->used_vector_modes.end (); ++i)
11723 if (!VECTOR_MODE_P (*i)
11724 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11725 return false;
11726 return true;
11729 /* Function vect_is_simple_use.
11731 Input:
11732 VINFO - the vect info of the loop or basic block that is being vectorized.
11733 OPERAND - operand in the loop or bb.
11734 Output:
11735 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11736 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11737 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11738 the definition could be anywhere in the function
11739 DT - the type of definition
11741 Returns whether a stmt with OPERAND can be vectorized.
11742 For loops, supportable operands are constants, loop invariants, and operands
11743 that are defined by the current iteration of the loop. Unsupportable
11744 operands are those that are defined by a previous iteration of the loop (as
11745 is the case in reduction/induction computations).
11746 For basic blocks, supportable operands are constants and bb invariants.
11747 For now, operands defined outside the basic block are not supported. */
11749 bool
11750 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11751 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11753 if (def_stmt_info_out)
11754 *def_stmt_info_out = NULL;
11755 if (def_stmt_out)
11756 *def_stmt_out = NULL;
11757 *dt = vect_unknown_def_type;
11759 if (dump_enabled_p ())
11761 dump_printf_loc (MSG_NOTE, vect_location,
11762 "vect_is_simple_use: operand ");
11763 if (TREE_CODE (operand) == SSA_NAME
11764 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11765 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11766 else
11767 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11770 if (CONSTANT_CLASS_P (operand))
11771 *dt = vect_constant_def;
11772 else if (is_gimple_min_invariant (operand))
11773 *dt = vect_external_def;
11774 else if (TREE_CODE (operand) != SSA_NAME)
11775 *dt = vect_unknown_def_type;
11776 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11777 *dt = vect_external_def;
11778 else
11780 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11781 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11782 if (!stmt_vinfo)
11783 *dt = vect_external_def;
11784 else
11786 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11787 def_stmt = stmt_vinfo->stmt;
11788 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11789 if (def_stmt_info_out)
11790 *def_stmt_info_out = stmt_vinfo;
11792 if (def_stmt_out)
11793 *def_stmt_out = def_stmt;
11796 if (dump_enabled_p ())
11798 dump_printf (MSG_NOTE, ", type of def: ");
11799 switch (*dt)
11801 case vect_uninitialized_def:
11802 dump_printf (MSG_NOTE, "uninitialized\n");
11803 break;
11804 case vect_constant_def:
11805 dump_printf (MSG_NOTE, "constant\n");
11806 break;
11807 case vect_external_def:
11808 dump_printf (MSG_NOTE, "external\n");
11809 break;
11810 case vect_internal_def:
11811 dump_printf (MSG_NOTE, "internal\n");
11812 break;
11813 case vect_induction_def:
11814 dump_printf (MSG_NOTE, "induction\n");
11815 break;
11816 case vect_reduction_def:
11817 dump_printf (MSG_NOTE, "reduction\n");
11818 break;
11819 case vect_double_reduction_def:
11820 dump_printf (MSG_NOTE, "double reduction\n");
11821 break;
11822 case vect_nested_cycle:
11823 dump_printf (MSG_NOTE, "nested cycle\n");
11824 break;
11825 case vect_first_order_recurrence:
11826 dump_printf (MSG_NOTE, "first order recurrence\n");
11827 break;
11828 case vect_unknown_def_type:
11829 dump_printf (MSG_NOTE, "unknown\n");
11830 break;
11834 if (*dt == vect_unknown_def_type)
11836 if (dump_enabled_p ())
11837 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11838 "Unsupported pattern.\n");
11839 return false;
11842 return true;
11845 /* Function vect_is_simple_use.
11847 Same as vect_is_simple_use but also determines the vector operand
11848 type of OPERAND and stores it to *VECTYPE. If the definition of
11849 OPERAND is vect_uninitialized_def, vect_constant_def or
11850 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11851 is responsible to compute the best suited vector type for the
11852 scalar operand. */
11854 bool
11855 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11856 tree *vectype, stmt_vec_info *def_stmt_info_out,
11857 gimple **def_stmt_out)
11859 stmt_vec_info def_stmt_info;
11860 gimple *def_stmt;
11861 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11862 return false;
11864 if (def_stmt_out)
11865 *def_stmt_out = def_stmt;
11866 if (def_stmt_info_out)
11867 *def_stmt_info_out = def_stmt_info;
11869 /* Now get a vector type if the def is internal, otherwise supply
11870 NULL_TREE and leave it up to the caller to figure out a proper
11871 type for the use stmt. */
11872 if (*dt == vect_internal_def
11873 || *dt == vect_induction_def
11874 || *dt == vect_reduction_def
11875 || *dt == vect_double_reduction_def
11876 || *dt == vect_nested_cycle
11877 || *dt == vect_first_order_recurrence)
11879 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11880 gcc_assert (*vectype != NULL_TREE);
11881 if (dump_enabled_p ())
11882 dump_printf_loc (MSG_NOTE, vect_location,
11883 "vect_is_simple_use: vectype %T\n", *vectype);
11885 else if (*dt == vect_uninitialized_def
11886 || *dt == vect_constant_def
11887 || *dt == vect_external_def)
11888 *vectype = NULL_TREE;
11889 else
11890 gcc_unreachable ();
11892 return true;
11895 /* Function vect_is_simple_use.
11897 Same as vect_is_simple_use but determines the operand by operand
11898 position OPERAND from either STMT or SLP_NODE, filling in *OP
11899 and *SLP_DEF (when SLP_NODE is not NULL). */
11901 bool
11902 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11903 unsigned operand, tree *op, slp_tree *slp_def,
11904 enum vect_def_type *dt,
11905 tree *vectype, stmt_vec_info *def_stmt_info_out)
11907 if (slp_node)
11909 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11910 *slp_def = child;
11911 *vectype = SLP_TREE_VECTYPE (child);
11912 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11914 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11915 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11917 else
11919 if (def_stmt_info_out)
11920 *def_stmt_info_out = NULL;
11921 *op = SLP_TREE_SCALAR_OPS (child)[0];
11922 *dt = SLP_TREE_DEF_TYPE (child);
11923 return true;
11926 else
11928 *slp_def = NULL;
11929 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11931 if (gimple_assign_rhs_code (ass) == COND_EXPR
11932 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11934 if (operand < 2)
11935 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11936 else
11937 *op = gimple_op (ass, operand);
11939 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11940 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11941 else
11942 *op = gimple_op (ass, operand + 1);
11944 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11945 *op = gimple_call_arg (call, operand);
11946 else
11947 gcc_unreachable ();
11948 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11952 /* If OP is not NULL and is external or constant update its vector
11953 type with VECTYPE. Returns true if successful or false if not,
11954 for example when conflicting vector types are present. */
11956 bool
11957 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11959 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11960 return true;
11961 if (SLP_TREE_VECTYPE (op))
11962 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11963 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
11964 should be handled by patters. Allow vect_constant_def for now. */
11965 if (VECTOR_BOOLEAN_TYPE_P (vectype)
11966 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
11967 return false;
11968 SLP_TREE_VECTYPE (op) = vectype;
11969 return true;
11972 /* Function supportable_widening_operation
11974 Check whether an operation represented by the code CODE is a
11975 widening operation that is supported by the target platform in
11976 vector form (i.e., when operating on arguments of type VECTYPE_IN
11977 producing a result of type VECTYPE_OUT).
11979 Widening operations we currently support are NOP (CONVERT), FLOAT,
11980 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11981 are supported by the target platform either directly (via vector
11982 tree-codes), or via target builtins.
11984 Output:
11985 - CODE1 and CODE2 are codes of vector operations to be used when
11986 vectorizing the operation, if available.
11987 - MULTI_STEP_CVT determines the number of required intermediate steps in
11988 case of multi-step conversion (like char->short->int - in that case
11989 MULTI_STEP_CVT will be 1).
11990 - INTERM_TYPES contains the intermediate type required to perform the
11991 widening operation (short in the above example). */
11993 bool
11994 supportable_widening_operation (vec_info *vinfo,
11995 enum tree_code code, stmt_vec_info stmt_info,
11996 tree vectype_out, tree vectype_in,
11997 enum tree_code *code1, enum tree_code *code2,
11998 int *multi_step_cvt,
11999 vec<tree> *interm_types)
12001 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
12002 class loop *vect_loop = NULL;
12003 machine_mode vec_mode;
12004 enum insn_code icode1, icode2;
12005 optab optab1, optab2;
12006 tree vectype = vectype_in;
12007 tree wide_vectype = vectype_out;
12008 enum tree_code c1, c2;
12009 int i;
12010 tree prev_type, intermediate_type;
12011 machine_mode intermediate_mode, prev_mode;
12012 optab optab3, optab4;
12014 *multi_step_cvt = 0;
12015 if (loop_info)
12016 vect_loop = LOOP_VINFO_LOOP (loop_info);
12018 switch (code)
12020 case WIDEN_MULT_EXPR:
12021 /* The result of a vectorized widening operation usually requires
12022 two vectors (because the widened results do not fit into one vector).
12023 The generated vector results would normally be expected to be
12024 generated in the same order as in the original scalar computation,
12025 i.e. if 8 results are generated in each vector iteration, they are
12026 to be organized as follows:
12027 vect1: [res1,res2,res3,res4],
12028 vect2: [res5,res6,res7,res8].
12030 However, in the special case that the result of the widening
12031 operation is used in a reduction computation only, the order doesn't
12032 matter (because when vectorizing a reduction we change the order of
12033 the computation). Some targets can take advantage of this and
12034 generate more efficient code. For example, targets like Altivec,
12035 that support widen_mult using a sequence of {mult_even,mult_odd}
12036 generate the following vectors:
12037 vect1: [res1,res3,res5,res7],
12038 vect2: [res2,res4,res6,res8].
12040 When vectorizing outer-loops, we execute the inner-loop sequentially
12041 (each vectorized inner-loop iteration contributes to VF outer-loop
12042 iterations in parallel). We therefore don't allow to change the
12043 order of the computation in the inner-loop during outer-loop
12044 vectorization. */
12045 /* TODO: Another case in which order doesn't *really* matter is when we
12046 widen and then contract again, e.g. (short)((int)x * y >> 8).
12047 Normally, pack_trunc performs an even/odd permute, whereas the
12048 repack from an even/odd expansion would be an interleave, which
12049 would be significantly simpler for e.g. AVX2. */
12050 /* In any case, in order to avoid duplicating the code below, recurse
12051 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
12052 are properly set up for the caller. If we fail, we'll continue with
12053 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
12054 if (vect_loop
12055 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
12056 && !nested_in_vect_loop_p (vect_loop, stmt_info)
12057 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
12058 stmt_info, vectype_out,
12059 vectype_in, code1, code2,
12060 multi_step_cvt, interm_types))
12062 /* Elements in a vector with vect_used_by_reduction property cannot
12063 be reordered if the use chain with this property does not have the
12064 same operation. One such an example is s += a * b, where elements
12065 in a and b cannot be reordered. Here we check if the vector defined
12066 by STMT is only directly used in the reduction statement. */
12067 tree lhs = gimple_assign_lhs (stmt_info->stmt);
12068 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
12069 if (use_stmt_info
12070 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
12071 return true;
12073 c1 = VEC_WIDEN_MULT_LO_EXPR;
12074 c2 = VEC_WIDEN_MULT_HI_EXPR;
12075 break;
12077 case DOT_PROD_EXPR:
12078 c1 = DOT_PROD_EXPR;
12079 c2 = DOT_PROD_EXPR;
12080 break;
12082 case SAD_EXPR:
12083 c1 = SAD_EXPR;
12084 c2 = SAD_EXPR;
12085 break;
12087 case VEC_WIDEN_MULT_EVEN_EXPR:
12088 /* Support the recursion induced just above. */
12089 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
12090 c2 = VEC_WIDEN_MULT_ODD_EXPR;
12091 break;
12093 case WIDEN_LSHIFT_EXPR:
12094 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
12095 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
12096 break;
12098 case WIDEN_PLUS_EXPR:
12099 c1 = VEC_WIDEN_PLUS_LO_EXPR;
12100 c2 = VEC_WIDEN_PLUS_HI_EXPR;
12101 break;
12103 case WIDEN_MINUS_EXPR:
12104 c1 = VEC_WIDEN_MINUS_LO_EXPR;
12105 c2 = VEC_WIDEN_MINUS_HI_EXPR;
12106 break;
12108 CASE_CONVERT:
12109 c1 = VEC_UNPACK_LO_EXPR;
12110 c2 = VEC_UNPACK_HI_EXPR;
12111 break;
12113 case FLOAT_EXPR:
12114 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
12115 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
12116 break;
12118 case FIX_TRUNC_EXPR:
12119 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
12120 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
12121 break;
12123 default:
12124 gcc_unreachable ();
12127 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
12128 std::swap (c1, c2);
12130 if (code == FIX_TRUNC_EXPR)
12132 /* The signedness is determined from output operand. */
12133 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12134 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
12136 else if (CONVERT_EXPR_CODE_P (code)
12137 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
12138 && VECTOR_BOOLEAN_TYPE_P (vectype)
12139 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
12140 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12142 /* If the input and result modes are the same, a different optab
12143 is needed where we pass in the number of units in vectype. */
12144 optab1 = vec_unpacks_sbool_lo_optab;
12145 optab2 = vec_unpacks_sbool_hi_optab;
12147 else
12149 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12150 optab2 = optab_for_tree_code (c2, vectype, optab_default);
12153 if (!optab1 || !optab2)
12154 return false;
12156 vec_mode = TYPE_MODE (vectype);
12157 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
12158 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
12159 return false;
12161 *code1 = c1;
12162 *code2 = c2;
12164 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12165 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12167 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12168 return true;
12169 /* For scalar masks we may have different boolean
12170 vector types having the same QImode. Thus we
12171 add additional check for elements number. */
12172 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
12173 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12174 return true;
12177 /* Check if it's a multi-step conversion that can be done using intermediate
12178 types. */
12180 prev_type = vectype;
12181 prev_mode = vec_mode;
12183 if (!CONVERT_EXPR_CODE_P (code))
12184 return false;
12186 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12187 intermediate steps in promotion sequence. We try
12188 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12189 not. */
12190 interm_types->create (MAX_INTERM_CVT_STEPS);
12191 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12193 intermediate_mode = insn_data[icode1].operand[0].mode;
12194 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12195 intermediate_type
12196 = vect_halve_mask_nunits (prev_type, intermediate_mode);
12197 else if (VECTOR_MODE_P (intermediate_mode))
12199 tree intermediate_element_type
12200 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
12201 TYPE_UNSIGNED (prev_type));
12202 intermediate_type
12203 = build_vector_type_for_mode (intermediate_element_type,
12204 intermediate_mode);
12206 else
12207 intermediate_type
12208 = lang_hooks.types.type_for_mode (intermediate_mode,
12209 TYPE_UNSIGNED (prev_type));
12211 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12212 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12213 && intermediate_mode == prev_mode
12214 && SCALAR_INT_MODE_P (prev_mode))
12216 /* If the input and result modes are the same, a different optab
12217 is needed where we pass in the number of units in vectype. */
12218 optab3 = vec_unpacks_sbool_lo_optab;
12219 optab4 = vec_unpacks_sbool_hi_optab;
12221 else
12223 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12224 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12227 if (!optab3 || !optab4
12228 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12229 || insn_data[icode1].operand[0].mode != intermediate_mode
12230 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12231 || insn_data[icode2].operand[0].mode != intermediate_mode
12232 || ((icode1 = optab_handler (optab3, intermediate_mode))
12233 == CODE_FOR_nothing)
12234 || ((icode2 = optab_handler (optab4, intermediate_mode))
12235 == CODE_FOR_nothing))
12236 break;
12238 interm_types->quick_push (intermediate_type);
12239 (*multi_step_cvt)++;
12241 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12242 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12244 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12245 return true;
12246 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12247 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12248 return true;
12251 prev_type = intermediate_type;
12252 prev_mode = intermediate_mode;
12255 interm_types->release ();
12256 return false;
12260 /* Function supportable_narrowing_operation
12262 Check whether an operation represented by the code CODE is a
12263 narrowing operation that is supported by the target platform in
12264 vector form (i.e., when operating on arguments of type VECTYPE_IN
12265 and producing a result of type VECTYPE_OUT).
12267 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12268 and FLOAT. This function checks if these operations are supported by
12269 the target platform directly via vector tree-codes.
12271 Output:
12272 - CODE1 is the code of a vector operation to be used when
12273 vectorizing the operation, if available.
12274 - MULTI_STEP_CVT determines the number of required intermediate steps in
12275 case of multi-step conversion (like int->short->char - in that case
12276 MULTI_STEP_CVT will be 1).
12277 - INTERM_TYPES contains the intermediate type required to perform the
12278 narrowing operation (short in the above example). */
12280 bool
12281 supportable_narrowing_operation (enum tree_code code,
12282 tree vectype_out, tree vectype_in,
12283 enum tree_code *code1, int *multi_step_cvt,
12284 vec<tree> *interm_types)
12286 machine_mode vec_mode;
12287 enum insn_code icode1;
12288 optab optab1, interm_optab;
12289 tree vectype = vectype_in;
12290 tree narrow_vectype = vectype_out;
12291 enum tree_code c1;
12292 tree intermediate_type, prev_type;
12293 machine_mode intermediate_mode, prev_mode;
12294 int i;
12295 unsigned HOST_WIDE_INT n_elts;
12296 bool uns;
12298 *multi_step_cvt = 0;
12299 switch (code)
12301 CASE_CONVERT:
12302 c1 = VEC_PACK_TRUNC_EXPR;
12303 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12304 && VECTOR_BOOLEAN_TYPE_P (vectype)
12305 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
12306 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
12307 && n_elts < BITS_PER_UNIT)
12308 optab1 = vec_pack_sbool_trunc_optab;
12309 else
12310 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12311 break;
12313 case FIX_TRUNC_EXPR:
12314 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12315 /* The signedness is determined from output operand. */
12316 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12317 break;
12319 case FLOAT_EXPR:
12320 c1 = VEC_PACK_FLOAT_EXPR;
12321 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12322 break;
12324 default:
12325 gcc_unreachable ();
12328 if (!optab1)
12329 return false;
12331 vec_mode = TYPE_MODE (vectype);
12332 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12333 return false;
12335 *code1 = c1;
12337 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12339 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12340 return true;
12341 /* For scalar masks we may have different boolean
12342 vector types having the same QImode. Thus we
12343 add additional check for elements number. */
12344 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12345 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12346 return true;
12349 if (code == FLOAT_EXPR)
12350 return false;
12352 /* Check if it's a multi-step conversion that can be done using intermediate
12353 types. */
12354 prev_mode = vec_mode;
12355 prev_type = vectype;
12356 if (code == FIX_TRUNC_EXPR)
12357 uns = TYPE_UNSIGNED (vectype_out);
12358 else
12359 uns = TYPE_UNSIGNED (vectype);
12361 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12362 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12363 costly than signed. */
12364 if (code == FIX_TRUNC_EXPR && uns)
12366 enum insn_code icode2;
12368 intermediate_type
12369 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12370 interm_optab
12371 = optab_for_tree_code (c1, intermediate_type, optab_default);
12372 if (interm_optab != unknown_optab
12373 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12374 && insn_data[icode1].operand[0].mode
12375 == insn_data[icode2].operand[0].mode)
12377 uns = false;
12378 optab1 = interm_optab;
12379 icode1 = icode2;
12383 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12384 intermediate steps in promotion sequence. We try
12385 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12386 interm_types->create (MAX_INTERM_CVT_STEPS);
12387 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12389 intermediate_mode = insn_data[icode1].operand[0].mode;
12390 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12391 intermediate_type
12392 = vect_double_mask_nunits (prev_type, intermediate_mode);
12393 else
12394 intermediate_type
12395 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12396 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12397 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12398 && SCALAR_INT_MODE_P (prev_mode)
12399 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
12400 && n_elts < BITS_PER_UNIT)
12401 interm_optab = vec_pack_sbool_trunc_optab;
12402 else
12403 interm_optab
12404 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12405 optab_default);
12406 if (!interm_optab
12407 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12408 || insn_data[icode1].operand[0].mode != intermediate_mode
12409 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12410 == CODE_FOR_nothing))
12411 break;
12413 interm_types->quick_push (intermediate_type);
12414 (*multi_step_cvt)++;
12416 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12418 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12419 return true;
12420 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12421 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12422 return true;
12425 prev_mode = intermediate_mode;
12426 prev_type = intermediate_type;
12427 optab1 = interm_optab;
12430 interm_types->release ();
12431 return false;
12434 /* Generate and return a vector mask of MASK_TYPE such that
12435 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12436 Add the statements to SEQ. */
12438 tree
12439 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12440 tree end_index, const char *name)
12442 tree cmp_type = TREE_TYPE (start_index);
12443 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12444 cmp_type, mask_type,
12445 OPTIMIZE_FOR_SPEED));
12446 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12447 start_index, end_index,
12448 build_zero_cst (mask_type));
12449 tree tmp;
12450 if (name)
12451 tmp = make_temp_ssa_name (mask_type, NULL, name);
12452 else
12453 tmp = make_ssa_name (mask_type);
12454 gimple_call_set_lhs (call, tmp);
12455 gimple_seq_add_stmt (seq, call);
12456 return tmp;
12459 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12460 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12462 tree
12463 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12464 tree end_index)
12466 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12467 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12470 /* Try to compute the vector types required to vectorize STMT_INFO,
12471 returning true on success and false if vectorization isn't possible.
12472 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12473 take sure that the number of elements in the vectors is no bigger
12474 than GROUP_SIZE.
12476 On success:
12478 - Set *STMT_VECTYPE_OUT to:
12479 - NULL_TREE if the statement doesn't need to be vectorized;
12480 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12482 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12483 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12484 statement does not help to determine the overall number of units. */
12486 opt_result
12487 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12488 tree *stmt_vectype_out,
12489 tree *nunits_vectype_out,
12490 unsigned int group_size)
12492 gimple *stmt = stmt_info->stmt;
12494 /* For BB vectorization, we should always have a group size once we've
12495 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12496 are tentative requests during things like early data reference
12497 analysis and pattern recognition. */
12498 if (is_a <bb_vec_info> (vinfo))
12499 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12500 else
12501 group_size = 0;
12503 *stmt_vectype_out = NULL_TREE;
12504 *nunits_vectype_out = NULL_TREE;
12506 if (gimple_get_lhs (stmt) == NULL_TREE
12507 /* MASK_STORE has no lhs, but is ok. */
12508 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12510 if (is_a <gcall *> (stmt))
12512 /* Ignore calls with no lhs. These must be calls to
12513 #pragma omp simd functions, and what vectorization factor
12514 it really needs can't be determined until
12515 vectorizable_simd_clone_call. */
12516 if (dump_enabled_p ())
12517 dump_printf_loc (MSG_NOTE, vect_location,
12518 "defer to SIMD clone analysis.\n");
12519 return opt_result::success ();
12522 return opt_result::failure_at (stmt,
12523 "not vectorized: irregular stmt.%G", stmt);
12526 tree vectype;
12527 tree scalar_type = NULL_TREE;
12528 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12530 vectype = STMT_VINFO_VECTYPE (stmt_info);
12531 if (dump_enabled_p ())
12532 dump_printf_loc (MSG_NOTE, vect_location,
12533 "precomputed vectype: %T\n", vectype);
12535 else if (vect_use_mask_type_p (stmt_info))
12537 unsigned int precision = stmt_info->mask_precision;
12538 scalar_type = build_nonstandard_integer_type (precision, 1);
12539 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12540 if (!vectype)
12541 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12542 " data-type %T\n", scalar_type);
12543 if (dump_enabled_p ())
12544 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12546 else
12548 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12549 scalar_type = TREE_TYPE (DR_REF (dr));
12550 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12551 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12552 else
12553 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12555 if (dump_enabled_p ())
12557 if (group_size)
12558 dump_printf_loc (MSG_NOTE, vect_location,
12559 "get vectype for scalar type (group size %d):"
12560 " %T\n", group_size, scalar_type);
12561 else
12562 dump_printf_loc (MSG_NOTE, vect_location,
12563 "get vectype for scalar type: %T\n", scalar_type);
12565 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12566 if (!vectype)
12567 return opt_result::failure_at (stmt,
12568 "not vectorized:"
12569 " unsupported data-type %T\n",
12570 scalar_type);
12572 if (dump_enabled_p ())
12573 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12576 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12577 return opt_result::failure_at (stmt,
12578 "not vectorized: vector stmt in loop:%G",
12579 stmt);
12581 *stmt_vectype_out = vectype;
12583 /* Don't try to compute scalar types if the stmt produces a boolean
12584 vector; use the existing vector type instead. */
12585 tree nunits_vectype = vectype;
12586 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12588 /* The number of units is set according to the smallest scalar
12589 type (or the largest vector size, but we only support one
12590 vector size per vectorization). */
12591 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12592 TREE_TYPE (vectype));
12593 if (scalar_type != TREE_TYPE (vectype))
12595 if (dump_enabled_p ())
12596 dump_printf_loc (MSG_NOTE, vect_location,
12597 "get vectype for smallest scalar type: %T\n",
12598 scalar_type);
12599 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12600 group_size);
12601 if (!nunits_vectype)
12602 return opt_result::failure_at
12603 (stmt, "not vectorized: unsupported data-type %T\n",
12604 scalar_type);
12605 if (dump_enabled_p ())
12606 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12607 nunits_vectype);
12611 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12612 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12613 return opt_result::failure_at (stmt,
12614 "Not vectorized: Incompatible number "
12615 "of vector subparts between %T and %T\n",
12616 nunits_vectype, *stmt_vectype_out);
12618 if (dump_enabled_p ())
12620 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12621 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12622 dump_printf (MSG_NOTE, "\n");
12625 *nunits_vectype_out = nunits_vectype;
12626 return opt_result::success ();
12629 /* Generate and return statement sequence that sets vector length LEN that is:
12631 min_of_start_and_end = min (START_INDEX, END_INDEX);
12632 left_len = END_INDEX - min_of_start_and_end;
12633 rhs = min (left_len, LEN_LIMIT);
12634 LEN = rhs;
12636 Note: the cost of the code generated by this function is modeled
12637 by vect_estimate_min_profitable_iters, so changes here may need
12638 corresponding changes there. */
12640 gimple_seq
12641 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12643 gimple_seq stmts = NULL;
12644 tree len_type = TREE_TYPE (len);
12645 gcc_assert (TREE_TYPE (start_index) == len_type);
12647 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12648 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12649 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12650 gimple* stmt = gimple_build_assign (len, rhs);
12651 gimple_seq_add_stmt (&stmts, stmt);
12653 return stmts;