c++: fix explicit/copy problem [PR109247]
[official-gcc.git] / gcc / tree-vect-stmts.cc
blobe37c401b6880a1a7f884331559918570f0c586b7
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
57 #include "regs.h"
58 #include "attribs.h"
60 /* For lang_hooks.types.type_for_mode. */
61 #include "langhooks.h"
63 /* Return the vectorized type for the given statement. */
65 tree
66 stmt_vectype (class _stmt_vec_info *stmt_info)
68 return STMT_VINFO_VECTYPE (stmt_info);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
73 bool
74 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
76 gimple *stmt = STMT_VINFO_STMT (stmt_info);
77 basic_block bb = gimple_bb (stmt);
78 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
79 class loop* loop;
81 if (!loop_vinfo)
82 return false;
84 loop = LOOP_VINFO_LOOP (loop_vinfo);
86 return (bb->loop_father == loop->inner);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
93 static unsigned
94 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
95 enum vect_cost_for_stmt kind,
96 stmt_vec_info stmt_info, slp_tree node,
97 tree vectype, int misalign,
98 enum vect_cost_model_location where)
100 if ((kind == vector_load || kind == unaligned_load)
101 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
102 kind = vector_gather_load;
103 if ((kind == vector_store || kind == unaligned_store)
104 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
105 kind = vector_scatter_store;
107 stmt_info_for_cost si
108 = { count, kind, where, stmt_info, node, vectype, misalign };
109 body_cost_vec->safe_push (si);
111 return (unsigned)
112 (builtin_vectorization_cost (kind, vectype, misalign) * count);
115 unsigned
116 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
117 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
118 tree vectype, int misalign,
119 enum vect_cost_model_location where)
121 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
122 vectype, misalign, where);
125 unsigned
126 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
127 enum vect_cost_for_stmt kind, slp_tree node,
128 tree vectype, int misalign,
129 enum vect_cost_model_location where)
131 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
132 vectype, misalign, where);
135 unsigned
136 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
137 enum vect_cost_for_stmt kind,
138 enum vect_cost_model_location where)
140 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
141 || kind == scalar_stmt);
142 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
143 NULL_TREE, 0, where);
146 /* Return a variable of type ELEM_TYPE[NELEMS]. */
148 static tree
149 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
151 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
152 "vect_array");
155 /* ARRAY is an array of vectors created by create_vector_array.
156 Return an SSA_NAME for the vector in index N. The reference
157 is part of the vectorization of STMT_INFO and the vector is associated
158 with scalar destination SCALAR_DEST. */
160 static tree
161 read_vector_array (vec_info *vinfo,
162 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
163 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
165 tree vect_type, vect, vect_name, array_ref;
166 gimple *new_stmt;
168 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
169 vect_type = TREE_TYPE (TREE_TYPE (array));
170 vect = vect_create_destination_var (scalar_dest, vect_type);
171 array_ref = build4 (ARRAY_REF, vect_type, array,
172 build_int_cst (size_type_node, n),
173 NULL_TREE, NULL_TREE);
175 new_stmt = gimple_build_assign (vect, array_ref);
176 vect_name = make_ssa_name (vect, new_stmt);
177 gimple_assign_set_lhs (new_stmt, vect_name);
178 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
180 return vect_name;
183 /* ARRAY is an array of vectors created by create_vector_array.
184 Emit code to store SSA_NAME VECT in index N of the array.
185 The store is part of the vectorization of STMT_INFO. */
187 static void
188 write_vector_array (vec_info *vinfo,
189 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
190 tree vect, tree array, unsigned HOST_WIDE_INT n)
192 tree array_ref;
193 gimple *new_stmt;
195 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
196 build_int_cst (size_type_node, n),
197 NULL_TREE, NULL_TREE);
199 new_stmt = gimple_build_assign (array_ref, vect);
200 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
203 /* PTR is a pointer to an array of type TYPE. Return a representation
204 of *PTR. The memory reference replaces those in FIRST_DR
205 (and its group). */
207 static tree
208 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
210 tree mem_ref;
212 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
213 /* Arrays have the same alignment as their type. */
214 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
215 return mem_ref;
218 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
219 Emit the clobber before *GSI. */
221 static void
222 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
223 gimple_stmt_iterator *gsi, tree var)
225 tree clobber = build_clobber (TREE_TYPE (var));
226 gimple *new_stmt = gimple_build_assign (var, clobber);
227 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
230 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
232 /* Function vect_mark_relevant.
234 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
236 static void
237 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
238 enum vect_relevant relevant, bool live_p)
240 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
241 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "mark relevant %d, live %d: %G", relevant, live_p,
246 stmt_info->stmt);
248 /* If this stmt is an original stmt in a pattern, we might need to mark its
249 related pattern stmt instead of the original stmt. However, such stmts
250 may have their own uses that are not in any pattern, in such cases the
251 stmt itself should be marked. */
252 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
254 /* This is the last stmt in a sequence that was detected as a
255 pattern that can potentially be vectorized. Don't mark the stmt
256 as relevant/live because it's not going to be vectorized.
257 Instead mark the pattern-stmt that replaces it. */
259 if (dump_enabled_p ())
260 dump_printf_loc (MSG_NOTE, vect_location,
261 "last stmt in pattern. don't mark"
262 " relevant/live.\n");
263 stmt_vec_info old_stmt_info = stmt_info;
264 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
265 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
266 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
267 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
270 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
271 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
272 STMT_VINFO_RELEVANT (stmt_info) = relevant;
274 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
275 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
277 if (dump_enabled_p ())
278 dump_printf_loc (MSG_NOTE, vect_location,
279 "already marked relevant/live.\n");
280 return;
283 worklist->safe_push (stmt_info);
287 /* Function is_simple_and_all_uses_invariant
289 Return true if STMT_INFO is simple and all uses of it are invariant. */
291 bool
292 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
293 loop_vec_info loop_vinfo)
295 tree op;
296 ssa_op_iter iter;
298 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
299 if (!stmt)
300 return false;
302 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
304 enum vect_def_type dt = vect_uninitialized_def;
306 if (!vect_is_simple_use (op, loop_vinfo, &dt))
308 if (dump_enabled_p ())
309 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
310 "use not simple.\n");
311 return false;
314 if (dt != vect_external_def && dt != vect_constant_def)
315 return false;
317 return true;
320 /* Function vect_stmt_relevant_p.
322 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
323 is "relevant for vectorization".
325 A stmt is considered "relevant for vectorization" if:
326 - it has uses outside the loop.
327 - it has vdefs (it alters memory).
328 - control stmts in the loop (except for the exit condition).
330 CHECKME: what other side effects would the vectorizer allow? */
332 static bool
333 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
334 enum vect_relevant *relevant, bool *live_p)
336 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
337 ssa_op_iter op_iter;
338 imm_use_iterator imm_iter;
339 use_operand_p use_p;
340 def_operand_p def_p;
342 *relevant = vect_unused_in_scope;
343 *live_p = false;
345 /* cond stmt other than loop exit cond. */
346 if (is_ctrl_stmt (stmt_info->stmt)
347 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
348 *relevant = vect_used_in_scope;
350 /* changing memory. */
351 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
352 if (gimple_vdef (stmt_info->stmt)
353 && !gimple_clobber_p (stmt_info->stmt))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt has vdefs.\n");
358 *relevant = vect_used_in_scope;
361 /* uses outside the loop. */
362 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
364 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
366 basic_block bb = gimple_bb (USE_STMT (use_p));
367 if (!flow_bb_inside_loop_p (loop, bb))
369 if (is_gimple_debug (USE_STMT (use_p)))
370 continue;
372 if (dump_enabled_p ())
373 dump_printf_loc (MSG_NOTE, vect_location,
374 "vec_stmt_relevant_p: used out of loop.\n");
376 /* We expect all such uses to be in the loop exit phis
377 (because of loop closed form) */
378 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
379 gcc_assert (bb == single_exit (loop)->dest);
381 *live_p = true;
386 if (*live_p && *relevant == vect_unused_in_scope
387 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
389 if (dump_enabled_p ())
390 dump_printf_loc (MSG_NOTE, vect_location,
391 "vec_stmt_relevant_p: stmt live but not relevant.\n");
392 *relevant = vect_used_only_live;
395 return (*live_p || *relevant);
399 /* Function exist_non_indexing_operands_for_use_p
401 USE is one of the uses attached to STMT_INFO. Check if USE is
402 used in STMT_INFO for anything other than indexing an array. */
404 static bool
405 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
407 tree operand;
409 /* USE corresponds to some operand in STMT. If there is no data
410 reference in STMT, then any operand that corresponds to USE
411 is not indexing an array. */
412 if (!STMT_VINFO_DATA_REF (stmt_info))
413 return true;
415 /* STMT has a data_ref. FORNOW this means that its of one of
416 the following forms:
417 -1- ARRAY_REF = var
418 -2- var = ARRAY_REF
419 (This should have been verified in analyze_data_refs).
421 'var' in the second case corresponds to a def, not a use,
422 so USE cannot correspond to any operands that are not used
423 for array indexing.
425 Therefore, all we need to check is if STMT falls into the
426 first case, and whether var corresponds to USE. */
428 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
429 if (!assign || !gimple_assign_copy_p (assign))
431 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
432 if (call && gimple_call_internal_p (call))
434 internal_fn ifn = gimple_call_internal_fn (call);
435 int mask_index = internal_fn_mask_index (ifn);
436 if (mask_index >= 0
437 && use == gimple_call_arg (call, mask_index))
438 return true;
439 int stored_value_index = internal_fn_stored_value_index (ifn);
440 if (stored_value_index >= 0
441 && use == gimple_call_arg (call, stored_value_index))
442 return true;
443 if (internal_gather_scatter_fn_p (ifn)
444 && use == gimple_call_arg (call, 1))
445 return true;
447 return false;
450 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
451 return false;
452 operand = gimple_assign_rhs1 (assign);
453 if (TREE_CODE (operand) != SSA_NAME)
454 return false;
456 if (operand == use)
457 return true;
459 return false;
464 Function process_use.
466 Inputs:
467 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
468 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
469 that defined USE. This is done by calling mark_relevant and passing it
470 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
471 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
472 be performed.
474 Outputs:
475 Generally, LIVE_P and RELEVANT are used to define the liveness and
476 relevance info of the DEF_STMT of this USE:
477 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
478 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
479 Exceptions:
480 - case 1: If USE is used only for address computations (e.g. array indexing),
481 which does not need to be directly vectorized, then the liveness/relevance
482 of the respective DEF_STMT is left unchanged.
483 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
484 we skip DEF_STMT cause it had already been processed.
485 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
486 "relevant" will be modified accordingly.
488 Return true if everything is as expected. Return false otherwise. */
490 static opt_result
491 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
492 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
493 bool force)
495 stmt_vec_info dstmt_vinfo;
496 enum vect_def_type dt;
498 /* case 1: we are only interested in uses that need to be vectorized. Uses
499 that are used for address computation are not considered relevant. */
500 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
501 return opt_result::success ();
503 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
504 return opt_result::failure_at (stmt_vinfo->stmt,
505 "not vectorized:"
506 " unsupported use in stmt.\n");
508 if (!dstmt_vinfo)
509 return opt_result::success ();
511 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
512 basic_block bb = gimple_bb (stmt_vinfo->stmt);
514 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
515 We have to force the stmt live since the epilogue loop needs it to
516 continue computing the reduction. */
517 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
518 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
519 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
520 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
521 && bb->loop_father == def_bb->loop_father)
523 if (dump_enabled_p ())
524 dump_printf_loc (MSG_NOTE, vect_location,
525 "reduc-stmt defining reduc-phi in the same nest.\n");
526 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
527 return opt_result::success ();
530 /* case 3a: outer-loop stmt defining an inner-loop stmt:
531 outer-loop-header-bb:
532 d = dstmt_vinfo
533 inner-loop:
534 stmt # use (d)
535 outer-loop-tail-bb:
536 ... */
537 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_NOTE, vect_location,
541 "outer-loop def-stmt defining inner-loop stmt.\n");
543 switch (relevant)
545 case vect_unused_in_scope:
546 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
547 vect_used_in_scope : vect_unused_in_scope;
548 break;
550 case vect_used_in_outer_by_reduction:
551 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
552 relevant = vect_used_by_reduction;
553 break;
555 case vect_used_in_outer:
556 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
557 relevant = vect_used_in_scope;
558 break;
560 case vect_used_in_scope:
561 break;
563 default:
564 gcc_unreachable ();
568 /* case 3b: inner-loop stmt defining an outer-loop stmt:
569 outer-loop-header-bb:
571 inner-loop:
572 d = dstmt_vinfo
573 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
574 stmt # use (d) */
575 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
577 if (dump_enabled_p ())
578 dump_printf_loc (MSG_NOTE, vect_location,
579 "inner-loop def-stmt defining outer-loop stmt.\n");
581 switch (relevant)
583 case vect_unused_in_scope:
584 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
585 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
586 vect_used_in_outer_by_reduction : vect_unused_in_scope;
587 break;
589 case vect_used_by_reduction:
590 case vect_used_only_live:
591 relevant = vect_used_in_outer_by_reduction;
592 break;
594 case vect_used_in_scope:
595 relevant = vect_used_in_outer;
596 break;
598 default:
599 gcc_unreachable ();
602 /* We are also not interested in uses on loop PHI backedges that are
603 inductions. Otherwise we'll needlessly vectorize the IV increment
604 and cause hybrid SLP for SLP inductions. Unless the PHI is live
605 of course. */
606 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
607 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
608 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
609 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
610 loop_latch_edge (bb->loop_father))
611 == use))
613 if (dump_enabled_p ())
614 dump_printf_loc (MSG_NOTE, vect_location,
615 "induction value on backedge.\n");
616 return opt_result::success ();
620 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
621 return opt_result::success ();
625 /* Function vect_mark_stmts_to_be_vectorized.
627 Not all stmts in the loop need to be vectorized. For example:
629 for i...
630 for j...
631 1. T0 = i + j
632 2. T1 = a[T0]
634 3. j = j + 1
636 Stmt 1 and 3 do not need to be vectorized, because loop control and
637 addressing of vectorized data-refs are handled differently.
639 This pass detects such stmts. */
641 opt_result
642 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
644 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
645 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
646 unsigned int nbbs = loop->num_nodes;
647 gimple_stmt_iterator si;
648 unsigned int i;
649 basic_block bb;
650 bool live_p;
651 enum vect_relevant relevant;
653 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
655 auto_vec<stmt_vec_info, 64> worklist;
657 /* 1. Init worklist. */
658 for (i = 0; i < nbbs; i++)
660 bb = bbs[i];
661 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
663 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
664 if (dump_enabled_p ())
665 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
666 phi_info->stmt);
668 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
669 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
671 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
673 if (is_gimple_debug (gsi_stmt (si)))
674 continue;
675 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
676 if (dump_enabled_p ())
677 dump_printf_loc (MSG_NOTE, vect_location,
678 "init: stmt relevant? %G", stmt_info->stmt);
680 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
681 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
685 /* 2. Process_worklist */
686 while (worklist.length () > 0)
688 use_operand_p use_p;
689 ssa_op_iter iter;
691 stmt_vec_info stmt_vinfo = worklist.pop ();
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE, vect_location,
694 "worklist: examine stmt: %G", stmt_vinfo->stmt);
696 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
697 (DEF_STMT) as relevant/irrelevant according to the relevance property
698 of STMT. */
699 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
701 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
702 propagated as is to the DEF_STMTs of its USEs.
704 One exception is when STMT has been identified as defining a reduction
705 variable; in this case we set the relevance to vect_used_by_reduction.
706 This is because we distinguish between two kinds of relevant stmts -
707 those that are used by a reduction computation, and those that are
708 (also) used by a regular computation. This allows us later on to
709 identify stmts that are used solely by a reduction, and therefore the
710 order of the results that they produce does not have to be kept. */
712 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
714 case vect_reduction_def:
715 gcc_assert (relevant != vect_unused_in_scope);
716 if (relevant != vect_unused_in_scope
717 && relevant != vect_used_in_scope
718 && relevant != vect_used_by_reduction
719 && relevant != vect_used_only_live)
720 return opt_result::failure_at
721 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
722 break;
724 case vect_nested_cycle:
725 if (relevant != vect_unused_in_scope
726 && relevant != vect_used_in_outer_by_reduction
727 && relevant != vect_used_in_outer)
728 return opt_result::failure_at
729 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
730 break;
732 case vect_double_reduction_def:
733 if (relevant != vect_unused_in_scope
734 && relevant != vect_used_by_reduction
735 && relevant != vect_used_only_live)
736 return opt_result::failure_at
737 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
738 break;
740 default:
741 break;
744 if (is_pattern_stmt_p (stmt_vinfo))
746 /* Pattern statements are not inserted into the code, so
747 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
748 have to scan the RHS or function arguments instead. */
749 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
751 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
752 tree op = gimple_assign_rhs1 (assign);
754 i = 1;
755 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
757 opt_result res
758 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
759 loop_vinfo, relevant, &worklist, false);
760 if (!res)
761 return res;
762 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
763 loop_vinfo, relevant, &worklist, false);
764 if (!res)
765 return res;
766 i = 2;
768 for (; i < gimple_num_ops (assign); i++)
770 op = gimple_op (assign, i);
771 if (TREE_CODE (op) == SSA_NAME)
773 opt_result res
774 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
775 &worklist, false);
776 if (!res)
777 return res;
781 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
783 for (i = 0; i < gimple_call_num_args (call); i++)
785 tree arg = gimple_call_arg (call, i);
786 opt_result res
787 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
788 &worklist, false);
789 if (!res)
790 return res;
794 else
795 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
797 tree op = USE_FROM_PTR (use_p);
798 opt_result res
799 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
800 &worklist, false);
801 if (!res)
802 return res;
805 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
807 gather_scatter_info gs_info;
808 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
809 gcc_unreachable ();
810 opt_result res
811 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
812 &worklist, true);
813 if (!res)
815 if (fatal)
816 *fatal = false;
817 return res;
820 } /* while worklist */
822 return opt_result::success ();
825 /* Function vect_model_simple_cost.
827 Models cost for simple operations, i.e. those that only emit ncopies of a
828 single op. Right now, this does not account for multiple insns that could
829 be generated for the single vector op. We will handle that shortly. */
831 static void
832 vect_model_simple_cost (vec_info *,
833 stmt_vec_info stmt_info, int ncopies,
834 enum vect_def_type *dt,
835 int ndts,
836 slp_tree node,
837 stmt_vector_for_cost *cost_vec,
838 vect_cost_for_stmt kind = vector_stmt)
840 int inside_cost = 0, prologue_cost = 0;
842 gcc_assert (cost_vec != NULL);
844 /* ??? Somehow we need to fix this at the callers. */
845 if (node)
846 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
848 if (!node)
849 /* Cost the "broadcast" of a scalar operand in to a vector operand.
850 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
851 cost model. */
852 for (int i = 0; i < ndts; i++)
853 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
854 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
855 stmt_info, 0, vect_prologue);
857 /* Pass the inside-of-loop statements to the target-specific cost model. */
858 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
859 stmt_info, 0, vect_body);
861 if (dump_enabled_p ())
862 dump_printf_loc (MSG_NOTE, vect_location,
863 "vect_model_simple_cost: inside_cost = %d, "
864 "prologue_cost = %d .\n", inside_cost, prologue_cost);
868 /* Model cost for type demotion and promotion operations. PWR is
869 normally zero for single-step promotions and demotions. It will be
870 one if two-step promotion/demotion is required, and so on. NCOPIES
871 is the number of vector results (and thus number of instructions)
872 for the narrowest end of the operation chain. Each additional
873 step doubles the number of instructions required. If WIDEN_ARITH
874 is true the stmt is doing widening arithmetic. */
876 static void
877 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
878 enum vect_def_type *dt,
879 unsigned int ncopies, int pwr,
880 stmt_vector_for_cost *cost_vec,
881 bool widen_arith)
883 int i;
884 int inside_cost = 0, prologue_cost = 0;
886 for (i = 0; i < pwr + 1; i++)
888 inside_cost += record_stmt_cost (cost_vec, ncopies,
889 widen_arith
890 ? vector_stmt : vec_promote_demote,
891 stmt_info, 0, vect_body);
892 ncopies *= 2;
895 /* FORNOW: Assuming maximum 2 args per stmts. */
896 for (i = 0; i < 2; i++)
897 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
898 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
899 stmt_info, 0, vect_prologue);
901 if (dump_enabled_p ())
902 dump_printf_loc (MSG_NOTE, vect_location,
903 "vect_model_promotion_demotion_cost: inside_cost = %d, "
904 "prologue_cost = %d .\n", inside_cost, prologue_cost);
907 /* Returns true if the current function returns DECL. */
909 static bool
910 cfun_returns (tree decl)
912 edge_iterator ei;
913 edge e;
914 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
916 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
917 if (!ret)
918 continue;
919 if (gimple_return_retval (ret) == decl)
920 return true;
921 /* We often end up with an aggregate copy to the result decl,
922 handle that case as well. First skip intermediate clobbers
923 though. */
924 gimple *def = ret;
927 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
929 while (gimple_clobber_p (def));
930 if (is_a <gassign *> (def)
931 && gimple_assign_lhs (def) == gimple_return_retval (ret)
932 && gimple_assign_rhs1 (def) == decl)
933 return true;
935 return false;
938 /* Function vect_model_store_cost
940 Models cost for stores. In the case of grouped accesses, one access
941 has the overhead of the grouped access attributed to it. */
943 static void
944 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
945 vect_memory_access_type memory_access_type,
946 gather_scatter_info *gs_info,
947 dr_alignment_support alignment_support_scheme,
948 int misalignment,
949 vec_load_store_type vls_type, slp_tree slp_node,
950 stmt_vector_for_cost *cost_vec)
952 unsigned int inside_cost = 0, prologue_cost = 0;
953 stmt_vec_info first_stmt_info = stmt_info;
954 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
956 /* ??? Somehow we need to fix this at the callers. */
957 if (slp_node)
958 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
960 if (vls_type == VLS_STORE_INVARIANT)
962 if (!slp_node)
963 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
964 stmt_info, 0, vect_prologue);
967 /* Grouped stores update all elements in the group at once,
968 so we want the DR for the first statement. */
969 if (!slp_node && grouped_access_p)
970 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
972 /* True if we should include any once-per-group costs as well as
973 the cost of the statement itself. For SLP we only get called
974 once per group anyhow. */
975 bool first_stmt_p = (first_stmt_info == stmt_info);
977 /* We assume that the cost of a single store-lanes instruction is
978 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
979 access is instead being provided by a permute-and-store operation,
980 include the cost of the permutes. */
981 if (first_stmt_p
982 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
984 /* Uses a high and low interleave or shuffle operations for each
985 needed permute. */
986 int group_size = DR_GROUP_SIZE (first_stmt_info);
987 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
988 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
989 stmt_info, 0, vect_body);
991 if (dump_enabled_p ())
992 dump_printf_loc (MSG_NOTE, vect_location,
993 "vect_model_store_cost: strided group_size = %d .\n",
994 group_size);
997 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
998 /* Costs of the stores. */
999 if (memory_access_type == VMAT_ELEMENTWISE
1000 || memory_access_type == VMAT_GATHER_SCATTER)
1002 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1003 if (memory_access_type == VMAT_GATHER_SCATTER
1004 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1005 /* For emulated scatter N offset vector element extracts
1006 (we assume the scalar scaling and ptr + offset add is consumed by
1007 the load). */
1008 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1009 vec_to_scalar, stmt_info, 0,
1010 vect_body);
1011 /* N scalar stores plus extracting the elements. */
1012 inside_cost += record_stmt_cost (cost_vec,
1013 ncopies * assumed_nunits,
1014 scalar_store, stmt_info, 0, vect_body);
1016 else
1017 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1018 misalignment, &inside_cost, cost_vec);
1020 if (memory_access_type == VMAT_ELEMENTWISE
1021 || memory_access_type == VMAT_STRIDED_SLP
1022 || (memory_access_type == VMAT_GATHER_SCATTER
1023 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1025 /* N scalar stores plus extracting the elements. */
1026 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1027 inside_cost += record_stmt_cost (cost_vec,
1028 ncopies * assumed_nunits,
1029 vec_to_scalar, stmt_info, 0, vect_body);
1032 /* When vectorizing a store into the function result assign
1033 a penalty if the function returns in a multi-register location.
1034 In this case we assume we'll end up with having to spill the
1035 vector result and do piecewise loads as a conservative estimate. */
1036 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1037 if (base
1038 && (TREE_CODE (base) == RESULT_DECL
1039 || (DECL_P (base) && cfun_returns (base)))
1040 && !aggregate_value_p (base, cfun->decl))
1042 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1043 /* ??? Handle PARALLEL in some way. */
1044 if (REG_P (reg))
1046 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1047 /* Assume that a single reg-reg move is possible and cheap,
1048 do not account for vector to gp register move cost. */
1049 if (nregs > 1)
1051 /* Spill. */
1052 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1053 vector_store,
1054 stmt_info, 0, vect_epilogue);
1055 /* Loads. */
1056 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1057 scalar_load,
1058 stmt_info, 0, vect_epilogue);
1063 if (dump_enabled_p ())
1064 dump_printf_loc (MSG_NOTE, vect_location,
1065 "vect_model_store_cost: inside_cost = %d, "
1066 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1070 /* Calculate cost of DR's memory access. */
1071 void
1072 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1073 dr_alignment_support alignment_support_scheme,
1074 int misalignment,
1075 unsigned int *inside_cost,
1076 stmt_vector_for_cost *body_cost_vec)
1078 switch (alignment_support_scheme)
1080 case dr_aligned:
1082 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1083 vector_store, stmt_info, 0,
1084 vect_body);
1086 if (dump_enabled_p ())
1087 dump_printf_loc (MSG_NOTE, vect_location,
1088 "vect_model_store_cost: aligned.\n");
1089 break;
1092 case dr_unaligned_supported:
1094 /* Here, we assign an additional cost for the unaligned store. */
1095 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1096 unaligned_store, stmt_info,
1097 misalignment, vect_body);
1098 if (dump_enabled_p ())
1099 dump_printf_loc (MSG_NOTE, vect_location,
1100 "vect_model_store_cost: unaligned supported by "
1101 "hardware.\n");
1102 break;
1105 case dr_unaligned_unsupported:
1107 *inside_cost = VECT_MAX_COST;
1109 if (dump_enabled_p ())
1110 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1111 "vect_model_store_cost: unsupported access.\n");
1112 break;
1115 default:
1116 gcc_unreachable ();
1121 /* Function vect_model_load_cost
1123 Models cost for loads. In the case of grouped accesses, one access has
1124 the overhead of the grouped access attributed to it. Since unaligned
1125 accesses are supported for loads, we also account for the costs of the
1126 access scheme chosen. */
1128 static void
1129 vect_model_load_cost (vec_info *vinfo,
1130 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1131 vect_memory_access_type memory_access_type,
1132 dr_alignment_support alignment_support_scheme,
1133 int misalignment,
1134 gather_scatter_info *gs_info,
1135 slp_tree slp_node,
1136 stmt_vector_for_cost *cost_vec)
1138 unsigned int inside_cost = 0, prologue_cost = 0;
1139 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1141 gcc_assert (cost_vec);
1143 /* ??? Somehow we need to fix this at the callers. */
1144 if (slp_node)
1145 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1147 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1149 /* If the load is permuted then the alignment is determined by
1150 the first group element not by the first scalar stmt DR. */
1151 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1152 /* Record the cost for the permutation. */
1153 unsigned n_perms, n_loads;
1154 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1155 vf, true, &n_perms, &n_loads);
1156 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1157 first_stmt_info, 0, vect_body);
1159 /* And adjust the number of loads performed. This handles
1160 redundancies as well as loads that are later dead. */
1161 ncopies = n_loads;
1164 /* Grouped loads read all elements in the group at once,
1165 so we want the DR for the first statement. */
1166 stmt_vec_info first_stmt_info = stmt_info;
1167 if (!slp_node && grouped_access_p)
1168 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1170 /* True if we should include any once-per-group costs as well as
1171 the cost of the statement itself. For SLP we only get called
1172 once per group anyhow. */
1173 bool first_stmt_p = (first_stmt_info == stmt_info);
1175 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1176 ones we actually need. Account for the cost of unused results. */
1177 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1179 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1180 stmt_vec_info next_stmt_info = first_stmt_info;
1183 gaps -= 1;
1184 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1186 while (next_stmt_info);
1187 if (gaps)
1189 if (dump_enabled_p ())
1190 dump_printf_loc (MSG_NOTE, vect_location,
1191 "vect_model_load_cost: %d unused vectors.\n",
1192 gaps);
1193 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1194 alignment_support_scheme, misalignment, false,
1195 &inside_cost, &prologue_cost,
1196 cost_vec, cost_vec, true);
1200 /* We assume that the cost of a single load-lanes instruction is
1201 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1202 access is instead being provided by a load-and-permute operation,
1203 include the cost of the permutes. */
1204 if (first_stmt_p
1205 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1207 /* Uses an even and odd extract operations or shuffle operations
1208 for each needed permute. */
1209 int group_size = DR_GROUP_SIZE (first_stmt_info);
1210 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1211 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1212 stmt_info, 0, vect_body);
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_NOTE, vect_location,
1216 "vect_model_load_cost: strided group_size = %d .\n",
1217 group_size);
1220 /* The loads themselves. */
1221 if (memory_access_type == VMAT_ELEMENTWISE
1222 || memory_access_type == VMAT_GATHER_SCATTER)
1224 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1225 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1226 if (memory_access_type == VMAT_GATHER_SCATTER
1227 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1228 /* For emulated gathers N offset vector element extracts
1229 (we assume the scalar scaling and ptr + offset add is consumed by
1230 the load). */
1231 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1232 vec_to_scalar, stmt_info, 0,
1233 vect_body);
1234 /* N scalar loads plus gathering them into a vector. */
1235 inside_cost += record_stmt_cost (cost_vec,
1236 ncopies * assumed_nunits,
1237 scalar_load, stmt_info, 0, vect_body);
1239 else if (memory_access_type == VMAT_INVARIANT)
1241 /* Invariant loads will ideally be hoisted and splat to a vector. */
1242 prologue_cost += record_stmt_cost (cost_vec, 1,
1243 scalar_load, stmt_info, 0,
1244 vect_prologue);
1245 prologue_cost += record_stmt_cost (cost_vec, 1,
1246 scalar_to_vec, stmt_info, 0,
1247 vect_prologue);
1249 else
1250 vect_get_load_cost (vinfo, stmt_info, ncopies,
1251 alignment_support_scheme, misalignment, first_stmt_p,
1252 &inside_cost, &prologue_cost,
1253 cost_vec, cost_vec, true);
1254 if (memory_access_type == VMAT_ELEMENTWISE
1255 || memory_access_type == VMAT_STRIDED_SLP
1256 || (memory_access_type == VMAT_GATHER_SCATTER
1257 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1258 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1259 stmt_info, 0, vect_body);
1261 if (dump_enabled_p ())
1262 dump_printf_loc (MSG_NOTE, vect_location,
1263 "vect_model_load_cost: inside_cost = %d, "
1264 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1268 /* Calculate cost of DR's memory access. */
1269 void
1270 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1271 dr_alignment_support alignment_support_scheme,
1272 int misalignment,
1273 bool add_realign_cost, unsigned int *inside_cost,
1274 unsigned int *prologue_cost,
1275 stmt_vector_for_cost *prologue_cost_vec,
1276 stmt_vector_for_cost *body_cost_vec,
1277 bool record_prologue_costs)
1279 switch (alignment_support_scheme)
1281 case dr_aligned:
1283 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1284 stmt_info, 0, vect_body);
1286 if (dump_enabled_p ())
1287 dump_printf_loc (MSG_NOTE, vect_location,
1288 "vect_model_load_cost: aligned.\n");
1290 break;
1292 case dr_unaligned_supported:
1294 /* Here, we assign an additional cost for the unaligned load. */
1295 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1296 unaligned_load, stmt_info,
1297 misalignment, vect_body);
1299 if (dump_enabled_p ())
1300 dump_printf_loc (MSG_NOTE, vect_location,
1301 "vect_model_load_cost: unaligned supported by "
1302 "hardware.\n");
1304 break;
1306 case dr_explicit_realign:
1308 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1309 vector_load, stmt_info, 0, vect_body);
1310 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1311 vec_perm, stmt_info, 0, vect_body);
1313 /* FIXME: If the misalignment remains fixed across the iterations of
1314 the containing loop, the following cost should be added to the
1315 prologue costs. */
1316 if (targetm.vectorize.builtin_mask_for_load)
1317 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1318 stmt_info, 0, vect_body);
1320 if (dump_enabled_p ())
1321 dump_printf_loc (MSG_NOTE, vect_location,
1322 "vect_model_load_cost: explicit realign\n");
1324 break;
1326 case dr_explicit_realign_optimized:
1328 if (dump_enabled_p ())
1329 dump_printf_loc (MSG_NOTE, vect_location,
1330 "vect_model_load_cost: unaligned software "
1331 "pipelined.\n");
1333 /* Unaligned software pipeline has a load of an address, an initial
1334 load, and possibly a mask operation to "prime" the loop. However,
1335 if this is an access in a group of loads, which provide grouped
1336 access, then the above cost should only be considered for one
1337 access in the group. Inside the loop, there is a load op
1338 and a realignment op. */
1340 if (add_realign_cost && record_prologue_costs)
1342 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1343 vector_stmt, stmt_info,
1344 0, vect_prologue);
1345 if (targetm.vectorize.builtin_mask_for_load)
1346 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1347 vector_stmt, stmt_info,
1348 0, vect_prologue);
1351 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1352 stmt_info, 0, vect_body);
1353 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1354 stmt_info, 0, vect_body);
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_NOTE, vect_location,
1358 "vect_model_load_cost: explicit realign optimized"
1359 "\n");
1361 break;
1364 case dr_unaligned_unsupported:
1366 *inside_cost = VECT_MAX_COST;
1368 if (dump_enabled_p ())
1369 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1370 "vect_model_load_cost: unsupported access.\n");
1371 break;
1374 default:
1375 gcc_unreachable ();
1379 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1380 the loop preheader for the vectorized stmt STMT_VINFO. */
1382 static void
1383 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1384 gimple_stmt_iterator *gsi)
1386 if (gsi)
1387 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1388 else
1389 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1391 if (dump_enabled_p ())
1392 dump_printf_loc (MSG_NOTE, vect_location,
1393 "created new init_stmt: %G", new_stmt);
1396 /* Function vect_init_vector.
1398 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1399 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1400 vector type a vector with all elements equal to VAL is created first.
1401 Place the initialization at GSI if it is not NULL. Otherwise, place the
1402 initialization at the loop preheader.
1403 Return the DEF of INIT_STMT.
1404 It will be used in the vectorization of STMT_INFO. */
1406 tree
1407 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1408 gimple_stmt_iterator *gsi)
1410 gimple *init_stmt;
1411 tree new_temp;
1413 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1414 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1416 gcc_assert (VECTOR_TYPE_P (type));
1417 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1419 /* Scalar boolean value should be transformed into
1420 all zeros or all ones value before building a vector. */
1421 if (VECTOR_BOOLEAN_TYPE_P (type))
1423 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1424 tree false_val = build_zero_cst (TREE_TYPE (type));
1426 if (CONSTANT_CLASS_P (val))
1427 val = integer_zerop (val) ? false_val : true_val;
1428 else
1430 new_temp = make_ssa_name (TREE_TYPE (type));
1431 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1432 val, true_val, false_val);
1433 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1434 val = new_temp;
1437 else
1439 gimple_seq stmts = NULL;
1440 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1441 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1442 TREE_TYPE (type), val);
1443 else
1444 /* ??? Condition vectorization expects us to do
1445 promotion of invariant/external defs. */
1446 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1447 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1448 !gsi_end_p (gsi2); )
1450 init_stmt = gsi_stmt (gsi2);
1451 gsi_remove (&gsi2, false);
1452 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1456 val = build_vector_from_val (type, val);
1459 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1460 init_stmt = gimple_build_assign (new_temp, val);
1461 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1462 return new_temp;
1466 /* Function vect_get_vec_defs_for_operand.
1468 OP is an operand in STMT_VINFO. This function returns a vector of
1469 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1471 In the case that OP is an SSA_NAME which is defined in the loop, then
1472 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1474 In case OP is an invariant or constant, a new stmt that creates a vector def
1475 needs to be introduced. VECTYPE may be used to specify a required type for
1476 vector invariant. */
1478 void
1479 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1480 unsigned ncopies,
1481 tree op, vec<tree> *vec_oprnds, tree vectype)
1483 gimple *def_stmt;
1484 enum vect_def_type dt;
1485 bool is_simple_use;
1486 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1488 if (dump_enabled_p ())
1489 dump_printf_loc (MSG_NOTE, vect_location,
1490 "vect_get_vec_defs_for_operand: %T\n", op);
1492 stmt_vec_info def_stmt_info;
1493 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1494 &def_stmt_info, &def_stmt);
1495 gcc_assert (is_simple_use);
1496 if (def_stmt && dump_enabled_p ())
1497 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1499 vec_oprnds->create (ncopies);
1500 if (dt == vect_constant_def || dt == vect_external_def)
1502 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1503 tree vector_type;
1505 if (vectype)
1506 vector_type = vectype;
1507 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1508 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1509 vector_type = truth_type_for (stmt_vectype);
1510 else
1511 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1513 gcc_assert (vector_type);
1514 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1515 while (ncopies--)
1516 vec_oprnds->quick_push (vop);
1518 else
1520 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1521 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1522 for (unsigned i = 0; i < ncopies; ++i)
1523 vec_oprnds->quick_push (gimple_get_lhs
1524 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1529 /* Get vectorized definitions for OP0 and OP1. */
1531 void
1532 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1533 unsigned ncopies,
1534 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1535 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1536 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1537 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1539 if (slp_node)
1541 if (op0)
1542 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1543 if (op1)
1544 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1545 if (op2)
1546 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1547 if (op3)
1548 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1550 else
1552 if (op0)
1553 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1554 op0, vec_oprnds0, vectype0);
1555 if (op1)
1556 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1557 op1, vec_oprnds1, vectype1);
1558 if (op2)
1559 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1560 op2, vec_oprnds2, vectype2);
1561 if (op3)
1562 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1563 op3, vec_oprnds3, vectype3);
1567 void
1568 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1569 unsigned ncopies,
1570 tree op0, vec<tree> *vec_oprnds0,
1571 tree op1, vec<tree> *vec_oprnds1,
1572 tree op2, vec<tree> *vec_oprnds2,
1573 tree op3, vec<tree> *vec_oprnds3)
1575 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1576 op0, vec_oprnds0, NULL_TREE,
1577 op1, vec_oprnds1, NULL_TREE,
1578 op2, vec_oprnds2, NULL_TREE,
1579 op3, vec_oprnds3, NULL_TREE);
1582 /* Helper function called by vect_finish_replace_stmt and
1583 vect_finish_stmt_generation. Set the location of the new
1584 statement and create and return a stmt_vec_info for it. */
1586 static void
1587 vect_finish_stmt_generation_1 (vec_info *,
1588 stmt_vec_info stmt_info, gimple *vec_stmt)
1590 if (dump_enabled_p ())
1591 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1593 if (stmt_info)
1595 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1597 /* While EH edges will generally prevent vectorization, stmt might
1598 e.g. be in a must-not-throw region. Ensure newly created stmts
1599 that could throw are part of the same region. */
1600 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1601 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1602 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1604 else
1605 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1608 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1609 which sets the same scalar result as STMT_INFO did. Create and return a
1610 stmt_vec_info for VEC_STMT. */
1612 void
1613 vect_finish_replace_stmt (vec_info *vinfo,
1614 stmt_vec_info stmt_info, gimple *vec_stmt)
1616 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1617 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1619 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1620 gsi_replace (&gsi, vec_stmt, true);
1622 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1625 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1626 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1628 void
1629 vect_finish_stmt_generation (vec_info *vinfo,
1630 stmt_vec_info stmt_info, gimple *vec_stmt,
1631 gimple_stmt_iterator *gsi)
1633 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1635 if (!gsi_end_p (*gsi)
1636 && gimple_has_mem_ops (vec_stmt))
1638 gimple *at_stmt = gsi_stmt (*gsi);
1639 tree vuse = gimple_vuse (at_stmt);
1640 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1642 tree vdef = gimple_vdef (at_stmt);
1643 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1644 gimple_set_modified (vec_stmt, true);
1645 /* If we have an SSA vuse and insert a store, update virtual
1646 SSA form to avoid triggering the renamer. Do so only
1647 if we can easily see all uses - which is what almost always
1648 happens with the way vectorized stmts are inserted. */
1649 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1650 && ((is_gimple_assign (vec_stmt)
1651 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1652 || (is_gimple_call (vec_stmt)
1653 && (!(gimple_call_flags (vec_stmt)
1654 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1655 || (gimple_call_lhs (vec_stmt)
1656 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1658 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1659 gimple_set_vdef (vec_stmt, new_vdef);
1660 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1664 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1665 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1668 /* We want to vectorize a call to combined function CFN with function
1669 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1670 as the types of all inputs. Check whether this is possible using
1671 an internal function, returning its code if so or IFN_LAST if not. */
1673 static internal_fn
1674 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1675 tree vectype_out, tree vectype_in)
1677 internal_fn ifn;
1678 if (internal_fn_p (cfn))
1679 ifn = as_internal_fn (cfn);
1680 else
1681 ifn = associated_internal_fn (fndecl);
1682 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1684 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1685 if (info.vectorizable)
1687 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1688 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1689 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1690 OPTIMIZE_FOR_SPEED))
1691 return ifn;
1694 return IFN_LAST;
1698 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1699 gimple_stmt_iterator *);
1701 /* Check whether a load or store statement in the loop described by
1702 LOOP_VINFO is possible in a loop using partial vectors. This is
1703 testing whether the vectorizer pass has the appropriate support,
1704 as well as whether the target does.
1706 VLS_TYPE says whether the statement is a load or store and VECTYPE
1707 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1708 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1709 says how the load or store is going to be implemented and GROUP_SIZE
1710 is the number of load or store statements in the containing group.
1711 If the access is a gather load or scatter store, GS_INFO describes
1712 its arguments. If the load or store is conditional, SCALAR_MASK is the
1713 condition under which it occurs.
1715 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1716 vectors is not supported, otherwise record the required rgroup control
1717 types. */
1719 static void
1720 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1721 slp_tree slp_node,
1722 vec_load_store_type vls_type,
1723 int group_size,
1724 vect_memory_access_type
1725 memory_access_type,
1726 gather_scatter_info *gs_info,
1727 tree scalar_mask)
1729 /* Invariant loads need no special support. */
1730 if (memory_access_type == VMAT_INVARIANT)
1731 return;
1733 unsigned int nvectors;
1734 if (slp_node)
1735 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1736 else
1737 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1739 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1740 machine_mode vecmode = TYPE_MODE (vectype);
1741 bool is_load = (vls_type == VLS_LOAD);
1742 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1744 if (is_load
1745 ? !vect_load_lanes_supported (vectype, group_size, true)
1746 : !vect_store_lanes_supported (vectype, group_size, true))
1748 if (dump_enabled_p ())
1749 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1750 "can't operate on partial vectors because"
1751 " the target doesn't have an appropriate"
1752 " load/store-lanes instruction.\n");
1753 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1754 return;
1756 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1757 scalar_mask);
1758 return;
1761 if (memory_access_type == VMAT_GATHER_SCATTER)
1763 internal_fn ifn = (is_load
1764 ? IFN_MASK_GATHER_LOAD
1765 : IFN_MASK_SCATTER_STORE);
1766 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1767 gs_info->memory_type,
1768 gs_info->offset_vectype,
1769 gs_info->scale))
1771 if (dump_enabled_p ())
1772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1773 "can't operate on partial vectors because"
1774 " the target doesn't have an appropriate"
1775 " gather load or scatter store instruction.\n");
1776 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1777 return;
1779 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1780 scalar_mask);
1781 return;
1784 if (memory_access_type != VMAT_CONTIGUOUS
1785 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1787 /* Element X of the data must come from iteration i * VF + X of the
1788 scalar loop. We need more work to support other mappings. */
1789 if (dump_enabled_p ())
1790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1791 "can't operate on partial vectors because an"
1792 " access isn't contiguous.\n");
1793 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1794 return;
1797 if (!VECTOR_MODE_P (vecmode))
1799 if (dump_enabled_p ())
1800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1801 "can't operate on partial vectors when emulating"
1802 " vector operations.\n");
1803 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1804 return;
1807 /* We might load more scalars than we need for permuting SLP loads.
1808 We checked in get_group_load_store_type that the extra elements
1809 don't leak into a new vector. */
1810 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1812 unsigned int nvectors;
1813 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1814 return nvectors;
1815 gcc_unreachable ();
1818 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1819 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1820 machine_mode mask_mode;
1821 bool using_partial_vectors_p = false;
1822 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1823 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1825 nvectors = group_memory_nvectors (group_size * vf, nunits);
1826 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1827 using_partial_vectors_p = true;
1830 machine_mode vmode;
1831 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1833 nvectors = group_memory_nvectors (group_size * vf, nunits);
1834 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1835 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1836 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1837 using_partial_vectors_p = true;
1840 if (!using_partial_vectors_p)
1842 if (dump_enabled_p ())
1843 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1844 "can't operate on partial vectors because the"
1845 " target doesn't have the appropriate partial"
1846 " vectorization load or store.\n");
1847 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1851 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1852 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1853 that needs to be applied to all loads and stores in a vectorized loop.
1854 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1855 otherwise return VEC_MASK & LOOP_MASK.
1857 MASK_TYPE is the type of both masks. If new statements are needed,
1858 insert them before GSI. */
1860 static tree
1861 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1862 tree vec_mask, gimple_stmt_iterator *gsi)
1864 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1865 if (!loop_mask)
1866 return vec_mask;
1868 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1870 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1871 return vec_mask;
1873 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1874 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1875 vec_mask, loop_mask);
1877 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1878 return and_res;
1881 /* Determine whether we can use a gather load or scatter store to vectorize
1882 strided load or store STMT_INFO by truncating the current offset to a
1883 smaller width. We need to be able to construct an offset vector:
1885 { 0, X, X*2, X*3, ... }
1887 without loss of precision, where X is STMT_INFO's DR_STEP.
1889 Return true if this is possible, describing the gather load or scatter
1890 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1892 static bool
1893 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1894 loop_vec_info loop_vinfo, bool masked_p,
1895 gather_scatter_info *gs_info)
1897 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1898 data_reference *dr = dr_info->dr;
1899 tree step = DR_STEP (dr);
1900 if (TREE_CODE (step) != INTEGER_CST)
1902 /* ??? Perhaps we could use range information here? */
1903 if (dump_enabled_p ())
1904 dump_printf_loc (MSG_NOTE, vect_location,
1905 "cannot truncate variable step.\n");
1906 return false;
1909 /* Get the number of bits in an element. */
1910 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1911 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1912 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1914 /* Set COUNT to the upper limit on the number of elements - 1.
1915 Start with the maximum vectorization factor. */
1916 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1918 /* Try lowering COUNT to the number of scalar latch iterations. */
1919 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1920 widest_int max_iters;
1921 if (max_loop_iterations (loop, &max_iters)
1922 && max_iters < count)
1923 count = max_iters.to_shwi ();
1925 /* Try scales of 1 and the element size. */
1926 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1927 wi::overflow_type overflow = wi::OVF_NONE;
1928 for (int i = 0; i < 2; ++i)
1930 int scale = scales[i];
1931 widest_int factor;
1932 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1933 continue;
1935 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1936 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1937 if (overflow)
1938 continue;
1939 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1940 unsigned int min_offset_bits = wi::min_precision (range, sign);
1942 /* Find the narrowest viable offset type. */
1943 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1944 tree offset_type = build_nonstandard_integer_type (offset_bits,
1945 sign == UNSIGNED);
1947 /* See whether the target supports the operation with an offset
1948 no narrower than OFFSET_TYPE. */
1949 tree memory_type = TREE_TYPE (DR_REF (dr));
1950 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1951 vectype, memory_type, offset_type, scale,
1952 &gs_info->ifn, &gs_info->offset_vectype)
1953 || gs_info->ifn == IFN_LAST)
1954 continue;
1956 gs_info->decl = NULL_TREE;
1957 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1958 but we don't need to store that here. */
1959 gs_info->base = NULL_TREE;
1960 gs_info->element_type = TREE_TYPE (vectype);
1961 gs_info->offset = fold_convert (offset_type, step);
1962 gs_info->offset_dt = vect_constant_def;
1963 gs_info->scale = scale;
1964 gs_info->memory_type = memory_type;
1965 return true;
1968 if (overflow && dump_enabled_p ())
1969 dump_printf_loc (MSG_NOTE, vect_location,
1970 "truncating gather/scatter offset to %d bits"
1971 " might change its value.\n", element_bits);
1973 return false;
1976 /* Return true if we can use gather/scatter internal functions to
1977 vectorize STMT_INFO, which is a grouped or strided load or store.
1978 MASKED_P is true if load or store is conditional. When returning
1979 true, fill in GS_INFO with the information required to perform the
1980 operation. */
1982 static bool
1983 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1984 loop_vec_info loop_vinfo, bool masked_p,
1985 gather_scatter_info *gs_info)
1987 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1988 || gs_info->ifn == IFN_LAST)
1989 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1990 masked_p, gs_info);
1992 tree old_offset_type = TREE_TYPE (gs_info->offset);
1993 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1995 gcc_assert (TYPE_PRECISION (new_offset_type)
1996 >= TYPE_PRECISION (old_offset_type));
1997 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1999 if (dump_enabled_p ())
2000 dump_printf_loc (MSG_NOTE, vect_location,
2001 "using gather/scatter for strided/grouped access,"
2002 " scale = %d\n", gs_info->scale);
2004 return true;
2007 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2008 elements with a known constant step. Return -1 if that step
2009 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2011 static int
2012 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
2014 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2015 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
2016 size_zero_node);
2019 /* If the target supports a permute mask that reverses the elements in
2020 a vector of type VECTYPE, return that mask, otherwise return null. */
2022 static tree
2023 perm_mask_for_reverse (tree vectype)
2025 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2027 /* The encoding has a single stepped pattern. */
2028 vec_perm_builder sel (nunits, 1, 3);
2029 for (int i = 0; i < 3; ++i)
2030 sel.quick_push (nunits - 1 - i);
2032 vec_perm_indices indices (sel, 1, nunits);
2033 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
2034 indices))
2035 return NULL_TREE;
2036 return vect_gen_perm_mask_checked (vectype, indices);
2039 /* A subroutine of get_load_store_type, with a subset of the same
2040 arguments. Handle the case where STMT_INFO is a load or store that
2041 accesses consecutive elements with a negative step. Sets *POFFSET
2042 to the offset to be applied to the DR for the first access. */
2044 static vect_memory_access_type
2045 get_negative_load_store_type (vec_info *vinfo,
2046 stmt_vec_info stmt_info, tree vectype,
2047 vec_load_store_type vls_type,
2048 unsigned int ncopies, poly_int64 *poffset)
2050 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2051 dr_alignment_support alignment_support_scheme;
2053 if (ncopies > 1)
2055 if (dump_enabled_p ())
2056 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2057 "multiple types with negative step.\n");
2058 return VMAT_ELEMENTWISE;
2061 /* For backward running DRs the first access in vectype actually is
2062 N-1 elements before the address of the DR. */
2063 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2064 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2066 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
2067 alignment_support_scheme
2068 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2069 if (alignment_support_scheme != dr_aligned
2070 && alignment_support_scheme != dr_unaligned_supported)
2072 if (dump_enabled_p ())
2073 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2074 "negative step but alignment required.\n");
2075 *poffset = 0;
2076 return VMAT_ELEMENTWISE;
2079 if (vls_type == VLS_STORE_INVARIANT)
2081 if (dump_enabled_p ())
2082 dump_printf_loc (MSG_NOTE, vect_location,
2083 "negative step with invariant source;"
2084 " no permute needed.\n");
2085 return VMAT_CONTIGUOUS_DOWN;
2088 if (!perm_mask_for_reverse (vectype))
2090 if (dump_enabled_p ())
2091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2092 "negative step and reversing not supported.\n");
2093 *poffset = 0;
2094 return VMAT_ELEMENTWISE;
2097 return VMAT_CONTIGUOUS_REVERSE;
2100 /* STMT_INFO is either a masked or unconditional store. Return the value
2101 being stored. */
2103 tree
2104 vect_get_store_rhs (stmt_vec_info stmt_info)
2106 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2108 gcc_assert (gimple_assign_single_p (assign));
2109 return gimple_assign_rhs1 (assign);
2111 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2113 internal_fn ifn = gimple_call_internal_fn (call);
2114 int index = internal_fn_stored_value_index (ifn);
2115 gcc_assert (index >= 0);
2116 return gimple_call_arg (call, index);
2118 gcc_unreachable ();
2121 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2123 This function returns a vector type which can be composed with NETLS pieces,
2124 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2125 same vector size as the return vector. It checks target whether supports
2126 pieces-size vector mode for construction firstly, if target fails to, check
2127 pieces-size scalar mode for construction further. It returns NULL_TREE if
2128 fails to find the available composition.
2130 For example, for (vtype=V16QI, nelts=4), we can probably get:
2131 - V16QI with PTYPE V4QI.
2132 - V4SI with PTYPE SI.
2133 - NULL_TREE. */
2135 static tree
2136 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2138 gcc_assert (VECTOR_TYPE_P (vtype));
2139 gcc_assert (known_gt (nelts, 0U));
2141 machine_mode vmode = TYPE_MODE (vtype);
2142 if (!VECTOR_MODE_P (vmode))
2143 return NULL_TREE;
2145 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2146 unsigned int pbsize;
2147 if (constant_multiple_p (vbsize, nelts, &pbsize))
2149 /* First check if vec_init optab supports construction from
2150 vector pieces directly. */
2151 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2152 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2153 machine_mode rmode;
2154 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2155 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2156 != CODE_FOR_nothing))
2158 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2159 return vtype;
2162 /* Otherwise check if exists an integer type of the same piece size and
2163 if vec_init optab supports construction from it directly. */
2164 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2165 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2166 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2167 != CODE_FOR_nothing))
2169 *ptype = build_nonstandard_integer_type (pbsize, 1);
2170 return build_vector_type (*ptype, nelts);
2174 return NULL_TREE;
2177 /* A subroutine of get_load_store_type, with a subset of the same
2178 arguments. Handle the case where STMT_INFO is part of a grouped load
2179 or store.
2181 For stores, the statements in the group are all consecutive
2182 and there is no gap at the end. For loads, the statements in the
2183 group might not be consecutive; there can be gaps between statements
2184 as well as at the end. */
2186 static bool
2187 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2188 tree vectype, slp_tree slp_node,
2189 bool masked_p, vec_load_store_type vls_type,
2190 vect_memory_access_type *memory_access_type,
2191 poly_int64 *poffset,
2192 dr_alignment_support *alignment_support_scheme,
2193 int *misalignment,
2194 gather_scatter_info *gs_info)
2196 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2197 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2198 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2199 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2200 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2201 bool single_element_p = (stmt_info == first_stmt_info
2202 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2203 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2204 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2206 /* True if the vectorized statements would access beyond the last
2207 statement in the group. */
2208 bool overrun_p = false;
2210 /* True if we can cope with such overrun by peeling for gaps, so that
2211 there is at least one final scalar iteration after the vector loop. */
2212 bool can_overrun_p = (!masked_p
2213 && vls_type == VLS_LOAD
2214 && loop_vinfo
2215 && !loop->inner);
2217 /* There can only be a gap at the end of the group if the stride is
2218 known at compile time. */
2219 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2221 /* Stores can't yet have gaps. */
2222 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2224 if (slp_node)
2226 /* For SLP vectorization we directly vectorize a subchain
2227 without permutation. */
2228 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2229 first_dr_info
2230 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2231 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2233 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2234 separated by the stride, until we have a complete vector.
2235 Fall back to scalar accesses if that isn't possible. */
2236 if (multiple_p (nunits, group_size))
2237 *memory_access_type = VMAT_STRIDED_SLP;
2238 else
2239 *memory_access_type = VMAT_ELEMENTWISE;
2241 else
2243 overrun_p = loop_vinfo && gap != 0;
2244 if (overrun_p && vls_type != VLS_LOAD)
2246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2247 "Grouped store with gaps requires"
2248 " non-consecutive accesses\n");
2249 return false;
2251 /* An overrun is fine if the trailing elements are smaller
2252 than the alignment boundary B. Every vector access will
2253 be a multiple of B and so we are guaranteed to access a
2254 non-gap element in the same B-sized block. */
2255 if (overrun_p
2256 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2257 vectype)
2258 / vect_get_scalar_dr_size (first_dr_info)))
2259 overrun_p = false;
2261 /* If the gap splits the vector in half and the target
2262 can do half-vector operations avoid the epilogue peeling
2263 by simply loading half of the vector only. Usually
2264 the construction with an upper zero half will be elided. */
2265 dr_alignment_support alss;
2266 int misalign = dr_misalignment (first_dr_info, vectype);
2267 tree half_vtype;
2268 if (overrun_p
2269 && !masked_p
2270 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2271 vectype, misalign)))
2272 == dr_aligned
2273 || alss == dr_unaligned_supported)
2274 && known_eq (nunits, (group_size - gap) * 2)
2275 && known_eq (nunits, group_size)
2276 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2277 != NULL_TREE))
2278 overrun_p = false;
2280 if (overrun_p && !can_overrun_p)
2282 if (dump_enabled_p ())
2283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2284 "Peeling for outer loop is not supported\n");
2285 return false;
2287 int cmp = compare_step_with_zero (vinfo, stmt_info);
2288 if (cmp < 0)
2290 if (single_element_p)
2291 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2292 only correct for single element "interleaving" SLP. */
2293 *memory_access_type = get_negative_load_store_type
2294 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2295 else
2297 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2298 separated by the stride, until we have a complete vector.
2299 Fall back to scalar accesses if that isn't possible. */
2300 if (multiple_p (nunits, group_size))
2301 *memory_access_type = VMAT_STRIDED_SLP;
2302 else
2303 *memory_access_type = VMAT_ELEMENTWISE;
2306 else
2308 gcc_assert (!loop_vinfo || cmp > 0);
2309 *memory_access_type = VMAT_CONTIGUOUS;
2312 /* When we have a contiguous access across loop iterations
2313 but the access in the loop doesn't cover the full vector
2314 we can end up with no gap recorded but still excess
2315 elements accessed, see PR103116. Make sure we peel for
2316 gaps if necessary and sufficient and give up if not. */
2317 if (loop_vinfo
2318 && *memory_access_type == VMAT_CONTIGUOUS
2319 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2320 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2321 nunits))
2323 unsigned HOST_WIDE_INT cnunits, cvf;
2324 if (!can_overrun_p
2325 || !nunits.is_constant (&cnunits)
2326 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2327 /* Peeling for gaps assumes that a single scalar iteration
2328 is enough to make sure the last vector iteration doesn't
2329 access excess elements.
2330 ??? Enhancements include peeling multiple iterations
2331 or using masked loads with a static mask. */
2332 || (group_size * cvf) % cnunits + group_size < cnunits)
2334 if (dump_enabled_p ())
2335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2336 "peeling for gaps insufficient for "
2337 "access\n");
2338 return false;
2340 overrun_p = true;
2344 else
2346 /* We can always handle this case using elementwise accesses,
2347 but see if something more efficient is available. */
2348 *memory_access_type = VMAT_ELEMENTWISE;
2350 /* If there is a gap at the end of the group then these optimizations
2351 would access excess elements in the last iteration. */
2352 bool would_overrun_p = (gap != 0);
2353 /* An overrun is fine if the trailing elements are smaller than the
2354 alignment boundary B. Every vector access will be a multiple of B
2355 and so we are guaranteed to access a non-gap element in the
2356 same B-sized block. */
2357 if (would_overrun_p
2358 && !masked_p
2359 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2360 / vect_get_scalar_dr_size (first_dr_info)))
2361 would_overrun_p = false;
2363 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2364 && (can_overrun_p || !would_overrun_p)
2365 && compare_step_with_zero (vinfo, stmt_info) > 0)
2367 /* First cope with the degenerate case of a single-element
2368 vector. */
2369 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2372 /* Otherwise try using LOAD/STORE_LANES. */
2373 else if (vls_type == VLS_LOAD
2374 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2375 : vect_store_lanes_supported (vectype, group_size,
2376 masked_p))
2378 *memory_access_type = VMAT_LOAD_STORE_LANES;
2379 overrun_p = would_overrun_p;
2382 /* If that fails, try using permuting loads. */
2383 else if (vls_type == VLS_LOAD
2384 ? vect_grouped_load_supported (vectype, single_element_p,
2385 group_size)
2386 : vect_grouped_store_supported (vectype, group_size))
2388 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2389 overrun_p = would_overrun_p;
2393 /* As a last resort, trying using a gather load or scatter store.
2395 ??? Although the code can handle all group sizes correctly,
2396 it probably isn't a win to use separate strided accesses based
2397 on nearby locations. Or, even if it's a win over scalar code,
2398 it might not be a win over vectorizing at a lower VF, if that
2399 allows us to use contiguous accesses. */
2400 if (*memory_access_type == VMAT_ELEMENTWISE
2401 && single_element_p
2402 && loop_vinfo
2403 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2404 masked_p, gs_info))
2405 *memory_access_type = VMAT_GATHER_SCATTER;
2408 if (*memory_access_type == VMAT_GATHER_SCATTER
2409 || *memory_access_type == VMAT_ELEMENTWISE)
2411 *alignment_support_scheme = dr_unaligned_supported;
2412 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2414 else
2416 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2417 *alignment_support_scheme
2418 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2419 *misalignment);
2422 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2424 /* STMT is the leader of the group. Check the operands of all the
2425 stmts of the group. */
2426 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2427 while (next_stmt_info)
2429 tree op = vect_get_store_rhs (next_stmt_info);
2430 enum vect_def_type dt;
2431 if (!vect_is_simple_use (op, vinfo, &dt))
2433 if (dump_enabled_p ())
2434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2435 "use not simple.\n");
2436 return false;
2438 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2442 if (overrun_p)
2444 gcc_assert (can_overrun_p);
2445 if (dump_enabled_p ())
2446 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2447 "Data access with gaps requires scalar "
2448 "epilogue loop\n");
2449 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2452 return true;
2455 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2456 if there is a memory access type that the vectorized form can use,
2457 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2458 or scatters, fill in GS_INFO accordingly. In addition
2459 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2460 the target does not support the alignment scheme. *MISALIGNMENT
2461 is set according to the alignment of the access (including
2462 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2464 SLP says whether we're performing SLP rather than loop vectorization.
2465 MASKED_P is true if the statement is conditional on a vectorized mask.
2466 VECTYPE is the vector type that the vectorized statements will use.
2467 NCOPIES is the number of vector statements that will be needed. */
2469 static bool
2470 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2471 tree vectype, slp_tree slp_node,
2472 bool masked_p, vec_load_store_type vls_type,
2473 unsigned int ncopies,
2474 vect_memory_access_type *memory_access_type,
2475 poly_int64 *poffset,
2476 dr_alignment_support *alignment_support_scheme,
2477 int *misalignment,
2478 gather_scatter_info *gs_info)
2480 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2481 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2482 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2483 *poffset = 0;
2484 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2486 *memory_access_type = VMAT_GATHER_SCATTER;
2487 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2488 gcc_unreachable ();
2489 /* When using internal functions, we rely on pattern recognition
2490 to convert the type of the offset to the type that the target
2491 requires, with the result being a call to an internal function.
2492 If that failed for some reason (e.g. because another pattern
2493 took priority), just handle cases in which the offset already
2494 has the right type. */
2495 else if (gs_info->ifn != IFN_LAST
2496 && !is_gimple_call (stmt_info->stmt)
2497 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2498 TREE_TYPE (gs_info->offset_vectype)))
2500 if (dump_enabled_p ())
2501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2502 "%s offset requires a conversion\n",
2503 vls_type == VLS_LOAD ? "gather" : "scatter");
2504 return false;
2506 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2507 &gs_info->offset_dt,
2508 &gs_info->offset_vectype))
2510 if (dump_enabled_p ())
2511 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2512 "%s index use not simple.\n",
2513 vls_type == VLS_LOAD ? "gather" : "scatter");
2514 return false;
2516 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2518 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2519 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2520 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2521 (gs_info->offset_vectype),
2522 TYPE_VECTOR_SUBPARTS (vectype)))
2524 if (dump_enabled_p ())
2525 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2526 "unsupported vector types for emulated "
2527 "gather.\n");
2528 return false;
2531 /* Gather-scatter accesses perform only component accesses, alignment
2532 is irrelevant for them. */
2533 *alignment_support_scheme = dr_unaligned_supported;
2535 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2537 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2538 masked_p,
2539 vls_type, memory_access_type, poffset,
2540 alignment_support_scheme,
2541 misalignment, gs_info))
2542 return false;
2544 else if (STMT_VINFO_STRIDED_P (stmt_info))
2546 gcc_assert (!slp_node);
2547 if (loop_vinfo
2548 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2549 masked_p, gs_info))
2550 *memory_access_type = VMAT_GATHER_SCATTER;
2551 else
2552 *memory_access_type = VMAT_ELEMENTWISE;
2553 /* Alignment is irrelevant here. */
2554 *alignment_support_scheme = dr_unaligned_supported;
2556 else
2558 int cmp = compare_step_with_zero (vinfo, stmt_info);
2559 if (cmp == 0)
2561 gcc_assert (vls_type == VLS_LOAD);
2562 *memory_access_type = VMAT_INVARIANT;
2563 /* Invariant accesses perform only component accesses, alignment
2564 is irrelevant for them. */
2565 *alignment_support_scheme = dr_unaligned_supported;
2567 else
2569 if (cmp < 0)
2570 *memory_access_type = get_negative_load_store_type
2571 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2572 else
2573 *memory_access_type = VMAT_CONTIGUOUS;
2574 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2575 vectype, *poffset);
2576 *alignment_support_scheme
2577 = vect_supportable_dr_alignment (vinfo,
2578 STMT_VINFO_DR_INFO (stmt_info),
2579 vectype, *misalignment);
2583 if ((*memory_access_type == VMAT_ELEMENTWISE
2584 || *memory_access_type == VMAT_STRIDED_SLP)
2585 && !nunits.is_constant ())
2587 if (dump_enabled_p ())
2588 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2589 "Not using elementwise accesses due to variable "
2590 "vectorization factor.\n");
2591 return false;
2594 if (*alignment_support_scheme == dr_unaligned_unsupported)
2596 if (dump_enabled_p ())
2597 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2598 "unsupported unaligned access\n");
2599 return false;
2602 /* FIXME: At the moment the cost model seems to underestimate the
2603 cost of using elementwise accesses. This check preserves the
2604 traditional behavior until that can be fixed. */
2605 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2606 if (!first_stmt_info)
2607 first_stmt_info = stmt_info;
2608 if (*memory_access_type == VMAT_ELEMENTWISE
2609 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2610 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2611 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2612 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2614 if (dump_enabled_p ())
2615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2616 "not falling back to elementwise accesses\n");
2617 return false;
2619 return true;
2622 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2623 conditional operation STMT_INFO. When returning true, store the mask
2624 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2625 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2626 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2628 static bool
2629 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2630 slp_tree slp_node, unsigned mask_index,
2631 tree *mask, slp_tree *mask_node,
2632 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2634 enum vect_def_type mask_dt;
2635 tree mask_vectype;
2636 slp_tree mask_node_1;
2637 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2638 mask, &mask_node_1, &mask_dt, &mask_vectype))
2640 if (dump_enabled_p ())
2641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2642 "mask use not simple.\n");
2643 return false;
2646 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2648 if (dump_enabled_p ())
2649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2650 "mask argument is not a boolean.\n");
2651 return false;
2654 /* If the caller is not prepared for adjusting an external/constant
2655 SLP mask vector type fail. */
2656 if (slp_node
2657 && !mask_node
2658 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2660 if (dump_enabled_p ())
2661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2662 "SLP mask argument is not vectorized.\n");
2663 return false;
2666 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2667 if (!mask_vectype)
2668 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2670 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2672 if (dump_enabled_p ())
2673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2674 "could not find an appropriate vector mask type.\n");
2675 return false;
2678 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2679 TYPE_VECTOR_SUBPARTS (vectype)))
2681 if (dump_enabled_p ())
2682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2683 "vector mask type %T"
2684 " does not match vector data type %T.\n",
2685 mask_vectype, vectype);
2687 return false;
2690 *mask_dt_out = mask_dt;
2691 *mask_vectype_out = mask_vectype;
2692 if (mask_node)
2693 *mask_node = mask_node_1;
2694 return true;
2697 /* Return true if stored value RHS is suitable for vectorizing store
2698 statement STMT_INFO. When returning true, store the type of the
2699 definition in *RHS_DT_OUT, the type of the vectorized store value in
2700 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2702 static bool
2703 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2704 slp_tree slp_node, tree rhs,
2705 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2706 vec_load_store_type *vls_type_out)
2708 /* In the case this is a store from a constant make sure
2709 native_encode_expr can handle it. */
2710 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2712 if (dump_enabled_p ())
2713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2714 "cannot encode constant as a byte sequence.\n");
2715 return false;
2718 unsigned op_no = 0;
2719 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2721 if (gimple_call_internal_p (call)
2722 && internal_store_fn_p (gimple_call_internal_fn (call)))
2723 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2726 enum vect_def_type rhs_dt;
2727 tree rhs_vectype;
2728 slp_tree slp_op;
2729 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2730 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2732 if (dump_enabled_p ())
2733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2734 "use not simple.\n");
2735 return false;
2738 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2739 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2741 if (dump_enabled_p ())
2742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2743 "incompatible vector types.\n");
2744 return false;
2747 *rhs_dt_out = rhs_dt;
2748 *rhs_vectype_out = rhs_vectype;
2749 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2750 *vls_type_out = VLS_STORE_INVARIANT;
2751 else
2752 *vls_type_out = VLS_STORE;
2753 return true;
2756 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2757 Note that we support masks with floating-point type, in which case the
2758 floats are interpreted as a bitmask. */
2760 static tree
2761 vect_build_all_ones_mask (vec_info *vinfo,
2762 stmt_vec_info stmt_info, tree masktype)
2764 if (TREE_CODE (masktype) == INTEGER_TYPE)
2765 return build_int_cst (masktype, -1);
2766 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2768 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2769 mask = build_vector_from_val (masktype, mask);
2770 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2772 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2774 REAL_VALUE_TYPE r;
2775 long tmp[6];
2776 for (int j = 0; j < 6; ++j)
2777 tmp[j] = -1;
2778 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2779 tree mask = build_real (TREE_TYPE (masktype), r);
2780 mask = build_vector_from_val (masktype, mask);
2781 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2783 gcc_unreachable ();
2786 /* Build an all-zero merge value of type VECTYPE while vectorizing
2787 STMT_INFO as a gather load. */
2789 static tree
2790 vect_build_zero_merge_argument (vec_info *vinfo,
2791 stmt_vec_info stmt_info, tree vectype)
2793 tree merge;
2794 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2795 merge = build_int_cst (TREE_TYPE (vectype), 0);
2796 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2798 REAL_VALUE_TYPE r;
2799 long tmp[6];
2800 for (int j = 0; j < 6; ++j)
2801 tmp[j] = 0;
2802 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2803 merge = build_real (TREE_TYPE (vectype), r);
2805 else
2806 gcc_unreachable ();
2807 merge = build_vector_from_val (vectype, merge);
2808 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2811 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2812 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2813 the gather load operation. If the load is conditional, MASK is the
2814 unvectorized condition and MASK_DT is its definition type, otherwise
2815 MASK is null. */
2817 static void
2818 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2819 gimple_stmt_iterator *gsi,
2820 gimple **vec_stmt,
2821 gather_scatter_info *gs_info,
2822 tree mask)
2824 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2825 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2826 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2827 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2828 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2829 edge pe = loop_preheader_edge (loop);
2830 enum { NARROW, NONE, WIDEN } modifier;
2831 poly_uint64 gather_off_nunits
2832 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2834 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2835 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2836 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2837 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2838 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2839 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2840 tree scaletype = TREE_VALUE (arglist);
2841 tree real_masktype = masktype;
2842 gcc_checking_assert (types_compatible_p (srctype, rettype)
2843 && (!mask
2844 || TREE_CODE (masktype) == INTEGER_TYPE
2845 || types_compatible_p (srctype, masktype)));
2846 if (mask)
2847 masktype = truth_type_for (srctype);
2849 tree mask_halftype = masktype;
2850 tree perm_mask = NULL_TREE;
2851 tree mask_perm_mask = NULL_TREE;
2852 if (known_eq (nunits, gather_off_nunits))
2853 modifier = NONE;
2854 else if (known_eq (nunits * 2, gather_off_nunits))
2856 modifier = WIDEN;
2858 /* Currently widening gathers and scatters are only supported for
2859 fixed-length vectors. */
2860 int count = gather_off_nunits.to_constant ();
2861 vec_perm_builder sel (count, count, 1);
2862 for (int i = 0; i < count; ++i)
2863 sel.quick_push (i | (count / 2));
2865 vec_perm_indices indices (sel, 1, count);
2866 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2867 indices);
2869 else if (known_eq (nunits, gather_off_nunits * 2))
2871 modifier = NARROW;
2873 /* Currently narrowing gathers and scatters are only supported for
2874 fixed-length vectors. */
2875 int count = nunits.to_constant ();
2876 vec_perm_builder sel (count, count, 1);
2877 sel.quick_grow (count);
2878 for (int i = 0; i < count; ++i)
2879 sel[i] = i < count / 2 ? i : i + count / 2;
2880 vec_perm_indices indices (sel, 2, count);
2881 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2883 ncopies *= 2;
2885 if (mask && VECTOR_TYPE_P (real_masktype))
2887 for (int i = 0; i < count; ++i)
2888 sel[i] = i | (count / 2);
2889 indices.new_vector (sel, 2, count);
2890 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2892 else if (mask)
2893 mask_halftype = truth_type_for (gs_info->offset_vectype);
2895 else
2896 gcc_unreachable ();
2898 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2899 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2901 tree ptr = fold_convert (ptrtype, gs_info->base);
2902 if (!is_gimple_min_invariant (ptr))
2904 gimple_seq seq;
2905 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2906 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2907 gcc_assert (!new_bb);
2910 tree scale = build_int_cst (scaletype, gs_info->scale);
2912 tree vec_oprnd0 = NULL_TREE;
2913 tree vec_mask = NULL_TREE;
2914 tree src_op = NULL_TREE;
2915 tree mask_op = NULL_TREE;
2916 tree prev_res = NULL_TREE;
2918 if (!mask)
2920 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2921 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2924 auto_vec<tree> vec_oprnds0;
2925 auto_vec<tree> vec_masks;
2926 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2927 modifier == WIDEN ? ncopies / 2 : ncopies,
2928 gs_info->offset, &vec_oprnds0);
2929 if (mask)
2930 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2931 modifier == NARROW ? ncopies / 2 : ncopies,
2932 mask, &vec_masks, masktype);
2933 for (int j = 0; j < ncopies; ++j)
2935 tree op, var;
2936 if (modifier == WIDEN && (j & 1))
2937 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2938 perm_mask, stmt_info, gsi);
2939 else
2940 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2942 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2944 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2945 TYPE_VECTOR_SUBPARTS (idxtype)));
2946 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2947 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2948 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2949 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2950 op = var;
2953 if (mask)
2955 if (mask_perm_mask && (j & 1))
2956 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2957 mask_perm_mask, stmt_info, gsi);
2958 else
2960 if (modifier == NARROW)
2962 if ((j & 1) == 0)
2963 vec_mask = vec_masks[j / 2];
2965 else
2966 vec_mask = vec_masks[j];
2968 mask_op = vec_mask;
2969 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2971 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2972 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2973 gcc_assert (known_eq (sub1, sub2));
2974 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2975 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2976 gassign *new_stmt
2977 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2978 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2979 mask_op = var;
2982 if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2984 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2985 gassign *new_stmt
2986 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2987 : VEC_UNPACK_LO_EXPR,
2988 mask_op);
2989 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2990 mask_op = var;
2992 src_op = mask_op;
2995 tree mask_arg = mask_op;
2996 if (masktype != real_masktype)
2998 tree utype, optype = TREE_TYPE (mask_op);
2999 if (VECTOR_TYPE_P (real_masktype)
3000 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
3001 utype = real_masktype;
3002 else
3003 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
3004 var = vect_get_new_ssa_name (utype, vect_scalar_var);
3005 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
3006 gassign *new_stmt
3007 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
3008 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3009 mask_arg = var;
3010 if (!useless_type_conversion_p (real_masktype, utype))
3012 gcc_assert (TYPE_PRECISION (utype)
3013 <= TYPE_PRECISION (real_masktype));
3014 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
3015 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
3016 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3017 mask_arg = var;
3019 src_op = build_zero_cst (srctype);
3021 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
3022 mask_arg, scale);
3024 if (!useless_type_conversion_p (vectype, rettype))
3026 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
3027 TYPE_VECTOR_SUBPARTS (rettype)));
3028 op = vect_get_new_ssa_name (rettype, vect_simple_var);
3029 gimple_call_set_lhs (new_stmt, op);
3030 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3031 var = make_ssa_name (vec_dest);
3032 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
3033 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3034 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3036 else
3038 var = make_ssa_name (vec_dest, new_stmt);
3039 gimple_call_set_lhs (new_stmt, var);
3040 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3043 if (modifier == NARROW)
3045 if ((j & 1) == 0)
3047 prev_res = var;
3048 continue;
3050 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
3051 stmt_info, gsi);
3052 new_stmt = SSA_NAME_DEF_STMT (var);
3055 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3057 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3060 /* Prepare the base and offset in GS_INFO for vectorization.
3061 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3062 to the vectorized offset argument for the first copy of STMT_INFO.
3063 STMT_INFO is the statement described by GS_INFO and LOOP is the
3064 containing loop. */
3066 static void
3067 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
3068 class loop *loop, stmt_vec_info stmt_info,
3069 slp_tree slp_node, gather_scatter_info *gs_info,
3070 tree *dataref_ptr, vec<tree> *vec_offset)
3072 gimple_seq stmts = NULL;
3073 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3074 if (stmts != NULL)
3076 basic_block new_bb;
3077 edge pe = loop_preheader_edge (loop);
3078 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3079 gcc_assert (!new_bb);
3081 if (slp_node)
3082 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3083 else
3085 unsigned ncopies
3086 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
3087 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
3088 gs_info->offset, vec_offset,
3089 gs_info->offset_vectype);
3093 /* Prepare to implement a grouped or strided load or store using
3094 the gather load or scatter store operation described by GS_INFO.
3095 STMT_INFO is the load or store statement.
3097 Set *DATAREF_BUMP to the amount that should be added to the base
3098 address after each copy of the vectorized statement. Set *VEC_OFFSET
3099 to an invariant offset vector in which element I has the value
3100 I * DR_STEP / SCALE. */
3102 static void
3103 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3104 loop_vec_info loop_vinfo,
3105 gather_scatter_info *gs_info,
3106 tree *dataref_bump, tree *vec_offset)
3108 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3109 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3111 tree bump = size_binop (MULT_EXPR,
3112 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3113 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3114 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3116 /* The offset given in GS_INFO can have pointer type, so use the element
3117 type of the vector instead. */
3118 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3120 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3121 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3122 ssize_int (gs_info->scale));
3123 step = fold_convert (offset_type, step);
3125 /* Create {0, X, X*2, X*3, ...}. */
3126 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3127 build_zero_cst (offset_type), step);
3128 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3131 /* Return the amount that should be added to a vector pointer to move
3132 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3133 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3134 vectorization. */
3136 static tree
3137 vect_get_data_ptr_increment (vec_info *vinfo,
3138 dr_vec_info *dr_info, tree aggr_type,
3139 vect_memory_access_type memory_access_type)
3141 if (memory_access_type == VMAT_INVARIANT)
3142 return size_zero_node;
3144 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3145 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3146 if (tree_int_cst_sgn (step) == -1)
3147 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3148 return iv_step;
3151 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3153 static bool
3154 vectorizable_bswap (vec_info *vinfo,
3155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3156 gimple **vec_stmt, slp_tree slp_node,
3157 slp_tree *slp_op,
3158 tree vectype_in, stmt_vector_for_cost *cost_vec)
3160 tree op, vectype;
3161 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3162 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3163 unsigned ncopies;
3165 op = gimple_call_arg (stmt, 0);
3166 vectype = STMT_VINFO_VECTYPE (stmt_info);
3167 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3169 /* Multiple types in SLP are handled by creating the appropriate number of
3170 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3171 case of SLP. */
3172 if (slp_node)
3173 ncopies = 1;
3174 else
3175 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3177 gcc_assert (ncopies >= 1);
3179 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3180 if (! char_vectype)
3181 return false;
3183 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3184 unsigned word_bytes;
3185 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3186 return false;
3188 /* The encoding uses one stepped pattern for each byte in the word. */
3189 vec_perm_builder elts (num_bytes, word_bytes, 3);
3190 for (unsigned i = 0; i < 3; ++i)
3191 for (unsigned j = 0; j < word_bytes; ++j)
3192 elts.quick_push ((i + 1) * word_bytes - j - 1);
3194 vec_perm_indices indices (elts, 1, num_bytes);
3195 machine_mode vmode = TYPE_MODE (char_vectype);
3196 if (!can_vec_perm_const_p (vmode, vmode, indices))
3197 return false;
3199 if (! vec_stmt)
3201 if (slp_node
3202 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3204 if (dump_enabled_p ())
3205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3206 "incompatible vector types for invariants\n");
3207 return false;
3210 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3211 DUMP_VECT_SCOPE ("vectorizable_bswap");
3212 record_stmt_cost (cost_vec,
3213 1, vector_stmt, stmt_info, 0, vect_prologue);
3214 record_stmt_cost (cost_vec,
3215 slp_node
3216 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3217 vec_perm, stmt_info, 0, vect_body);
3218 return true;
3221 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3223 /* Transform. */
3224 vec<tree> vec_oprnds = vNULL;
3225 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3226 op, &vec_oprnds);
3227 /* Arguments are ready. create the new vector stmt. */
3228 unsigned i;
3229 tree vop;
3230 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3232 gimple *new_stmt;
3233 tree tem = make_ssa_name (char_vectype);
3234 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3235 char_vectype, vop));
3236 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3237 tree tem2 = make_ssa_name (char_vectype);
3238 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3239 tem, tem, bswap_vconst);
3240 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3241 tem = make_ssa_name (vectype);
3242 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3243 vectype, tem2));
3244 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3245 if (slp_node)
3246 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3247 else
3248 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3251 if (!slp_node)
3252 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3254 vec_oprnds.release ();
3255 return true;
3258 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3259 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3260 in a single step. On success, store the binary pack code in
3261 *CONVERT_CODE. */
3263 static bool
3264 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3265 tree_code *convert_code)
3267 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3268 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3269 return false;
3271 tree_code code;
3272 int multi_step_cvt = 0;
3273 auto_vec <tree, 8> interm_types;
3274 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3275 &code, &multi_step_cvt, &interm_types)
3276 || multi_step_cvt)
3277 return false;
3279 *convert_code = code;
3280 return true;
3283 /* Function vectorizable_call.
3285 Check if STMT_INFO performs a function call that can be vectorized.
3286 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3287 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3288 Return true if STMT_INFO is vectorizable in this way. */
3290 static bool
3291 vectorizable_call (vec_info *vinfo,
3292 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3293 gimple **vec_stmt, slp_tree slp_node,
3294 stmt_vector_for_cost *cost_vec)
3296 gcall *stmt;
3297 tree vec_dest;
3298 tree scalar_dest;
3299 tree op;
3300 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3301 tree vectype_out, vectype_in;
3302 poly_uint64 nunits_in;
3303 poly_uint64 nunits_out;
3304 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3305 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3306 tree fndecl, new_temp, rhs_type;
3307 enum vect_def_type dt[4]
3308 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3309 vect_unknown_def_type };
3310 tree vectypes[ARRAY_SIZE (dt)] = {};
3311 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3312 int ndts = ARRAY_SIZE (dt);
3313 int ncopies, j;
3314 auto_vec<tree, 8> vargs;
3315 enum { NARROW, NONE, WIDEN } modifier;
3316 size_t i, nargs;
3317 tree lhs;
3319 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3320 return false;
3322 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3323 && ! vec_stmt)
3324 return false;
3326 /* Is STMT_INFO a vectorizable call? */
3327 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3328 if (!stmt)
3329 return false;
3331 if (gimple_call_internal_p (stmt)
3332 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3333 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3334 /* Handled by vectorizable_load and vectorizable_store. */
3335 return false;
3337 if (gimple_call_lhs (stmt) == NULL_TREE
3338 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3339 return false;
3341 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3343 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3345 /* Process function arguments. */
3346 rhs_type = NULL_TREE;
3347 vectype_in = NULL_TREE;
3348 nargs = gimple_call_num_args (stmt);
3350 /* Bail out if the function has more than four arguments, we do not have
3351 interesting builtin functions to vectorize with more than two arguments
3352 except for fma. No arguments is also not good. */
3353 if (nargs == 0 || nargs > 4)
3354 return false;
3356 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3357 combined_fn cfn = gimple_call_combined_fn (stmt);
3358 if (cfn == CFN_GOMP_SIMD_LANE)
3360 nargs = 0;
3361 rhs_type = unsigned_type_node;
3364 int mask_opno = -1;
3365 if (internal_fn_p (cfn))
3366 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3368 for (i = 0; i < nargs; i++)
3370 if ((int) i == mask_opno)
3372 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3373 &op, &slp_op[i], &dt[i], &vectypes[i]))
3374 return false;
3375 continue;
3378 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3379 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3381 if (dump_enabled_p ())
3382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3383 "use not simple.\n");
3384 return false;
3387 /* We can only handle calls with arguments of the same type. */
3388 if (rhs_type
3389 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3391 if (dump_enabled_p ())
3392 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3393 "argument types differ.\n");
3394 return false;
3396 if (!rhs_type)
3397 rhs_type = TREE_TYPE (op);
3399 if (!vectype_in)
3400 vectype_in = vectypes[i];
3401 else if (vectypes[i]
3402 && !types_compatible_p (vectypes[i], vectype_in))
3404 if (dump_enabled_p ())
3405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3406 "argument vector types differ.\n");
3407 return false;
3410 /* If all arguments are external or constant defs, infer the vector type
3411 from the scalar type. */
3412 if (!vectype_in)
3413 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3414 if (vec_stmt)
3415 gcc_assert (vectype_in);
3416 if (!vectype_in)
3418 if (dump_enabled_p ())
3419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3420 "no vectype for scalar type %T\n", rhs_type);
3422 return false;
3424 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3425 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3426 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3427 by a pack of the two vectors into an SI vector. We would need
3428 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3429 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3431 if (dump_enabled_p ())
3432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3433 "mismatched vector sizes %T and %T\n",
3434 vectype_in, vectype_out);
3435 return false;
3438 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3439 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3441 if (dump_enabled_p ())
3442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3443 "mixed mask and nonmask vector types\n");
3444 return false;
3447 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3449 if (dump_enabled_p ())
3450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3451 "use emulated vector type for call\n");
3452 return false;
3455 /* FORNOW */
3456 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3457 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3458 if (known_eq (nunits_in * 2, nunits_out))
3459 modifier = NARROW;
3460 else if (known_eq (nunits_out, nunits_in))
3461 modifier = NONE;
3462 else if (known_eq (nunits_out * 2, nunits_in))
3463 modifier = WIDEN;
3464 else
3465 return false;
3467 /* We only handle functions that do not read or clobber memory. */
3468 if (gimple_vuse (stmt))
3470 if (dump_enabled_p ())
3471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3472 "function reads from or writes to memory.\n");
3473 return false;
3476 /* For now, we only vectorize functions if a target specific builtin
3477 is available. TODO -- in some cases, it might be profitable to
3478 insert the calls for pieces of the vector, in order to be able
3479 to vectorize other operations in the loop. */
3480 fndecl = NULL_TREE;
3481 internal_fn ifn = IFN_LAST;
3482 tree callee = gimple_call_fndecl (stmt);
3484 /* First try using an internal function. */
3485 tree_code convert_code = ERROR_MARK;
3486 if (cfn != CFN_LAST
3487 && (modifier == NONE
3488 || (modifier == NARROW
3489 && simple_integer_narrowing (vectype_out, vectype_in,
3490 &convert_code))))
3491 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3492 vectype_in);
3494 /* If that fails, try asking for a target-specific built-in function. */
3495 if (ifn == IFN_LAST)
3497 if (cfn != CFN_LAST)
3498 fndecl = targetm.vectorize.builtin_vectorized_function
3499 (cfn, vectype_out, vectype_in);
3500 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3501 fndecl = targetm.vectorize.builtin_md_vectorized_function
3502 (callee, vectype_out, vectype_in);
3505 if (ifn == IFN_LAST && !fndecl)
3507 if (cfn == CFN_GOMP_SIMD_LANE
3508 && !slp_node
3509 && loop_vinfo
3510 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3511 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3512 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3513 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3515 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3516 { 0, 1, 2, ... vf - 1 } vector. */
3517 gcc_assert (nargs == 0);
3519 else if (modifier == NONE
3520 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3521 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3522 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3523 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3524 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3525 slp_op, vectype_in, cost_vec);
3526 else
3528 if (dump_enabled_p ())
3529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3530 "function is not vectorizable.\n");
3531 return false;
3535 if (slp_node)
3536 ncopies = 1;
3537 else if (modifier == NARROW && ifn == IFN_LAST)
3538 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3539 else
3540 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3542 /* Sanity check: make sure that at least one copy of the vectorized stmt
3543 needs to be generated. */
3544 gcc_assert (ncopies >= 1);
3546 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3547 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3548 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3549 if (!vec_stmt) /* transformation not required. */
3551 if (slp_node)
3552 for (i = 0; i < nargs; ++i)
3553 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3554 vectypes[i]
3555 ? vectypes[i] : vectype_in))
3557 if (dump_enabled_p ())
3558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3559 "incompatible vector types for invariants\n");
3560 return false;
3562 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3563 DUMP_VECT_SCOPE ("vectorizable_call");
3564 vect_model_simple_cost (vinfo, stmt_info,
3565 ncopies, dt, ndts, slp_node, cost_vec);
3566 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3567 record_stmt_cost (cost_vec, ncopies / 2,
3568 vec_promote_demote, stmt_info, 0, vect_body);
3570 if (loop_vinfo
3571 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3572 && (reduc_idx >= 0 || mask_opno >= 0))
3574 if (reduc_idx >= 0
3575 && (cond_fn == IFN_LAST
3576 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3577 OPTIMIZE_FOR_SPEED)))
3579 if (dump_enabled_p ())
3580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3581 "can't use a fully-masked loop because no"
3582 " conditional operation is available.\n");
3583 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3585 else
3587 unsigned int nvectors
3588 = (slp_node
3589 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3590 : ncopies);
3591 tree scalar_mask = NULL_TREE;
3592 if (mask_opno >= 0)
3593 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3594 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3595 vectype_out, scalar_mask);
3598 return true;
3601 /* Transform. */
3603 if (dump_enabled_p ())
3604 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3606 /* Handle def. */
3607 scalar_dest = gimple_call_lhs (stmt);
3608 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3610 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3611 unsigned int vect_nargs = nargs;
3612 if (masked_loop_p && reduc_idx >= 0)
3614 ifn = cond_fn;
3615 vect_nargs += 2;
3618 if (modifier == NONE || ifn != IFN_LAST)
3620 tree prev_res = NULL_TREE;
3621 vargs.safe_grow (vect_nargs, true);
3622 auto_vec<vec<tree> > vec_defs (nargs);
3623 for (j = 0; j < ncopies; ++j)
3625 /* Build argument list for the vectorized call. */
3626 if (slp_node)
3628 vec<tree> vec_oprnds0;
3630 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3631 vec_oprnds0 = vec_defs[0];
3633 /* Arguments are ready. Create the new vector stmt. */
3634 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3636 int varg = 0;
3637 if (masked_loop_p && reduc_idx >= 0)
3639 unsigned int vec_num = vec_oprnds0.length ();
3640 /* Always true for SLP. */
3641 gcc_assert (ncopies == 1);
3642 vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
3643 vectype_out, i);
3645 size_t k;
3646 for (k = 0; k < nargs; k++)
3648 vec<tree> vec_oprndsk = vec_defs[k];
3649 vargs[varg++] = vec_oprndsk[i];
3651 if (masked_loop_p && reduc_idx >= 0)
3652 vargs[varg++] = vargs[reduc_idx + 1];
3653 gimple *new_stmt;
3654 if (modifier == NARROW)
3656 /* We don't define any narrowing conditional functions
3657 at present. */
3658 gcc_assert (mask_opno < 0);
3659 tree half_res = make_ssa_name (vectype_in);
3660 gcall *call
3661 = gimple_build_call_internal_vec (ifn, vargs);
3662 gimple_call_set_lhs (call, half_res);
3663 gimple_call_set_nothrow (call, true);
3664 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3665 if ((i & 1) == 0)
3667 prev_res = half_res;
3668 continue;
3670 new_temp = make_ssa_name (vec_dest);
3671 new_stmt = gimple_build_assign (new_temp, convert_code,
3672 prev_res, half_res);
3673 vect_finish_stmt_generation (vinfo, stmt_info,
3674 new_stmt, gsi);
3676 else
3678 if (mask_opno >= 0 && masked_loop_p)
3680 unsigned int vec_num = vec_oprnds0.length ();
3681 /* Always true for SLP. */
3682 gcc_assert (ncopies == 1);
3683 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3684 vectype_out, i);
3685 vargs[mask_opno] = prepare_vec_mask
3686 (loop_vinfo, TREE_TYPE (mask), mask,
3687 vargs[mask_opno], gsi);
3690 gcall *call;
3691 if (ifn != IFN_LAST)
3692 call = gimple_build_call_internal_vec (ifn, vargs);
3693 else
3694 call = gimple_build_call_vec (fndecl, vargs);
3695 new_temp = make_ssa_name (vec_dest, call);
3696 gimple_call_set_lhs (call, new_temp);
3697 gimple_call_set_nothrow (call, true);
3698 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3699 new_stmt = call;
3701 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3703 continue;
3706 int varg = 0;
3707 if (masked_loop_p && reduc_idx >= 0)
3708 vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
3709 vectype_out, j);
3710 for (i = 0; i < nargs; i++)
3712 op = gimple_call_arg (stmt, i);
3713 if (j == 0)
3715 vec_defs.quick_push (vNULL);
3716 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3717 op, &vec_defs[i],
3718 vectypes[i]);
3720 vargs[varg++] = vec_defs[i][j];
3722 if (masked_loop_p && reduc_idx >= 0)
3723 vargs[varg++] = vargs[reduc_idx + 1];
3725 if (mask_opno >= 0 && masked_loop_p)
3727 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3728 vectype_out, j);
3729 vargs[mask_opno]
3730 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3731 vargs[mask_opno], gsi);
3734 gimple *new_stmt;
3735 if (cfn == CFN_GOMP_SIMD_LANE)
3737 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3738 tree new_var
3739 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3740 gimple *init_stmt = gimple_build_assign (new_var, cst);
3741 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3742 new_temp = make_ssa_name (vec_dest);
3743 new_stmt = gimple_build_assign (new_temp, new_var);
3744 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3746 else if (modifier == NARROW)
3748 /* We don't define any narrowing conditional functions at
3749 present. */
3750 gcc_assert (mask_opno < 0);
3751 tree half_res = make_ssa_name (vectype_in);
3752 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3753 gimple_call_set_lhs (call, half_res);
3754 gimple_call_set_nothrow (call, true);
3755 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3756 if ((j & 1) == 0)
3758 prev_res = half_res;
3759 continue;
3761 new_temp = make_ssa_name (vec_dest);
3762 new_stmt = gimple_build_assign (new_temp, convert_code,
3763 prev_res, half_res);
3764 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3766 else
3768 gcall *call;
3769 if (ifn != IFN_LAST)
3770 call = gimple_build_call_internal_vec (ifn, vargs);
3771 else
3772 call = gimple_build_call_vec (fndecl, vargs);
3773 new_temp = make_ssa_name (vec_dest, call);
3774 gimple_call_set_lhs (call, new_temp);
3775 gimple_call_set_nothrow (call, true);
3776 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3777 new_stmt = call;
3780 if (j == (modifier == NARROW ? 1 : 0))
3781 *vec_stmt = new_stmt;
3782 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3784 for (i = 0; i < nargs; i++)
3786 vec<tree> vec_oprndsi = vec_defs[i];
3787 vec_oprndsi.release ();
3790 else if (modifier == NARROW)
3792 auto_vec<vec<tree> > vec_defs (nargs);
3793 /* We don't define any narrowing conditional functions at present. */
3794 gcc_assert (mask_opno < 0);
3795 for (j = 0; j < ncopies; ++j)
3797 /* Build argument list for the vectorized call. */
3798 if (j == 0)
3799 vargs.create (nargs * 2);
3800 else
3801 vargs.truncate (0);
3803 if (slp_node)
3805 vec<tree> vec_oprnds0;
3807 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3808 vec_oprnds0 = vec_defs[0];
3810 /* Arguments are ready. Create the new vector stmt. */
3811 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3813 size_t k;
3814 vargs.truncate (0);
3815 for (k = 0; k < nargs; k++)
3817 vec<tree> vec_oprndsk = vec_defs[k];
3818 vargs.quick_push (vec_oprndsk[i]);
3819 vargs.quick_push (vec_oprndsk[i + 1]);
3821 gcall *call;
3822 if (ifn != IFN_LAST)
3823 call = gimple_build_call_internal_vec (ifn, vargs);
3824 else
3825 call = gimple_build_call_vec (fndecl, vargs);
3826 new_temp = make_ssa_name (vec_dest, call);
3827 gimple_call_set_lhs (call, new_temp);
3828 gimple_call_set_nothrow (call, true);
3829 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3830 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3832 continue;
3835 for (i = 0; i < nargs; i++)
3837 op = gimple_call_arg (stmt, i);
3838 if (j == 0)
3840 vec_defs.quick_push (vNULL);
3841 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3842 op, &vec_defs[i], vectypes[i]);
3844 vec_oprnd0 = vec_defs[i][2*j];
3845 vec_oprnd1 = vec_defs[i][2*j+1];
3847 vargs.quick_push (vec_oprnd0);
3848 vargs.quick_push (vec_oprnd1);
3851 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3852 new_temp = make_ssa_name (vec_dest, new_stmt);
3853 gimple_call_set_lhs (new_stmt, new_temp);
3854 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3856 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3859 if (!slp_node)
3860 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3862 for (i = 0; i < nargs; i++)
3864 vec<tree> vec_oprndsi = vec_defs[i];
3865 vec_oprndsi.release ();
3868 else
3869 /* No current target implements this case. */
3870 return false;
3872 vargs.release ();
3874 /* The call in STMT might prevent it from being removed in dce.
3875 We however cannot remove it here, due to the way the ssa name
3876 it defines is mapped to the new definition. So just replace
3877 rhs of the statement with something harmless. */
3879 if (slp_node)
3880 return true;
3882 stmt_info = vect_orig_stmt (stmt_info);
3883 lhs = gimple_get_lhs (stmt_info->stmt);
3885 gassign *new_stmt
3886 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3887 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3889 return true;
3893 struct simd_call_arg_info
3895 tree vectype;
3896 tree op;
3897 HOST_WIDE_INT linear_step;
3898 enum vect_def_type dt;
3899 unsigned int align;
3900 bool simd_lane_linear;
3903 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3904 is linear within simd lane (but not within whole loop), note it in
3905 *ARGINFO. */
3907 static void
3908 vect_simd_lane_linear (tree op, class loop *loop,
3909 struct simd_call_arg_info *arginfo)
3911 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3913 if (!is_gimple_assign (def_stmt)
3914 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3915 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3916 return;
3918 tree base = gimple_assign_rhs1 (def_stmt);
3919 HOST_WIDE_INT linear_step = 0;
3920 tree v = gimple_assign_rhs2 (def_stmt);
3921 while (TREE_CODE (v) == SSA_NAME)
3923 tree t;
3924 def_stmt = SSA_NAME_DEF_STMT (v);
3925 if (is_gimple_assign (def_stmt))
3926 switch (gimple_assign_rhs_code (def_stmt))
3928 case PLUS_EXPR:
3929 t = gimple_assign_rhs2 (def_stmt);
3930 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3931 return;
3932 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3933 v = gimple_assign_rhs1 (def_stmt);
3934 continue;
3935 case MULT_EXPR:
3936 t = gimple_assign_rhs2 (def_stmt);
3937 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3938 return;
3939 linear_step = tree_to_shwi (t);
3940 v = gimple_assign_rhs1 (def_stmt);
3941 continue;
3942 CASE_CONVERT:
3943 t = gimple_assign_rhs1 (def_stmt);
3944 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3945 || (TYPE_PRECISION (TREE_TYPE (v))
3946 < TYPE_PRECISION (TREE_TYPE (t))))
3947 return;
3948 if (!linear_step)
3949 linear_step = 1;
3950 v = t;
3951 continue;
3952 default:
3953 return;
3955 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3956 && loop->simduid
3957 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3958 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3959 == loop->simduid))
3961 if (!linear_step)
3962 linear_step = 1;
3963 arginfo->linear_step = linear_step;
3964 arginfo->op = base;
3965 arginfo->simd_lane_linear = true;
3966 return;
3971 /* Return the number of elements in vector type VECTYPE, which is associated
3972 with a SIMD clone. At present these vectors always have a constant
3973 length. */
3975 static unsigned HOST_WIDE_INT
3976 simd_clone_subparts (tree vectype)
3978 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3981 /* Function vectorizable_simd_clone_call.
3983 Check if STMT_INFO performs a function call that can be vectorized
3984 by calling a simd clone of the function.
3985 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3986 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3987 Return true if STMT_INFO is vectorizable in this way. */
3989 static bool
3990 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3991 gimple_stmt_iterator *gsi,
3992 gimple **vec_stmt, slp_tree slp_node,
3993 stmt_vector_for_cost *)
3995 tree vec_dest;
3996 tree scalar_dest;
3997 tree op, type;
3998 tree vec_oprnd0 = NULL_TREE;
3999 tree vectype;
4000 poly_uint64 nunits;
4001 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4002 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4003 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
4004 tree fndecl, new_temp;
4005 int ncopies, j;
4006 auto_vec<simd_call_arg_info> arginfo;
4007 vec<tree> vargs = vNULL;
4008 size_t i, nargs;
4009 tree lhs, rtype, ratype;
4010 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
4011 int arg_offset = 0;
4013 /* Is STMT a vectorizable call? */
4014 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4015 if (!stmt)
4016 return false;
4018 fndecl = gimple_call_fndecl (stmt);
4019 if (fndecl == NULL_TREE
4020 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
4022 fndecl = gimple_call_arg (stmt, 0);
4023 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
4024 fndecl = TREE_OPERAND (fndecl, 0);
4025 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
4026 arg_offset = 1;
4028 if (fndecl == NULL_TREE)
4029 return false;
4031 struct cgraph_node *node = cgraph_node::get (fndecl);
4032 if (node == NULL || node->simd_clones == NULL)
4033 return false;
4035 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4036 return false;
4038 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4039 && ! vec_stmt)
4040 return false;
4042 if (gimple_call_lhs (stmt)
4043 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4044 return false;
4046 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4048 vectype = STMT_VINFO_VECTYPE (stmt_info);
4050 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4051 return false;
4053 /* FORNOW */
4054 if (slp_node)
4055 return false;
4057 /* Process function arguments. */
4058 nargs = gimple_call_num_args (stmt) - arg_offset;
4060 /* Bail out if the function has zero arguments. */
4061 if (nargs == 0)
4062 return false;
4064 arginfo.reserve (nargs, true);
4066 for (i = 0; i < nargs; i++)
4068 simd_call_arg_info thisarginfo;
4069 affine_iv iv;
4071 thisarginfo.linear_step = 0;
4072 thisarginfo.align = 0;
4073 thisarginfo.op = NULL_TREE;
4074 thisarginfo.simd_lane_linear = false;
4076 op = gimple_call_arg (stmt, i + arg_offset);
4077 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
4078 &thisarginfo.vectype)
4079 || thisarginfo.dt == vect_uninitialized_def)
4081 if (dump_enabled_p ())
4082 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4083 "use not simple.\n");
4084 return false;
4087 if (thisarginfo.dt == vect_constant_def
4088 || thisarginfo.dt == vect_external_def)
4089 gcc_assert (thisarginfo.vectype == NULL_TREE);
4090 else
4091 gcc_assert (thisarginfo.vectype != NULL_TREE);
4093 /* For linear arguments, the analyze phase should have saved
4094 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4095 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4096 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4098 gcc_assert (vec_stmt);
4099 thisarginfo.linear_step
4100 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4101 thisarginfo.op
4102 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4103 thisarginfo.simd_lane_linear
4104 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4105 == boolean_true_node);
4106 /* If loop has been peeled for alignment, we need to adjust it. */
4107 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4108 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4109 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4111 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4112 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4113 tree opt = TREE_TYPE (thisarginfo.op);
4114 bias = fold_convert (TREE_TYPE (step), bias);
4115 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4116 thisarginfo.op
4117 = fold_build2 (POINTER_TYPE_P (opt)
4118 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4119 thisarginfo.op, bias);
4122 else if (!vec_stmt
4123 && thisarginfo.dt != vect_constant_def
4124 && thisarginfo.dt != vect_external_def
4125 && loop_vinfo
4126 && TREE_CODE (op) == SSA_NAME
4127 && simple_iv (loop, loop_containing_stmt (stmt), op,
4128 &iv, false)
4129 && tree_fits_shwi_p (iv.step))
4131 thisarginfo.linear_step = tree_to_shwi (iv.step);
4132 thisarginfo.op = iv.base;
4134 else if ((thisarginfo.dt == vect_constant_def
4135 || thisarginfo.dt == vect_external_def)
4136 && POINTER_TYPE_P (TREE_TYPE (op)))
4137 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4138 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4139 linear too. */
4140 if (POINTER_TYPE_P (TREE_TYPE (op))
4141 && !thisarginfo.linear_step
4142 && !vec_stmt
4143 && thisarginfo.dt != vect_constant_def
4144 && thisarginfo.dt != vect_external_def
4145 && loop_vinfo
4146 && !slp_node
4147 && TREE_CODE (op) == SSA_NAME)
4148 vect_simd_lane_linear (op, loop, &thisarginfo);
4150 arginfo.quick_push (thisarginfo);
4153 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4154 if (!vf.is_constant ())
4156 if (dump_enabled_p ())
4157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4158 "not considering SIMD clones; not yet supported"
4159 " for variable-width vectors.\n");
4160 return false;
4163 unsigned int badness = 0;
4164 struct cgraph_node *bestn = NULL;
4165 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4166 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4167 else
4168 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4169 n = n->simdclone->next_clone)
4171 unsigned int this_badness = 0;
4172 unsigned int num_calls;
4173 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4174 || n->simdclone->nargs != nargs)
4175 continue;
4176 if (num_calls != 1)
4177 this_badness += exact_log2 (num_calls) * 4096;
4178 if (n->simdclone->inbranch)
4179 this_badness += 8192;
4180 int target_badness = targetm.simd_clone.usable (n);
4181 if (target_badness < 0)
4182 continue;
4183 this_badness += target_badness * 512;
4184 for (i = 0; i < nargs; i++)
4186 switch (n->simdclone->args[i].arg_type)
4188 case SIMD_CLONE_ARG_TYPE_VECTOR:
4189 if (!useless_type_conversion_p
4190 (n->simdclone->args[i].orig_type,
4191 TREE_TYPE (gimple_call_arg (stmt, i + arg_offset))))
4192 i = -1;
4193 else if (arginfo[i].dt == vect_constant_def
4194 || arginfo[i].dt == vect_external_def
4195 || arginfo[i].linear_step)
4196 this_badness += 64;
4197 break;
4198 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4199 if (arginfo[i].dt != vect_constant_def
4200 && arginfo[i].dt != vect_external_def)
4201 i = -1;
4202 break;
4203 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4204 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4205 if (arginfo[i].dt == vect_constant_def
4206 || arginfo[i].dt == vect_external_def
4207 || (arginfo[i].linear_step
4208 != n->simdclone->args[i].linear_step))
4209 i = -1;
4210 break;
4211 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4212 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4213 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4214 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4215 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4216 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4217 /* FORNOW */
4218 i = -1;
4219 break;
4220 case SIMD_CLONE_ARG_TYPE_MASK:
4221 break;
4223 if (i == (size_t) -1)
4224 break;
4225 if (n->simdclone->args[i].alignment > arginfo[i].align)
4227 i = -1;
4228 break;
4230 if (arginfo[i].align)
4231 this_badness += (exact_log2 (arginfo[i].align)
4232 - exact_log2 (n->simdclone->args[i].alignment));
4234 if (i == (size_t) -1)
4235 continue;
4236 if (bestn == NULL || this_badness < badness)
4238 bestn = n;
4239 badness = this_badness;
4243 if (bestn == NULL)
4244 return false;
4246 for (i = 0; i < nargs; i++)
4248 if ((arginfo[i].dt == vect_constant_def
4249 || arginfo[i].dt == vect_external_def)
4250 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4252 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i + arg_offset));
4253 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4254 slp_node);
4255 if (arginfo[i].vectype == NULL
4256 || !constant_multiple_p (bestn->simdclone->simdlen,
4257 simd_clone_subparts (arginfo[i].vectype)))
4258 return false;
4261 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4262 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4264 if (dump_enabled_p ())
4265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4266 "vector mask arguments are not supported.\n");
4267 return false;
4270 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
4271 && bestn->simdclone->mask_mode == VOIDmode
4272 && (simd_clone_subparts (bestn->simdclone->args[i].vector_type)
4273 != simd_clone_subparts (arginfo[i].vectype)))
4275 /* FORNOW we only have partial support for vector-type masks that
4276 can't hold all of simdlen. */
4277 if (dump_enabled_p ())
4278 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4279 vect_location,
4280 "in-branch vector clones are not yet"
4281 " supported for mismatched vector sizes.\n");
4282 return false;
4284 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
4285 && bestn->simdclone->mask_mode != VOIDmode)
4287 /* FORNOW don't support integer-type masks. */
4288 if (dump_enabled_p ())
4289 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4290 vect_location,
4291 "in-branch vector clones are not yet"
4292 " supported for integer mask modes.\n");
4293 return false;
4297 fndecl = bestn->decl;
4298 nunits = bestn->simdclone->simdlen;
4299 ncopies = vector_unroll_factor (vf, nunits);
4301 /* If the function isn't const, only allow it in simd loops where user
4302 has asserted that at least nunits consecutive iterations can be
4303 performed using SIMD instructions. */
4304 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4305 && gimple_vuse (stmt))
4306 return false;
4308 /* Sanity check: make sure that at least one copy of the vectorized stmt
4309 needs to be generated. */
4310 gcc_assert (ncopies >= 1);
4312 if (!vec_stmt) /* transformation not required. */
4314 /* When the original call is pure or const but the SIMD ABI dictates
4315 an aggregate return we will have to use a virtual definition and
4316 in a loop eventually even need to add a virtual PHI. That's
4317 not straight-forward so allow to fix this up via renaming. */
4318 if (gimple_call_lhs (stmt)
4319 && !gimple_vdef (stmt)
4320 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4321 vinfo->any_known_not_updated_vssa = true;
4322 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4323 for (i = 0; i < nargs; i++)
4324 if ((bestn->simdclone->args[i].arg_type
4325 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4326 || (bestn->simdclone->args[i].arg_type
4327 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4329 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4330 + 1,
4331 true);
4332 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4333 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4334 ? size_type_node : TREE_TYPE (arginfo[i].op);
4335 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4336 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4337 tree sll = arginfo[i].simd_lane_linear
4338 ? boolean_true_node : boolean_false_node;
4339 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4341 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4342 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4343 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4344 dt, slp_node, cost_vec); */
4345 return true;
4348 /* Transform. */
4350 if (dump_enabled_p ())
4351 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4353 /* Handle def. */
4354 scalar_dest = gimple_call_lhs (stmt);
4355 vec_dest = NULL_TREE;
4356 rtype = NULL_TREE;
4357 ratype = NULL_TREE;
4358 if (scalar_dest)
4360 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4361 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4362 if (TREE_CODE (rtype) == ARRAY_TYPE)
4364 ratype = rtype;
4365 rtype = TREE_TYPE (ratype);
4369 auto_vec<vec<tree> > vec_oprnds;
4370 auto_vec<unsigned> vec_oprnds_i;
4371 vec_oprnds.safe_grow_cleared (nargs, true);
4372 vec_oprnds_i.safe_grow_cleared (nargs, true);
4373 for (j = 0; j < ncopies; ++j)
4375 /* Build argument list for the vectorized call. */
4376 if (j == 0)
4377 vargs.create (nargs);
4378 else
4379 vargs.truncate (0);
4381 for (i = 0; i < nargs; i++)
4383 unsigned int k, l, m, o;
4384 tree atype;
4385 op = gimple_call_arg (stmt, i + arg_offset);
4386 switch (bestn->simdclone->args[i].arg_type)
4388 case SIMD_CLONE_ARG_TYPE_VECTOR:
4389 atype = bestn->simdclone->args[i].vector_type;
4390 o = vector_unroll_factor (nunits,
4391 simd_clone_subparts (atype));
4392 for (m = j * o; m < (j + 1) * o; m++)
4394 if (simd_clone_subparts (atype)
4395 < simd_clone_subparts (arginfo[i].vectype))
4397 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4398 k = (simd_clone_subparts (arginfo[i].vectype)
4399 / simd_clone_subparts (atype));
4400 gcc_assert ((k & (k - 1)) == 0);
4401 if (m == 0)
4403 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4404 ncopies * o / k, op,
4405 &vec_oprnds[i]);
4406 vec_oprnds_i[i] = 0;
4407 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4409 else
4411 vec_oprnd0 = arginfo[i].op;
4412 if ((m & (k - 1)) == 0)
4413 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4415 arginfo[i].op = vec_oprnd0;
4416 vec_oprnd0
4417 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4418 bitsize_int (prec),
4419 bitsize_int ((m & (k - 1)) * prec));
4420 gassign *new_stmt
4421 = gimple_build_assign (make_ssa_name (atype),
4422 vec_oprnd0);
4423 vect_finish_stmt_generation (vinfo, stmt_info,
4424 new_stmt, gsi);
4425 vargs.safe_push (gimple_assign_lhs (new_stmt));
4427 else
4429 k = (simd_clone_subparts (atype)
4430 / simd_clone_subparts (arginfo[i].vectype));
4431 gcc_assert ((k & (k - 1)) == 0);
4432 vec<constructor_elt, va_gc> *ctor_elts;
4433 if (k != 1)
4434 vec_alloc (ctor_elts, k);
4435 else
4436 ctor_elts = NULL;
4437 for (l = 0; l < k; l++)
4439 if (m == 0 && l == 0)
4441 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4442 k * o * ncopies,
4444 &vec_oprnds[i]);
4445 vec_oprnds_i[i] = 0;
4446 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4448 else
4449 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4450 arginfo[i].op = vec_oprnd0;
4451 if (k == 1)
4452 break;
4453 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4454 vec_oprnd0);
4456 if (k == 1)
4457 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4458 atype))
4460 vec_oprnd0
4461 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4462 gassign *new_stmt
4463 = gimple_build_assign (make_ssa_name (atype),
4464 vec_oprnd0);
4465 vect_finish_stmt_generation (vinfo, stmt_info,
4466 new_stmt, gsi);
4467 vargs.safe_push (gimple_assign_lhs (new_stmt));
4469 else
4470 vargs.safe_push (vec_oprnd0);
4471 else
4473 vec_oprnd0 = build_constructor (atype, ctor_elts);
4474 gassign *new_stmt
4475 = gimple_build_assign (make_ssa_name (atype),
4476 vec_oprnd0);
4477 vect_finish_stmt_generation (vinfo, stmt_info,
4478 new_stmt, gsi);
4479 vargs.safe_push (gimple_assign_lhs (new_stmt));
4483 break;
4484 case SIMD_CLONE_ARG_TYPE_MASK:
4485 atype = bestn->simdclone->args[i].vector_type;
4486 if (bestn->simdclone->mask_mode != VOIDmode)
4488 /* FORNOW: this is disabled above. */
4489 gcc_unreachable ();
4491 else
4493 tree elt_type = TREE_TYPE (atype);
4494 tree one = fold_convert (elt_type, integer_one_node);
4495 tree zero = fold_convert (elt_type, integer_zero_node);
4496 o = vector_unroll_factor (nunits,
4497 simd_clone_subparts (atype));
4498 for (m = j * o; m < (j + 1) * o; m++)
4500 if (simd_clone_subparts (atype)
4501 < simd_clone_subparts (arginfo[i].vectype))
4503 /* The mask type has fewer elements than simdlen. */
4505 /* FORNOW */
4506 gcc_unreachable ();
4508 else if (simd_clone_subparts (atype)
4509 == simd_clone_subparts (arginfo[i].vectype))
4511 /* The SIMD clone function has the same number of
4512 elements as the current function. */
4513 if (m == 0)
4515 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4516 o * ncopies,
4518 &vec_oprnds[i]);
4519 vec_oprnds_i[i] = 0;
4521 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4522 vec_oprnd0
4523 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4524 build_vector_from_val (atype, one),
4525 build_vector_from_val (atype, zero));
4526 gassign *new_stmt
4527 = gimple_build_assign (make_ssa_name (atype),
4528 vec_oprnd0);
4529 vect_finish_stmt_generation (vinfo, stmt_info,
4530 new_stmt, gsi);
4531 vargs.safe_push (gimple_assign_lhs (new_stmt));
4533 else
4535 /* The mask type has more elements than simdlen. */
4537 /* FORNOW */
4538 gcc_unreachable ();
4542 break;
4543 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4544 vargs.safe_push (op);
4545 break;
4546 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4547 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4548 if (j == 0)
4550 gimple_seq stmts;
4551 arginfo[i].op
4552 = force_gimple_operand (unshare_expr (arginfo[i].op),
4553 &stmts, true, NULL_TREE);
4554 if (stmts != NULL)
4556 basic_block new_bb;
4557 edge pe = loop_preheader_edge (loop);
4558 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4559 gcc_assert (!new_bb);
4561 if (arginfo[i].simd_lane_linear)
4563 vargs.safe_push (arginfo[i].op);
4564 break;
4566 tree phi_res = copy_ssa_name (op);
4567 gphi *new_phi = create_phi_node (phi_res, loop->header);
4568 add_phi_arg (new_phi, arginfo[i].op,
4569 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4570 enum tree_code code
4571 = POINTER_TYPE_P (TREE_TYPE (op))
4572 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4573 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4574 ? sizetype : TREE_TYPE (op);
4575 poly_widest_int cst
4576 = wi::mul (bestn->simdclone->args[i].linear_step,
4577 ncopies * nunits);
4578 tree tcst = wide_int_to_tree (type, cst);
4579 tree phi_arg = copy_ssa_name (op);
4580 gassign *new_stmt
4581 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4582 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4583 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4584 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4585 UNKNOWN_LOCATION);
4586 arginfo[i].op = phi_res;
4587 vargs.safe_push (phi_res);
4589 else
4591 enum tree_code code
4592 = POINTER_TYPE_P (TREE_TYPE (op))
4593 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4594 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4595 ? sizetype : TREE_TYPE (op);
4596 poly_widest_int cst
4597 = wi::mul (bestn->simdclone->args[i].linear_step,
4598 j * nunits);
4599 tree tcst = wide_int_to_tree (type, cst);
4600 new_temp = make_ssa_name (TREE_TYPE (op));
4601 gassign *new_stmt
4602 = gimple_build_assign (new_temp, code,
4603 arginfo[i].op, tcst);
4604 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4605 vargs.safe_push (new_temp);
4607 break;
4608 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4609 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4610 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4611 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4612 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4613 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4614 default:
4615 gcc_unreachable ();
4619 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4620 if (vec_dest)
4622 gcc_assert (ratype
4623 || known_eq (simd_clone_subparts (rtype), nunits));
4624 if (ratype)
4625 new_temp = create_tmp_var (ratype);
4626 else if (useless_type_conversion_p (vectype, rtype))
4627 new_temp = make_ssa_name (vec_dest, new_call);
4628 else
4629 new_temp = make_ssa_name (rtype, new_call);
4630 gimple_call_set_lhs (new_call, new_temp);
4632 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4633 gimple *new_stmt = new_call;
4635 if (vec_dest)
4637 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4639 unsigned int k, l;
4640 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4641 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4642 k = vector_unroll_factor (nunits,
4643 simd_clone_subparts (vectype));
4644 gcc_assert ((k & (k - 1)) == 0);
4645 for (l = 0; l < k; l++)
4647 tree t;
4648 if (ratype)
4650 t = build_fold_addr_expr (new_temp);
4651 t = build2 (MEM_REF, vectype, t,
4652 build_int_cst (TREE_TYPE (t), l * bytes));
4654 else
4655 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4656 bitsize_int (prec), bitsize_int (l * prec));
4657 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4658 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4660 if (j == 0 && l == 0)
4661 *vec_stmt = new_stmt;
4662 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4665 if (ratype)
4666 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4667 continue;
4669 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4671 unsigned int k = (simd_clone_subparts (vectype)
4672 / simd_clone_subparts (rtype));
4673 gcc_assert ((k & (k - 1)) == 0);
4674 if ((j & (k - 1)) == 0)
4675 vec_alloc (ret_ctor_elts, k);
4676 if (ratype)
4678 unsigned int m, o;
4679 o = vector_unroll_factor (nunits,
4680 simd_clone_subparts (rtype));
4681 for (m = 0; m < o; m++)
4683 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4684 size_int (m), NULL_TREE, NULL_TREE);
4685 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4686 tem);
4687 vect_finish_stmt_generation (vinfo, stmt_info,
4688 new_stmt, gsi);
4689 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4690 gimple_assign_lhs (new_stmt));
4692 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4694 else
4695 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4696 if ((j & (k - 1)) != k - 1)
4697 continue;
4698 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4699 new_stmt
4700 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4701 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4703 if ((unsigned) j == k - 1)
4704 *vec_stmt = new_stmt;
4705 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4706 continue;
4708 else if (ratype)
4710 tree t = build_fold_addr_expr (new_temp);
4711 t = build2 (MEM_REF, vectype, t,
4712 build_int_cst (TREE_TYPE (t), 0));
4713 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4714 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4715 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4717 else if (!useless_type_conversion_p (vectype, rtype))
4719 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4720 new_stmt
4721 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4722 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4726 if (j == 0)
4727 *vec_stmt = new_stmt;
4728 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4731 for (i = 0; i < nargs; ++i)
4733 vec<tree> oprndsi = vec_oprnds[i];
4734 oprndsi.release ();
4736 vargs.release ();
4738 /* Mark the clone as no longer being a candidate for GC. */
4739 bestn->gc_candidate = false;
4741 /* The call in STMT might prevent it from being removed in dce.
4742 We however cannot remove it here, due to the way the ssa name
4743 it defines is mapped to the new definition. So just replace
4744 rhs of the statement with something harmless. */
4746 if (slp_node)
4747 return true;
4749 gimple *new_stmt;
4750 if (scalar_dest)
4752 type = TREE_TYPE (scalar_dest);
4753 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4754 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4756 else
4757 new_stmt = gimple_build_nop ();
4758 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4759 unlink_stmt_vdef (stmt);
4761 return true;
4765 /* Function vect_gen_widened_results_half
4767 Create a vector stmt whose code, type, number of arguments, and result
4768 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4769 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4770 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4771 needs to be created (DECL is a function-decl of a target-builtin).
4772 STMT_INFO is the original scalar stmt that we are vectorizing. */
4774 static gimple *
4775 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4776 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4777 tree vec_dest, gimple_stmt_iterator *gsi,
4778 stmt_vec_info stmt_info)
4780 gimple *new_stmt;
4781 tree new_temp;
4783 /* Generate half of the widened result: */
4784 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4785 if (op_type != binary_op)
4786 vec_oprnd1 = NULL;
4787 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4788 new_temp = make_ssa_name (vec_dest, new_stmt);
4789 gimple_assign_set_lhs (new_stmt, new_temp);
4790 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4792 return new_stmt;
4796 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4797 For multi-step conversions store the resulting vectors and call the function
4798 recursively. When NARROW_SRC_P is true, there's still a conversion after
4799 narrowing, don't store the vectors in the SLP_NODE or in vector info of
4800 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
4802 static void
4803 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4804 int multi_step_cvt,
4805 stmt_vec_info stmt_info,
4806 vec<tree> &vec_dsts,
4807 gimple_stmt_iterator *gsi,
4808 slp_tree slp_node, enum tree_code code,
4809 bool narrow_src_p)
4811 unsigned int i;
4812 tree vop0, vop1, new_tmp, vec_dest;
4814 vec_dest = vec_dsts.pop ();
4816 for (i = 0; i < vec_oprnds->length (); i += 2)
4818 /* Create demotion operation. */
4819 vop0 = (*vec_oprnds)[i];
4820 vop1 = (*vec_oprnds)[i + 1];
4821 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4822 new_tmp = make_ssa_name (vec_dest, new_stmt);
4823 gimple_assign_set_lhs (new_stmt, new_tmp);
4824 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4825 if (multi_step_cvt || narrow_src_p)
4826 /* Store the resulting vector for next recursive call,
4827 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
4828 (*vec_oprnds)[i/2] = new_tmp;
4829 else
4831 /* This is the last step of the conversion sequence. Store the
4832 vectors in SLP_NODE or in vector info of the scalar statement
4833 (or in STMT_VINFO_RELATED_STMT chain). */
4834 if (slp_node)
4835 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4836 else
4837 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4841 /* For multi-step demotion operations we first generate demotion operations
4842 from the source type to the intermediate types, and then combine the
4843 results (stored in VEC_OPRNDS) in demotion operation to the destination
4844 type. */
4845 if (multi_step_cvt)
4847 /* At each level of recursion we have half of the operands we had at the
4848 previous level. */
4849 vec_oprnds->truncate ((i+1)/2);
4850 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4851 multi_step_cvt - 1,
4852 stmt_info, vec_dsts, gsi,
4853 slp_node, VEC_PACK_TRUNC_EXPR,
4854 narrow_src_p);
4857 vec_dsts.quick_push (vec_dest);
4861 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4862 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4863 STMT_INFO. For multi-step conversions store the resulting vectors and
4864 call the function recursively. */
4866 static void
4867 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4868 vec<tree> *vec_oprnds0,
4869 vec<tree> *vec_oprnds1,
4870 stmt_vec_info stmt_info, tree vec_dest,
4871 gimple_stmt_iterator *gsi,
4872 enum tree_code code1,
4873 enum tree_code code2, int op_type)
4875 int i;
4876 tree vop0, vop1, new_tmp1, new_tmp2;
4877 gimple *new_stmt1, *new_stmt2;
4878 vec<tree> vec_tmp = vNULL;
4880 vec_tmp.create (vec_oprnds0->length () * 2);
4881 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4883 if (op_type == binary_op)
4884 vop1 = (*vec_oprnds1)[i];
4885 else
4886 vop1 = NULL_TREE;
4888 /* Generate the two halves of promotion operation. */
4889 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4890 op_type, vec_dest, gsi,
4891 stmt_info);
4892 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4893 op_type, vec_dest, gsi,
4894 stmt_info);
4895 if (is_gimple_call (new_stmt1))
4897 new_tmp1 = gimple_call_lhs (new_stmt1);
4898 new_tmp2 = gimple_call_lhs (new_stmt2);
4900 else
4902 new_tmp1 = gimple_assign_lhs (new_stmt1);
4903 new_tmp2 = gimple_assign_lhs (new_stmt2);
4906 /* Store the results for the next step. */
4907 vec_tmp.quick_push (new_tmp1);
4908 vec_tmp.quick_push (new_tmp2);
4911 vec_oprnds0->release ();
4912 *vec_oprnds0 = vec_tmp;
4915 /* Create vectorized promotion stmts for widening stmts using only half the
4916 potential vector size for input. */
4917 static void
4918 vect_create_half_widening_stmts (vec_info *vinfo,
4919 vec<tree> *vec_oprnds0,
4920 vec<tree> *vec_oprnds1,
4921 stmt_vec_info stmt_info, tree vec_dest,
4922 gimple_stmt_iterator *gsi,
4923 enum tree_code code1,
4924 int op_type)
4926 int i;
4927 tree vop0, vop1;
4928 gimple *new_stmt1;
4929 gimple *new_stmt2;
4930 gimple *new_stmt3;
4931 vec<tree> vec_tmp = vNULL;
4933 vec_tmp.create (vec_oprnds0->length ());
4934 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4936 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4938 gcc_assert (op_type == binary_op);
4939 vop1 = (*vec_oprnds1)[i];
4941 /* Widen the first vector input. */
4942 out_type = TREE_TYPE (vec_dest);
4943 new_tmp1 = make_ssa_name (out_type);
4944 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4945 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4946 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4948 /* Widen the second vector input. */
4949 new_tmp2 = make_ssa_name (out_type);
4950 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4951 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4952 /* Perform the operation. With both vector inputs widened. */
4953 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4955 else
4957 /* Perform the operation. With the single vector input widened. */
4958 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4961 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4962 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4963 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4965 /* Store the results for the next step. */
4966 vec_tmp.quick_push (new_tmp3);
4969 vec_oprnds0->release ();
4970 *vec_oprnds0 = vec_tmp;
4974 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4975 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4976 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4977 Return true if STMT_INFO is vectorizable in this way. */
4979 static bool
4980 vectorizable_conversion (vec_info *vinfo,
4981 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4982 gimple **vec_stmt, slp_tree slp_node,
4983 stmt_vector_for_cost *cost_vec)
4985 tree vec_dest;
4986 tree scalar_dest;
4987 tree op0, op1 = NULL_TREE;
4988 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4989 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4990 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4991 tree new_temp;
4992 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4993 int ndts = 2;
4994 poly_uint64 nunits_in;
4995 poly_uint64 nunits_out;
4996 tree vectype_out, vectype_in;
4997 int ncopies, i;
4998 tree lhs_type, rhs_type;
4999 /* For conversions between floating point and integer, there're 2 NARROW
5000 cases. NARROW_SRC is for FLOAT_EXPR, means
5001 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5002 This is safe when the range of the source integer can fit into the lower
5003 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5004 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5005 For other conversions, when there's narrowing, NARROW_DST is used as
5006 default. */
5007 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5008 vec<tree> vec_oprnds0 = vNULL;
5009 vec<tree> vec_oprnds1 = vNULL;
5010 tree vop0;
5011 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5012 int multi_step_cvt = 0;
5013 vec<tree> interm_types = vNULL;
5014 tree intermediate_type, cvt_type = NULL_TREE;
5015 int op_type;
5016 unsigned short fltsz;
5018 /* Is STMT a vectorizable conversion? */
5020 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5021 return false;
5023 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5024 && ! vec_stmt)
5025 return false;
5027 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5028 if (!stmt)
5029 return false;
5031 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5032 return false;
5034 code = gimple_assign_rhs_code (stmt);
5035 if (!CONVERT_EXPR_CODE_P (code)
5036 && code != FIX_TRUNC_EXPR
5037 && code != FLOAT_EXPR
5038 && code != WIDEN_PLUS_EXPR
5039 && code != WIDEN_MINUS_EXPR
5040 && code != WIDEN_MULT_EXPR
5041 && code != WIDEN_LSHIFT_EXPR)
5042 return false;
5044 bool widen_arith = (code == WIDEN_PLUS_EXPR
5045 || code == WIDEN_MINUS_EXPR
5046 || code == WIDEN_MULT_EXPR
5047 || code == WIDEN_LSHIFT_EXPR);
5048 op_type = TREE_CODE_LENGTH (code);
5050 /* Check types of lhs and rhs. */
5051 scalar_dest = gimple_assign_lhs (stmt);
5052 lhs_type = TREE_TYPE (scalar_dest);
5053 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5055 /* Check the operands of the operation. */
5056 slp_tree slp_op0, slp_op1 = NULL;
5057 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5058 0, &op0, &slp_op0, &dt[0], &vectype_in))
5060 if (dump_enabled_p ())
5061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5062 "use not simple.\n");
5063 return false;
5066 rhs_type = TREE_TYPE (op0);
5067 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5068 && !((INTEGRAL_TYPE_P (lhs_type)
5069 && INTEGRAL_TYPE_P (rhs_type))
5070 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5071 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5072 return false;
5074 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5075 && ((INTEGRAL_TYPE_P (lhs_type)
5076 && !type_has_mode_precision_p (lhs_type))
5077 || (INTEGRAL_TYPE_P (rhs_type)
5078 && !type_has_mode_precision_p (rhs_type))))
5080 if (dump_enabled_p ())
5081 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5082 "type conversion to/from bit-precision unsupported."
5083 "\n");
5084 return false;
5087 if (op_type == binary_op)
5089 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
5090 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
5092 op1 = gimple_assign_rhs2 (stmt);
5093 tree vectype1_in;
5094 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5095 &op1, &slp_op1, &dt[1], &vectype1_in))
5097 if (dump_enabled_p ())
5098 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5099 "use not simple.\n");
5100 return false;
5102 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5103 OP1. */
5104 if (!vectype_in)
5105 vectype_in = vectype1_in;
5108 /* If op0 is an external or constant def, infer the vector type
5109 from the scalar type. */
5110 if (!vectype_in)
5111 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5112 if (vec_stmt)
5113 gcc_assert (vectype_in);
5114 if (!vectype_in)
5116 if (dump_enabled_p ())
5117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5118 "no vectype for scalar type %T\n", rhs_type);
5120 return false;
5123 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5124 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5126 if (dump_enabled_p ())
5127 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5128 "can't convert between boolean and non "
5129 "boolean vectors %T\n", rhs_type);
5131 return false;
5134 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5135 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5136 if (known_eq (nunits_out, nunits_in))
5137 if (widen_arith)
5138 modifier = WIDEN;
5139 else
5140 modifier = NONE;
5141 else if (multiple_p (nunits_out, nunits_in))
5142 modifier = NARROW_DST;
5143 else
5145 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5146 modifier = WIDEN;
5149 /* Multiple types in SLP are handled by creating the appropriate number of
5150 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5151 case of SLP. */
5152 if (slp_node)
5153 ncopies = 1;
5154 else if (modifier == NARROW_DST)
5155 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5156 else
5157 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5159 /* Sanity check: make sure that at least one copy of the vectorized stmt
5160 needs to be generated. */
5161 gcc_assert (ncopies >= 1);
5163 bool found_mode = false;
5164 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5165 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5166 opt_scalar_mode rhs_mode_iter;
5168 /* Supportable by target? */
5169 switch (modifier)
5171 case NONE:
5172 if (code != FIX_TRUNC_EXPR
5173 && code != FLOAT_EXPR
5174 && !CONVERT_EXPR_CODE_P (code))
5175 return false;
5176 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
5177 break;
5178 /* FALLTHRU */
5179 unsupported:
5180 if (dump_enabled_p ())
5181 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5182 "conversion not supported by target.\n");
5183 return false;
5185 case WIDEN:
5186 if (known_eq (nunits_in, nunits_out))
5188 if (!supportable_half_widening_operation (code, vectype_out,
5189 vectype_in, &code1))
5190 goto unsupported;
5191 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5192 break;
5194 if (supportable_widening_operation (vinfo, code, stmt_info,
5195 vectype_out, vectype_in, &code1,
5196 &code2, &multi_step_cvt,
5197 &interm_types))
5199 /* Binary widening operation can only be supported directly by the
5200 architecture. */
5201 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5202 break;
5205 if (code != FLOAT_EXPR
5206 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5207 goto unsupported;
5209 fltsz = GET_MODE_SIZE (lhs_mode);
5210 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5212 rhs_mode = rhs_mode_iter.require ();
5213 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5214 break;
5216 cvt_type
5217 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5218 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5219 if (cvt_type == NULL_TREE)
5220 goto unsupported;
5222 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5224 if (!supportable_convert_operation (code, vectype_out,
5225 cvt_type, &codecvt1))
5226 goto unsupported;
5228 else if (!supportable_widening_operation (vinfo, code, stmt_info,
5229 vectype_out, cvt_type,
5230 &codecvt1, &codecvt2,
5231 &multi_step_cvt,
5232 &interm_types))
5233 continue;
5234 else
5235 gcc_assert (multi_step_cvt == 0);
5237 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5238 cvt_type,
5239 vectype_in, &code1, &code2,
5240 &multi_step_cvt, &interm_types))
5242 found_mode = true;
5243 break;
5247 if (!found_mode)
5248 goto unsupported;
5250 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5251 codecvt2 = ERROR_MARK;
5252 else
5254 multi_step_cvt++;
5255 interm_types.safe_push (cvt_type);
5256 cvt_type = NULL_TREE;
5258 break;
5260 case NARROW_DST:
5261 gcc_assert (op_type == unary_op);
5262 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5263 &code1, &multi_step_cvt,
5264 &interm_types))
5265 break;
5267 if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5268 goto unsupported;
5270 if (code == FIX_TRUNC_EXPR)
5272 cvt_type
5273 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5274 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5275 if (cvt_type == NULL_TREE)
5276 goto unsupported;
5277 if (!supportable_convert_operation (code, cvt_type, vectype_in,
5278 &codecvt1))
5279 goto unsupported;
5280 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5281 &code1, &multi_step_cvt,
5282 &interm_types))
5283 break;
5285 /* If op0 can be represented with low precision integer,
5286 truncate it to cvt_type and the do FLOAT_EXPR. */
5287 else if (code == FLOAT_EXPR)
5289 wide_int op_min_value, op_max_value;
5290 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5291 goto unsupported;
5293 cvt_type
5294 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5295 if (cvt_type == NULL_TREE
5296 || (wi::min_precision (op_max_value, SIGNED)
5297 > TYPE_PRECISION (cvt_type))
5298 || (wi::min_precision (op_min_value, SIGNED)
5299 > TYPE_PRECISION (cvt_type)))
5300 goto unsupported;
5302 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5303 if (cvt_type == NULL_TREE)
5304 goto unsupported;
5305 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5306 &code1, &multi_step_cvt,
5307 &interm_types))
5308 goto unsupported;
5309 if (supportable_convert_operation (code, vectype_out,
5310 cvt_type, &codecvt1))
5312 modifier = NARROW_SRC;
5313 break;
5317 goto unsupported;
5319 default:
5320 gcc_unreachable ();
5323 if (!vec_stmt) /* transformation not required. */
5325 if (slp_node
5326 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5327 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5329 if (dump_enabled_p ())
5330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5331 "incompatible vector types for invariants\n");
5332 return false;
5334 DUMP_VECT_SCOPE ("vectorizable_conversion");
5335 if (modifier == NONE)
5337 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5338 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5339 cost_vec);
5341 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5343 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5344 /* The final packing step produces one vector result per copy. */
5345 unsigned int nvectors
5346 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5347 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5348 multi_step_cvt, cost_vec,
5349 widen_arith);
5351 else
5353 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5354 /* The initial unpacking step produces two vector results
5355 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5356 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5357 unsigned int nvectors
5358 = (slp_node
5359 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5360 : ncopies * 2);
5361 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5362 multi_step_cvt, cost_vec,
5363 widen_arith);
5365 interm_types.release ();
5366 return true;
5369 /* Transform. */
5370 if (dump_enabled_p ())
5371 dump_printf_loc (MSG_NOTE, vect_location,
5372 "transform conversion. ncopies = %d.\n", ncopies);
5374 if (op_type == binary_op)
5376 if (CONSTANT_CLASS_P (op0))
5377 op0 = fold_convert (TREE_TYPE (op1), op0);
5378 else if (CONSTANT_CLASS_P (op1))
5379 op1 = fold_convert (TREE_TYPE (op0), op1);
5382 /* In case of multi-step conversion, we first generate conversion operations
5383 to the intermediate types, and then from that types to the final one.
5384 We create vector destinations for the intermediate type (TYPES) received
5385 from supportable_*_operation, and store them in the correct order
5386 for future use in vect_create_vectorized_*_stmts (). */
5387 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5388 bool widen_or_narrow_float_p
5389 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5390 vec_dest = vect_create_destination_var (scalar_dest,
5391 widen_or_narrow_float_p
5392 ? cvt_type : vectype_out);
5393 vec_dsts.quick_push (vec_dest);
5395 if (multi_step_cvt)
5397 for (i = interm_types.length () - 1;
5398 interm_types.iterate (i, &intermediate_type); i--)
5400 vec_dest = vect_create_destination_var (scalar_dest,
5401 intermediate_type);
5402 vec_dsts.quick_push (vec_dest);
5406 if (cvt_type)
5407 vec_dest = vect_create_destination_var (scalar_dest,
5408 widen_or_narrow_float_p
5409 ? vectype_out : cvt_type);
5411 int ninputs = 1;
5412 if (!slp_node)
5414 if (modifier == WIDEN)
5416 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5418 if (multi_step_cvt)
5419 ninputs = vect_pow2 (multi_step_cvt);
5420 ninputs *= 2;
5424 switch (modifier)
5426 case NONE:
5427 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5428 op0, &vec_oprnds0);
5429 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5431 /* Arguments are ready, create the new vector stmt. */
5432 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5433 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5434 new_temp = make_ssa_name (vec_dest, new_stmt);
5435 gimple_assign_set_lhs (new_stmt, new_temp);
5436 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5438 if (slp_node)
5439 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5440 else
5441 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5443 break;
5445 case WIDEN:
5446 /* In case the vectorization factor (VF) is bigger than the number
5447 of elements that we can fit in a vectype (nunits), we have to
5448 generate more than one vector stmt - i.e - we need to "unroll"
5449 the vector stmt by a factor VF/nunits. */
5450 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5451 op0, &vec_oprnds0,
5452 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5453 &vec_oprnds1);
5454 if (code == WIDEN_LSHIFT_EXPR)
5456 int oprnds_size = vec_oprnds0.length ();
5457 vec_oprnds1.create (oprnds_size);
5458 for (i = 0; i < oprnds_size; ++i)
5459 vec_oprnds1.quick_push (op1);
5461 /* Arguments are ready. Create the new vector stmts. */
5462 for (i = multi_step_cvt; i >= 0; i--)
5464 tree this_dest = vec_dsts[i];
5465 enum tree_code c1 = code1, c2 = code2;
5466 if (i == 0 && codecvt2 != ERROR_MARK)
5468 c1 = codecvt1;
5469 c2 = codecvt2;
5471 if (known_eq (nunits_out, nunits_in))
5472 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5473 &vec_oprnds1, stmt_info,
5474 this_dest, gsi,
5475 c1, op_type);
5476 else
5477 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5478 &vec_oprnds1, stmt_info,
5479 this_dest, gsi,
5480 c1, c2, op_type);
5483 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5485 gimple *new_stmt;
5486 if (cvt_type)
5488 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5489 new_temp = make_ssa_name (vec_dest);
5490 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5491 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5493 else
5494 new_stmt = SSA_NAME_DEF_STMT (vop0);
5496 if (slp_node)
5497 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5498 else
5499 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5501 break;
5503 case NARROW_SRC:
5504 case NARROW_DST:
5505 /* In case the vectorization factor (VF) is bigger than the number
5506 of elements that we can fit in a vectype (nunits), we have to
5507 generate more than one vector stmt - i.e - we need to "unroll"
5508 the vector stmt by a factor VF/nunits. */
5509 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5510 op0, &vec_oprnds0);
5511 /* Arguments are ready. Create the new vector stmts. */
5512 if (cvt_type && modifier == NARROW_DST)
5513 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5515 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5516 new_temp = make_ssa_name (vec_dest);
5517 gassign *new_stmt
5518 = gimple_build_assign (new_temp, codecvt1, vop0);
5519 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5520 vec_oprnds0[i] = new_temp;
5523 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5524 multi_step_cvt,
5525 stmt_info, vec_dsts, gsi,
5526 slp_node, code1,
5527 modifier == NARROW_SRC);
5528 /* After demoting op0 to cvt_type, convert it to dest. */
5529 if (cvt_type && code == FLOAT_EXPR)
5531 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5533 /* Arguments are ready, create the new vector stmt. */
5534 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5535 gassign *new_stmt
5536 = gimple_build_assign (vec_dest, codecvt1, vec_oprnds0[i]);
5537 new_temp = make_ssa_name (vec_dest, new_stmt);
5538 gimple_assign_set_lhs (new_stmt, new_temp);
5539 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5541 /* This is the last step of the conversion sequence. Store the
5542 vectors in SLP_NODE or in vector info of the scalar statement
5543 (or in STMT_VINFO_RELATED_STMT chain). */
5544 if (slp_node)
5545 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5546 else
5547 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5550 break;
5552 if (!slp_node)
5553 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5555 vec_oprnds0.release ();
5556 vec_oprnds1.release ();
5557 interm_types.release ();
5559 return true;
5562 /* Return true if we can assume from the scalar form of STMT_INFO that
5563 neither the scalar nor the vector forms will generate code. STMT_INFO
5564 is known not to involve a data reference. */
5566 bool
5567 vect_nop_conversion_p (stmt_vec_info stmt_info)
5569 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5570 if (!stmt)
5571 return false;
5573 tree lhs = gimple_assign_lhs (stmt);
5574 tree_code code = gimple_assign_rhs_code (stmt);
5575 tree rhs = gimple_assign_rhs1 (stmt);
5577 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5578 return true;
5580 if (CONVERT_EXPR_CODE_P (code))
5581 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5583 return false;
5586 /* Function vectorizable_assignment.
5588 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5589 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5590 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5591 Return true if STMT_INFO is vectorizable in this way. */
5593 static bool
5594 vectorizable_assignment (vec_info *vinfo,
5595 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5596 gimple **vec_stmt, slp_tree slp_node,
5597 stmt_vector_for_cost *cost_vec)
5599 tree vec_dest;
5600 tree scalar_dest;
5601 tree op;
5602 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5603 tree new_temp;
5604 enum vect_def_type dt[1] = {vect_unknown_def_type};
5605 int ndts = 1;
5606 int ncopies;
5607 int i;
5608 vec<tree> vec_oprnds = vNULL;
5609 tree vop;
5610 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5611 enum tree_code code;
5612 tree vectype_in;
5614 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5615 return false;
5617 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5618 && ! vec_stmt)
5619 return false;
5621 /* Is vectorizable assignment? */
5622 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5623 if (!stmt)
5624 return false;
5626 scalar_dest = gimple_assign_lhs (stmt);
5627 if (TREE_CODE (scalar_dest) != SSA_NAME)
5628 return false;
5630 if (STMT_VINFO_DATA_REF (stmt_info))
5631 return false;
5633 code = gimple_assign_rhs_code (stmt);
5634 if (!(gimple_assign_single_p (stmt)
5635 || code == PAREN_EXPR
5636 || CONVERT_EXPR_CODE_P (code)))
5637 return false;
5639 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5640 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5642 /* Multiple types in SLP are handled by creating the appropriate number of
5643 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5644 case of SLP. */
5645 if (slp_node)
5646 ncopies = 1;
5647 else
5648 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5650 gcc_assert (ncopies >= 1);
5652 slp_tree slp_op;
5653 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5654 &dt[0], &vectype_in))
5656 if (dump_enabled_p ())
5657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5658 "use not simple.\n");
5659 return false;
5661 if (!vectype_in)
5662 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5664 /* We can handle NOP_EXPR conversions that do not change the number
5665 of elements or the vector size. */
5666 if ((CONVERT_EXPR_CODE_P (code)
5667 || code == VIEW_CONVERT_EXPR)
5668 && (!vectype_in
5669 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5670 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5671 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5672 return false;
5674 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5676 if (dump_enabled_p ())
5677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5678 "can't convert between boolean and non "
5679 "boolean vectors %T\n", TREE_TYPE (op));
5681 return false;
5684 /* We do not handle bit-precision changes. */
5685 if ((CONVERT_EXPR_CODE_P (code)
5686 || code == VIEW_CONVERT_EXPR)
5687 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5688 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5689 || !type_has_mode_precision_p (TREE_TYPE (op)))
5690 /* But a conversion that does not change the bit-pattern is ok. */
5691 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5692 > TYPE_PRECISION (TREE_TYPE (op)))
5693 && TYPE_UNSIGNED (TREE_TYPE (op))))
5695 if (dump_enabled_p ())
5696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5697 "type conversion to/from bit-precision "
5698 "unsupported.\n");
5699 return false;
5702 if (!vec_stmt) /* transformation not required. */
5704 if (slp_node
5705 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5707 if (dump_enabled_p ())
5708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5709 "incompatible vector types for invariants\n");
5710 return false;
5712 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5713 DUMP_VECT_SCOPE ("vectorizable_assignment");
5714 if (!vect_nop_conversion_p (stmt_info))
5715 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5716 cost_vec);
5717 return true;
5720 /* Transform. */
5721 if (dump_enabled_p ())
5722 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5724 /* Handle def. */
5725 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5727 /* Handle use. */
5728 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5730 /* Arguments are ready. create the new vector stmt. */
5731 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5733 if (CONVERT_EXPR_CODE_P (code)
5734 || code == VIEW_CONVERT_EXPR)
5735 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5736 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5737 new_temp = make_ssa_name (vec_dest, new_stmt);
5738 gimple_assign_set_lhs (new_stmt, new_temp);
5739 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5740 if (slp_node)
5741 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5742 else
5743 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5745 if (!slp_node)
5746 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5748 vec_oprnds.release ();
5749 return true;
5753 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5754 either as shift by a scalar or by a vector. */
5756 bool
5757 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5760 machine_mode vec_mode;
5761 optab optab;
5762 int icode;
5763 tree vectype;
5765 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5766 if (!vectype)
5767 return false;
5769 optab = optab_for_tree_code (code, vectype, optab_scalar);
5770 if (!optab
5771 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5773 optab = optab_for_tree_code (code, vectype, optab_vector);
5774 if (!optab
5775 || (optab_handler (optab, TYPE_MODE (vectype))
5776 == CODE_FOR_nothing))
5777 return false;
5780 vec_mode = TYPE_MODE (vectype);
5781 icode = (int) optab_handler (optab, vec_mode);
5782 if (icode == CODE_FOR_nothing)
5783 return false;
5785 return true;
5789 /* Function vectorizable_shift.
5791 Check if STMT_INFO performs a shift operation that can be vectorized.
5792 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5793 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5794 Return true if STMT_INFO is vectorizable in this way. */
5796 static bool
5797 vectorizable_shift (vec_info *vinfo,
5798 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5799 gimple **vec_stmt, slp_tree slp_node,
5800 stmt_vector_for_cost *cost_vec)
5802 tree vec_dest;
5803 tree scalar_dest;
5804 tree op0, op1 = NULL;
5805 tree vec_oprnd1 = NULL_TREE;
5806 tree vectype;
5807 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5808 enum tree_code code;
5809 machine_mode vec_mode;
5810 tree new_temp;
5811 optab optab;
5812 int icode;
5813 machine_mode optab_op2_mode;
5814 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5815 int ndts = 2;
5816 poly_uint64 nunits_in;
5817 poly_uint64 nunits_out;
5818 tree vectype_out;
5819 tree op1_vectype;
5820 int ncopies;
5821 int i;
5822 vec<tree> vec_oprnds0 = vNULL;
5823 vec<tree> vec_oprnds1 = vNULL;
5824 tree vop0, vop1;
5825 unsigned int k;
5826 bool scalar_shift_arg = true;
5827 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5828 bool incompatible_op1_vectype_p = false;
5830 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5831 return false;
5833 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5834 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5835 && ! vec_stmt)
5836 return false;
5838 /* Is STMT a vectorizable binary/unary operation? */
5839 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5840 if (!stmt)
5841 return false;
5843 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5844 return false;
5846 code = gimple_assign_rhs_code (stmt);
5848 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5849 || code == RROTATE_EXPR))
5850 return false;
5852 scalar_dest = gimple_assign_lhs (stmt);
5853 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5854 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5856 if (dump_enabled_p ())
5857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5858 "bit-precision shifts not supported.\n");
5859 return false;
5862 slp_tree slp_op0;
5863 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5864 0, &op0, &slp_op0, &dt[0], &vectype))
5866 if (dump_enabled_p ())
5867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5868 "use not simple.\n");
5869 return false;
5871 /* If op0 is an external or constant def, infer the vector type
5872 from the scalar type. */
5873 if (!vectype)
5874 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5875 if (vec_stmt)
5876 gcc_assert (vectype);
5877 if (!vectype)
5879 if (dump_enabled_p ())
5880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5881 "no vectype for scalar type\n");
5882 return false;
5885 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5886 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5887 if (maybe_ne (nunits_out, nunits_in))
5888 return false;
5890 stmt_vec_info op1_def_stmt_info;
5891 slp_tree slp_op1;
5892 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5893 &dt[1], &op1_vectype, &op1_def_stmt_info))
5895 if (dump_enabled_p ())
5896 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5897 "use not simple.\n");
5898 return false;
5901 /* Multiple types in SLP are handled by creating the appropriate number of
5902 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5903 case of SLP. */
5904 if (slp_node)
5905 ncopies = 1;
5906 else
5907 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5909 gcc_assert (ncopies >= 1);
5911 /* Determine whether the shift amount is a vector, or scalar. If the
5912 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5914 if ((dt[1] == vect_internal_def
5915 || dt[1] == vect_induction_def
5916 || dt[1] == vect_nested_cycle)
5917 && !slp_node)
5918 scalar_shift_arg = false;
5919 else if (dt[1] == vect_constant_def
5920 || dt[1] == vect_external_def
5921 || dt[1] == vect_internal_def)
5923 /* In SLP, need to check whether the shift count is the same,
5924 in loops if it is a constant or invariant, it is always
5925 a scalar shift. */
5926 if (slp_node)
5928 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5929 stmt_vec_info slpstmt_info;
5931 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5933 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5934 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5935 scalar_shift_arg = false;
5938 /* For internal SLP defs we have to make sure we see scalar stmts
5939 for all vector elements.
5940 ??? For different vectors we could resort to a different
5941 scalar shift operand but code-generation below simply always
5942 takes the first. */
5943 if (dt[1] == vect_internal_def
5944 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5945 stmts.length ()))
5946 scalar_shift_arg = false;
5949 /* If the shift amount is computed by a pattern stmt we cannot
5950 use the scalar amount directly thus give up and use a vector
5951 shift. */
5952 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5953 scalar_shift_arg = false;
5955 else
5957 if (dump_enabled_p ())
5958 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5959 "operand mode requires invariant argument.\n");
5960 return false;
5963 /* Vector shifted by vector. */
5964 bool was_scalar_shift_arg = scalar_shift_arg;
5965 if (!scalar_shift_arg)
5967 optab = optab_for_tree_code (code, vectype, optab_vector);
5968 if (dump_enabled_p ())
5969 dump_printf_loc (MSG_NOTE, vect_location,
5970 "vector/vector shift/rotate found.\n");
5972 if (!op1_vectype)
5973 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5974 slp_op1);
5975 incompatible_op1_vectype_p
5976 = (op1_vectype == NULL_TREE
5977 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5978 TYPE_VECTOR_SUBPARTS (vectype))
5979 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5980 if (incompatible_op1_vectype_p
5981 && (!slp_node
5982 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5983 || slp_op1->refcnt != 1))
5985 if (dump_enabled_p ())
5986 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5987 "unusable type for last operand in"
5988 " vector/vector shift/rotate.\n");
5989 return false;
5992 /* See if the machine has a vector shifted by scalar insn and if not
5993 then see if it has a vector shifted by vector insn. */
5994 else
5996 optab = optab_for_tree_code (code, vectype, optab_scalar);
5997 if (optab
5998 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6000 if (dump_enabled_p ())
6001 dump_printf_loc (MSG_NOTE, vect_location,
6002 "vector/scalar shift/rotate found.\n");
6004 else
6006 optab = optab_for_tree_code (code, vectype, optab_vector);
6007 if (optab
6008 && (optab_handler (optab, TYPE_MODE (vectype))
6009 != CODE_FOR_nothing))
6011 scalar_shift_arg = false;
6013 if (dump_enabled_p ())
6014 dump_printf_loc (MSG_NOTE, vect_location,
6015 "vector/vector shift/rotate found.\n");
6017 if (!op1_vectype)
6018 op1_vectype = get_vectype_for_scalar_type (vinfo,
6019 TREE_TYPE (op1),
6020 slp_op1);
6022 /* Unlike the other binary operators, shifts/rotates have
6023 the rhs being int, instead of the same type as the lhs,
6024 so make sure the scalar is the right type if we are
6025 dealing with vectors of long long/long/short/char. */
6026 incompatible_op1_vectype_p
6027 = (!op1_vectype
6028 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6029 TREE_TYPE (op1)));
6030 if (incompatible_op1_vectype_p
6031 && dt[1] == vect_internal_def)
6033 if (dump_enabled_p ())
6034 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6035 "unusable type for last operand in"
6036 " vector/vector shift/rotate.\n");
6037 return false;
6043 /* Supportable by target? */
6044 if (!optab)
6046 if (dump_enabled_p ())
6047 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6048 "no optab.\n");
6049 return false;
6051 vec_mode = TYPE_MODE (vectype);
6052 icode = (int) optab_handler (optab, vec_mode);
6053 if (icode == CODE_FOR_nothing)
6055 if (dump_enabled_p ())
6056 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6057 "op not supported by target.\n");
6058 return false;
6060 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6061 if (vect_emulated_vector_p (vectype))
6062 return false;
6064 if (!vec_stmt) /* transformation not required. */
6066 if (slp_node
6067 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6068 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6069 && (!incompatible_op1_vectype_p
6070 || dt[1] == vect_constant_def)
6071 && !vect_maybe_update_slp_op_vectype
6072 (slp_op1,
6073 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6075 if (dump_enabled_p ())
6076 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6077 "incompatible vector types for invariants\n");
6078 return false;
6080 /* Now adjust the constant shift amount in place. */
6081 if (slp_node
6082 && incompatible_op1_vectype_p
6083 && dt[1] == vect_constant_def)
6085 for (unsigned i = 0;
6086 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6088 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6089 = fold_convert (TREE_TYPE (vectype),
6090 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6091 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6092 == INTEGER_CST));
6095 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6096 DUMP_VECT_SCOPE ("vectorizable_shift");
6097 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6098 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6099 return true;
6102 /* Transform. */
6104 if (dump_enabled_p ())
6105 dump_printf_loc (MSG_NOTE, vect_location,
6106 "transform binary/unary operation.\n");
6108 if (incompatible_op1_vectype_p && !slp_node)
6110 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6111 op1 = fold_convert (TREE_TYPE (vectype), op1);
6112 if (dt[1] != vect_constant_def)
6113 op1 = vect_init_vector (vinfo, stmt_info, op1,
6114 TREE_TYPE (vectype), NULL);
6117 /* Handle def. */
6118 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6120 if (scalar_shift_arg && dt[1] != vect_internal_def)
6122 /* Vector shl and shr insn patterns can be defined with scalar
6123 operand 2 (shift operand). In this case, use constant or loop
6124 invariant op1 directly, without extending it to vector mode
6125 first. */
6126 optab_op2_mode = insn_data[icode].operand[2].mode;
6127 if (!VECTOR_MODE_P (optab_op2_mode))
6129 if (dump_enabled_p ())
6130 dump_printf_loc (MSG_NOTE, vect_location,
6131 "operand 1 using scalar mode.\n");
6132 vec_oprnd1 = op1;
6133 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6134 vec_oprnds1.quick_push (vec_oprnd1);
6135 /* Store vec_oprnd1 for every vector stmt to be created.
6136 We check during the analysis that all the shift arguments
6137 are the same.
6138 TODO: Allow different constants for different vector
6139 stmts generated for an SLP instance. */
6140 for (k = 0;
6141 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6142 vec_oprnds1.quick_push (vec_oprnd1);
6145 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6147 if (was_scalar_shift_arg)
6149 /* If the argument was the same in all lanes create
6150 the correctly typed vector shift amount directly. */
6151 op1 = fold_convert (TREE_TYPE (vectype), op1);
6152 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6153 !loop_vinfo ? gsi : NULL);
6154 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6155 !loop_vinfo ? gsi : NULL);
6156 vec_oprnds1.create (slp_node->vec_stmts_size);
6157 for (k = 0; k < slp_node->vec_stmts_size; k++)
6158 vec_oprnds1.quick_push (vec_oprnd1);
6160 else if (dt[1] == vect_constant_def)
6161 /* The constant shift amount has been adjusted in place. */
6163 else
6164 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6167 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6168 (a special case for certain kind of vector shifts); otherwise,
6169 operand 1 should be of a vector type (the usual case). */
6170 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6171 op0, &vec_oprnds0,
6172 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6174 /* Arguments are ready. Create the new vector stmt. */
6175 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6177 /* For internal defs where we need to use a scalar shift arg
6178 extract the first lane. */
6179 if (scalar_shift_arg && dt[1] == vect_internal_def)
6181 vop1 = vec_oprnds1[0];
6182 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6183 gassign *new_stmt
6184 = gimple_build_assign (new_temp,
6185 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6186 vop1,
6187 TYPE_SIZE (TREE_TYPE (new_temp)),
6188 bitsize_zero_node));
6189 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6190 vop1 = new_temp;
6192 else
6193 vop1 = vec_oprnds1[i];
6194 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6195 new_temp = make_ssa_name (vec_dest, new_stmt);
6196 gimple_assign_set_lhs (new_stmt, new_temp);
6197 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6198 if (slp_node)
6199 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6200 else
6201 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6204 if (!slp_node)
6205 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6207 vec_oprnds0.release ();
6208 vec_oprnds1.release ();
6210 return true;
6213 /* Function vectorizable_operation.
6215 Check if STMT_INFO performs a binary, unary or ternary operation that can
6216 be vectorized.
6217 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6218 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6219 Return true if STMT_INFO is vectorizable in this way. */
6221 static bool
6222 vectorizable_operation (vec_info *vinfo,
6223 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6224 gimple **vec_stmt, slp_tree slp_node,
6225 stmt_vector_for_cost *cost_vec)
6227 tree vec_dest;
6228 tree scalar_dest;
6229 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6230 tree vectype;
6231 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6232 enum tree_code code, orig_code;
6233 machine_mode vec_mode;
6234 tree new_temp;
6235 int op_type;
6236 optab optab;
6237 bool target_support_p;
6238 enum vect_def_type dt[3]
6239 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6240 int ndts = 3;
6241 poly_uint64 nunits_in;
6242 poly_uint64 nunits_out;
6243 tree vectype_out;
6244 int ncopies, vec_num;
6245 int i;
6246 vec<tree> vec_oprnds0 = vNULL;
6247 vec<tree> vec_oprnds1 = vNULL;
6248 vec<tree> vec_oprnds2 = vNULL;
6249 tree vop0, vop1, vop2;
6250 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6252 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6253 return false;
6255 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6256 && ! vec_stmt)
6257 return false;
6259 /* Is STMT a vectorizable binary/unary operation? */
6260 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6261 if (!stmt)
6262 return false;
6264 /* Loads and stores are handled in vectorizable_{load,store}. */
6265 if (STMT_VINFO_DATA_REF (stmt_info))
6266 return false;
6268 orig_code = code = gimple_assign_rhs_code (stmt);
6270 /* Shifts are handled in vectorizable_shift. */
6271 if (code == LSHIFT_EXPR
6272 || code == RSHIFT_EXPR
6273 || code == LROTATE_EXPR
6274 || code == RROTATE_EXPR)
6275 return false;
6277 /* Comparisons are handled in vectorizable_comparison. */
6278 if (TREE_CODE_CLASS (code) == tcc_comparison)
6279 return false;
6281 /* Conditions are handled in vectorizable_condition. */
6282 if (code == COND_EXPR)
6283 return false;
6285 /* For pointer addition and subtraction, we should use the normal
6286 plus and minus for the vector operation. */
6287 if (code == POINTER_PLUS_EXPR)
6288 code = PLUS_EXPR;
6289 if (code == POINTER_DIFF_EXPR)
6290 code = MINUS_EXPR;
6292 /* Support only unary or binary operations. */
6293 op_type = TREE_CODE_LENGTH (code);
6294 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6296 if (dump_enabled_p ())
6297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6298 "num. args = %d (not unary/binary/ternary op).\n",
6299 op_type);
6300 return false;
6303 scalar_dest = gimple_assign_lhs (stmt);
6304 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6306 /* Most operations cannot handle bit-precision types without extra
6307 truncations. */
6308 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6309 if (!mask_op_p
6310 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6311 /* Exception are bitwise binary operations. */
6312 && code != BIT_IOR_EXPR
6313 && code != BIT_XOR_EXPR
6314 && code != BIT_AND_EXPR)
6316 if (dump_enabled_p ())
6317 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6318 "bit-precision arithmetic not supported.\n");
6319 return false;
6322 slp_tree slp_op0;
6323 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6324 0, &op0, &slp_op0, &dt[0], &vectype))
6326 if (dump_enabled_p ())
6327 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6328 "use not simple.\n");
6329 return false;
6331 bool is_invariant = (dt[0] == vect_external_def
6332 || dt[0] == vect_constant_def);
6333 /* If op0 is an external or constant def, infer the vector type
6334 from the scalar type. */
6335 if (!vectype)
6337 /* For boolean type we cannot determine vectype by
6338 invariant value (don't know whether it is a vector
6339 of booleans or vector of integers). We use output
6340 vectype because operations on boolean don't change
6341 type. */
6342 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6344 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6346 if (dump_enabled_p ())
6347 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6348 "not supported operation on bool value.\n");
6349 return false;
6351 vectype = vectype_out;
6353 else
6354 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6355 slp_node);
6357 if (vec_stmt)
6358 gcc_assert (vectype);
6359 if (!vectype)
6361 if (dump_enabled_p ())
6362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6363 "no vectype for scalar type %T\n",
6364 TREE_TYPE (op0));
6366 return false;
6369 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6370 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6371 if (maybe_ne (nunits_out, nunits_in))
6372 return false;
6374 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6375 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6376 if (op_type == binary_op || op_type == ternary_op)
6378 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6379 1, &op1, &slp_op1, &dt[1], &vectype2))
6381 if (dump_enabled_p ())
6382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6383 "use not simple.\n");
6384 return false;
6386 is_invariant &= (dt[1] == vect_external_def
6387 || dt[1] == vect_constant_def);
6388 if (vectype2
6389 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6390 return false;
6392 if (op_type == ternary_op)
6394 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6395 2, &op2, &slp_op2, &dt[2], &vectype3))
6397 if (dump_enabled_p ())
6398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6399 "use not simple.\n");
6400 return false;
6402 is_invariant &= (dt[2] == vect_external_def
6403 || dt[2] == vect_constant_def);
6404 if (vectype3
6405 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6406 return false;
6409 /* Multiple types in SLP are handled by creating the appropriate number of
6410 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6411 case of SLP. */
6412 if (slp_node)
6414 ncopies = 1;
6415 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6417 else
6419 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6420 vec_num = 1;
6423 gcc_assert (ncopies >= 1);
6425 /* Reject attempts to combine mask types with nonmask types, e.g. if
6426 we have an AND between a (nonmask) boolean loaded from memory and
6427 a (mask) boolean result of a comparison.
6429 TODO: We could easily fix these cases up using pattern statements. */
6430 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6431 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6432 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6434 if (dump_enabled_p ())
6435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6436 "mixed mask and nonmask vector types\n");
6437 return false;
6440 /* Supportable by target? */
6442 vec_mode = TYPE_MODE (vectype);
6443 if (code == MULT_HIGHPART_EXPR)
6444 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6445 else
6447 optab = optab_for_tree_code (code, vectype, optab_default);
6448 if (!optab)
6450 if (dump_enabled_p ())
6451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6452 "no optab.\n");
6453 return false;
6455 target_support_p = (optab_handler (optab, vec_mode)
6456 != CODE_FOR_nothing);
6459 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6460 if (!target_support_p || using_emulated_vectors_p)
6462 if (dump_enabled_p ())
6463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6464 "op not supported by target.\n");
6465 /* When vec_mode is not a vector mode and we verified ops we
6466 do not have to lower like AND are natively supported let
6467 those through even when the mode isn't word_mode. For
6468 ops we have to lower the lowering code assumes we are
6469 dealing with word_mode. */
6470 if ((((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6471 || !target_support_p)
6472 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6473 /* Check only during analysis. */
6474 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6476 if (dump_enabled_p ())
6477 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6478 return false;
6480 if (dump_enabled_p ())
6481 dump_printf_loc (MSG_NOTE, vect_location,
6482 "proceeding using word mode.\n");
6483 using_emulated_vectors_p = true;
6486 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6487 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6488 internal_fn cond_fn = get_conditional_internal_fn (code);
6490 /* If operating on inactive elements could generate spurious traps,
6491 we need to restrict the operation to active lanes. Note that this
6492 specifically doesn't apply to unhoisted invariants, since they
6493 operate on the same value for every lane.
6495 Similarly, if this operation is part of a reduction, a fully-masked
6496 loop should only change the active lanes of the reduction chain,
6497 keeping the inactive lanes as-is. */
6498 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6499 || reduc_idx >= 0);
6501 if (!vec_stmt) /* transformation not required. */
6503 if (loop_vinfo
6504 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6505 && mask_out_inactive)
6507 if (cond_fn == IFN_LAST
6508 || !direct_internal_fn_supported_p (cond_fn, vectype,
6509 OPTIMIZE_FOR_SPEED))
6511 if (dump_enabled_p ())
6512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6513 "can't use a fully-masked loop because no"
6514 " conditional operation is available.\n");
6515 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6517 else
6518 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6519 vectype, NULL);
6522 /* Put types on constant and invariant SLP children. */
6523 if (slp_node
6524 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6525 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6526 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6528 if (dump_enabled_p ())
6529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6530 "incompatible vector types for invariants\n");
6531 return false;
6534 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6535 DUMP_VECT_SCOPE ("vectorizable_operation");
6536 vect_model_simple_cost (vinfo, stmt_info,
6537 ncopies, dt, ndts, slp_node, cost_vec);
6538 if (using_emulated_vectors_p)
6540 /* The above vect_model_simple_cost call handles constants
6541 in the prologue and (mis-)costs one of the stmts as
6542 vector stmt. See below for the actual lowering that will
6543 be applied. */
6544 unsigned n
6545 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6546 switch (code)
6548 case PLUS_EXPR:
6549 n *= 5;
6550 break;
6551 case MINUS_EXPR:
6552 n *= 6;
6553 break;
6554 case NEGATE_EXPR:
6555 n *= 4;
6556 break;
6557 default:
6558 /* Bit operations do not have extra cost and are accounted
6559 as vector stmt by vect_model_simple_cost. */
6560 n = 0;
6561 break;
6563 if (n != 0)
6565 /* We also need to materialize two large constants. */
6566 record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6567 0, vect_prologue);
6568 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6569 0, vect_body);
6572 return true;
6575 /* Transform. */
6577 if (dump_enabled_p ())
6578 dump_printf_loc (MSG_NOTE, vect_location,
6579 "transform binary/unary operation.\n");
6581 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6583 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6584 vectors with unsigned elements, but the result is signed. So, we
6585 need to compute the MINUS_EXPR into vectype temporary and
6586 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6587 tree vec_cvt_dest = NULL_TREE;
6588 if (orig_code == POINTER_DIFF_EXPR)
6590 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6591 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6593 /* Handle def. */
6594 else
6595 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6597 /* In case the vectorization factor (VF) is bigger than the number
6598 of elements that we can fit in a vectype (nunits), we have to generate
6599 more than one vector stmt - i.e - we need to "unroll" the
6600 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6601 from one copy of the vector stmt to the next, in the field
6602 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6603 stages to find the correct vector defs to be used when vectorizing
6604 stmts that use the defs of the current stmt. The example below
6605 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6606 we need to create 4 vectorized stmts):
6608 before vectorization:
6609 RELATED_STMT VEC_STMT
6610 S1: x = memref - -
6611 S2: z = x + 1 - -
6613 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6614 there):
6615 RELATED_STMT VEC_STMT
6616 VS1_0: vx0 = memref0 VS1_1 -
6617 VS1_1: vx1 = memref1 VS1_2 -
6618 VS1_2: vx2 = memref2 VS1_3 -
6619 VS1_3: vx3 = memref3 - -
6620 S1: x = load - VS1_0
6621 S2: z = x + 1 - -
6623 step2: vectorize stmt S2 (done here):
6624 To vectorize stmt S2 we first need to find the relevant vector
6625 def for the first operand 'x'. This is, as usual, obtained from
6626 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6627 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6628 relevant vector def 'vx0'. Having found 'vx0' we can generate
6629 the vector stmt VS2_0, and as usual, record it in the
6630 STMT_VINFO_VEC_STMT of stmt S2.
6631 When creating the second copy (VS2_1), we obtain the relevant vector
6632 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6633 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6634 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6635 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6636 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6637 chain of stmts and pointers:
6638 RELATED_STMT VEC_STMT
6639 VS1_0: vx0 = memref0 VS1_1 -
6640 VS1_1: vx1 = memref1 VS1_2 -
6641 VS1_2: vx2 = memref2 VS1_3 -
6642 VS1_3: vx3 = memref3 - -
6643 S1: x = load - VS1_0
6644 VS2_0: vz0 = vx0 + v1 VS2_1 -
6645 VS2_1: vz1 = vx1 + v1 VS2_2 -
6646 VS2_2: vz2 = vx2 + v1 VS2_3 -
6647 VS2_3: vz3 = vx3 + v1 - -
6648 S2: z = x + 1 - VS2_0 */
6650 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6651 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6652 /* Arguments are ready. Create the new vector stmt. */
6653 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6655 gimple *new_stmt = NULL;
6656 vop1 = ((op_type == binary_op || op_type == ternary_op)
6657 ? vec_oprnds1[i] : NULL_TREE);
6658 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6659 if (using_emulated_vectors_p
6660 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
6662 /* Lower the operation. This follows vector lowering. */
6663 unsigned int width = vector_element_bits (vectype);
6664 tree inner_type = TREE_TYPE (vectype);
6665 tree word_type
6666 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
6667 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
6668 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
6669 tree high_bits
6670 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
6671 tree wvop0 = make_ssa_name (word_type);
6672 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
6673 build1 (VIEW_CONVERT_EXPR,
6674 word_type, vop0));
6675 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6676 tree result_low, signs;
6677 if (code == PLUS_EXPR || code == MINUS_EXPR)
6679 tree wvop1 = make_ssa_name (word_type);
6680 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
6681 build1 (VIEW_CONVERT_EXPR,
6682 word_type, vop1));
6683 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6684 signs = make_ssa_name (word_type);
6685 new_stmt = gimple_build_assign (signs,
6686 BIT_XOR_EXPR, wvop0, wvop1);
6687 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6688 tree b_low = make_ssa_name (word_type);
6689 new_stmt = gimple_build_assign (b_low,
6690 BIT_AND_EXPR, wvop1, low_bits);
6691 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6692 tree a_low = make_ssa_name (word_type);
6693 if (code == PLUS_EXPR)
6694 new_stmt = gimple_build_assign (a_low,
6695 BIT_AND_EXPR, wvop0, low_bits);
6696 else
6697 new_stmt = gimple_build_assign (a_low,
6698 BIT_IOR_EXPR, wvop0, high_bits);
6699 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6700 if (code == MINUS_EXPR)
6702 new_stmt = gimple_build_assign (NULL_TREE,
6703 BIT_NOT_EXPR, signs);
6704 signs = make_ssa_name (word_type);
6705 gimple_assign_set_lhs (new_stmt, signs);
6706 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6708 new_stmt = gimple_build_assign (NULL_TREE,
6709 BIT_AND_EXPR, signs, high_bits);
6710 signs = make_ssa_name (word_type);
6711 gimple_assign_set_lhs (new_stmt, signs);
6712 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6713 result_low = make_ssa_name (word_type);
6714 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
6715 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6717 else
6719 tree a_low = make_ssa_name (word_type);
6720 new_stmt = gimple_build_assign (a_low,
6721 BIT_AND_EXPR, wvop0, low_bits);
6722 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6723 signs = make_ssa_name (word_type);
6724 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
6725 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6726 new_stmt = gimple_build_assign (NULL_TREE,
6727 BIT_AND_EXPR, signs, high_bits);
6728 signs = make_ssa_name (word_type);
6729 gimple_assign_set_lhs (new_stmt, signs);
6730 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6731 result_low = make_ssa_name (word_type);
6732 new_stmt = gimple_build_assign (result_low,
6733 MINUS_EXPR, high_bits, a_low);
6734 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6736 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
6737 signs);
6738 result_low = make_ssa_name (word_type);
6739 gimple_assign_set_lhs (new_stmt, result_low);
6740 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6741 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
6742 build1 (VIEW_CONVERT_EXPR,
6743 vectype, result_low));
6744 new_temp = make_ssa_name (vectype);
6745 gimple_assign_set_lhs (new_stmt, new_temp);
6746 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6748 else if (masked_loop_p && mask_out_inactive)
6750 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6751 vectype, i);
6752 auto_vec<tree> vops (5);
6753 vops.quick_push (mask);
6754 vops.quick_push (vop0);
6755 if (vop1)
6756 vops.quick_push (vop1);
6757 if (vop2)
6758 vops.quick_push (vop2);
6759 if (reduc_idx >= 0)
6761 /* Perform the operation on active elements only and take
6762 inactive elements from the reduction chain input. */
6763 gcc_assert (!vop2);
6764 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
6766 else
6768 auto else_value = targetm.preferred_else_value
6769 (cond_fn, vectype, vops.length () - 1, &vops[1]);
6770 vops.quick_push (else_value);
6772 gcall *call = gimple_build_call_internal_vec (cond_fn, vops);
6773 new_temp = make_ssa_name (vec_dest, call);
6774 gimple_call_set_lhs (call, new_temp);
6775 gimple_call_set_nothrow (call, true);
6776 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6777 new_stmt = call;
6779 else
6781 tree mask = NULL_TREE;
6782 /* When combining two masks check if either of them is elsewhere
6783 combined with a loop mask, if that's the case we can mark that the
6784 new combined mask doesn't need to be combined with a loop mask. */
6785 if (masked_loop_p
6786 && code == BIT_AND_EXPR
6787 && VECTOR_BOOLEAN_TYPE_P (vectype))
6789 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
6790 ncopies}))
6792 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6793 vectype, i);
6795 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6796 vop0, gsi);
6799 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
6800 ncopies }))
6802 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6803 vectype, i);
6805 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6806 vop1, gsi);
6810 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6811 new_temp = make_ssa_name (vec_dest, new_stmt);
6812 gimple_assign_set_lhs (new_stmt, new_temp);
6813 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6814 if (using_emulated_vectors_p)
6815 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
6817 /* Enter the combined value into the vector cond hash so we don't
6818 AND it with a loop mask again. */
6819 if (mask)
6820 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
6823 if (vec_cvt_dest)
6825 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6826 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6827 new_temp);
6828 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6829 gimple_assign_set_lhs (new_stmt, new_temp);
6830 vect_finish_stmt_generation (vinfo, stmt_info,
6831 new_stmt, gsi);
6834 if (slp_node)
6835 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6836 else
6837 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6840 if (!slp_node)
6841 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6843 vec_oprnds0.release ();
6844 vec_oprnds1.release ();
6845 vec_oprnds2.release ();
6847 return true;
6850 /* A helper function to ensure data reference DR_INFO's base alignment. */
6852 static void
6853 ensure_base_align (dr_vec_info *dr_info)
6855 /* Alignment is only analyzed for the first element of a DR group,
6856 use that to look at base alignment we need to enforce. */
6857 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6858 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6860 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6862 if (dr_info->base_misaligned)
6864 tree base_decl = dr_info->base_decl;
6866 // We should only be able to increase the alignment of a base object if
6867 // we know what its new alignment should be at compile time.
6868 unsigned HOST_WIDE_INT align_base_to =
6869 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6871 if (decl_in_symtab_p (base_decl))
6872 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6873 else if (DECL_ALIGN (base_decl) < align_base_to)
6875 SET_DECL_ALIGN (base_decl, align_base_to);
6876 DECL_USER_ALIGN (base_decl) = 1;
6878 dr_info->base_misaligned = false;
6883 /* Function get_group_alias_ptr_type.
6885 Return the alias type for the group starting at FIRST_STMT_INFO. */
6887 static tree
6888 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6890 struct data_reference *first_dr, *next_dr;
6892 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6893 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6894 while (next_stmt_info)
6896 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6897 if (get_alias_set (DR_REF (first_dr))
6898 != get_alias_set (DR_REF (next_dr)))
6900 if (dump_enabled_p ())
6901 dump_printf_loc (MSG_NOTE, vect_location,
6902 "conflicting alias set types.\n");
6903 return ptr_type_node;
6905 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6907 return reference_alias_ptr_type (DR_REF (first_dr));
6911 /* Function scan_operand_equal_p.
6913 Helper function for check_scan_store. Compare two references
6914 with .GOMP_SIMD_LANE bases. */
6916 static bool
6917 scan_operand_equal_p (tree ref1, tree ref2)
6919 tree ref[2] = { ref1, ref2 };
6920 poly_int64 bitsize[2], bitpos[2];
6921 tree offset[2], base[2];
6922 for (int i = 0; i < 2; ++i)
6924 machine_mode mode;
6925 int unsignedp, reversep, volatilep = 0;
6926 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6927 &offset[i], &mode, &unsignedp,
6928 &reversep, &volatilep);
6929 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6930 return false;
6931 if (TREE_CODE (base[i]) == MEM_REF
6932 && offset[i] == NULL_TREE
6933 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6935 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6936 if (is_gimple_assign (def_stmt)
6937 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6938 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6939 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6941 if (maybe_ne (mem_ref_offset (base[i]), 0))
6942 return false;
6943 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6944 offset[i] = gimple_assign_rhs2 (def_stmt);
6949 if (!operand_equal_p (base[0], base[1], 0))
6950 return false;
6951 if (maybe_ne (bitsize[0], bitsize[1]))
6952 return false;
6953 if (offset[0] != offset[1])
6955 if (!offset[0] || !offset[1])
6956 return false;
6957 if (!operand_equal_p (offset[0], offset[1], 0))
6959 tree step[2];
6960 for (int i = 0; i < 2; ++i)
6962 step[i] = integer_one_node;
6963 if (TREE_CODE (offset[i]) == SSA_NAME)
6965 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6966 if (is_gimple_assign (def_stmt)
6967 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6968 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6969 == INTEGER_CST))
6971 step[i] = gimple_assign_rhs2 (def_stmt);
6972 offset[i] = gimple_assign_rhs1 (def_stmt);
6975 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6977 step[i] = TREE_OPERAND (offset[i], 1);
6978 offset[i] = TREE_OPERAND (offset[i], 0);
6980 tree rhs1 = NULL_TREE;
6981 if (TREE_CODE (offset[i]) == SSA_NAME)
6983 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6984 if (gimple_assign_cast_p (def_stmt))
6985 rhs1 = gimple_assign_rhs1 (def_stmt);
6987 else if (CONVERT_EXPR_P (offset[i]))
6988 rhs1 = TREE_OPERAND (offset[i], 0);
6989 if (rhs1
6990 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6991 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6992 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6993 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6994 offset[i] = rhs1;
6996 if (!operand_equal_p (offset[0], offset[1], 0)
6997 || !operand_equal_p (step[0], step[1], 0))
6998 return false;
7001 return true;
7005 enum scan_store_kind {
7006 /* Normal permutation. */
7007 scan_store_kind_perm,
7009 /* Whole vector left shift permutation with zero init. */
7010 scan_store_kind_lshift_zero,
7012 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7013 scan_store_kind_lshift_cond
7016 /* Function check_scan_store.
7018 Verify if we can perform the needed permutations or whole vector shifts.
7019 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7020 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7021 to do at each step. */
7023 static int
7024 scan_store_can_perm_p (tree vectype, tree init,
7025 vec<enum scan_store_kind> *use_whole_vector = NULL)
7027 enum machine_mode vec_mode = TYPE_MODE (vectype);
7028 unsigned HOST_WIDE_INT nunits;
7029 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7030 return -1;
7031 int units_log2 = exact_log2 (nunits);
7032 if (units_log2 <= 0)
7033 return -1;
7035 int i;
7036 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7037 for (i = 0; i <= units_log2; ++i)
7039 unsigned HOST_WIDE_INT j, k;
7040 enum scan_store_kind kind = scan_store_kind_perm;
7041 vec_perm_builder sel (nunits, nunits, 1);
7042 sel.quick_grow (nunits);
7043 if (i == units_log2)
7045 for (j = 0; j < nunits; ++j)
7046 sel[j] = nunits - 1;
7048 else
7050 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7051 sel[j] = j;
7052 for (k = 0; j < nunits; ++j, ++k)
7053 sel[j] = nunits + k;
7055 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7056 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7058 if (i == units_log2)
7059 return -1;
7061 if (whole_vector_shift_kind == scan_store_kind_perm)
7063 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
7064 return -1;
7065 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7066 /* Whole vector shifts shift in zeros, so if init is all zero
7067 constant, there is no need to do anything further. */
7068 if ((TREE_CODE (init) != INTEGER_CST
7069 && TREE_CODE (init) != REAL_CST)
7070 || !initializer_zerop (init))
7072 tree masktype = truth_type_for (vectype);
7073 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7074 return -1;
7075 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7078 kind = whole_vector_shift_kind;
7080 if (use_whole_vector)
7082 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7083 use_whole_vector->safe_grow_cleared (i, true);
7084 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7085 use_whole_vector->safe_push (kind);
7089 return units_log2;
7093 /* Function check_scan_store.
7095 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7097 static bool
7098 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7099 enum vect_def_type rhs_dt, bool slp, tree mask,
7100 vect_memory_access_type memory_access_type)
7102 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7103 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7104 tree ref_type;
7106 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7107 if (slp
7108 || mask
7109 || memory_access_type != VMAT_CONTIGUOUS
7110 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7111 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7112 || loop_vinfo == NULL
7113 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7114 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7115 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7116 || !integer_zerop (DR_INIT (dr_info->dr))
7117 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7118 || !alias_sets_conflict_p (get_alias_set (vectype),
7119 get_alias_set (TREE_TYPE (ref_type))))
7121 if (dump_enabled_p ())
7122 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7123 "unsupported OpenMP scan store.\n");
7124 return false;
7127 /* We need to pattern match code built by OpenMP lowering and simplified
7128 by following optimizations into something we can handle.
7129 #pragma omp simd reduction(inscan,+:r)
7130 for (...)
7132 r += something ();
7133 #pragma omp scan inclusive (r)
7134 use (r);
7136 shall have body with:
7137 // Initialization for input phase, store the reduction initializer:
7138 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7139 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7140 D.2042[_21] = 0;
7141 // Actual input phase:
7143 r.0_5 = D.2042[_20];
7144 _6 = _4 + r.0_5;
7145 D.2042[_20] = _6;
7146 // Initialization for scan phase:
7147 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7148 _26 = D.2043[_25];
7149 _27 = D.2042[_25];
7150 _28 = _26 + _27;
7151 D.2043[_25] = _28;
7152 D.2042[_25] = _28;
7153 // Actual scan phase:
7155 r.1_8 = D.2042[_20];
7157 The "omp simd array" variable D.2042 holds the privatized copy used
7158 inside of the loop and D.2043 is another one that holds copies of
7159 the current original list item. The separate GOMP_SIMD_LANE ifn
7160 kinds are there in order to allow optimizing the initializer store
7161 and combiner sequence, e.g. if it is originally some C++ish user
7162 defined reduction, but allow the vectorizer to pattern recognize it
7163 and turn into the appropriate vectorized scan.
7165 For exclusive scan, this is slightly different:
7166 #pragma omp simd reduction(inscan,+:r)
7167 for (...)
7169 use (r);
7170 #pragma omp scan exclusive (r)
7171 r += something ();
7173 shall have body with:
7174 // Initialization for input phase, store the reduction initializer:
7175 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7176 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7177 D.2042[_21] = 0;
7178 // Actual input phase:
7180 r.0_5 = D.2042[_20];
7181 _6 = _4 + r.0_5;
7182 D.2042[_20] = _6;
7183 // Initialization for scan phase:
7184 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7185 _26 = D.2043[_25];
7186 D.2044[_25] = _26;
7187 _27 = D.2042[_25];
7188 _28 = _26 + _27;
7189 D.2043[_25] = _28;
7190 // Actual scan phase:
7192 r.1_8 = D.2044[_20];
7193 ... */
7195 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7197 /* Match the D.2042[_21] = 0; store above. Just require that
7198 it is a constant or external definition store. */
7199 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7201 fail_init:
7202 if (dump_enabled_p ())
7203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7204 "unsupported OpenMP scan initializer store.\n");
7205 return false;
7208 if (! loop_vinfo->scan_map)
7209 loop_vinfo->scan_map = new hash_map<tree, tree>;
7210 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7211 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7212 if (cached)
7213 goto fail_init;
7214 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7216 /* These stores can be vectorized normally. */
7217 return true;
7220 if (rhs_dt != vect_internal_def)
7222 fail:
7223 if (dump_enabled_p ())
7224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7225 "unsupported OpenMP scan combiner pattern.\n");
7226 return false;
7229 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7230 tree rhs = gimple_assign_rhs1 (stmt);
7231 if (TREE_CODE (rhs) != SSA_NAME)
7232 goto fail;
7234 gimple *other_store_stmt = NULL;
7235 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7236 bool inscan_var_store
7237 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7239 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7241 if (!inscan_var_store)
7243 use_operand_p use_p;
7244 imm_use_iterator iter;
7245 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7247 gimple *use_stmt = USE_STMT (use_p);
7248 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7249 continue;
7250 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7251 || !is_gimple_assign (use_stmt)
7252 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7253 || other_store_stmt
7254 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7255 goto fail;
7256 other_store_stmt = use_stmt;
7258 if (other_store_stmt == NULL)
7259 goto fail;
7260 rhs = gimple_assign_lhs (other_store_stmt);
7261 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7262 goto fail;
7265 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7267 use_operand_p use_p;
7268 imm_use_iterator iter;
7269 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7271 gimple *use_stmt = USE_STMT (use_p);
7272 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7273 continue;
7274 if (other_store_stmt)
7275 goto fail;
7276 other_store_stmt = use_stmt;
7279 else
7280 goto fail;
7282 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7283 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7284 || !is_gimple_assign (def_stmt)
7285 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7286 goto fail;
7288 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7289 /* For pointer addition, we should use the normal plus for the vector
7290 operation. */
7291 switch (code)
7293 case POINTER_PLUS_EXPR:
7294 code = PLUS_EXPR;
7295 break;
7296 case MULT_HIGHPART_EXPR:
7297 goto fail;
7298 default:
7299 break;
7301 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7302 goto fail;
7304 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7305 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7306 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7307 goto fail;
7309 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7310 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7311 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7312 || !gimple_assign_load_p (load1_stmt)
7313 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7314 || !gimple_assign_load_p (load2_stmt))
7315 goto fail;
7317 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7318 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7319 if (load1_stmt_info == NULL
7320 || load2_stmt_info == NULL
7321 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7322 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7323 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7324 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7325 goto fail;
7327 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7329 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7330 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7331 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7332 goto fail;
7333 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7334 tree lrhs;
7335 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7336 lrhs = rhs1;
7337 else
7338 lrhs = rhs2;
7339 use_operand_p use_p;
7340 imm_use_iterator iter;
7341 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7343 gimple *use_stmt = USE_STMT (use_p);
7344 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7345 continue;
7346 if (other_store_stmt)
7347 goto fail;
7348 other_store_stmt = use_stmt;
7352 if (other_store_stmt == NULL)
7353 goto fail;
7354 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7355 || !gimple_store_p (other_store_stmt))
7356 goto fail;
7358 stmt_vec_info other_store_stmt_info
7359 = loop_vinfo->lookup_stmt (other_store_stmt);
7360 if (other_store_stmt_info == NULL
7361 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7362 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7363 goto fail;
7365 gimple *stmt1 = stmt;
7366 gimple *stmt2 = other_store_stmt;
7367 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7368 std::swap (stmt1, stmt2);
7369 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7370 gimple_assign_rhs1 (load2_stmt)))
7372 std::swap (rhs1, rhs2);
7373 std::swap (load1_stmt, load2_stmt);
7374 std::swap (load1_stmt_info, load2_stmt_info);
7376 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7377 gimple_assign_rhs1 (load1_stmt)))
7378 goto fail;
7380 tree var3 = NULL_TREE;
7381 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7382 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7383 gimple_assign_rhs1 (load2_stmt)))
7384 goto fail;
7385 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7387 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7388 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7389 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7390 goto fail;
7391 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7392 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7393 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7394 || lookup_attribute ("omp simd inscan exclusive",
7395 DECL_ATTRIBUTES (var3)))
7396 goto fail;
7399 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7400 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7401 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7402 goto fail;
7404 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7405 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7406 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7407 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7408 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7409 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7410 goto fail;
7412 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7413 std::swap (var1, var2);
7415 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7417 if (!lookup_attribute ("omp simd inscan exclusive",
7418 DECL_ATTRIBUTES (var1)))
7419 goto fail;
7420 var1 = var3;
7423 if (loop_vinfo->scan_map == NULL)
7424 goto fail;
7425 tree *init = loop_vinfo->scan_map->get (var1);
7426 if (init == NULL)
7427 goto fail;
7429 /* The IL is as expected, now check if we can actually vectorize it.
7430 Inclusive scan:
7431 _26 = D.2043[_25];
7432 _27 = D.2042[_25];
7433 _28 = _26 + _27;
7434 D.2043[_25] = _28;
7435 D.2042[_25] = _28;
7436 should be vectorized as (where _40 is the vectorized rhs
7437 from the D.2042[_21] = 0; store):
7438 _30 = MEM <vector(8) int> [(int *)&D.2043];
7439 _31 = MEM <vector(8) int> [(int *)&D.2042];
7440 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7441 _33 = _31 + _32;
7442 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7443 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7444 _35 = _33 + _34;
7445 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7446 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7447 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7448 _37 = _35 + _36;
7449 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7450 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7451 _38 = _30 + _37;
7452 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7453 MEM <vector(8) int> [(int *)&D.2043] = _39;
7454 MEM <vector(8) int> [(int *)&D.2042] = _38;
7455 Exclusive scan:
7456 _26 = D.2043[_25];
7457 D.2044[_25] = _26;
7458 _27 = D.2042[_25];
7459 _28 = _26 + _27;
7460 D.2043[_25] = _28;
7461 should be vectorized as (where _40 is the vectorized rhs
7462 from the D.2042[_21] = 0; store):
7463 _30 = MEM <vector(8) int> [(int *)&D.2043];
7464 _31 = MEM <vector(8) int> [(int *)&D.2042];
7465 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7466 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7467 _34 = _32 + _33;
7468 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7469 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7470 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7471 _36 = _34 + _35;
7472 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7473 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7474 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7475 _38 = _36 + _37;
7476 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7477 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7478 _39 = _30 + _38;
7479 _50 = _31 + _39;
7480 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7481 MEM <vector(8) int> [(int *)&D.2044] = _39;
7482 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7483 enum machine_mode vec_mode = TYPE_MODE (vectype);
7484 optab optab = optab_for_tree_code (code, vectype, optab_default);
7485 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7486 goto fail;
7488 int units_log2 = scan_store_can_perm_p (vectype, *init);
7489 if (units_log2 == -1)
7490 goto fail;
7492 return true;
7496 /* Function vectorizable_scan_store.
7498 Helper of vectorizable_score, arguments like on vectorizable_store.
7499 Handle only the transformation, checking is done in check_scan_store. */
7501 static bool
7502 vectorizable_scan_store (vec_info *vinfo,
7503 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7504 gimple **vec_stmt, int ncopies)
7506 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7507 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7508 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7509 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7511 if (dump_enabled_p ())
7512 dump_printf_loc (MSG_NOTE, vect_location,
7513 "transform scan store. ncopies = %d\n", ncopies);
7515 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7516 tree rhs = gimple_assign_rhs1 (stmt);
7517 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7519 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7520 bool inscan_var_store
7521 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7523 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7525 use_operand_p use_p;
7526 imm_use_iterator iter;
7527 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7529 gimple *use_stmt = USE_STMT (use_p);
7530 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7531 continue;
7532 rhs = gimple_assign_lhs (use_stmt);
7533 break;
7537 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7538 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7539 if (code == POINTER_PLUS_EXPR)
7540 code = PLUS_EXPR;
7541 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7542 && commutative_tree_code (code));
7543 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7544 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7545 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7546 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7547 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7548 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7549 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7550 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7551 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7552 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7553 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7555 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7557 std::swap (rhs1, rhs2);
7558 std::swap (var1, var2);
7559 std::swap (load1_dr_info, load2_dr_info);
7562 tree *init = loop_vinfo->scan_map->get (var1);
7563 gcc_assert (init);
7565 unsigned HOST_WIDE_INT nunits;
7566 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7567 gcc_unreachable ();
7568 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7569 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7570 gcc_assert (units_log2 > 0);
7571 auto_vec<tree, 16> perms;
7572 perms.quick_grow (units_log2 + 1);
7573 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7574 for (int i = 0; i <= units_log2; ++i)
7576 unsigned HOST_WIDE_INT j, k;
7577 vec_perm_builder sel (nunits, nunits, 1);
7578 sel.quick_grow (nunits);
7579 if (i == units_log2)
7580 for (j = 0; j < nunits; ++j)
7581 sel[j] = nunits - 1;
7582 else
7584 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7585 sel[j] = j;
7586 for (k = 0; j < nunits; ++j, ++k)
7587 sel[j] = nunits + k;
7589 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7590 if (!use_whole_vector.is_empty ()
7591 && use_whole_vector[i] != scan_store_kind_perm)
7593 if (zero_vec == NULL_TREE)
7594 zero_vec = build_zero_cst (vectype);
7595 if (masktype == NULL_TREE
7596 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7597 masktype = truth_type_for (vectype);
7598 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7600 else
7601 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7604 tree vec_oprnd1 = NULL_TREE;
7605 tree vec_oprnd2 = NULL_TREE;
7606 tree vec_oprnd3 = NULL_TREE;
7607 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7608 tree dataref_offset = build_int_cst (ref_type, 0);
7609 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7610 vectype, VMAT_CONTIGUOUS);
7611 tree ldataref_ptr = NULL_TREE;
7612 tree orig = NULL_TREE;
7613 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7614 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7615 auto_vec<tree> vec_oprnds1;
7616 auto_vec<tree> vec_oprnds2;
7617 auto_vec<tree> vec_oprnds3;
7618 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7619 *init, &vec_oprnds1,
7620 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7621 rhs2, &vec_oprnds3);
7622 for (int j = 0; j < ncopies; j++)
7624 vec_oprnd1 = vec_oprnds1[j];
7625 if (ldataref_ptr == NULL)
7626 vec_oprnd2 = vec_oprnds2[j];
7627 vec_oprnd3 = vec_oprnds3[j];
7628 if (j == 0)
7629 orig = vec_oprnd3;
7630 else if (!inscan_var_store)
7631 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7633 if (ldataref_ptr)
7635 vec_oprnd2 = make_ssa_name (vectype);
7636 tree data_ref = fold_build2 (MEM_REF, vectype,
7637 unshare_expr (ldataref_ptr),
7638 dataref_offset);
7639 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7640 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7641 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7642 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7643 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7646 tree v = vec_oprnd2;
7647 for (int i = 0; i < units_log2; ++i)
7649 tree new_temp = make_ssa_name (vectype);
7650 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7651 (zero_vec
7652 && (use_whole_vector[i]
7653 != scan_store_kind_perm))
7654 ? zero_vec : vec_oprnd1, v,
7655 perms[i]);
7656 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7657 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7658 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7660 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7662 /* Whole vector shift shifted in zero bits, but if *init
7663 is not initializer_zerop, we need to replace those elements
7664 with elements from vec_oprnd1. */
7665 tree_vector_builder vb (masktype, nunits, 1);
7666 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7667 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7668 ? boolean_false_node : boolean_true_node);
7670 tree new_temp2 = make_ssa_name (vectype);
7671 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7672 new_temp, vec_oprnd1);
7673 vect_finish_stmt_generation (vinfo, stmt_info,
7674 g, gsi);
7675 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7676 new_temp = new_temp2;
7679 /* For exclusive scan, perform the perms[i] permutation once
7680 more. */
7681 if (i == 0
7682 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7683 && v == vec_oprnd2)
7685 v = new_temp;
7686 --i;
7687 continue;
7690 tree new_temp2 = make_ssa_name (vectype);
7691 g = gimple_build_assign (new_temp2, code, v, new_temp);
7692 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7693 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7695 v = new_temp2;
7698 tree new_temp = make_ssa_name (vectype);
7699 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7700 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7701 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7703 tree last_perm_arg = new_temp;
7704 /* For exclusive scan, new_temp computed above is the exclusive scan
7705 prefix sum. Turn it into inclusive prefix sum for the broadcast
7706 of the last element into orig. */
7707 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7709 last_perm_arg = make_ssa_name (vectype);
7710 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7711 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7712 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7715 orig = make_ssa_name (vectype);
7716 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7717 last_perm_arg, perms[units_log2]);
7718 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7719 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7721 if (!inscan_var_store)
7723 tree data_ref = fold_build2 (MEM_REF, vectype,
7724 unshare_expr (dataref_ptr),
7725 dataref_offset);
7726 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7727 g = gimple_build_assign (data_ref, new_temp);
7728 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7729 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7733 if (inscan_var_store)
7734 for (int j = 0; j < ncopies; j++)
7736 if (j != 0)
7737 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7739 tree data_ref = fold_build2 (MEM_REF, vectype,
7740 unshare_expr (dataref_ptr),
7741 dataref_offset);
7742 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7743 gimple *g = gimple_build_assign (data_ref, orig);
7744 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7745 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7747 return true;
7751 /* Function vectorizable_store.
7753 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7754 that can be vectorized.
7755 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7756 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7757 Return true if STMT_INFO is vectorizable in this way. */
7759 static bool
7760 vectorizable_store (vec_info *vinfo,
7761 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7762 gimple **vec_stmt, slp_tree slp_node,
7763 stmt_vector_for_cost *cost_vec)
7765 tree data_ref;
7766 tree op;
7767 tree vec_oprnd = NULL_TREE;
7768 tree elem_type;
7769 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7770 class loop *loop = NULL;
7771 machine_mode vec_mode;
7772 tree dummy;
7773 enum vect_def_type rhs_dt = vect_unknown_def_type;
7774 enum vect_def_type mask_dt = vect_unknown_def_type;
7775 tree dataref_ptr = NULL_TREE;
7776 tree dataref_offset = NULL_TREE;
7777 gimple *ptr_incr = NULL;
7778 int ncopies;
7779 int j;
7780 stmt_vec_info first_stmt_info;
7781 bool grouped_store;
7782 unsigned int group_size, i;
7783 vec<tree> oprnds = vNULL;
7784 vec<tree> result_chain = vNULL;
7785 vec<tree> vec_oprnds = vNULL;
7786 bool slp = (slp_node != NULL);
7787 unsigned int vec_num;
7788 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7789 tree aggr_type;
7790 gather_scatter_info gs_info;
7791 poly_uint64 vf;
7792 vec_load_store_type vls_type;
7793 tree ref_type;
7795 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7796 return false;
7798 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7799 && ! vec_stmt)
7800 return false;
7802 /* Is vectorizable store? */
7804 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7805 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7807 tree scalar_dest = gimple_assign_lhs (assign);
7808 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7809 && is_pattern_stmt_p (stmt_info))
7810 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7811 if (TREE_CODE (scalar_dest) != ARRAY_REF
7812 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7813 && TREE_CODE (scalar_dest) != INDIRECT_REF
7814 && TREE_CODE (scalar_dest) != COMPONENT_REF
7815 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7816 && TREE_CODE (scalar_dest) != REALPART_EXPR
7817 && TREE_CODE (scalar_dest) != MEM_REF)
7818 return false;
7820 else
7822 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7823 if (!call || !gimple_call_internal_p (call))
7824 return false;
7826 internal_fn ifn = gimple_call_internal_fn (call);
7827 if (!internal_store_fn_p (ifn))
7828 return false;
7830 if (slp_node != NULL)
7832 if (dump_enabled_p ())
7833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7834 "SLP of masked stores not supported.\n");
7835 return false;
7838 int mask_index = internal_fn_mask_index (ifn);
7839 if (mask_index >= 0
7840 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7841 &mask, NULL, &mask_dt, &mask_vectype))
7842 return false;
7845 op = vect_get_store_rhs (stmt_info);
7847 /* Cannot have hybrid store SLP -- that would mean storing to the
7848 same location twice. */
7849 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7851 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7852 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7854 if (loop_vinfo)
7856 loop = LOOP_VINFO_LOOP (loop_vinfo);
7857 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7859 else
7860 vf = 1;
7862 /* Multiple types in SLP are handled by creating the appropriate number of
7863 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7864 case of SLP. */
7865 if (slp)
7866 ncopies = 1;
7867 else
7868 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7870 gcc_assert (ncopies >= 1);
7872 /* FORNOW. This restriction should be relaxed. */
7873 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7875 if (dump_enabled_p ())
7876 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7877 "multiple types in nested loop.\n");
7878 return false;
7881 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7882 op, &rhs_dt, &rhs_vectype, &vls_type))
7883 return false;
7885 elem_type = TREE_TYPE (vectype);
7886 vec_mode = TYPE_MODE (vectype);
7888 if (!STMT_VINFO_DATA_REF (stmt_info))
7889 return false;
7891 vect_memory_access_type memory_access_type;
7892 enum dr_alignment_support alignment_support_scheme;
7893 int misalignment;
7894 poly_int64 poffset;
7895 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7896 ncopies, &memory_access_type, &poffset,
7897 &alignment_support_scheme, &misalignment, &gs_info))
7898 return false;
7900 if (mask)
7902 if (memory_access_type == VMAT_CONTIGUOUS)
7904 if (!VECTOR_MODE_P (vec_mode)
7905 || !can_vec_mask_load_store_p (vec_mode,
7906 TYPE_MODE (mask_vectype), false))
7907 return false;
7909 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7910 && (memory_access_type != VMAT_GATHER_SCATTER
7911 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7913 if (dump_enabled_p ())
7914 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7915 "unsupported access type for masked store.\n");
7916 return false;
7918 else if (memory_access_type == VMAT_GATHER_SCATTER
7919 && gs_info.ifn == IFN_LAST
7920 && !gs_info.decl)
7922 if (dump_enabled_p ())
7923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7924 "unsupported masked emulated scatter.\n");
7925 return false;
7928 else
7930 /* FORNOW. In some cases can vectorize even if data-type not supported
7931 (e.g. - array initialization with 0). */
7932 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7933 return false;
7936 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7937 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7938 && memory_access_type != VMAT_GATHER_SCATTER
7939 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7940 if (grouped_store)
7942 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7943 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7944 group_size = DR_GROUP_SIZE (first_stmt_info);
7946 else
7948 first_stmt_info = stmt_info;
7949 first_dr_info = dr_info;
7950 group_size = vec_num = 1;
7953 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7955 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7956 memory_access_type))
7957 return false;
7960 if (!vec_stmt) /* transformation not required. */
7962 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7964 if (loop_vinfo
7965 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7966 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
7967 vls_type, group_size,
7968 memory_access_type, &gs_info,
7969 mask);
7971 if (slp_node
7972 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7973 vectype))
7975 if (dump_enabled_p ())
7976 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7977 "incompatible vector types for invariants\n");
7978 return false;
7981 if (dump_enabled_p ()
7982 && memory_access_type != VMAT_ELEMENTWISE
7983 && memory_access_type != VMAT_GATHER_SCATTER
7984 && alignment_support_scheme != dr_aligned)
7985 dump_printf_loc (MSG_NOTE, vect_location,
7986 "Vectorizing an unaligned access.\n");
7988 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7989 vect_model_store_cost (vinfo, stmt_info, ncopies,
7990 memory_access_type, &gs_info,
7991 alignment_support_scheme,
7992 misalignment, vls_type, slp_node, cost_vec);
7993 return true;
7995 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7997 /* Transform. */
7999 ensure_base_align (dr_info);
8001 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8003 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
8004 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
8005 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
8006 tree ptr, var, scale, vec_mask;
8007 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
8008 tree mask_halfvectype = mask_vectype;
8009 edge pe = loop_preheader_edge (loop);
8010 gimple_seq seq;
8011 basic_block new_bb;
8012 enum { NARROW, NONE, WIDEN } modifier;
8013 poly_uint64 scatter_off_nunits
8014 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
8016 if (known_eq (nunits, scatter_off_nunits))
8017 modifier = NONE;
8018 else if (known_eq (nunits * 2, scatter_off_nunits))
8020 modifier = WIDEN;
8022 /* Currently gathers and scatters are only supported for
8023 fixed-length vectors. */
8024 unsigned int count = scatter_off_nunits.to_constant ();
8025 vec_perm_builder sel (count, count, 1);
8026 for (i = 0; i < (unsigned int) count; ++i)
8027 sel.quick_push (i | (count / 2));
8029 vec_perm_indices indices (sel, 1, count);
8030 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
8031 indices);
8032 gcc_assert (perm_mask != NULL_TREE);
8034 else if (known_eq (nunits, scatter_off_nunits * 2))
8036 modifier = NARROW;
8038 /* Currently gathers and scatters are only supported for
8039 fixed-length vectors. */
8040 unsigned int count = nunits.to_constant ();
8041 vec_perm_builder sel (count, count, 1);
8042 for (i = 0; i < (unsigned int) count; ++i)
8043 sel.quick_push (i | (count / 2));
8045 vec_perm_indices indices (sel, 2, count);
8046 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
8047 gcc_assert (perm_mask != NULL_TREE);
8048 ncopies *= 2;
8050 if (mask)
8051 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
8053 else
8054 gcc_unreachable ();
8056 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
8057 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
8058 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
8059 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
8060 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
8061 scaletype = TREE_VALUE (arglist);
8063 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
8064 && TREE_CODE (rettype) == VOID_TYPE);
8066 ptr = fold_convert (ptrtype, gs_info.base);
8067 if (!is_gimple_min_invariant (ptr))
8069 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
8070 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
8071 gcc_assert (!new_bb);
8074 if (mask == NULL_TREE)
8076 mask_arg = build_int_cst (masktype, -1);
8077 mask_arg = vect_init_vector (vinfo, stmt_info,
8078 mask_arg, masktype, NULL);
8081 scale = build_int_cst (scaletype, gs_info.scale);
8083 auto_vec<tree> vec_oprnds0;
8084 auto_vec<tree> vec_oprnds1;
8085 auto_vec<tree> vec_masks;
8086 if (mask)
8088 tree mask_vectype = truth_type_for (vectype);
8089 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8090 modifier == NARROW
8091 ? ncopies / 2 : ncopies,
8092 mask, &vec_masks, mask_vectype);
8094 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8095 modifier == WIDEN
8096 ? ncopies / 2 : ncopies,
8097 gs_info.offset, &vec_oprnds0);
8098 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8099 modifier == NARROW
8100 ? ncopies / 2 : ncopies,
8101 op, &vec_oprnds1);
8102 for (j = 0; j < ncopies; ++j)
8104 if (modifier == WIDEN)
8106 if (j & 1)
8107 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
8108 perm_mask, stmt_info, gsi);
8109 else
8110 op = vec_oprnd0 = vec_oprnds0[j / 2];
8111 src = vec_oprnd1 = vec_oprnds1[j];
8112 if (mask)
8113 mask_op = vec_mask = vec_masks[j];
8115 else if (modifier == NARROW)
8117 if (j & 1)
8118 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
8119 perm_mask, stmt_info, gsi);
8120 else
8121 src = vec_oprnd1 = vec_oprnds1[j / 2];
8122 op = vec_oprnd0 = vec_oprnds0[j];
8123 if (mask)
8124 mask_op = vec_mask = vec_masks[j / 2];
8126 else
8128 op = vec_oprnd0 = vec_oprnds0[j];
8129 src = vec_oprnd1 = vec_oprnds1[j];
8130 if (mask)
8131 mask_op = vec_mask = vec_masks[j];
8134 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
8136 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
8137 TYPE_VECTOR_SUBPARTS (srctype)));
8138 var = vect_get_new_ssa_name (srctype, vect_simple_var);
8139 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
8140 gassign *new_stmt
8141 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
8142 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8143 src = var;
8146 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
8148 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
8149 TYPE_VECTOR_SUBPARTS (idxtype)));
8150 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
8151 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
8152 gassign *new_stmt
8153 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
8154 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8155 op = var;
8158 if (mask)
8160 tree utype;
8161 mask_arg = mask_op;
8162 if (modifier == NARROW)
8164 var = vect_get_new_ssa_name (mask_halfvectype,
8165 vect_simple_var);
8166 gassign *new_stmt
8167 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
8168 : VEC_UNPACK_LO_EXPR,
8169 mask_op);
8170 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8171 mask_arg = var;
8173 tree optype = TREE_TYPE (mask_arg);
8174 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
8175 utype = masktype;
8176 else
8177 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
8178 var = vect_get_new_ssa_name (utype, vect_scalar_var);
8179 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
8180 gassign *new_stmt
8181 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
8182 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8183 mask_arg = var;
8184 if (!useless_type_conversion_p (masktype, utype))
8186 gcc_assert (TYPE_PRECISION (utype)
8187 <= TYPE_PRECISION (masktype));
8188 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
8189 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
8190 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8191 mask_arg = var;
8195 gcall *new_stmt
8196 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
8197 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8199 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8201 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8202 return true;
8204 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8205 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8207 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8208 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
8210 if (grouped_store)
8212 /* FORNOW */
8213 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8215 /* We vectorize all the stmts of the interleaving group when we
8216 reach the last stmt in the group. */
8217 if (DR_GROUP_STORE_COUNT (first_stmt_info)
8218 < DR_GROUP_SIZE (first_stmt_info)
8219 && !slp)
8221 *vec_stmt = NULL;
8222 return true;
8225 if (slp)
8227 grouped_store = false;
8228 /* VEC_NUM is the number of vect stmts to be created for this
8229 group. */
8230 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8231 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8232 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8233 == first_stmt_info);
8234 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8235 op = vect_get_store_rhs (first_stmt_info);
8237 else
8238 /* VEC_NUM is the number of vect stmts to be created for this
8239 group. */
8240 vec_num = group_size;
8242 ref_type = get_group_alias_ptr_type (first_stmt_info);
8244 else
8245 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8247 if (dump_enabled_p ())
8248 dump_printf_loc (MSG_NOTE, vect_location,
8249 "transform store. ncopies = %d\n", ncopies);
8251 if (memory_access_type == VMAT_ELEMENTWISE
8252 || memory_access_type == VMAT_STRIDED_SLP)
8254 gimple_stmt_iterator incr_gsi;
8255 bool insert_after;
8256 gimple *incr;
8257 tree offvar;
8258 tree ivstep;
8259 tree running_off;
8260 tree stride_base, stride_step, alias_off;
8261 tree vec_oprnd;
8262 tree dr_offset;
8263 unsigned int g;
8264 /* Checked by get_load_store_type. */
8265 unsigned int const_nunits = nunits.to_constant ();
8267 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8268 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8270 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8271 stride_base
8272 = fold_build_pointer_plus
8273 (DR_BASE_ADDRESS (first_dr_info->dr),
8274 size_binop (PLUS_EXPR,
8275 convert_to_ptrofftype (dr_offset),
8276 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8277 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8279 /* For a store with loop-invariant (but other than power-of-2)
8280 stride (i.e. not a grouped access) like so:
8282 for (i = 0; i < n; i += stride)
8283 array[i] = ...;
8285 we generate a new induction variable and new stores from
8286 the components of the (vectorized) rhs:
8288 for (j = 0; ; j += VF*stride)
8289 vectemp = ...;
8290 tmp1 = vectemp[0];
8291 array[j] = tmp1;
8292 tmp2 = vectemp[1];
8293 array[j + stride] = tmp2;
8297 unsigned nstores = const_nunits;
8298 unsigned lnel = 1;
8299 tree ltype = elem_type;
8300 tree lvectype = vectype;
8301 if (slp)
8303 if (group_size < const_nunits
8304 && const_nunits % group_size == 0)
8306 nstores = const_nunits / group_size;
8307 lnel = group_size;
8308 ltype = build_vector_type (elem_type, group_size);
8309 lvectype = vectype;
8311 /* First check if vec_extract optab doesn't support extraction
8312 of vector elts directly. */
8313 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8314 machine_mode vmode;
8315 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8316 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8317 group_size).exists (&vmode)
8318 || (convert_optab_handler (vec_extract_optab,
8319 TYPE_MODE (vectype), vmode)
8320 == CODE_FOR_nothing))
8322 /* Try to avoid emitting an extract of vector elements
8323 by performing the extracts using an integer type of the
8324 same size, extracting from a vector of those and then
8325 re-interpreting it as the original vector type if
8326 supported. */
8327 unsigned lsize
8328 = group_size * GET_MODE_BITSIZE (elmode);
8329 unsigned int lnunits = const_nunits / group_size;
8330 /* If we can't construct such a vector fall back to
8331 element extracts from the original vector type and
8332 element size stores. */
8333 if (int_mode_for_size (lsize, 0).exists (&elmode)
8334 && VECTOR_MODE_P (TYPE_MODE (vectype))
8335 && related_vector_mode (TYPE_MODE (vectype), elmode,
8336 lnunits).exists (&vmode)
8337 && (convert_optab_handler (vec_extract_optab,
8338 vmode, elmode)
8339 != CODE_FOR_nothing))
8341 nstores = lnunits;
8342 lnel = group_size;
8343 ltype = build_nonstandard_integer_type (lsize, 1);
8344 lvectype = build_vector_type (ltype, nstores);
8346 /* Else fall back to vector extraction anyway.
8347 Fewer stores are more important than avoiding spilling
8348 of the vector we extract from. Compared to the
8349 construction case in vectorizable_load no store-forwarding
8350 issue exists here for reasonable archs. */
8353 else if (group_size >= const_nunits
8354 && group_size % const_nunits == 0)
8356 nstores = 1;
8357 lnel = const_nunits;
8358 ltype = vectype;
8359 lvectype = vectype;
8361 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8362 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8365 ivstep = stride_step;
8366 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8367 build_int_cst (TREE_TYPE (ivstep), vf));
8369 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8371 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8372 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8373 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
8374 loop, &incr_gsi, insert_after,
8375 &offvar, NULL);
8376 incr = gsi_stmt (incr_gsi);
8378 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8380 alias_off = build_int_cst (ref_type, 0);
8381 stmt_vec_info next_stmt_info = first_stmt_info;
8382 for (g = 0; g < group_size; g++)
8384 running_off = offvar;
8385 if (g)
8387 tree size = TYPE_SIZE_UNIT (ltype);
8388 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
8389 size);
8390 tree newoff = copy_ssa_name (running_off, NULL);
8391 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8392 running_off, pos);
8393 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8394 running_off = newoff;
8396 if (!slp)
8397 op = vect_get_store_rhs (next_stmt_info);
8398 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
8399 op, &vec_oprnds);
8400 unsigned int group_el = 0;
8401 unsigned HOST_WIDE_INT
8402 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8403 for (j = 0; j < ncopies; j++)
8405 vec_oprnd = vec_oprnds[j];
8406 /* Pun the vector to extract from if necessary. */
8407 if (lvectype != vectype)
8409 tree tem = make_ssa_name (lvectype);
8410 gimple *pun
8411 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8412 lvectype, vec_oprnd));
8413 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8414 vec_oprnd = tem;
8416 for (i = 0; i < nstores; i++)
8418 tree newref, newoff;
8419 gimple *incr, *assign;
8420 tree size = TYPE_SIZE (ltype);
8421 /* Extract the i'th component. */
8422 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8423 bitsize_int (i), size);
8424 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8425 size, pos);
8427 elem = force_gimple_operand_gsi (gsi, elem, true,
8428 NULL_TREE, true,
8429 GSI_SAME_STMT);
8431 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8432 group_el * elsz);
8433 newref = build2 (MEM_REF, ltype,
8434 running_off, this_off);
8435 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8437 /* And store it to *running_off. */
8438 assign = gimple_build_assign (newref, elem);
8439 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8441 group_el += lnel;
8442 if (! slp
8443 || group_el == group_size)
8445 newoff = copy_ssa_name (running_off, NULL);
8446 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8447 running_off, stride_step);
8448 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8450 running_off = newoff;
8451 group_el = 0;
8453 if (g == group_size - 1
8454 && !slp)
8456 if (j == 0 && i == 0)
8457 *vec_stmt = assign;
8458 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8462 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8463 vec_oprnds.release ();
8464 if (slp)
8465 break;
8468 return true;
8471 auto_vec<tree> dr_chain (group_size);
8472 oprnds.create (group_size);
8474 gcc_assert (alignment_support_scheme);
8475 vec_loop_masks *loop_masks
8476 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8477 ? &LOOP_VINFO_MASKS (loop_vinfo)
8478 : NULL);
8479 vec_loop_lens *loop_lens
8480 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8481 ? &LOOP_VINFO_LENS (loop_vinfo)
8482 : NULL);
8484 /* Shouldn't go with length-based approach if fully masked. */
8485 gcc_assert (!loop_lens || !loop_masks);
8487 /* Targets with store-lane instructions must not require explicit
8488 realignment. vect_supportable_dr_alignment always returns either
8489 dr_aligned or dr_unaligned_supported for masked operations. */
8490 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8491 && !mask
8492 && !loop_masks)
8493 || alignment_support_scheme == dr_aligned
8494 || alignment_support_scheme == dr_unaligned_supported);
8496 tree offset = NULL_TREE;
8497 if (!known_eq (poffset, 0))
8498 offset = size_int (poffset);
8500 tree bump;
8501 tree vec_offset = NULL_TREE;
8502 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8504 aggr_type = NULL_TREE;
8505 bump = NULL_TREE;
8507 else if (memory_access_type == VMAT_GATHER_SCATTER)
8509 aggr_type = elem_type;
8510 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8511 &bump, &vec_offset);
8513 else
8515 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8516 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8517 else
8518 aggr_type = vectype;
8519 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8520 memory_access_type);
8523 if (mask)
8524 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8526 /* In case the vectorization factor (VF) is bigger than the number
8527 of elements that we can fit in a vectype (nunits), we have to generate
8528 more than one vector stmt - i.e - we need to "unroll" the
8529 vector stmt by a factor VF/nunits. */
8531 /* In case of interleaving (non-unit grouped access):
8533 S1: &base + 2 = x2
8534 S2: &base = x0
8535 S3: &base + 1 = x1
8536 S4: &base + 3 = x3
8538 We create vectorized stores starting from base address (the access of the
8539 first stmt in the chain (S2 in the above example), when the last store stmt
8540 of the chain (S4) is reached:
8542 VS1: &base = vx2
8543 VS2: &base + vec_size*1 = vx0
8544 VS3: &base + vec_size*2 = vx1
8545 VS4: &base + vec_size*3 = vx3
8547 Then permutation statements are generated:
8549 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8550 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8553 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8554 (the order of the data-refs in the output of vect_permute_store_chain
8555 corresponds to the order of scalar stmts in the interleaving chain - see
8556 the documentation of vect_permute_store_chain()).
8558 In case of both multiple types and interleaving, above vector stores and
8559 permutation stmts are created for every copy. The result vector stmts are
8560 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8561 STMT_VINFO_RELATED_STMT for the next copies.
8564 auto_vec<tree> vec_masks;
8565 tree vec_mask = NULL;
8566 auto_vec<tree> vec_offsets;
8567 auto_vec<vec<tree> > gvec_oprnds;
8568 gvec_oprnds.safe_grow_cleared (group_size, true);
8569 for (j = 0; j < ncopies; j++)
8571 gimple *new_stmt;
8572 if (j == 0)
8574 if (slp)
8576 /* Get vectorized arguments for SLP_NODE. */
8577 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8578 op, &vec_oprnds);
8579 vec_oprnd = vec_oprnds[0];
8581 else
8583 /* For interleaved stores we collect vectorized defs for all the
8584 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8585 used as an input to vect_permute_store_chain().
8587 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8588 and OPRNDS are of size 1. */
8589 stmt_vec_info next_stmt_info = first_stmt_info;
8590 for (i = 0; i < group_size; i++)
8592 /* Since gaps are not supported for interleaved stores,
8593 DR_GROUP_SIZE is the exact number of stmts in the chain.
8594 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8595 that there is no interleaving, DR_GROUP_SIZE is 1,
8596 and only one iteration of the loop will be executed. */
8597 op = vect_get_store_rhs (next_stmt_info);
8598 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8599 ncopies, op, &gvec_oprnds[i]);
8600 vec_oprnd = gvec_oprnds[i][0];
8601 dr_chain.quick_push (gvec_oprnds[i][0]);
8602 oprnds.quick_push (gvec_oprnds[i][0]);
8603 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8605 if (mask)
8607 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8608 mask, &vec_masks, mask_vectype);
8609 vec_mask = vec_masks[0];
8613 /* We should have catched mismatched types earlier. */
8614 gcc_assert (useless_type_conversion_p (vectype,
8615 TREE_TYPE (vec_oprnd)));
8616 bool simd_lane_access_p
8617 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8618 if (simd_lane_access_p
8619 && !loop_masks
8620 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8621 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8622 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8623 && integer_zerop (DR_INIT (first_dr_info->dr))
8624 && alias_sets_conflict_p (get_alias_set (aggr_type),
8625 get_alias_set (TREE_TYPE (ref_type))))
8627 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8628 dataref_offset = build_int_cst (ref_type, 0);
8630 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8631 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8632 slp_node, &gs_info, &dataref_ptr,
8633 &vec_offsets);
8634 else
8635 dataref_ptr
8636 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8637 simd_lane_access_p ? loop : NULL,
8638 offset, &dummy, gsi, &ptr_incr,
8639 simd_lane_access_p, bump);
8641 else
8643 /* For interleaved stores we created vectorized defs for all the
8644 defs stored in OPRNDS in the previous iteration (previous copy).
8645 DR_CHAIN is then used as an input to vect_permute_store_chain().
8646 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8647 OPRNDS are of size 1. */
8648 for (i = 0; i < group_size; i++)
8650 vec_oprnd = gvec_oprnds[i][j];
8651 dr_chain[i] = gvec_oprnds[i][j];
8652 oprnds[i] = gvec_oprnds[i][j];
8654 if (mask)
8655 vec_mask = vec_masks[j];
8656 if (dataref_offset)
8657 dataref_offset
8658 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8659 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8660 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8661 stmt_info, bump);
8664 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8666 tree vec_array;
8668 /* Get an array into which we can store the individual vectors. */
8669 vec_array = create_vector_array (vectype, vec_num);
8671 /* Invalidate the current contents of VEC_ARRAY. This should
8672 become an RTL clobber too, which prevents the vector registers
8673 from being upward-exposed. */
8674 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8676 /* Store the individual vectors into the array. */
8677 for (i = 0; i < vec_num; i++)
8679 vec_oprnd = dr_chain[i];
8680 write_vector_array (vinfo, stmt_info,
8681 gsi, vec_oprnd, vec_array, i);
8684 tree final_mask = NULL;
8685 if (loop_masks)
8686 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8687 vectype, j);
8688 if (vec_mask)
8689 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8690 final_mask, vec_mask, gsi);
8692 gcall *call;
8693 if (final_mask)
8695 /* Emit:
8696 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8697 VEC_ARRAY). */
8698 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8699 tree alias_ptr = build_int_cst (ref_type, align);
8700 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8701 dataref_ptr, alias_ptr,
8702 final_mask, vec_array);
8704 else
8706 /* Emit:
8707 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8708 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8709 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8710 vec_array);
8711 gimple_call_set_lhs (call, data_ref);
8713 gimple_call_set_nothrow (call, true);
8714 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8715 new_stmt = call;
8717 /* Record that VEC_ARRAY is now dead. */
8718 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8720 else
8722 new_stmt = NULL;
8723 if (grouped_store)
8725 if (j == 0)
8726 result_chain.create (group_size);
8727 /* Permute. */
8728 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8729 gsi, &result_chain);
8732 stmt_vec_info next_stmt_info = first_stmt_info;
8733 for (i = 0; i < vec_num; i++)
8735 unsigned misalign;
8736 unsigned HOST_WIDE_INT align;
8738 tree final_mask = NULL_TREE;
8739 if (loop_masks)
8740 final_mask = vect_get_loop_mask (gsi, loop_masks,
8741 vec_num * ncopies,
8742 vectype, vec_num * j + i);
8743 if (vec_mask)
8744 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8745 final_mask, vec_mask, gsi);
8747 if (memory_access_type == VMAT_GATHER_SCATTER
8748 && gs_info.ifn != IFN_LAST)
8750 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8751 vec_offset = vec_offsets[vec_num * j + i];
8752 tree scale = size_int (gs_info.scale);
8753 gcall *call;
8754 if (final_mask)
8755 call = gimple_build_call_internal
8756 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8757 scale, vec_oprnd, final_mask);
8758 else
8759 call = gimple_build_call_internal
8760 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8761 scale, vec_oprnd);
8762 gimple_call_set_nothrow (call, true);
8763 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8764 new_stmt = call;
8765 break;
8767 else if (memory_access_type == VMAT_GATHER_SCATTER)
8769 /* Emulated scatter. */
8770 gcc_assert (!final_mask);
8771 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
8772 unsigned HOST_WIDE_INT const_offset_nunits
8773 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
8774 .to_constant ();
8775 vec<constructor_elt, va_gc> *ctor_elts;
8776 vec_alloc (ctor_elts, const_nunits);
8777 gimple_seq stmts = NULL;
8778 tree elt_type = TREE_TYPE (vectype);
8779 unsigned HOST_WIDE_INT elt_size
8780 = tree_to_uhwi (TYPE_SIZE (elt_type));
8781 /* We support offset vectors with more elements
8782 than the data vector for now. */
8783 unsigned HOST_WIDE_INT factor
8784 = const_offset_nunits / const_nunits;
8785 vec_offset = vec_offsets[j / factor];
8786 unsigned elt_offset = (j % factor) * const_nunits;
8787 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
8788 tree scale = size_int (gs_info.scale);
8789 align = get_object_alignment (DR_REF (first_dr_info->dr));
8790 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
8791 for (unsigned k = 0; k < const_nunits; ++k)
8793 /* Compute the offsetted pointer. */
8794 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
8795 bitsize_int (k + elt_offset));
8796 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
8797 idx_type, vec_offset,
8798 TYPE_SIZE (idx_type), boff);
8799 idx = gimple_convert (&stmts, sizetype, idx);
8800 idx = gimple_build (&stmts, MULT_EXPR,
8801 sizetype, idx, scale);
8802 tree ptr = gimple_build (&stmts, PLUS_EXPR,
8803 TREE_TYPE (dataref_ptr),
8804 dataref_ptr, idx);
8805 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
8806 /* Extract the element to be stored. */
8807 tree elt = gimple_build (&stmts, BIT_FIELD_REF,
8808 TREE_TYPE (vectype), vec_oprnd,
8809 TYPE_SIZE (elt_type),
8810 bitsize_int (k * elt_size));
8811 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
8812 stmts = NULL;
8813 tree ref = build2 (MEM_REF, ltype, ptr,
8814 build_int_cst (ref_type, 0));
8815 new_stmt = gimple_build_assign (ref, elt);
8816 vect_finish_stmt_generation (vinfo, stmt_info,
8817 new_stmt, gsi);
8819 break;
8822 if (i > 0)
8823 /* Bump the vector pointer. */
8824 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8825 gsi, stmt_info, bump);
8827 if (slp)
8828 vec_oprnd = vec_oprnds[i];
8829 else if (grouped_store)
8830 /* For grouped stores vectorized defs are interleaved in
8831 vect_permute_store_chain(). */
8832 vec_oprnd = result_chain[i];
8834 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8835 if (alignment_support_scheme == dr_aligned)
8836 misalign = 0;
8837 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8839 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8840 misalign = 0;
8842 else
8843 misalign = misalignment;
8844 if (dataref_offset == NULL_TREE
8845 && TREE_CODE (dataref_ptr) == SSA_NAME)
8846 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8847 misalign);
8848 align = least_bit_hwi (misalign | align);
8850 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8852 tree perm_mask = perm_mask_for_reverse (vectype);
8853 tree perm_dest = vect_create_destination_var
8854 (vect_get_store_rhs (stmt_info), vectype);
8855 tree new_temp = make_ssa_name (perm_dest);
8857 /* Generate the permute statement. */
8858 gimple *perm_stmt
8859 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8860 vec_oprnd, perm_mask);
8861 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8863 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8864 vec_oprnd = new_temp;
8867 /* Arguments are ready. Create the new vector stmt. */
8868 if (final_mask)
8870 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8871 gcall *call
8872 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8873 dataref_ptr, ptr,
8874 final_mask, vec_oprnd);
8875 gimple_call_set_nothrow (call, true);
8876 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8877 new_stmt = call;
8879 else if (loop_lens)
8881 machine_mode vmode = TYPE_MODE (vectype);
8882 opt_machine_mode new_ovmode
8883 = get_len_load_store_mode (vmode, false);
8884 machine_mode new_vmode = new_ovmode.require ();
8885 unsigned factor
8886 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
8887 tree final_len
8888 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8889 vec_num * ncopies, vectype,
8890 vec_num * j + i, factor);
8891 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8892 /* Need conversion if it's wrapped with VnQI. */
8893 if (vmode != new_vmode)
8895 tree new_vtype
8896 = build_vector_type_for_mode (unsigned_intQI_type_node,
8897 new_vmode);
8898 tree var
8899 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8900 vec_oprnd
8901 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8902 gassign *new_stmt
8903 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8904 vec_oprnd);
8905 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8906 gsi);
8907 vec_oprnd = var;
8910 signed char biasval =
8911 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8913 tree bias = build_int_cst (intQI_type_node, biasval);
8914 gcall *call
8915 = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
8916 ptr, final_len, vec_oprnd,
8917 bias);
8918 gimple_call_set_nothrow (call, true);
8919 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8920 new_stmt = call;
8922 else
8924 data_ref = fold_build2 (MEM_REF, vectype,
8925 dataref_ptr,
8926 dataref_offset
8927 ? dataref_offset
8928 : build_int_cst (ref_type, 0));
8929 if (alignment_support_scheme == dr_aligned)
8931 else
8932 TREE_TYPE (data_ref)
8933 = build_aligned_type (TREE_TYPE (data_ref),
8934 align * BITS_PER_UNIT);
8935 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8936 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8937 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8940 if (slp)
8941 continue;
8943 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8944 if (!next_stmt_info)
8945 break;
8948 if (!slp)
8950 if (j == 0)
8951 *vec_stmt = new_stmt;
8952 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8956 for (i = 0; i < group_size; ++i)
8958 vec<tree> oprndsi = gvec_oprnds[i];
8959 oprndsi.release ();
8961 oprnds.release ();
8962 result_chain.release ();
8963 vec_oprnds.release ();
8965 return true;
8968 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8969 VECTOR_CST mask. No checks are made that the target platform supports the
8970 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8971 vect_gen_perm_mask_checked. */
8973 tree
8974 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8976 tree mask_type;
8978 poly_uint64 nunits = sel.length ();
8979 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8981 mask_type = build_vector_type (ssizetype, nunits);
8982 return vec_perm_indices_to_tree (mask_type, sel);
8985 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8986 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8988 tree
8989 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8991 machine_mode vmode = TYPE_MODE (vectype);
8992 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
8993 return vect_gen_perm_mask_any (vectype, sel);
8996 /* Given a vector variable X and Y, that was generated for the scalar
8997 STMT_INFO, generate instructions to permute the vector elements of X and Y
8998 using permutation mask MASK_VEC, insert them at *GSI and return the
8999 permuted vector variable. */
9001 static tree
9002 permute_vec_elements (vec_info *vinfo,
9003 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9004 gimple_stmt_iterator *gsi)
9006 tree vectype = TREE_TYPE (x);
9007 tree perm_dest, data_ref;
9008 gimple *perm_stmt;
9010 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9011 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9012 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9013 else
9014 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9015 data_ref = make_ssa_name (perm_dest);
9017 /* Generate the permute statement. */
9018 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9019 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9021 return data_ref;
9024 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9025 inserting them on the loops preheader edge. Returns true if we
9026 were successful in doing so (and thus STMT_INFO can be moved then),
9027 otherwise returns false. */
9029 static bool
9030 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
9032 ssa_op_iter i;
9033 tree op;
9034 bool any = false;
9036 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9038 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9039 if (!gimple_nop_p (def_stmt)
9040 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9042 /* Make sure we don't need to recurse. While we could do
9043 so in simple cases when there are more complex use webs
9044 we don't have an easy way to preserve stmt order to fulfil
9045 dependencies within them. */
9046 tree op2;
9047 ssa_op_iter i2;
9048 if (gimple_code (def_stmt) == GIMPLE_PHI)
9049 return false;
9050 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9052 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9053 if (!gimple_nop_p (def_stmt2)
9054 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9055 return false;
9057 any = true;
9061 if (!any)
9062 return true;
9064 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9066 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9067 if (!gimple_nop_p (def_stmt)
9068 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9070 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9071 gsi_remove (&gsi, false);
9072 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9076 return true;
9079 /* vectorizable_load.
9081 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9082 that can be vectorized.
9083 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9084 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9085 Return true if STMT_INFO is vectorizable in this way. */
9087 static bool
9088 vectorizable_load (vec_info *vinfo,
9089 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9090 gimple **vec_stmt, slp_tree slp_node,
9091 stmt_vector_for_cost *cost_vec)
9093 tree scalar_dest;
9094 tree vec_dest = NULL;
9095 tree data_ref = NULL;
9096 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9097 class loop *loop = NULL;
9098 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9099 bool nested_in_vect_loop = false;
9100 tree elem_type;
9101 tree new_temp;
9102 machine_mode mode;
9103 tree dummy;
9104 tree dataref_ptr = NULL_TREE;
9105 tree dataref_offset = NULL_TREE;
9106 gimple *ptr_incr = NULL;
9107 int ncopies;
9108 int i, j;
9109 unsigned int group_size;
9110 poly_uint64 group_gap_adj;
9111 tree msq = NULL_TREE, lsq;
9112 tree realignment_token = NULL_TREE;
9113 gphi *phi = NULL;
9114 vec<tree> dr_chain = vNULL;
9115 bool grouped_load = false;
9116 stmt_vec_info first_stmt_info;
9117 stmt_vec_info first_stmt_info_for_drptr = NULL;
9118 bool compute_in_loop = false;
9119 class loop *at_loop;
9120 int vec_num;
9121 bool slp = (slp_node != NULL);
9122 bool slp_perm = false;
9123 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9124 poly_uint64 vf;
9125 tree aggr_type;
9126 gather_scatter_info gs_info;
9127 tree ref_type;
9128 enum vect_def_type mask_dt = vect_unknown_def_type;
9130 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9131 return false;
9133 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9134 && ! vec_stmt)
9135 return false;
9137 if (!STMT_VINFO_DATA_REF (stmt_info))
9138 return false;
9140 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9141 int mask_index = -1;
9142 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9144 scalar_dest = gimple_assign_lhs (assign);
9145 if (TREE_CODE (scalar_dest) != SSA_NAME)
9146 return false;
9148 tree_code code = gimple_assign_rhs_code (assign);
9149 if (code != ARRAY_REF
9150 && code != BIT_FIELD_REF
9151 && code != INDIRECT_REF
9152 && code != COMPONENT_REF
9153 && code != IMAGPART_EXPR
9154 && code != REALPART_EXPR
9155 && code != MEM_REF
9156 && TREE_CODE_CLASS (code) != tcc_declaration)
9157 return false;
9159 else
9161 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9162 if (!call || !gimple_call_internal_p (call))
9163 return false;
9165 internal_fn ifn = gimple_call_internal_fn (call);
9166 if (!internal_load_fn_p (ifn))
9167 return false;
9169 scalar_dest = gimple_call_lhs (call);
9170 if (!scalar_dest)
9171 return false;
9173 mask_index = internal_fn_mask_index (ifn);
9174 /* ??? For SLP the mask operand is always last. */
9175 if (mask_index >= 0 && slp_node)
9176 mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
9177 if (mask_index >= 0
9178 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9179 &mask, NULL, &mask_dt, &mask_vectype))
9180 return false;
9183 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9184 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9186 if (loop_vinfo)
9188 loop = LOOP_VINFO_LOOP (loop_vinfo);
9189 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9190 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9192 else
9193 vf = 1;
9195 /* Multiple types in SLP are handled by creating the appropriate number of
9196 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9197 case of SLP. */
9198 if (slp)
9199 ncopies = 1;
9200 else
9201 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9203 gcc_assert (ncopies >= 1);
9205 /* FORNOW. This restriction should be relaxed. */
9206 if (nested_in_vect_loop && ncopies > 1)
9208 if (dump_enabled_p ())
9209 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9210 "multiple types in nested loop.\n");
9211 return false;
9214 /* Invalidate assumptions made by dependence analysis when vectorization
9215 on the unrolled body effectively re-orders stmts. */
9216 if (ncopies > 1
9217 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9218 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9219 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9221 if (dump_enabled_p ())
9222 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9223 "cannot perform implicit CSE when unrolling "
9224 "with negative dependence distance\n");
9225 return false;
9228 elem_type = TREE_TYPE (vectype);
9229 mode = TYPE_MODE (vectype);
9231 /* FORNOW. In some cases can vectorize even if data-type not supported
9232 (e.g. - data copies). */
9233 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
9235 if (dump_enabled_p ())
9236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9237 "Aligned load, but unsupported type.\n");
9238 return false;
9241 /* Check if the load is a part of an interleaving chain. */
9242 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9244 grouped_load = true;
9245 /* FORNOW */
9246 gcc_assert (!nested_in_vect_loop);
9247 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9249 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9250 group_size = DR_GROUP_SIZE (first_stmt_info);
9252 /* Refuse non-SLP vectorization of SLP-only groups. */
9253 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9255 if (dump_enabled_p ())
9256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9257 "cannot vectorize load in non-SLP mode.\n");
9258 return false;
9261 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9263 slp_perm = true;
9265 if (!loop_vinfo)
9267 /* In BB vectorization we may not actually use a loaded vector
9268 accessing elements in excess of DR_GROUP_SIZE. */
9269 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9270 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9271 unsigned HOST_WIDE_INT nunits;
9272 unsigned j, k, maxk = 0;
9273 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
9274 if (k > maxk)
9275 maxk = k;
9276 tree vectype = SLP_TREE_VECTYPE (slp_node);
9277 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
9278 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
9280 if (dump_enabled_p ())
9281 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9282 "BB vectorization with gaps at the end of "
9283 "a load is not supported\n");
9284 return false;
9288 auto_vec<tree> tem;
9289 unsigned n_perms;
9290 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
9291 true, &n_perms))
9293 if (dump_enabled_p ())
9294 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
9295 vect_location,
9296 "unsupported load permutation\n");
9297 return false;
9301 /* Invalidate assumptions made by dependence analysis when vectorization
9302 on the unrolled body effectively re-orders stmts. */
9303 if (!PURE_SLP_STMT (stmt_info)
9304 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9305 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9306 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9308 if (dump_enabled_p ())
9309 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9310 "cannot perform implicit CSE when performing "
9311 "group loads with negative dependence distance\n");
9312 return false;
9315 else
9316 group_size = 1;
9318 vect_memory_access_type memory_access_type;
9319 enum dr_alignment_support alignment_support_scheme;
9320 int misalignment;
9321 poly_int64 poffset;
9322 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
9323 ncopies, &memory_access_type, &poffset,
9324 &alignment_support_scheme, &misalignment, &gs_info))
9325 return false;
9327 if (mask)
9329 if (memory_access_type == VMAT_CONTIGUOUS)
9331 machine_mode vec_mode = TYPE_MODE (vectype);
9332 if (!VECTOR_MODE_P (vec_mode)
9333 || !can_vec_mask_load_store_p (vec_mode,
9334 TYPE_MODE (mask_vectype), true))
9335 return false;
9337 else if (memory_access_type != VMAT_LOAD_STORE_LANES
9338 && memory_access_type != VMAT_GATHER_SCATTER)
9340 if (dump_enabled_p ())
9341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9342 "unsupported access type for masked load.\n");
9343 return false;
9345 else if (memory_access_type == VMAT_GATHER_SCATTER
9346 && gs_info.ifn == IFN_LAST
9347 && !gs_info.decl)
9349 if (dump_enabled_p ())
9350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9351 "unsupported masked emulated gather.\n");
9352 return false;
9356 if (!vec_stmt) /* transformation not required. */
9358 if (slp_node
9359 && mask
9360 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
9361 mask_vectype))
9363 if (dump_enabled_p ())
9364 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9365 "incompatible vector types for invariants\n");
9366 return false;
9369 if (!slp)
9370 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
9372 if (loop_vinfo
9373 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
9374 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
9375 VLS_LOAD, group_size,
9376 memory_access_type, &gs_info,
9377 mask);
9379 if (dump_enabled_p ()
9380 && memory_access_type != VMAT_ELEMENTWISE
9381 && memory_access_type != VMAT_GATHER_SCATTER
9382 && alignment_support_scheme != dr_aligned)
9383 dump_printf_loc (MSG_NOTE, vect_location,
9384 "Vectorizing an unaligned access.\n");
9386 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9387 vinfo->any_known_not_updated_vssa = true;
9389 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
9390 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
9391 alignment_support_scheme, misalignment,
9392 &gs_info, slp_node, cost_vec);
9393 return true;
9396 if (!slp)
9397 gcc_assert (memory_access_type
9398 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
9400 if (dump_enabled_p ())
9401 dump_printf_loc (MSG_NOTE, vect_location,
9402 "transform load. ncopies = %d\n", ncopies);
9404 /* Transform. */
9406 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
9407 ensure_base_align (dr_info);
9409 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
9411 vect_build_gather_load_calls (vinfo,
9412 stmt_info, gsi, vec_stmt, &gs_info, mask);
9413 return true;
9416 if (memory_access_type == VMAT_INVARIANT)
9418 gcc_assert (!grouped_load && !mask && !bb_vinfo);
9419 /* If we have versioned for aliasing or the loop doesn't
9420 have any data dependencies that would preclude this,
9421 then we are sure this is a loop invariant load and
9422 thus we can insert it on the preheader edge. */
9423 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
9424 && !nested_in_vect_loop
9425 && hoist_defs_of_uses (stmt_info, loop));
9426 if (hoist_p)
9428 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
9429 if (dump_enabled_p ())
9430 dump_printf_loc (MSG_NOTE, vect_location,
9431 "hoisting out of the vectorized loop: %G",
9432 (gimple *) stmt);
9433 scalar_dest = copy_ssa_name (scalar_dest);
9434 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
9435 edge pe = loop_preheader_edge (loop);
9436 gphi *vphi = get_virtual_phi (loop->header);
9437 tree vuse;
9438 if (vphi)
9439 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
9440 else
9441 vuse = gimple_vuse (gsi_stmt (*gsi));
9442 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
9443 gimple_set_vuse (new_stmt, vuse);
9444 gsi_insert_on_edge_immediate (pe, new_stmt);
9446 /* These copies are all equivalent, but currently the representation
9447 requires a separate STMT_VINFO_VEC_STMT for each one. */
9448 gimple_stmt_iterator gsi2 = *gsi;
9449 gsi_next (&gsi2);
9450 for (j = 0; j < ncopies; j++)
9452 if (hoist_p)
9453 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9454 vectype, NULL);
9455 else
9456 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9457 vectype, &gsi2);
9458 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
9459 if (slp)
9460 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9461 else
9463 if (j == 0)
9464 *vec_stmt = new_stmt;
9465 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9468 return true;
9471 if (memory_access_type == VMAT_ELEMENTWISE
9472 || memory_access_type == VMAT_STRIDED_SLP)
9474 gimple_stmt_iterator incr_gsi;
9475 bool insert_after;
9476 tree offvar;
9477 tree ivstep;
9478 tree running_off;
9479 vec<constructor_elt, va_gc> *v = NULL;
9480 tree stride_base, stride_step, alias_off;
9481 /* Checked by get_load_store_type. */
9482 unsigned int const_nunits = nunits.to_constant ();
9483 unsigned HOST_WIDE_INT cst_offset = 0;
9484 tree dr_offset;
9486 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
9487 gcc_assert (!nested_in_vect_loop);
9489 if (grouped_load)
9491 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9492 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9494 else
9496 first_stmt_info = stmt_info;
9497 first_dr_info = dr_info;
9499 if (slp && grouped_load)
9501 group_size = DR_GROUP_SIZE (first_stmt_info);
9502 ref_type = get_group_alias_ptr_type (first_stmt_info);
9504 else
9506 if (grouped_load)
9507 cst_offset
9508 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
9509 * vect_get_place_in_interleaving_chain (stmt_info,
9510 first_stmt_info));
9511 group_size = 1;
9512 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9515 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9516 stride_base
9517 = fold_build_pointer_plus
9518 (DR_BASE_ADDRESS (first_dr_info->dr),
9519 size_binop (PLUS_EXPR,
9520 convert_to_ptrofftype (dr_offset),
9521 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9522 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9524 /* For a load with loop-invariant (but other than power-of-2)
9525 stride (i.e. not a grouped access) like so:
9527 for (i = 0; i < n; i += stride)
9528 ... = array[i];
9530 we generate a new induction variable and new accesses to
9531 form a new vector (or vectors, depending on ncopies):
9533 for (j = 0; ; j += VF*stride)
9534 tmp1 = array[j];
9535 tmp2 = array[j + stride];
9537 vectemp = {tmp1, tmp2, ...}
9540 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9541 build_int_cst (TREE_TYPE (stride_step), vf));
9543 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9545 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9546 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9547 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
9548 loop, &incr_gsi, insert_after,
9549 &offvar, NULL);
9551 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9553 running_off = offvar;
9554 alias_off = build_int_cst (ref_type, 0);
9555 int nloads = const_nunits;
9556 int lnel = 1;
9557 tree ltype = TREE_TYPE (vectype);
9558 tree lvectype = vectype;
9559 auto_vec<tree> dr_chain;
9560 if (memory_access_type == VMAT_STRIDED_SLP)
9562 if (group_size < const_nunits)
9564 /* First check if vec_init optab supports construction from vector
9565 elts directly. Otherwise avoid emitting a constructor of
9566 vector elements by performing the loads using an integer type
9567 of the same size, constructing a vector of those and then
9568 re-interpreting it as the original vector type. This avoids a
9569 huge runtime penalty due to the general inability to perform
9570 store forwarding from smaller stores to a larger load. */
9571 tree ptype;
9572 tree vtype
9573 = vector_vector_composition_type (vectype,
9574 const_nunits / group_size,
9575 &ptype);
9576 if (vtype != NULL_TREE)
9578 nloads = const_nunits / group_size;
9579 lnel = group_size;
9580 lvectype = vtype;
9581 ltype = ptype;
9584 else
9586 nloads = 1;
9587 lnel = const_nunits;
9588 ltype = vectype;
9590 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9592 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9593 else if (nloads == 1)
9594 ltype = vectype;
9596 if (slp)
9598 /* For SLP permutation support we need to load the whole group,
9599 not only the number of vector stmts the permutation result
9600 fits in. */
9601 if (slp_perm)
9603 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9604 variable VF. */
9605 unsigned int const_vf = vf.to_constant ();
9606 ncopies = CEIL (group_size * const_vf, const_nunits);
9607 dr_chain.create (ncopies);
9609 else
9610 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9612 unsigned int group_el = 0;
9613 unsigned HOST_WIDE_INT
9614 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9615 unsigned int n_groups = 0;
9616 for (j = 0; j < ncopies; j++)
9618 if (nloads > 1)
9619 vec_alloc (v, nloads);
9620 gimple *new_stmt = NULL;
9621 for (i = 0; i < nloads; i++)
9623 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9624 group_el * elsz + cst_offset);
9625 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9626 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9627 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9628 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9629 if (nloads > 1)
9630 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9631 gimple_assign_lhs (new_stmt));
9633 group_el += lnel;
9634 if (! slp
9635 || group_el == group_size)
9637 n_groups++;
9638 /* When doing SLP make sure to not load elements from
9639 the next vector iteration, those will not be accessed
9640 so just use the last element again. See PR107451. */
9641 if (!slp || known_lt (n_groups, vf))
9643 tree newoff = copy_ssa_name (running_off);
9644 gimple *incr
9645 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9646 running_off, stride_step);
9647 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9648 running_off = newoff;
9650 group_el = 0;
9653 if (nloads > 1)
9655 tree vec_inv = build_constructor (lvectype, v);
9656 new_temp = vect_init_vector (vinfo, stmt_info,
9657 vec_inv, lvectype, gsi);
9658 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9659 if (lvectype != vectype)
9661 new_stmt = gimple_build_assign (make_ssa_name (vectype),
9662 VIEW_CONVERT_EXPR,
9663 build1 (VIEW_CONVERT_EXPR,
9664 vectype, new_temp));
9665 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9669 if (slp)
9671 if (slp_perm)
9672 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9673 else
9674 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9676 else
9678 if (j == 0)
9679 *vec_stmt = new_stmt;
9680 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9683 if (slp_perm)
9685 unsigned n_perms;
9686 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9687 false, &n_perms);
9689 return true;
9692 if (memory_access_type == VMAT_GATHER_SCATTER
9693 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9694 grouped_load = false;
9696 if (grouped_load)
9698 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9699 group_size = DR_GROUP_SIZE (first_stmt_info);
9700 /* For SLP vectorization we directly vectorize a subchain
9701 without permutation. */
9702 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9703 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9704 /* For BB vectorization always use the first stmt to base
9705 the data ref pointer on. */
9706 if (bb_vinfo)
9707 first_stmt_info_for_drptr
9708 = vect_find_first_scalar_stmt_in_slp (slp_node);
9710 /* Check if the chain of loads is already vectorized. */
9711 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9712 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9713 ??? But we can only do so if there is exactly one
9714 as we have no way to get at the rest. Leave the CSE
9715 opportunity alone.
9716 ??? With the group load eventually participating
9717 in multiple different permutations (having multiple
9718 slp nodes which refer to the same group) the CSE
9719 is even wrong code. See PR56270. */
9720 && !slp)
9722 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9723 return true;
9725 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9726 group_gap_adj = 0;
9728 /* VEC_NUM is the number of vect stmts to be created for this group. */
9729 if (slp)
9731 grouped_load = false;
9732 /* If an SLP permutation is from N elements to N elements,
9733 and if one vector holds a whole number of N, we can load
9734 the inputs to the permutation in the same way as an
9735 unpermuted sequence. In other cases we need to load the
9736 whole group, not only the number of vector stmts the
9737 permutation result fits in. */
9738 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9739 if (slp_perm
9740 && (group_size != scalar_lanes
9741 || !multiple_p (nunits, group_size)))
9743 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9744 variable VF; see vect_transform_slp_perm_load. */
9745 unsigned int const_vf = vf.to_constant ();
9746 unsigned int const_nunits = nunits.to_constant ();
9747 vec_num = CEIL (group_size * const_vf, const_nunits);
9748 group_gap_adj = vf * group_size - nunits * vec_num;
9750 else
9752 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9753 group_gap_adj
9754 = group_size - scalar_lanes;
9757 else
9758 vec_num = group_size;
9760 ref_type = get_group_alias_ptr_type (first_stmt_info);
9762 else
9764 first_stmt_info = stmt_info;
9765 first_dr_info = dr_info;
9766 group_size = vec_num = 1;
9767 group_gap_adj = 0;
9768 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9769 if (slp)
9770 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9773 gcc_assert (alignment_support_scheme);
9774 vec_loop_masks *loop_masks
9775 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9776 ? &LOOP_VINFO_MASKS (loop_vinfo)
9777 : NULL);
9778 vec_loop_lens *loop_lens
9779 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9780 ? &LOOP_VINFO_LENS (loop_vinfo)
9781 : NULL);
9783 /* Shouldn't go with length-based approach if fully masked. */
9784 gcc_assert (!loop_lens || !loop_masks);
9786 /* Targets with store-lane instructions must not require explicit
9787 realignment. vect_supportable_dr_alignment always returns either
9788 dr_aligned or dr_unaligned_supported for masked operations. */
9789 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9790 && !mask
9791 && !loop_masks)
9792 || alignment_support_scheme == dr_aligned
9793 || alignment_support_scheme == dr_unaligned_supported);
9795 /* In case the vectorization factor (VF) is bigger than the number
9796 of elements that we can fit in a vectype (nunits), we have to generate
9797 more than one vector stmt - i.e - we need to "unroll" the
9798 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9799 from one copy of the vector stmt to the next, in the field
9800 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9801 stages to find the correct vector defs to be used when vectorizing
9802 stmts that use the defs of the current stmt. The example below
9803 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9804 need to create 4 vectorized stmts):
9806 before vectorization:
9807 RELATED_STMT VEC_STMT
9808 S1: x = memref - -
9809 S2: z = x + 1 - -
9811 step 1: vectorize stmt S1:
9812 We first create the vector stmt VS1_0, and, as usual, record a
9813 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9814 Next, we create the vector stmt VS1_1, and record a pointer to
9815 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9816 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9817 stmts and pointers:
9818 RELATED_STMT VEC_STMT
9819 VS1_0: vx0 = memref0 VS1_1 -
9820 VS1_1: vx1 = memref1 VS1_2 -
9821 VS1_2: vx2 = memref2 VS1_3 -
9822 VS1_3: vx3 = memref3 - -
9823 S1: x = load - VS1_0
9824 S2: z = x + 1 - -
9827 /* In case of interleaving (non-unit grouped access):
9829 S1: x2 = &base + 2
9830 S2: x0 = &base
9831 S3: x1 = &base + 1
9832 S4: x3 = &base + 3
9834 Vectorized loads are created in the order of memory accesses
9835 starting from the access of the first stmt of the chain:
9837 VS1: vx0 = &base
9838 VS2: vx1 = &base + vec_size*1
9839 VS3: vx3 = &base + vec_size*2
9840 VS4: vx4 = &base + vec_size*3
9842 Then permutation statements are generated:
9844 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9845 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9848 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9849 (the order of the data-refs in the output of vect_permute_load_chain
9850 corresponds to the order of scalar stmts in the interleaving chain - see
9851 the documentation of vect_permute_load_chain()).
9852 The generation of permutation stmts and recording them in
9853 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9855 In case of both multiple types and interleaving, the vector loads and
9856 permutation stmts above are created for every copy. The result vector
9857 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9858 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9860 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9861 on a target that supports unaligned accesses (dr_unaligned_supported)
9862 we generate the following code:
9863 p = initial_addr;
9864 indx = 0;
9865 loop {
9866 p = p + indx * vectype_size;
9867 vec_dest = *(p);
9868 indx = indx + 1;
9871 Otherwise, the data reference is potentially unaligned on a target that
9872 does not support unaligned accesses (dr_explicit_realign_optimized) -
9873 then generate the following code, in which the data in each iteration is
9874 obtained by two vector loads, one from the previous iteration, and one
9875 from the current iteration:
9876 p1 = initial_addr;
9877 msq_init = *(floor(p1))
9878 p2 = initial_addr + VS - 1;
9879 realignment_token = call target_builtin;
9880 indx = 0;
9881 loop {
9882 p2 = p2 + indx * vectype_size
9883 lsq = *(floor(p2))
9884 vec_dest = realign_load (msq, lsq, realignment_token)
9885 indx = indx + 1;
9886 msq = lsq;
9887 } */
9889 /* If the misalignment remains the same throughout the execution of the
9890 loop, we can create the init_addr and permutation mask at the loop
9891 preheader. Otherwise, it needs to be created inside the loop.
9892 This can only occur when vectorizing memory accesses in the inner-loop
9893 nested within an outer-loop that is being vectorized. */
9895 if (nested_in_vect_loop
9896 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9897 GET_MODE_SIZE (TYPE_MODE (vectype))))
9899 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9900 compute_in_loop = true;
9903 bool diff_first_stmt_info
9904 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9906 tree offset = NULL_TREE;
9907 if ((alignment_support_scheme == dr_explicit_realign_optimized
9908 || alignment_support_scheme == dr_explicit_realign)
9909 && !compute_in_loop)
9911 /* If we have different first_stmt_info, we can't set up realignment
9912 here, since we can't guarantee first_stmt_info DR has been
9913 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9914 distance from first_stmt_info DR instead as below. */
9915 if (!diff_first_stmt_info)
9916 msq = vect_setup_realignment (vinfo,
9917 first_stmt_info, gsi, &realignment_token,
9918 alignment_support_scheme, NULL_TREE,
9919 &at_loop);
9920 if (alignment_support_scheme == dr_explicit_realign_optimized)
9922 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9923 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9924 size_one_node);
9925 gcc_assert (!first_stmt_info_for_drptr);
9928 else
9929 at_loop = loop;
9931 if (!known_eq (poffset, 0))
9932 offset = (offset
9933 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
9934 : size_int (poffset));
9936 tree bump;
9937 tree vec_offset = NULL_TREE;
9938 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9940 aggr_type = NULL_TREE;
9941 bump = NULL_TREE;
9943 else if (memory_access_type == VMAT_GATHER_SCATTER)
9945 aggr_type = elem_type;
9946 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9947 &bump, &vec_offset);
9949 else
9951 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9952 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9953 else
9954 aggr_type = vectype;
9955 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9956 memory_access_type);
9959 auto_vec<tree> vec_offsets;
9960 auto_vec<tree> vec_masks;
9961 if (mask)
9963 if (slp_node)
9964 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
9965 &vec_masks);
9966 else
9967 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
9968 &vec_masks, mask_vectype);
9970 tree vec_mask = NULL_TREE;
9971 poly_uint64 group_elt = 0;
9972 for (j = 0; j < ncopies; j++)
9974 /* 1. Create the vector or array pointer update chain. */
9975 if (j == 0)
9977 bool simd_lane_access_p
9978 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9979 if (simd_lane_access_p
9980 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9981 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9982 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9983 && integer_zerop (DR_INIT (first_dr_info->dr))
9984 && alias_sets_conflict_p (get_alias_set (aggr_type),
9985 get_alias_set (TREE_TYPE (ref_type)))
9986 && (alignment_support_scheme == dr_aligned
9987 || alignment_support_scheme == dr_unaligned_supported))
9989 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9990 dataref_offset = build_int_cst (ref_type, 0);
9992 else if (diff_first_stmt_info)
9994 dataref_ptr
9995 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9996 aggr_type, at_loop, offset, &dummy,
9997 gsi, &ptr_incr, simd_lane_access_p,
9998 bump);
9999 /* Adjust the pointer by the difference to first_stmt. */
10000 data_reference_p ptrdr
10001 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
10002 tree diff
10003 = fold_convert (sizetype,
10004 size_binop (MINUS_EXPR,
10005 DR_INIT (first_dr_info->dr),
10006 DR_INIT (ptrdr)));
10007 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10008 stmt_info, diff);
10009 if (alignment_support_scheme == dr_explicit_realign)
10011 msq = vect_setup_realignment (vinfo,
10012 first_stmt_info_for_drptr, gsi,
10013 &realignment_token,
10014 alignment_support_scheme,
10015 dataref_ptr, &at_loop);
10016 gcc_assert (!compute_in_loop);
10019 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10021 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10022 slp_node, &gs_info, &dataref_ptr,
10023 &vec_offsets);
10025 else
10026 dataref_ptr
10027 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10028 at_loop,
10029 offset, &dummy, gsi, &ptr_incr,
10030 simd_lane_access_p, bump);
10031 if (mask)
10032 vec_mask = vec_masks[0];
10034 else
10036 if (dataref_offset)
10037 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
10038 bump);
10039 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10040 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10041 stmt_info, bump);
10042 if (mask)
10043 vec_mask = vec_masks[j];
10046 if (grouped_load || slp_perm)
10047 dr_chain.create (vec_num);
10049 gimple *new_stmt = NULL;
10050 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10052 tree vec_array;
10054 vec_array = create_vector_array (vectype, vec_num);
10056 tree final_mask = NULL_TREE;
10057 if (loop_masks)
10058 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
10059 vectype, j);
10060 if (vec_mask)
10061 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10062 final_mask, vec_mask, gsi);
10064 gcall *call;
10065 if (final_mask)
10067 /* Emit:
10068 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10069 VEC_MASK). */
10070 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10071 tree alias_ptr = build_int_cst (ref_type, align);
10072 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
10073 dataref_ptr, alias_ptr,
10074 final_mask);
10076 else
10078 /* Emit:
10079 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10080 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
10081 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10083 gimple_call_set_lhs (call, vec_array);
10084 gimple_call_set_nothrow (call, true);
10085 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
10086 new_stmt = call;
10088 /* Extract each vector into an SSA_NAME. */
10089 for (i = 0; i < vec_num; i++)
10091 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10092 vec_array, i);
10093 dr_chain.quick_push (new_temp);
10096 /* Record the mapping between SSA_NAMEs and statements. */
10097 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10099 /* Record that VEC_ARRAY is now dead. */
10100 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10102 else
10104 for (i = 0; i < vec_num; i++)
10106 tree final_mask = NULL_TREE;
10107 if (loop_masks
10108 && memory_access_type != VMAT_INVARIANT)
10109 final_mask = vect_get_loop_mask (gsi, loop_masks,
10110 vec_num * ncopies,
10111 vectype, vec_num * j + i);
10112 if (vec_mask)
10113 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10114 final_mask, vec_mask, gsi);
10116 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10117 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10118 gsi, stmt_info, bump);
10120 /* 2. Create the vector-load in the loop. */
10121 switch (alignment_support_scheme)
10123 case dr_aligned:
10124 case dr_unaligned_supported:
10126 unsigned int misalign;
10127 unsigned HOST_WIDE_INT align;
10129 if (memory_access_type == VMAT_GATHER_SCATTER
10130 && gs_info.ifn != IFN_LAST)
10132 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10133 vec_offset = vec_offsets[vec_num * j + i];
10134 tree zero = build_zero_cst (vectype);
10135 tree scale = size_int (gs_info.scale);
10136 gcall *call;
10137 if (final_mask)
10138 call = gimple_build_call_internal
10139 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
10140 vec_offset, scale, zero, final_mask);
10141 else
10142 call = gimple_build_call_internal
10143 (IFN_GATHER_LOAD, 4, dataref_ptr,
10144 vec_offset, scale, zero);
10145 gimple_call_set_nothrow (call, true);
10146 new_stmt = call;
10147 data_ref = NULL_TREE;
10148 break;
10150 else if (memory_access_type == VMAT_GATHER_SCATTER)
10152 /* Emulated gather-scatter. */
10153 gcc_assert (!final_mask);
10154 unsigned HOST_WIDE_INT const_nunits
10155 = nunits.to_constant ();
10156 unsigned HOST_WIDE_INT const_offset_nunits
10157 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
10158 .to_constant ();
10159 vec<constructor_elt, va_gc> *ctor_elts;
10160 vec_alloc (ctor_elts, const_nunits);
10161 gimple_seq stmts = NULL;
10162 /* We support offset vectors with more elements
10163 than the data vector for now. */
10164 unsigned HOST_WIDE_INT factor
10165 = const_offset_nunits / const_nunits;
10166 vec_offset = vec_offsets[j / factor];
10167 unsigned elt_offset = (j % factor) * const_nunits;
10168 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
10169 tree scale = size_int (gs_info.scale);
10170 align
10171 = get_object_alignment (DR_REF (first_dr_info->dr));
10172 tree ltype = build_aligned_type (TREE_TYPE (vectype),
10173 align);
10174 for (unsigned k = 0; k < const_nunits; ++k)
10176 tree boff = size_binop (MULT_EXPR,
10177 TYPE_SIZE (idx_type),
10178 bitsize_int
10179 (k + elt_offset));
10180 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
10181 idx_type, vec_offset,
10182 TYPE_SIZE (idx_type),
10183 boff);
10184 idx = gimple_convert (&stmts, sizetype, idx);
10185 idx = gimple_build (&stmts, MULT_EXPR,
10186 sizetype, idx, scale);
10187 tree ptr = gimple_build (&stmts, PLUS_EXPR,
10188 TREE_TYPE (dataref_ptr),
10189 dataref_ptr, idx);
10190 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
10191 tree elt = make_ssa_name (TREE_TYPE (vectype));
10192 tree ref = build2 (MEM_REF, ltype, ptr,
10193 build_int_cst (ref_type, 0));
10194 new_stmt = gimple_build_assign (elt, ref);
10195 gimple_set_vuse (new_stmt,
10196 gimple_vuse (gsi_stmt (*gsi)));
10197 gimple_seq_add_stmt (&stmts, new_stmt);
10198 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
10200 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
10201 new_stmt = gimple_build_assign (NULL_TREE,
10202 build_constructor
10203 (vectype, ctor_elts));
10204 data_ref = NULL_TREE;
10205 break;
10208 align =
10209 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
10210 if (alignment_support_scheme == dr_aligned)
10211 misalign = 0;
10212 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
10214 align = dr_alignment
10215 (vect_dr_behavior (vinfo, first_dr_info));
10216 misalign = 0;
10218 else
10219 misalign = misalignment;
10220 if (dataref_offset == NULL_TREE
10221 && TREE_CODE (dataref_ptr) == SSA_NAME)
10222 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
10223 align, misalign);
10224 align = least_bit_hwi (misalign | align);
10226 if (final_mask)
10228 tree ptr = build_int_cst (ref_type,
10229 align * BITS_PER_UNIT);
10230 gcall *call
10231 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
10232 dataref_ptr, ptr,
10233 final_mask);
10234 gimple_call_set_nothrow (call, true);
10235 new_stmt = call;
10236 data_ref = NULL_TREE;
10238 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
10240 machine_mode vmode = TYPE_MODE (vectype);
10241 opt_machine_mode new_ovmode
10242 = get_len_load_store_mode (vmode, true);
10243 machine_mode new_vmode = new_ovmode.require ();
10244 unsigned factor = (new_ovmode == vmode)
10246 : GET_MODE_UNIT_SIZE (vmode);
10247 tree final_len
10248 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10249 vec_num * ncopies, vectype,
10250 vec_num * j + i, factor);
10251 tree ptr
10252 = build_int_cst (ref_type, align * BITS_PER_UNIT);
10254 tree qi_type = unsigned_intQI_type_node;
10256 signed char biasval =
10257 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10259 tree bias = build_int_cst (intQI_type_node, biasval);
10261 gcall *call
10262 = gimple_build_call_internal (IFN_LEN_LOAD, 4,
10263 dataref_ptr, ptr,
10264 final_len, bias);
10265 gimple_call_set_nothrow (call, true);
10266 new_stmt = call;
10267 data_ref = NULL_TREE;
10269 /* Need conversion if it's wrapped with VnQI. */
10270 if (vmode != new_vmode)
10272 tree new_vtype
10273 = build_vector_type_for_mode (qi_type, new_vmode);
10274 tree var = vect_get_new_ssa_name (new_vtype,
10275 vect_simple_var);
10276 gimple_set_lhs (call, var);
10277 vect_finish_stmt_generation (vinfo, stmt_info, call,
10278 gsi);
10279 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
10280 new_stmt
10281 = gimple_build_assign (vec_dest,
10282 VIEW_CONVERT_EXPR, op);
10285 else
10287 tree ltype = vectype;
10288 tree new_vtype = NULL_TREE;
10289 unsigned HOST_WIDE_INT gap
10290 = DR_GROUP_GAP (first_stmt_info);
10291 unsigned int vect_align
10292 = vect_known_alignment_in_bytes (first_dr_info,
10293 vectype);
10294 unsigned int scalar_dr_size
10295 = vect_get_scalar_dr_size (first_dr_info);
10296 /* If there's no peeling for gaps but we have a gap
10297 with slp loads then load the lower half of the
10298 vector only. See get_group_load_store_type for
10299 when we apply this optimization. */
10300 if (slp
10301 && loop_vinfo
10302 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
10303 && gap != 0
10304 && known_eq (nunits, (group_size - gap) * 2)
10305 && known_eq (nunits, group_size)
10306 && gap >= (vect_align / scalar_dr_size))
10308 tree half_vtype;
10309 new_vtype
10310 = vector_vector_composition_type (vectype, 2,
10311 &half_vtype);
10312 if (new_vtype != NULL_TREE)
10313 ltype = half_vtype;
10315 tree offset
10316 = (dataref_offset ? dataref_offset
10317 : build_int_cst (ref_type, 0));
10318 if (ltype != vectype
10319 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10321 unsigned HOST_WIDE_INT gap_offset
10322 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
10323 tree gapcst = build_int_cst (ref_type, gap_offset);
10324 offset = size_binop (PLUS_EXPR, offset, gapcst);
10326 data_ref
10327 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
10328 if (alignment_support_scheme == dr_aligned)
10330 else
10331 TREE_TYPE (data_ref)
10332 = build_aligned_type (TREE_TYPE (data_ref),
10333 align * BITS_PER_UNIT);
10334 if (ltype != vectype)
10336 vect_copy_ref_info (data_ref,
10337 DR_REF (first_dr_info->dr));
10338 tree tem = make_ssa_name (ltype);
10339 new_stmt = gimple_build_assign (tem, data_ref);
10340 vect_finish_stmt_generation (vinfo, stmt_info,
10341 new_stmt, gsi);
10342 data_ref = NULL;
10343 vec<constructor_elt, va_gc> *v;
10344 vec_alloc (v, 2);
10345 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10347 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10348 build_zero_cst (ltype));
10349 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
10351 else
10353 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
10354 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10355 build_zero_cst (ltype));
10357 gcc_assert (new_vtype != NULL_TREE);
10358 if (new_vtype == vectype)
10359 new_stmt = gimple_build_assign (
10360 vec_dest, build_constructor (vectype, v));
10361 else
10363 tree new_vname = make_ssa_name (new_vtype);
10364 new_stmt = gimple_build_assign (
10365 new_vname, build_constructor (new_vtype, v));
10366 vect_finish_stmt_generation (vinfo, stmt_info,
10367 new_stmt, gsi);
10368 new_stmt = gimple_build_assign (
10369 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
10370 new_vname));
10374 break;
10376 case dr_explicit_realign:
10378 tree ptr, bump;
10380 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10382 if (compute_in_loop)
10383 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10384 &realignment_token,
10385 dr_explicit_realign,
10386 dataref_ptr, NULL);
10388 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10389 ptr = copy_ssa_name (dataref_ptr);
10390 else
10391 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
10392 // For explicit realign the target alignment should be
10393 // known at compile time.
10394 unsigned HOST_WIDE_INT align =
10395 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10396 new_stmt = gimple_build_assign
10397 (ptr, BIT_AND_EXPR, dataref_ptr,
10398 build_int_cst
10399 (TREE_TYPE (dataref_ptr),
10400 -(HOST_WIDE_INT) align));
10401 vect_finish_stmt_generation (vinfo, stmt_info,
10402 new_stmt, gsi);
10403 data_ref
10404 = build2 (MEM_REF, vectype, ptr,
10405 build_int_cst (ref_type, 0));
10406 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10407 vec_dest = vect_create_destination_var (scalar_dest,
10408 vectype);
10409 new_stmt = gimple_build_assign (vec_dest, data_ref);
10410 new_temp = make_ssa_name (vec_dest, new_stmt);
10411 gimple_assign_set_lhs (new_stmt, new_temp);
10412 gimple_move_vops (new_stmt, stmt_info->stmt);
10413 vect_finish_stmt_generation (vinfo, stmt_info,
10414 new_stmt, gsi);
10415 msq = new_temp;
10417 bump = size_binop (MULT_EXPR, vs,
10418 TYPE_SIZE_UNIT (elem_type));
10419 bump = size_binop (MINUS_EXPR, bump, size_one_node);
10420 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
10421 stmt_info, bump);
10422 new_stmt = gimple_build_assign
10423 (NULL_TREE, BIT_AND_EXPR, ptr,
10424 build_int_cst
10425 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
10426 if (TREE_CODE (ptr) == SSA_NAME)
10427 ptr = copy_ssa_name (ptr, new_stmt);
10428 else
10429 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
10430 gimple_assign_set_lhs (new_stmt, ptr);
10431 vect_finish_stmt_generation (vinfo, stmt_info,
10432 new_stmt, gsi);
10433 data_ref
10434 = build2 (MEM_REF, vectype, ptr,
10435 build_int_cst (ref_type, 0));
10436 break;
10438 case dr_explicit_realign_optimized:
10440 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10441 new_temp = copy_ssa_name (dataref_ptr);
10442 else
10443 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
10444 // We should only be doing this if we know the target
10445 // alignment at compile time.
10446 unsigned HOST_WIDE_INT align =
10447 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10448 new_stmt = gimple_build_assign
10449 (new_temp, BIT_AND_EXPR, dataref_ptr,
10450 build_int_cst (TREE_TYPE (dataref_ptr),
10451 -(HOST_WIDE_INT) align));
10452 vect_finish_stmt_generation (vinfo, stmt_info,
10453 new_stmt, gsi);
10454 data_ref
10455 = build2 (MEM_REF, vectype, new_temp,
10456 build_int_cst (ref_type, 0));
10457 break;
10459 default:
10460 gcc_unreachable ();
10462 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10463 /* DATA_REF is null if we've already built the statement. */
10464 if (data_ref)
10466 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10467 new_stmt = gimple_build_assign (vec_dest, data_ref);
10469 new_temp = make_ssa_name (vec_dest, new_stmt);
10470 gimple_set_lhs (new_stmt, new_temp);
10471 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10473 /* 3. Handle explicit realignment if necessary/supported.
10474 Create in loop:
10475 vec_dest = realign_load (msq, lsq, realignment_token) */
10476 if (alignment_support_scheme == dr_explicit_realign_optimized
10477 || alignment_support_scheme == dr_explicit_realign)
10479 lsq = gimple_assign_lhs (new_stmt);
10480 if (!realignment_token)
10481 realignment_token = dataref_ptr;
10482 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10483 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
10484 msq, lsq, realignment_token);
10485 new_temp = make_ssa_name (vec_dest, new_stmt);
10486 gimple_assign_set_lhs (new_stmt, new_temp);
10487 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10489 if (alignment_support_scheme == dr_explicit_realign_optimized)
10491 gcc_assert (phi);
10492 if (i == vec_num - 1 && j == ncopies - 1)
10493 add_phi_arg (phi, lsq,
10494 loop_latch_edge (containing_loop),
10495 UNKNOWN_LOCATION);
10496 msq = lsq;
10500 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10502 tree perm_mask = perm_mask_for_reverse (vectype);
10503 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
10504 perm_mask, stmt_info, gsi);
10505 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10508 /* Collect vector loads and later create their permutation in
10509 vect_transform_grouped_load (). */
10510 if (grouped_load || slp_perm)
10511 dr_chain.quick_push (new_temp);
10513 /* Store vector loads in the corresponding SLP_NODE. */
10514 if (slp && !slp_perm)
10515 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10517 /* With SLP permutation we load the gaps as well, without
10518 we need to skip the gaps after we manage to fully load
10519 all elements. group_gap_adj is DR_GROUP_SIZE here. */
10520 group_elt += nunits;
10521 if (maybe_ne (group_gap_adj, 0U)
10522 && !slp_perm
10523 && known_eq (group_elt, group_size - group_gap_adj))
10525 poly_wide_int bump_val
10526 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10527 * group_gap_adj);
10528 if (tree_int_cst_sgn
10529 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10530 bump_val = -bump_val;
10531 tree bump = wide_int_to_tree (sizetype, bump_val);
10532 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10533 gsi, stmt_info, bump);
10534 group_elt = 0;
10537 /* Bump the vector pointer to account for a gap or for excess
10538 elements loaded for a permuted SLP load. */
10539 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
10541 poly_wide_int bump_val
10542 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10543 * group_gap_adj);
10544 if (tree_int_cst_sgn
10545 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10546 bump_val = -bump_val;
10547 tree bump = wide_int_to_tree (sizetype, bump_val);
10548 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10549 stmt_info, bump);
10553 if (slp && !slp_perm)
10554 continue;
10556 if (slp_perm)
10558 unsigned n_perms;
10559 /* For SLP we know we've seen all possible uses of dr_chain so
10560 direct vect_transform_slp_perm_load to DCE the unused parts.
10561 ??? This is a hack to prevent compile-time issues as seen
10562 in PR101120 and friends. */
10563 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
10564 gsi, vf, false, &n_perms,
10565 nullptr, true);
10566 gcc_assert (ok);
10568 else
10570 if (grouped_load)
10572 if (memory_access_type != VMAT_LOAD_STORE_LANES)
10573 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
10574 group_size, gsi);
10575 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10577 else
10579 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10582 dr_chain.release ();
10584 if (!slp)
10585 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10587 return true;
10590 /* Function vect_is_simple_cond.
10592 Input:
10593 LOOP - the loop that is being vectorized.
10594 COND - Condition that is checked for simple use.
10596 Output:
10597 *COMP_VECTYPE - the vector type for the comparison.
10598 *DTS - The def types for the arguments of the comparison
10600 Returns whether a COND can be vectorized. Checks whether
10601 condition operands are supportable using vec_is_simple_use. */
10603 static bool
10604 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
10605 slp_tree slp_node, tree *comp_vectype,
10606 enum vect_def_type *dts, tree vectype)
10608 tree lhs, rhs;
10609 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10610 slp_tree slp_op;
10612 /* Mask case. */
10613 if (TREE_CODE (cond) == SSA_NAME
10614 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
10616 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
10617 &slp_op, &dts[0], comp_vectype)
10618 || !*comp_vectype
10619 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
10620 return false;
10621 return true;
10624 if (!COMPARISON_CLASS_P (cond))
10625 return false;
10627 lhs = TREE_OPERAND (cond, 0);
10628 rhs = TREE_OPERAND (cond, 1);
10630 if (TREE_CODE (lhs) == SSA_NAME)
10632 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10633 &lhs, &slp_op, &dts[0], &vectype1))
10634 return false;
10636 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10637 || TREE_CODE (lhs) == FIXED_CST)
10638 dts[0] = vect_constant_def;
10639 else
10640 return false;
10642 if (TREE_CODE (rhs) == SSA_NAME)
10644 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10645 &rhs, &slp_op, &dts[1], &vectype2))
10646 return false;
10648 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10649 || TREE_CODE (rhs) == FIXED_CST)
10650 dts[1] = vect_constant_def;
10651 else
10652 return false;
10654 if (vectype1 && vectype2
10655 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10656 TYPE_VECTOR_SUBPARTS (vectype2)))
10657 return false;
10659 *comp_vectype = vectype1 ? vectype1 : vectype2;
10660 /* Invariant comparison. */
10661 if (! *comp_vectype)
10663 tree scalar_type = TREE_TYPE (lhs);
10664 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10665 *comp_vectype = truth_type_for (vectype);
10666 else
10668 /* If we can widen the comparison to match vectype do so. */
10669 if (INTEGRAL_TYPE_P (scalar_type)
10670 && !slp_node
10671 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10672 TYPE_SIZE (TREE_TYPE (vectype))))
10673 scalar_type = build_nonstandard_integer_type
10674 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10675 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10676 slp_node);
10680 return true;
10683 /* vectorizable_condition.
10685 Check if STMT_INFO is conditional modify expression that can be vectorized.
10686 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10687 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10688 at GSI.
10690 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10692 Return true if STMT_INFO is vectorizable in this way. */
10694 static bool
10695 vectorizable_condition (vec_info *vinfo,
10696 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10697 gimple **vec_stmt,
10698 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10700 tree scalar_dest = NULL_TREE;
10701 tree vec_dest = NULL_TREE;
10702 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10703 tree then_clause, else_clause;
10704 tree comp_vectype = NULL_TREE;
10705 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10706 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10707 tree vec_compare;
10708 tree new_temp;
10709 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10710 enum vect_def_type dts[4]
10711 = {vect_unknown_def_type, vect_unknown_def_type,
10712 vect_unknown_def_type, vect_unknown_def_type};
10713 int ndts = 4;
10714 int ncopies;
10715 int vec_num;
10716 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10717 int i;
10718 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10719 vec<tree> vec_oprnds0 = vNULL;
10720 vec<tree> vec_oprnds1 = vNULL;
10721 vec<tree> vec_oprnds2 = vNULL;
10722 vec<tree> vec_oprnds3 = vNULL;
10723 tree vec_cmp_type;
10724 bool masked = false;
10726 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10727 return false;
10729 /* Is vectorizable conditional operation? */
10730 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10731 if (!stmt)
10732 return false;
10734 code = gimple_assign_rhs_code (stmt);
10735 if (code != COND_EXPR)
10736 return false;
10738 stmt_vec_info reduc_info = NULL;
10739 int reduc_index = -1;
10740 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10741 bool for_reduction
10742 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10743 if (for_reduction)
10745 if (slp_node)
10746 return false;
10747 reduc_info = info_for_reduction (vinfo, stmt_info);
10748 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10749 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10750 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10751 || reduc_index != -1);
10753 else
10755 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10756 return false;
10759 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10760 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10762 if (slp_node)
10764 ncopies = 1;
10765 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10767 else
10769 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10770 vec_num = 1;
10773 gcc_assert (ncopies >= 1);
10774 if (for_reduction && ncopies > 1)
10775 return false; /* FORNOW */
10777 cond_expr = gimple_assign_rhs1 (stmt);
10779 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10780 &comp_vectype, &dts[0], vectype)
10781 || !comp_vectype)
10782 return false;
10784 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10785 slp_tree then_slp_node, else_slp_node;
10786 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10787 &then_clause, &then_slp_node, &dts[2], &vectype1))
10788 return false;
10789 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10790 &else_clause, &else_slp_node, &dts[3], &vectype2))
10791 return false;
10793 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10794 return false;
10796 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10797 return false;
10799 masked = !COMPARISON_CLASS_P (cond_expr);
10800 vec_cmp_type = truth_type_for (comp_vectype);
10802 if (vec_cmp_type == NULL_TREE)
10803 return false;
10805 cond_code = TREE_CODE (cond_expr);
10806 if (!masked)
10808 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10809 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10812 /* For conditional reductions, the "then" value needs to be the candidate
10813 value calculated by this iteration while the "else" value needs to be
10814 the result carried over from previous iterations. If the COND_EXPR
10815 is the other way around, we need to swap it. */
10816 bool must_invert_cmp_result = false;
10817 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10819 if (masked)
10820 must_invert_cmp_result = true;
10821 else
10823 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10824 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10825 if (new_code == ERROR_MARK)
10826 must_invert_cmp_result = true;
10827 else
10829 cond_code = new_code;
10830 /* Make sure we don't accidentally use the old condition. */
10831 cond_expr = NULL_TREE;
10834 std::swap (then_clause, else_clause);
10837 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10839 /* Boolean values may have another representation in vectors
10840 and therefore we prefer bit operations over comparison for
10841 them (which also works for scalar masks). We store opcodes
10842 to use in bitop1 and bitop2. Statement is vectorized as
10843 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10844 depending on bitop1 and bitop2 arity. */
10845 switch (cond_code)
10847 case GT_EXPR:
10848 bitop1 = BIT_NOT_EXPR;
10849 bitop2 = BIT_AND_EXPR;
10850 break;
10851 case GE_EXPR:
10852 bitop1 = BIT_NOT_EXPR;
10853 bitop2 = BIT_IOR_EXPR;
10854 break;
10855 case LT_EXPR:
10856 bitop1 = BIT_NOT_EXPR;
10857 bitop2 = BIT_AND_EXPR;
10858 std::swap (cond_expr0, cond_expr1);
10859 break;
10860 case LE_EXPR:
10861 bitop1 = BIT_NOT_EXPR;
10862 bitop2 = BIT_IOR_EXPR;
10863 std::swap (cond_expr0, cond_expr1);
10864 break;
10865 case NE_EXPR:
10866 bitop1 = BIT_XOR_EXPR;
10867 break;
10868 case EQ_EXPR:
10869 bitop1 = BIT_XOR_EXPR;
10870 bitop2 = BIT_NOT_EXPR;
10871 break;
10872 default:
10873 return false;
10875 cond_code = SSA_NAME;
10878 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10879 && reduction_type == EXTRACT_LAST_REDUCTION
10880 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10882 if (dump_enabled_p ())
10883 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10884 "reduction comparison operation not supported.\n");
10885 return false;
10888 if (!vec_stmt)
10890 if (bitop1 != NOP_EXPR)
10892 machine_mode mode = TYPE_MODE (comp_vectype);
10893 optab optab;
10895 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10896 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10897 return false;
10899 if (bitop2 != NOP_EXPR)
10901 optab = optab_for_tree_code (bitop2, comp_vectype,
10902 optab_default);
10903 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10904 return false;
10908 vect_cost_for_stmt kind = vector_stmt;
10909 if (reduction_type == EXTRACT_LAST_REDUCTION)
10910 /* Count one reduction-like operation per vector. */
10911 kind = vec_to_scalar;
10912 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
10913 && (masked
10914 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
10915 cond_code)
10916 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
10917 ERROR_MARK))))
10918 return false;
10920 if (slp_node
10921 && (!vect_maybe_update_slp_op_vectype
10922 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10923 || (op_adjust == 1
10924 && !vect_maybe_update_slp_op_vectype
10925 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10926 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10927 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10929 if (dump_enabled_p ())
10930 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10931 "incompatible vector types for invariants\n");
10932 return false;
10935 if (loop_vinfo && for_reduction
10936 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10938 if (reduction_type == EXTRACT_LAST_REDUCTION)
10939 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10940 ncopies * vec_num, vectype, NULL);
10941 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10942 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10944 if (dump_enabled_p ())
10945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10946 "conditional reduction prevents the use"
10947 " of partial vectors.\n");
10948 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10952 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10953 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10954 cost_vec, kind);
10955 return true;
10958 /* Transform. */
10960 /* Handle def. */
10961 scalar_dest = gimple_assign_lhs (stmt);
10962 if (reduction_type != EXTRACT_LAST_REDUCTION)
10963 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10965 bool swap_cond_operands = false;
10967 /* See whether another part of the vectorized code applies a loop
10968 mask to the condition, or to its inverse. */
10970 vec_loop_masks *masks = NULL;
10971 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10973 if (reduction_type == EXTRACT_LAST_REDUCTION)
10974 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10975 else
10977 scalar_cond_masked_key cond (cond_expr, ncopies);
10978 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10979 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10980 else
10982 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10983 tree_code orig_code = cond.code;
10984 cond.code = invert_tree_comparison (cond.code, honor_nans);
10985 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
10987 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10988 cond_code = cond.code;
10989 swap_cond_operands = true;
10991 else
10993 /* Try the inverse of the current mask. We check if the
10994 inverse mask is live and if so we generate a negate of
10995 the current mask such that we still honor NaNs. */
10996 cond.inverted_p = true;
10997 cond.code = orig_code;
10998 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
11000 masks = &LOOP_VINFO_MASKS (loop_vinfo);
11001 cond_code = cond.code;
11002 swap_cond_operands = true;
11003 must_invert_cmp_result = true;
11010 /* Handle cond expr. */
11011 if (masked)
11012 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
11013 cond_expr, &vec_oprnds0, comp_vectype,
11014 then_clause, &vec_oprnds2, vectype,
11015 reduction_type != EXTRACT_LAST_REDUCTION
11016 ? else_clause : NULL, &vec_oprnds3, vectype);
11017 else
11018 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
11019 cond_expr0, &vec_oprnds0, comp_vectype,
11020 cond_expr1, &vec_oprnds1, comp_vectype,
11021 then_clause, &vec_oprnds2, vectype,
11022 reduction_type != EXTRACT_LAST_REDUCTION
11023 ? else_clause : NULL, &vec_oprnds3, vectype);
11025 /* Arguments are ready. Create the new vector stmt. */
11026 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
11028 vec_then_clause = vec_oprnds2[i];
11029 if (reduction_type != EXTRACT_LAST_REDUCTION)
11030 vec_else_clause = vec_oprnds3[i];
11032 if (swap_cond_operands)
11033 std::swap (vec_then_clause, vec_else_clause);
11035 if (masked)
11036 vec_compare = vec_cond_lhs;
11037 else
11039 vec_cond_rhs = vec_oprnds1[i];
11040 if (bitop1 == NOP_EXPR)
11042 gimple_seq stmts = NULL;
11043 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
11044 vec_cond_lhs, vec_cond_rhs);
11045 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
11047 else
11049 new_temp = make_ssa_name (vec_cmp_type);
11050 gassign *new_stmt;
11051 if (bitop1 == BIT_NOT_EXPR)
11052 new_stmt = gimple_build_assign (new_temp, bitop1,
11053 vec_cond_rhs);
11054 else
11055 new_stmt
11056 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
11057 vec_cond_rhs);
11058 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11059 if (bitop2 == NOP_EXPR)
11060 vec_compare = new_temp;
11061 else if (bitop2 == BIT_NOT_EXPR
11062 && reduction_type != EXTRACT_LAST_REDUCTION)
11064 /* Instead of doing ~x ? y : z do x ? z : y. */
11065 vec_compare = new_temp;
11066 std::swap (vec_then_clause, vec_else_clause);
11068 else
11070 vec_compare = make_ssa_name (vec_cmp_type);
11071 if (bitop2 == BIT_NOT_EXPR)
11072 new_stmt
11073 = gimple_build_assign (vec_compare, bitop2, new_temp);
11074 else
11075 new_stmt
11076 = gimple_build_assign (vec_compare, bitop2,
11077 vec_cond_lhs, new_temp);
11078 vect_finish_stmt_generation (vinfo, stmt_info,
11079 new_stmt, gsi);
11084 /* If we decided to apply a loop mask to the result of the vector
11085 comparison, AND the comparison with the mask now. Later passes
11086 should then be able to reuse the AND results between mulitple
11087 vector statements.
11089 For example:
11090 for (int i = 0; i < 100; ++i)
11091 x[i] = y[i] ? z[i] : 10;
11093 results in following optimized GIMPLE:
11095 mask__35.8_43 = vect__4.7_41 != { 0, ... };
11096 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
11097 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
11098 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
11099 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
11100 vect_iftmp.11_47, { 10, ... }>;
11102 instead of using a masked and unmasked forms of
11103 vec != { 0, ... } (masked in the MASK_LOAD,
11104 unmasked in the VEC_COND_EXPR). */
11106 /* Force vec_compare to be an SSA_NAME rather than a comparison,
11107 in cases where that's necessary. */
11109 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
11111 if (!is_gimple_val (vec_compare))
11113 tree vec_compare_name = make_ssa_name (vec_cmp_type);
11114 gassign *new_stmt = gimple_build_assign (vec_compare_name,
11115 vec_compare);
11116 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11117 vec_compare = vec_compare_name;
11120 if (must_invert_cmp_result)
11122 tree vec_compare_name = make_ssa_name (vec_cmp_type);
11123 gassign *new_stmt = gimple_build_assign (vec_compare_name,
11124 BIT_NOT_EXPR,
11125 vec_compare);
11126 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11127 vec_compare = vec_compare_name;
11130 if (masks)
11132 tree loop_mask
11133 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
11134 vectype, i);
11135 tree tmp2 = make_ssa_name (vec_cmp_type);
11136 gassign *g
11137 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
11138 loop_mask);
11139 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
11140 vec_compare = tmp2;
11144 gimple *new_stmt;
11145 if (reduction_type == EXTRACT_LAST_REDUCTION)
11147 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
11148 tree lhs = gimple_get_lhs (old_stmt);
11149 new_stmt = gimple_build_call_internal
11150 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
11151 vec_then_clause);
11152 gimple_call_set_lhs (new_stmt, lhs);
11153 SSA_NAME_DEF_STMT (lhs) = new_stmt;
11154 if (old_stmt == gsi_stmt (*gsi))
11155 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
11156 else
11158 /* In this case we're moving the definition to later in the
11159 block. That doesn't matter because the only uses of the
11160 lhs are in phi statements. */
11161 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
11162 gsi_remove (&old_gsi, true);
11163 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11166 else
11168 new_temp = make_ssa_name (vec_dest);
11169 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
11170 vec_then_clause, vec_else_clause);
11171 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11173 if (slp_node)
11174 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
11175 else
11176 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11179 if (!slp_node)
11180 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11182 vec_oprnds0.release ();
11183 vec_oprnds1.release ();
11184 vec_oprnds2.release ();
11185 vec_oprnds3.release ();
11187 return true;
11190 /* vectorizable_comparison.
11192 Check if STMT_INFO is comparison expression that can be vectorized.
11193 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11194 comparison, put it in VEC_STMT, and insert it at GSI.
11196 Return true if STMT_INFO is vectorizable in this way. */
11198 static bool
11199 vectorizable_comparison (vec_info *vinfo,
11200 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11201 gimple **vec_stmt,
11202 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11204 tree lhs, rhs1, rhs2;
11205 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11206 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11207 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
11208 tree new_temp;
11209 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11210 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
11211 int ndts = 2;
11212 poly_uint64 nunits;
11213 int ncopies;
11214 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11215 int i;
11216 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11217 vec<tree> vec_oprnds0 = vNULL;
11218 vec<tree> vec_oprnds1 = vNULL;
11219 tree mask_type;
11220 tree mask;
11222 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11223 return false;
11225 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
11226 return false;
11228 mask_type = vectype;
11229 nunits = TYPE_VECTOR_SUBPARTS (vectype);
11231 if (slp_node)
11232 ncopies = 1;
11233 else
11234 ncopies = vect_get_num_copies (loop_vinfo, vectype);
11236 gcc_assert (ncopies >= 1);
11237 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11238 return false;
11240 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11241 if (!stmt)
11242 return false;
11244 code = gimple_assign_rhs_code (stmt);
11246 if (TREE_CODE_CLASS (code) != tcc_comparison)
11247 return false;
11249 slp_tree slp_rhs1, slp_rhs2;
11250 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
11251 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
11252 return false;
11254 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
11255 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
11256 return false;
11258 if (vectype1 && vectype2
11259 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11260 TYPE_VECTOR_SUBPARTS (vectype2)))
11261 return false;
11263 vectype = vectype1 ? vectype1 : vectype2;
11265 /* Invariant comparison. */
11266 if (!vectype)
11268 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
11269 vectype = mask_type;
11270 else
11271 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
11272 slp_node);
11273 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
11274 return false;
11276 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
11277 return false;
11279 /* Can't compare mask and non-mask types. */
11280 if (vectype1 && vectype2
11281 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
11282 return false;
11284 /* Boolean values may have another representation in vectors
11285 and therefore we prefer bit operations over comparison for
11286 them (which also works for scalar masks). We store opcodes
11287 to use in bitop1 and bitop2. Statement is vectorized as
11288 BITOP2 (rhs1 BITOP1 rhs2) or
11289 rhs1 BITOP2 (BITOP1 rhs2)
11290 depending on bitop1 and bitop2 arity. */
11291 bool swap_p = false;
11292 if (VECTOR_BOOLEAN_TYPE_P (vectype))
11294 if (code == GT_EXPR)
11296 bitop1 = BIT_NOT_EXPR;
11297 bitop2 = BIT_AND_EXPR;
11299 else if (code == GE_EXPR)
11301 bitop1 = BIT_NOT_EXPR;
11302 bitop2 = BIT_IOR_EXPR;
11304 else if (code == LT_EXPR)
11306 bitop1 = BIT_NOT_EXPR;
11307 bitop2 = BIT_AND_EXPR;
11308 swap_p = true;
11310 else if (code == LE_EXPR)
11312 bitop1 = BIT_NOT_EXPR;
11313 bitop2 = BIT_IOR_EXPR;
11314 swap_p = true;
11316 else
11318 bitop1 = BIT_XOR_EXPR;
11319 if (code == EQ_EXPR)
11320 bitop2 = BIT_NOT_EXPR;
11324 if (!vec_stmt)
11326 if (bitop1 == NOP_EXPR)
11328 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
11329 return false;
11331 else
11333 machine_mode mode = TYPE_MODE (vectype);
11334 optab optab;
11336 optab = optab_for_tree_code (bitop1, vectype, optab_default);
11337 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11338 return false;
11340 if (bitop2 != NOP_EXPR)
11342 optab = optab_for_tree_code (bitop2, vectype, optab_default);
11343 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11344 return false;
11348 /* Put types on constant and invariant SLP children. */
11349 if (slp_node
11350 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
11351 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
11353 if (dump_enabled_p ())
11354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11355 "incompatible vector types for invariants\n");
11356 return false;
11359 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
11360 vect_model_simple_cost (vinfo, stmt_info,
11361 ncopies * (1 + (bitop2 != NOP_EXPR)),
11362 dts, ndts, slp_node, cost_vec);
11363 return true;
11366 /* Transform. */
11368 /* Handle def. */
11369 lhs = gimple_assign_lhs (stmt);
11370 mask = vect_create_destination_var (lhs, mask_type);
11372 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
11373 rhs1, &vec_oprnds0, vectype,
11374 rhs2, &vec_oprnds1, vectype);
11375 if (swap_p)
11376 std::swap (vec_oprnds0, vec_oprnds1);
11378 /* Arguments are ready. Create the new vector stmt. */
11379 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
11381 gimple *new_stmt;
11382 vec_rhs2 = vec_oprnds1[i];
11384 new_temp = make_ssa_name (mask);
11385 if (bitop1 == NOP_EXPR)
11387 new_stmt = gimple_build_assign (new_temp, code,
11388 vec_rhs1, vec_rhs2);
11389 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11391 else
11393 if (bitop1 == BIT_NOT_EXPR)
11394 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
11395 else
11396 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
11397 vec_rhs2);
11398 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11399 if (bitop2 != NOP_EXPR)
11401 tree res = make_ssa_name (mask);
11402 if (bitop2 == BIT_NOT_EXPR)
11403 new_stmt = gimple_build_assign (res, bitop2, new_temp);
11404 else
11405 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
11406 new_temp);
11407 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11410 if (slp_node)
11411 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
11412 else
11413 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11416 if (!slp_node)
11417 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11419 vec_oprnds0.release ();
11420 vec_oprnds1.release ();
11422 return true;
11425 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
11426 can handle all live statements in the node. Otherwise return true
11427 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
11428 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
11430 static bool
11431 can_vectorize_live_stmts (vec_info *vinfo,
11432 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11433 slp_tree slp_node, slp_instance slp_node_instance,
11434 bool vec_stmt_p,
11435 stmt_vector_for_cost *cost_vec)
11437 if (slp_node)
11439 stmt_vec_info slp_stmt_info;
11440 unsigned int i;
11441 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
11443 if (STMT_VINFO_LIVE_P (slp_stmt_info)
11444 && !vectorizable_live_operation (vinfo,
11445 slp_stmt_info, gsi, slp_node,
11446 slp_node_instance, i,
11447 vec_stmt_p, cost_vec))
11448 return false;
11451 else if (STMT_VINFO_LIVE_P (stmt_info)
11452 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
11453 slp_node, slp_node_instance, -1,
11454 vec_stmt_p, cost_vec))
11455 return false;
11457 return true;
11460 /* Make sure the statement is vectorizable. */
11462 opt_result
11463 vect_analyze_stmt (vec_info *vinfo,
11464 stmt_vec_info stmt_info, bool *need_to_vectorize,
11465 slp_tree node, slp_instance node_instance,
11466 stmt_vector_for_cost *cost_vec)
11468 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11469 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
11470 bool ok;
11471 gimple_seq pattern_def_seq;
11473 if (dump_enabled_p ())
11474 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
11475 stmt_info->stmt);
11477 if (gimple_has_volatile_ops (stmt_info->stmt))
11478 return opt_result::failure_at (stmt_info->stmt,
11479 "not vectorized:"
11480 " stmt has volatile operands: %G\n",
11481 stmt_info->stmt);
11483 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11484 && node == NULL
11485 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
11487 gimple_stmt_iterator si;
11489 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
11491 stmt_vec_info pattern_def_stmt_info
11492 = vinfo->lookup_stmt (gsi_stmt (si));
11493 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
11494 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
11496 /* Analyze def stmt of STMT if it's a pattern stmt. */
11497 if (dump_enabled_p ())
11498 dump_printf_loc (MSG_NOTE, vect_location,
11499 "==> examining pattern def statement: %G",
11500 pattern_def_stmt_info->stmt);
11502 opt_result res
11503 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
11504 need_to_vectorize, node, node_instance,
11505 cost_vec);
11506 if (!res)
11507 return res;
11512 /* Skip stmts that do not need to be vectorized. In loops this is expected
11513 to include:
11514 - the COND_EXPR which is the loop exit condition
11515 - any LABEL_EXPRs in the loop
11516 - computations that are used only for array indexing or loop control.
11517 In basic blocks we only analyze statements that are a part of some SLP
11518 instance, therefore, all the statements are relevant.
11520 Pattern statement needs to be analyzed instead of the original statement
11521 if the original statement is not relevant. Otherwise, we analyze both
11522 statements. In basic blocks we are called from some SLP instance
11523 traversal, don't analyze pattern stmts instead, the pattern stmts
11524 already will be part of SLP instance. */
11526 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
11527 if (!STMT_VINFO_RELEVANT_P (stmt_info)
11528 && !STMT_VINFO_LIVE_P (stmt_info))
11530 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11531 && pattern_stmt_info
11532 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11533 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11535 /* Analyze PATTERN_STMT instead of the original stmt. */
11536 stmt_info = pattern_stmt_info;
11537 if (dump_enabled_p ())
11538 dump_printf_loc (MSG_NOTE, vect_location,
11539 "==> examining pattern statement: %G",
11540 stmt_info->stmt);
11542 else
11544 if (dump_enabled_p ())
11545 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11547 return opt_result::success ();
11550 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11551 && node == NULL
11552 && pattern_stmt_info
11553 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11554 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11556 /* Analyze PATTERN_STMT too. */
11557 if (dump_enabled_p ())
11558 dump_printf_loc (MSG_NOTE, vect_location,
11559 "==> examining pattern statement: %G",
11560 pattern_stmt_info->stmt);
11562 opt_result res
11563 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11564 node_instance, cost_vec);
11565 if (!res)
11566 return res;
11569 switch (STMT_VINFO_DEF_TYPE (stmt_info))
11571 case vect_internal_def:
11572 break;
11574 case vect_reduction_def:
11575 case vect_nested_cycle:
11576 gcc_assert (!bb_vinfo
11577 && (relevance == vect_used_in_outer
11578 || relevance == vect_used_in_outer_by_reduction
11579 || relevance == vect_used_by_reduction
11580 || relevance == vect_unused_in_scope
11581 || relevance == vect_used_only_live));
11582 break;
11584 case vect_induction_def:
11585 case vect_first_order_recurrence:
11586 gcc_assert (!bb_vinfo);
11587 break;
11589 case vect_constant_def:
11590 case vect_external_def:
11591 case vect_unknown_def_type:
11592 default:
11593 gcc_unreachable ();
11596 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11597 if (node)
11598 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
11600 if (STMT_VINFO_RELEVANT_P (stmt_info))
11602 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11603 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11604 || (call && gimple_call_lhs (call) == NULL_TREE));
11605 *need_to_vectorize = true;
11608 if (PURE_SLP_STMT (stmt_info) && !node)
11610 if (dump_enabled_p ())
11611 dump_printf_loc (MSG_NOTE, vect_location,
11612 "handled only by SLP analysis\n");
11613 return opt_result::success ();
11616 ok = true;
11617 if (!bb_vinfo
11618 && (STMT_VINFO_RELEVANT_P (stmt_info)
11619 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11620 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11621 -mveclibabi= takes preference over library functions with
11622 the simd attribute. */
11623 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11624 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11625 cost_vec)
11626 || vectorizable_conversion (vinfo, stmt_info,
11627 NULL, NULL, node, cost_vec)
11628 || vectorizable_operation (vinfo, stmt_info,
11629 NULL, NULL, node, cost_vec)
11630 || vectorizable_assignment (vinfo, stmt_info,
11631 NULL, NULL, node, cost_vec)
11632 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11633 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11634 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11635 node, node_instance, cost_vec)
11636 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11637 NULL, node, cost_vec)
11638 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11639 || vectorizable_condition (vinfo, stmt_info,
11640 NULL, NULL, node, cost_vec)
11641 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11642 cost_vec)
11643 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11644 stmt_info, NULL, node)
11645 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
11646 stmt_info, NULL, node, cost_vec));
11647 else
11649 if (bb_vinfo)
11650 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11651 || vectorizable_simd_clone_call (vinfo, stmt_info,
11652 NULL, NULL, node, cost_vec)
11653 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11654 cost_vec)
11655 || vectorizable_shift (vinfo, stmt_info,
11656 NULL, NULL, node, cost_vec)
11657 || vectorizable_operation (vinfo, stmt_info,
11658 NULL, NULL, node, cost_vec)
11659 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11660 cost_vec)
11661 || vectorizable_load (vinfo, stmt_info,
11662 NULL, NULL, node, cost_vec)
11663 || vectorizable_store (vinfo, stmt_info,
11664 NULL, NULL, node, cost_vec)
11665 || vectorizable_condition (vinfo, stmt_info,
11666 NULL, NULL, node, cost_vec)
11667 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11668 cost_vec)
11669 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11672 if (node)
11673 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11675 if (!ok)
11676 return opt_result::failure_at (stmt_info->stmt,
11677 "not vectorized:"
11678 " relevant stmt not supported: %G",
11679 stmt_info->stmt);
11681 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11682 need extra handling, except for vectorizable reductions. */
11683 if (!bb_vinfo
11684 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11685 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11686 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11687 stmt_info, NULL, node, node_instance,
11688 false, cost_vec))
11689 return opt_result::failure_at (stmt_info->stmt,
11690 "not vectorized:"
11691 " live stmt not supported: %G",
11692 stmt_info->stmt);
11694 return opt_result::success ();
11698 /* Function vect_transform_stmt.
11700 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11702 bool
11703 vect_transform_stmt (vec_info *vinfo,
11704 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11705 slp_tree slp_node, slp_instance slp_node_instance)
11707 bool is_store = false;
11708 gimple *vec_stmt = NULL;
11709 bool done;
11711 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11713 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11714 if (slp_node)
11715 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11717 switch (STMT_VINFO_TYPE (stmt_info))
11719 case type_demotion_vec_info_type:
11720 case type_promotion_vec_info_type:
11721 case type_conversion_vec_info_type:
11722 done = vectorizable_conversion (vinfo, stmt_info,
11723 gsi, &vec_stmt, slp_node, NULL);
11724 gcc_assert (done);
11725 break;
11727 case induc_vec_info_type:
11728 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11729 stmt_info, &vec_stmt, slp_node,
11730 NULL);
11731 gcc_assert (done);
11732 break;
11734 case shift_vec_info_type:
11735 done = vectorizable_shift (vinfo, stmt_info,
11736 gsi, &vec_stmt, slp_node, NULL);
11737 gcc_assert (done);
11738 break;
11740 case op_vec_info_type:
11741 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11742 NULL);
11743 gcc_assert (done);
11744 break;
11746 case assignment_vec_info_type:
11747 done = vectorizable_assignment (vinfo, stmt_info,
11748 gsi, &vec_stmt, slp_node, NULL);
11749 gcc_assert (done);
11750 break;
11752 case load_vec_info_type:
11753 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11754 NULL);
11755 gcc_assert (done);
11756 break;
11758 case store_vec_info_type:
11759 done = vectorizable_store (vinfo, stmt_info,
11760 gsi, &vec_stmt, slp_node, NULL);
11761 gcc_assert (done);
11762 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11764 /* In case of interleaving, the whole chain is vectorized when the
11765 last store in the chain is reached. Store stmts before the last
11766 one are skipped, and there vec_stmt_info shouldn't be freed
11767 meanwhile. */
11768 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11769 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11770 is_store = true;
11772 else
11773 is_store = true;
11774 break;
11776 case condition_vec_info_type:
11777 done = vectorizable_condition (vinfo, stmt_info,
11778 gsi, &vec_stmt, slp_node, NULL);
11779 gcc_assert (done);
11780 break;
11782 case comparison_vec_info_type:
11783 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11784 slp_node, NULL);
11785 gcc_assert (done);
11786 break;
11788 case call_vec_info_type:
11789 done = vectorizable_call (vinfo, stmt_info,
11790 gsi, &vec_stmt, slp_node, NULL);
11791 break;
11793 case call_simd_clone_vec_info_type:
11794 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11795 slp_node, NULL);
11796 break;
11798 case reduc_vec_info_type:
11799 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11800 gsi, &vec_stmt, slp_node);
11801 gcc_assert (done);
11802 break;
11804 case cycle_phi_info_type:
11805 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11806 &vec_stmt, slp_node, slp_node_instance);
11807 gcc_assert (done);
11808 break;
11810 case lc_phi_info_type:
11811 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11812 stmt_info, &vec_stmt, slp_node);
11813 gcc_assert (done);
11814 break;
11816 case recurr_info_type:
11817 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
11818 stmt_info, &vec_stmt, slp_node, NULL);
11819 gcc_assert (done);
11820 break;
11822 case phi_info_type:
11823 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11824 gcc_assert (done);
11825 break;
11827 default:
11828 if (!STMT_VINFO_LIVE_P (stmt_info))
11830 if (dump_enabled_p ())
11831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11832 "stmt not supported.\n");
11833 gcc_unreachable ();
11835 done = true;
11838 if (!slp_node && vec_stmt)
11839 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11841 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11843 /* Handle stmts whose DEF is used outside the loop-nest that is
11844 being vectorized. */
11845 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11846 slp_node_instance, true, NULL);
11847 gcc_assert (done);
11850 if (slp_node)
11851 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11853 return is_store;
11857 /* Remove a group of stores (for SLP or interleaving), free their
11858 stmt_vec_info. */
11860 void
11861 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11863 stmt_vec_info next_stmt_info = first_stmt_info;
11865 while (next_stmt_info)
11867 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11868 next_stmt_info = vect_orig_stmt (next_stmt_info);
11869 /* Free the attached stmt_vec_info and remove the stmt. */
11870 vinfo->remove_stmt (next_stmt_info);
11871 next_stmt_info = tmp;
11875 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11876 elements of type SCALAR_TYPE, or null if the target doesn't support
11877 such a type.
11879 If NUNITS is zero, return a vector type that contains elements of
11880 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11882 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11883 for this vectorization region and want to "autodetect" the best choice.
11884 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11885 and we want the new type to be interoperable with it. PREVAILING_MODE
11886 in this case can be a scalar integer mode or a vector mode; when it
11887 is a vector mode, the function acts like a tree-level version of
11888 related_vector_mode. */
11890 tree
11891 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11892 tree scalar_type, poly_uint64 nunits)
11894 tree orig_scalar_type = scalar_type;
11895 scalar_mode inner_mode;
11896 machine_mode simd_mode;
11897 tree vectype;
11899 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11900 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11901 return NULL_TREE;
11903 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11905 /* Interoperability between modes requires one to be a constant multiple
11906 of the other, so that the number of vectors required for each operation
11907 is a compile-time constant. */
11908 if (prevailing_mode != VOIDmode
11909 && !constant_multiple_p (nunits * nbytes,
11910 GET_MODE_SIZE (prevailing_mode))
11911 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
11912 nunits * nbytes))
11913 return NULL_TREE;
11915 /* For vector types of elements whose mode precision doesn't
11916 match their types precision we use a element type of mode
11917 precision. The vectorization routines will have to make sure
11918 they support the proper result truncation/extension.
11919 We also make sure to build vector types with INTEGER_TYPE
11920 component type only. */
11921 if (INTEGRAL_TYPE_P (scalar_type)
11922 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11923 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11924 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11925 TYPE_UNSIGNED (scalar_type));
11927 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11928 When the component mode passes the above test simply use a type
11929 corresponding to that mode. The theory is that any use that
11930 would cause problems with this will disable vectorization anyway. */
11931 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11932 && !INTEGRAL_TYPE_P (scalar_type))
11933 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11935 /* We can't build a vector type of elements with alignment bigger than
11936 their size. */
11937 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11938 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11939 TYPE_UNSIGNED (scalar_type));
11941 /* If we felt back to using the mode fail if there was
11942 no scalar type for it. */
11943 if (scalar_type == NULL_TREE)
11944 return NULL_TREE;
11946 /* If no prevailing mode was supplied, use the mode the target prefers.
11947 Otherwise lookup a vector mode based on the prevailing mode. */
11948 if (prevailing_mode == VOIDmode)
11950 gcc_assert (known_eq (nunits, 0U));
11951 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11952 if (SCALAR_INT_MODE_P (simd_mode))
11954 /* Traditional behavior is not to take the integer mode
11955 literally, but simply to use it as a way of determining
11956 the vector size. It is up to mode_for_vector to decide
11957 what the TYPE_MODE should be.
11959 Note that nunits == 1 is allowed in order to support single
11960 element vector types. */
11961 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11962 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11963 return NULL_TREE;
11966 else if (SCALAR_INT_MODE_P (prevailing_mode)
11967 || !related_vector_mode (prevailing_mode,
11968 inner_mode, nunits).exists (&simd_mode))
11970 /* Fall back to using mode_for_vector, mostly in the hope of being
11971 able to use an integer mode. */
11972 if (known_eq (nunits, 0U)
11973 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11974 return NULL_TREE;
11976 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11977 return NULL_TREE;
11980 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11982 /* In cases where the mode was chosen by mode_for_vector, check that
11983 the target actually supports the chosen mode, or that it at least
11984 allows the vector mode to be replaced by a like-sized integer. */
11985 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11986 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11987 return NULL_TREE;
11989 /* Re-attach the address-space qualifier if we canonicalized the scalar
11990 type. */
11991 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11992 return build_qualified_type
11993 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11995 return vectype;
11998 /* Function get_vectype_for_scalar_type.
12000 Returns the vector type corresponding to SCALAR_TYPE as supported
12001 by the target. If GROUP_SIZE is nonzero and we're performing BB
12002 vectorization, make sure that the number of elements in the vector
12003 is no bigger than GROUP_SIZE. */
12005 tree
12006 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
12007 unsigned int group_size)
12009 /* For BB vectorization, we should always have a group size once we've
12010 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12011 are tentative requests during things like early data reference
12012 analysis and pattern recognition. */
12013 if (is_a <bb_vec_info> (vinfo))
12014 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12015 else
12016 group_size = 0;
12018 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
12019 scalar_type);
12020 if (vectype && vinfo->vector_mode == VOIDmode)
12021 vinfo->vector_mode = TYPE_MODE (vectype);
12023 /* Register the natural choice of vector type, before the group size
12024 has been applied. */
12025 if (vectype)
12026 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
12028 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
12029 try again with an explicit number of elements. */
12030 if (vectype
12031 && group_size
12032 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
12034 /* Start with the biggest number of units that fits within
12035 GROUP_SIZE and halve it until we find a valid vector type.
12036 Usually either the first attempt will succeed or all will
12037 fail (in the latter case because GROUP_SIZE is too small
12038 for the target), but it's possible that a target could have
12039 a hole between supported vector types.
12041 If GROUP_SIZE is not a power of 2, this has the effect of
12042 trying the largest power of 2 that fits within the group,
12043 even though the group is not a multiple of that vector size.
12044 The BB vectorizer will then try to carve up the group into
12045 smaller pieces. */
12046 unsigned int nunits = 1 << floor_log2 (group_size);
12049 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
12050 scalar_type, nunits);
12051 nunits /= 2;
12053 while (nunits > 1 && !vectype);
12056 return vectype;
12059 /* Return the vector type corresponding to SCALAR_TYPE as supported
12060 by the target. NODE, if nonnull, is the SLP tree node that will
12061 use the returned vector type. */
12063 tree
12064 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
12066 unsigned int group_size = 0;
12067 if (node)
12068 group_size = SLP_TREE_LANES (node);
12069 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12072 /* Function get_mask_type_for_scalar_type.
12074 Returns the mask type corresponding to a result of comparison
12075 of vectors of specified SCALAR_TYPE as supported by target.
12076 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12077 make sure that the number of elements in the vector is no bigger
12078 than GROUP_SIZE. */
12080 tree
12081 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
12082 unsigned int group_size)
12084 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12086 if (!vectype)
12087 return NULL;
12089 return truth_type_for (vectype);
12092 /* Function get_same_sized_vectype
12094 Returns a vector type corresponding to SCALAR_TYPE of size
12095 VECTOR_TYPE if supported by the target. */
12097 tree
12098 get_same_sized_vectype (tree scalar_type, tree vector_type)
12100 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
12101 return truth_type_for (vector_type);
12103 poly_uint64 nunits;
12104 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
12105 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
12106 return NULL_TREE;
12108 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
12109 scalar_type, nunits);
12112 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
12113 would not change the chosen vector modes. */
12115 bool
12116 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
12118 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
12119 i != vinfo->used_vector_modes.end (); ++i)
12120 if (!VECTOR_MODE_P (*i)
12121 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
12122 return false;
12123 return true;
12126 /* Function vect_is_simple_use.
12128 Input:
12129 VINFO - the vect info of the loop or basic block that is being vectorized.
12130 OPERAND - operand in the loop or bb.
12131 Output:
12132 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
12133 case OPERAND is an SSA_NAME that is defined in the vectorizable region
12134 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
12135 the definition could be anywhere in the function
12136 DT - the type of definition
12138 Returns whether a stmt with OPERAND can be vectorized.
12139 For loops, supportable operands are constants, loop invariants, and operands
12140 that are defined by the current iteration of the loop. Unsupportable
12141 operands are those that are defined by a previous iteration of the loop (as
12142 is the case in reduction/induction computations).
12143 For basic blocks, supportable operands are constants and bb invariants.
12144 For now, operands defined outside the basic block are not supported. */
12146 bool
12147 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
12148 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
12150 if (def_stmt_info_out)
12151 *def_stmt_info_out = NULL;
12152 if (def_stmt_out)
12153 *def_stmt_out = NULL;
12154 *dt = vect_unknown_def_type;
12156 if (dump_enabled_p ())
12158 dump_printf_loc (MSG_NOTE, vect_location,
12159 "vect_is_simple_use: operand ");
12160 if (TREE_CODE (operand) == SSA_NAME
12161 && !SSA_NAME_IS_DEFAULT_DEF (operand))
12162 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
12163 else
12164 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
12167 if (CONSTANT_CLASS_P (operand))
12168 *dt = vect_constant_def;
12169 else if (is_gimple_min_invariant (operand))
12170 *dt = vect_external_def;
12171 else if (TREE_CODE (operand) != SSA_NAME)
12172 *dt = vect_unknown_def_type;
12173 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
12174 *dt = vect_external_def;
12175 else
12177 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
12178 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
12179 if (!stmt_vinfo)
12180 *dt = vect_external_def;
12181 else
12183 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
12184 def_stmt = stmt_vinfo->stmt;
12185 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
12186 if (def_stmt_info_out)
12187 *def_stmt_info_out = stmt_vinfo;
12189 if (def_stmt_out)
12190 *def_stmt_out = def_stmt;
12193 if (dump_enabled_p ())
12195 dump_printf (MSG_NOTE, ", type of def: ");
12196 switch (*dt)
12198 case vect_uninitialized_def:
12199 dump_printf (MSG_NOTE, "uninitialized\n");
12200 break;
12201 case vect_constant_def:
12202 dump_printf (MSG_NOTE, "constant\n");
12203 break;
12204 case vect_external_def:
12205 dump_printf (MSG_NOTE, "external\n");
12206 break;
12207 case vect_internal_def:
12208 dump_printf (MSG_NOTE, "internal\n");
12209 break;
12210 case vect_induction_def:
12211 dump_printf (MSG_NOTE, "induction\n");
12212 break;
12213 case vect_reduction_def:
12214 dump_printf (MSG_NOTE, "reduction\n");
12215 break;
12216 case vect_double_reduction_def:
12217 dump_printf (MSG_NOTE, "double reduction\n");
12218 break;
12219 case vect_nested_cycle:
12220 dump_printf (MSG_NOTE, "nested cycle\n");
12221 break;
12222 case vect_first_order_recurrence:
12223 dump_printf (MSG_NOTE, "first order recurrence\n");
12224 break;
12225 case vect_unknown_def_type:
12226 dump_printf (MSG_NOTE, "unknown\n");
12227 break;
12231 if (*dt == vect_unknown_def_type)
12233 if (dump_enabled_p ())
12234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12235 "Unsupported pattern.\n");
12236 return false;
12239 return true;
12242 /* Function vect_is_simple_use.
12244 Same as vect_is_simple_use but also determines the vector operand
12245 type of OPERAND and stores it to *VECTYPE. If the definition of
12246 OPERAND is vect_uninitialized_def, vect_constant_def or
12247 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
12248 is responsible to compute the best suited vector type for the
12249 scalar operand. */
12251 bool
12252 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
12253 tree *vectype, stmt_vec_info *def_stmt_info_out,
12254 gimple **def_stmt_out)
12256 stmt_vec_info def_stmt_info;
12257 gimple *def_stmt;
12258 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
12259 return false;
12261 if (def_stmt_out)
12262 *def_stmt_out = def_stmt;
12263 if (def_stmt_info_out)
12264 *def_stmt_info_out = def_stmt_info;
12266 /* Now get a vector type if the def is internal, otherwise supply
12267 NULL_TREE and leave it up to the caller to figure out a proper
12268 type for the use stmt. */
12269 if (*dt == vect_internal_def
12270 || *dt == vect_induction_def
12271 || *dt == vect_reduction_def
12272 || *dt == vect_double_reduction_def
12273 || *dt == vect_nested_cycle
12274 || *dt == vect_first_order_recurrence)
12276 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
12277 gcc_assert (*vectype != NULL_TREE);
12278 if (dump_enabled_p ())
12279 dump_printf_loc (MSG_NOTE, vect_location,
12280 "vect_is_simple_use: vectype %T\n", *vectype);
12282 else if (*dt == vect_uninitialized_def
12283 || *dt == vect_constant_def
12284 || *dt == vect_external_def)
12285 *vectype = NULL_TREE;
12286 else
12287 gcc_unreachable ();
12289 return true;
12292 /* Function vect_is_simple_use.
12294 Same as vect_is_simple_use but determines the operand by operand
12295 position OPERAND from either STMT or SLP_NODE, filling in *OP
12296 and *SLP_DEF (when SLP_NODE is not NULL). */
12298 bool
12299 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
12300 unsigned operand, tree *op, slp_tree *slp_def,
12301 enum vect_def_type *dt,
12302 tree *vectype, stmt_vec_info *def_stmt_info_out)
12304 if (slp_node)
12306 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
12307 *slp_def = child;
12308 *vectype = SLP_TREE_VECTYPE (child);
12309 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
12311 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
12312 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
12314 else
12316 if (def_stmt_info_out)
12317 *def_stmt_info_out = NULL;
12318 *op = SLP_TREE_SCALAR_OPS (child)[0];
12319 *dt = SLP_TREE_DEF_TYPE (child);
12320 return true;
12323 else
12325 *slp_def = NULL;
12326 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
12328 if (gimple_assign_rhs_code (ass) == COND_EXPR
12329 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
12331 if (operand < 2)
12332 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
12333 else
12334 *op = gimple_op (ass, operand);
12336 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
12337 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
12338 else
12339 *op = gimple_op (ass, operand + 1);
12341 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
12342 *op = gimple_call_arg (call, operand);
12343 else
12344 gcc_unreachable ();
12345 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
12349 /* If OP is not NULL and is external or constant update its vector
12350 type with VECTYPE. Returns true if successful or false if not,
12351 for example when conflicting vector types are present. */
12353 bool
12354 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
12356 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
12357 return true;
12358 if (SLP_TREE_VECTYPE (op))
12359 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
12360 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
12361 should be handled by patters. Allow vect_constant_def for now. */
12362 if (VECTOR_BOOLEAN_TYPE_P (vectype)
12363 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
12364 return false;
12365 SLP_TREE_VECTYPE (op) = vectype;
12366 return true;
12369 /* Function supportable_widening_operation
12371 Check whether an operation represented by the code CODE is a
12372 widening operation that is supported by the target platform in
12373 vector form (i.e., when operating on arguments of type VECTYPE_IN
12374 producing a result of type VECTYPE_OUT).
12376 Widening operations we currently support are NOP (CONVERT), FLOAT,
12377 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
12378 are supported by the target platform either directly (via vector
12379 tree-codes), or via target builtins.
12381 Output:
12382 - CODE1 and CODE2 are codes of vector operations to be used when
12383 vectorizing the operation, if available.
12384 - MULTI_STEP_CVT determines the number of required intermediate steps in
12385 case of multi-step conversion (like char->short->int - in that case
12386 MULTI_STEP_CVT will be 1).
12387 - INTERM_TYPES contains the intermediate type required to perform the
12388 widening operation (short in the above example). */
12390 bool
12391 supportable_widening_operation (vec_info *vinfo,
12392 enum tree_code code, stmt_vec_info stmt_info,
12393 tree vectype_out, tree vectype_in,
12394 enum tree_code *code1, enum tree_code *code2,
12395 int *multi_step_cvt,
12396 vec<tree> *interm_types)
12398 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
12399 class loop *vect_loop = NULL;
12400 machine_mode vec_mode;
12401 enum insn_code icode1, icode2;
12402 optab optab1, optab2;
12403 tree vectype = vectype_in;
12404 tree wide_vectype = vectype_out;
12405 enum tree_code c1, c2;
12406 int i;
12407 tree prev_type, intermediate_type;
12408 machine_mode intermediate_mode, prev_mode;
12409 optab optab3, optab4;
12411 *multi_step_cvt = 0;
12412 if (loop_info)
12413 vect_loop = LOOP_VINFO_LOOP (loop_info);
12415 switch (code)
12417 case WIDEN_MULT_EXPR:
12418 /* The result of a vectorized widening operation usually requires
12419 two vectors (because the widened results do not fit into one vector).
12420 The generated vector results would normally be expected to be
12421 generated in the same order as in the original scalar computation,
12422 i.e. if 8 results are generated in each vector iteration, they are
12423 to be organized as follows:
12424 vect1: [res1,res2,res3,res4],
12425 vect2: [res5,res6,res7,res8].
12427 However, in the special case that the result of the widening
12428 operation is used in a reduction computation only, the order doesn't
12429 matter (because when vectorizing a reduction we change the order of
12430 the computation). Some targets can take advantage of this and
12431 generate more efficient code. For example, targets like Altivec,
12432 that support widen_mult using a sequence of {mult_even,mult_odd}
12433 generate the following vectors:
12434 vect1: [res1,res3,res5,res7],
12435 vect2: [res2,res4,res6,res8].
12437 When vectorizing outer-loops, we execute the inner-loop sequentially
12438 (each vectorized inner-loop iteration contributes to VF outer-loop
12439 iterations in parallel). We therefore don't allow to change the
12440 order of the computation in the inner-loop during outer-loop
12441 vectorization. */
12442 /* TODO: Another case in which order doesn't *really* matter is when we
12443 widen and then contract again, e.g. (short)((int)x * y >> 8).
12444 Normally, pack_trunc performs an even/odd permute, whereas the
12445 repack from an even/odd expansion would be an interleave, which
12446 would be significantly simpler for e.g. AVX2. */
12447 /* In any case, in order to avoid duplicating the code below, recurse
12448 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
12449 are properly set up for the caller. If we fail, we'll continue with
12450 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
12451 if (vect_loop
12452 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
12453 && !nested_in_vect_loop_p (vect_loop, stmt_info)
12454 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
12455 stmt_info, vectype_out,
12456 vectype_in, code1, code2,
12457 multi_step_cvt, interm_types))
12459 /* Elements in a vector with vect_used_by_reduction property cannot
12460 be reordered if the use chain with this property does not have the
12461 same operation. One such an example is s += a * b, where elements
12462 in a and b cannot be reordered. Here we check if the vector defined
12463 by STMT is only directly used in the reduction statement. */
12464 tree lhs = gimple_assign_lhs (stmt_info->stmt);
12465 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
12466 if (use_stmt_info
12467 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
12468 return true;
12470 c1 = VEC_WIDEN_MULT_LO_EXPR;
12471 c2 = VEC_WIDEN_MULT_HI_EXPR;
12472 break;
12474 case DOT_PROD_EXPR:
12475 c1 = DOT_PROD_EXPR;
12476 c2 = DOT_PROD_EXPR;
12477 break;
12479 case SAD_EXPR:
12480 c1 = SAD_EXPR;
12481 c2 = SAD_EXPR;
12482 break;
12484 case VEC_WIDEN_MULT_EVEN_EXPR:
12485 /* Support the recursion induced just above. */
12486 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
12487 c2 = VEC_WIDEN_MULT_ODD_EXPR;
12488 break;
12490 case WIDEN_LSHIFT_EXPR:
12491 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
12492 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
12493 break;
12495 case WIDEN_PLUS_EXPR:
12496 c1 = VEC_WIDEN_PLUS_LO_EXPR;
12497 c2 = VEC_WIDEN_PLUS_HI_EXPR;
12498 break;
12500 case WIDEN_MINUS_EXPR:
12501 c1 = VEC_WIDEN_MINUS_LO_EXPR;
12502 c2 = VEC_WIDEN_MINUS_HI_EXPR;
12503 break;
12505 CASE_CONVERT:
12506 c1 = VEC_UNPACK_LO_EXPR;
12507 c2 = VEC_UNPACK_HI_EXPR;
12508 break;
12510 case FLOAT_EXPR:
12511 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
12512 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
12513 break;
12515 case FIX_TRUNC_EXPR:
12516 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
12517 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
12518 break;
12520 default:
12521 gcc_unreachable ();
12524 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
12525 std::swap (c1, c2);
12527 if (code == FIX_TRUNC_EXPR)
12529 /* The signedness is determined from output operand. */
12530 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12531 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
12533 else if (CONVERT_EXPR_CODE_P (code)
12534 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
12535 && VECTOR_BOOLEAN_TYPE_P (vectype)
12536 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
12537 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12539 /* If the input and result modes are the same, a different optab
12540 is needed where we pass in the number of units in vectype. */
12541 optab1 = vec_unpacks_sbool_lo_optab;
12542 optab2 = vec_unpacks_sbool_hi_optab;
12544 else
12546 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12547 optab2 = optab_for_tree_code (c2, vectype, optab_default);
12550 if (!optab1 || !optab2)
12551 return false;
12553 vec_mode = TYPE_MODE (vectype);
12554 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
12555 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
12556 return false;
12558 *code1 = c1;
12559 *code2 = c2;
12561 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12562 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12564 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12565 return true;
12566 /* For scalar masks we may have different boolean
12567 vector types having the same QImode. Thus we
12568 add additional check for elements number. */
12569 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
12570 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12571 return true;
12574 /* Check if it's a multi-step conversion that can be done using intermediate
12575 types. */
12577 prev_type = vectype;
12578 prev_mode = vec_mode;
12580 if (!CONVERT_EXPR_CODE_P (code))
12581 return false;
12583 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12584 intermediate steps in promotion sequence. We try
12585 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12586 not. */
12587 interm_types->create (MAX_INTERM_CVT_STEPS);
12588 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12590 intermediate_mode = insn_data[icode1].operand[0].mode;
12591 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12592 intermediate_type
12593 = vect_halve_mask_nunits (prev_type, intermediate_mode);
12594 else if (VECTOR_MODE_P (intermediate_mode))
12596 tree intermediate_element_type
12597 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
12598 TYPE_UNSIGNED (prev_type));
12599 intermediate_type
12600 = build_vector_type_for_mode (intermediate_element_type,
12601 intermediate_mode);
12603 else
12604 intermediate_type
12605 = lang_hooks.types.type_for_mode (intermediate_mode,
12606 TYPE_UNSIGNED (prev_type));
12608 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12609 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12610 && intermediate_mode == prev_mode
12611 && SCALAR_INT_MODE_P (prev_mode))
12613 /* If the input and result modes are the same, a different optab
12614 is needed where we pass in the number of units in vectype. */
12615 optab3 = vec_unpacks_sbool_lo_optab;
12616 optab4 = vec_unpacks_sbool_hi_optab;
12618 else
12620 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12621 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12624 if (!optab3 || !optab4
12625 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12626 || insn_data[icode1].operand[0].mode != intermediate_mode
12627 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12628 || insn_data[icode2].operand[0].mode != intermediate_mode
12629 || ((icode1 = optab_handler (optab3, intermediate_mode))
12630 == CODE_FOR_nothing)
12631 || ((icode2 = optab_handler (optab4, intermediate_mode))
12632 == CODE_FOR_nothing))
12633 break;
12635 interm_types->quick_push (intermediate_type);
12636 (*multi_step_cvt)++;
12638 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12639 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12641 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12642 return true;
12643 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12644 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12645 return true;
12648 prev_type = intermediate_type;
12649 prev_mode = intermediate_mode;
12652 interm_types->release ();
12653 return false;
12657 /* Function supportable_narrowing_operation
12659 Check whether an operation represented by the code CODE is a
12660 narrowing operation that is supported by the target platform in
12661 vector form (i.e., when operating on arguments of type VECTYPE_IN
12662 and producing a result of type VECTYPE_OUT).
12664 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12665 and FLOAT. This function checks if these operations are supported by
12666 the target platform directly via vector tree-codes.
12668 Output:
12669 - CODE1 is the code of a vector operation to be used when
12670 vectorizing the operation, if available.
12671 - MULTI_STEP_CVT determines the number of required intermediate steps in
12672 case of multi-step conversion (like int->short->char - in that case
12673 MULTI_STEP_CVT will be 1).
12674 - INTERM_TYPES contains the intermediate type required to perform the
12675 narrowing operation (short in the above example). */
12677 bool
12678 supportable_narrowing_operation (enum tree_code code,
12679 tree vectype_out, tree vectype_in,
12680 enum tree_code *code1, int *multi_step_cvt,
12681 vec<tree> *interm_types)
12683 machine_mode vec_mode;
12684 enum insn_code icode1;
12685 optab optab1, interm_optab;
12686 tree vectype = vectype_in;
12687 tree narrow_vectype = vectype_out;
12688 enum tree_code c1;
12689 tree intermediate_type, prev_type;
12690 machine_mode intermediate_mode, prev_mode;
12691 int i;
12692 unsigned HOST_WIDE_INT n_elts;
12693 bool uns;
12695 *multi_step_cvt = 0;
12696 switch (code)
12698 CASE_CONVERT:
12699 c1 = VEC_PACK_TRUNC_EXPR;
12700 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12701 && VECTOR_BOOLEAN_TYPE_P (vectype)
12702 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
12703 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
12704 && n_elts < BITS_PER_UNIT)
12705 optab1 = vec_pack_sbool_trunc_optab;
12706 else
12707 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12708 break;
12710 case FIX_TRUNC_EXPR:
12711 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12712 /* The signedness is determined from output operand. */
12713 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12714 break;
12716 case FLOAT_EXPR:
12717 c1 = VEC_PACK_FLOAT_EXPR;
12718 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12719 break;
12721 default:
12722 gcc_unreachable ();
12725 if (!optab1)
12726 return false;
12728 vec_mode = TYPE_MODE (vectype);
12729 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12730 return false;
12732 *code1 = c1;
12734 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12736 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12737 return true;
12738 /* For scalar masks we may have different boolean
12739 vector types having the same QImode. Thus we
12740 add additional check for elements number. */
12741 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12742 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12743 return true;
12746 if (code == FLOAT_EXPR)
12747 return false;
12749 /* Check if it's a multi-step conversion that can be done using intermediate
12750 types. */
12751 prev_mode = vec_mode;
12752 prev_type = vectype;
12753 if (code == FIX_TRUNC_EXPR)
12754 uns = TYPE_UNSIGNED (vectype_out);
12755 else
12756 uns = TYPE_UNSIGNED (vectype);
12758 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12759 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12760 costly than signed. */
12761 if (code == FIX_TRUNC_EXPR && uns)
12763 enum insn_code icode2;
12765 intermediate_type
12766 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12767 interm_optab
12768 = optab_for_tree_code (c1, intermediate_type, optab_default);
12769 if (interm_optab != unknown_optab
12770 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12771 && insn_data[icode1].operand[0].mode
12772 == insn_data[icode2].operand[0].mode)
12774 uns = false;
12775 optab1 = interm_optab;
12776 icode1 = icode2;
12780 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12781 intermediate steps in promotion sequence. We try
12782 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12783 interm_types->create (MAX_INTERM_CVT_STEPS);
12784 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12786 intermediate_mode = insn_data[icode1].operand[0].mode;
12787 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12788 intermediate_type
12789 = vect_double_mask_nunits (prev_type, intermediate_mode);
12790 else
12791 intermediate_type
12792 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12793 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12794 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12795 && SCALAR_INT_MODE_P (prev_mode)
12796 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
12797 && n_elts < BITS_PER_UNIT)
12798 interm_optab = vec_pack_sbool_trunc_optab;
12799 else
12800 interm_optab
12801 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12802 optab_default);
12803 if (!interm_optab
12804 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12805 || insn_data[icode1].operand[0].mode != intermediate_mode
12806 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12807 == CODE_FOR_nothing))
12808 break;
12810 interm_types->quick_push (intermediate_type);
12811 (*multi_step_cvt)++;
12813 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12815 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12816 return true;
12817 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12818 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12819 return true;
12822 prev_mode = intermediate_mode;
12823 prev_type = intermediate_type;
12824 optab1 = interm_optab;
12827 interm_types->release ();
12828 return false;
12831 /* Generate and return a vector mask of MASK_TYPE such that
12832 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12833 Add the statements to SEQ. */
12835 tree
12836 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12837 tree end_index, const char *name)
12839 tree cmp_type = TREE_TYPE (start_index);
12840 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12841 cmp_type, mask_type,
12842 OPTIMIZE_FOR_SPEED));
12843 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12844 start_index, end_index,
12845 build_zero_cst (mask_type));
12846 tree tmp;
12847 if (name)
12848 tmp = make_temp_ssa_name (mask_type, NULL, name);
12849 else
12850 tmp = make_ssa_name (mask_type);
12851 gimple_call_set_lhs (call, tmp);
12852 gimple_seq_add_stmt (seq, call);
12853 return tmp;
12856 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12857 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12859 tree
12860 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12861 tree end_index)
12863 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12864 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12867 /* Try to compute the vector types required to vectorize STMT_INFO,
12868 returning true on success and false if vectorization isn't possible.
12869 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12870 take sure that the number of elements in the vectors is no bigger
12871 than GROUP_SIZE.
12873 On success:
12875 - Set *STMT_VECTYPE_OUT to:
12876 - NULL_TREE if the statement doesn't need to be vectorized;
12877 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12879 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12880 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12881 statement does not help to determine the overall number of units. */
12883 opt_result
12884 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12885 tree *stmt_vectype_out,
12886 tree *nunits_vectype_out,
12887 unsigned int group_size)
12889 gimple *stmt = stmt_info->stmt;
12891 /* For BB vectorization, we should always have a group size once we've
12892 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12893 are tentative requests during things like early data reference
12894 analysis and pattern recognition. */
12895 if (is_a <bb_vec_info> (vinfo))
12896 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12897 else
12898 group_size = 0;
12900 *stmt_vectype_out = NULL_TREE;
12901 *nunits_vectype_out = NULL_TREE;
12903 if (gimple_get_lhs (stmt) == NULL_TREE
12904 /* MASK_STORE has no lhs, but is ok. */
12905 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12907 if (is_a <gcall *> (stmt))
12909 /* Ignore calls with no lhs. These must be calls to
12910 #pragma omp simd functions, and what vectorization factor
12911 it really needs can't be determined until
12912 vectorizable_simd_clone_call. */
12913 if (dump_enabled_p ())
12914 dump_printf_loc (MSG_NOTE, vect_location,
12915 "defer to SIMD clone analysis.\n");
12916 return opt_result::success ();
12919 return opt_result::failure_at (stmt,
12920 "not vectorized: irregular stmt.%G", stmt);
12923 tree vectype;
12924 tree scalar_type = NULL_TREE;
12925 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12927 vectype = STMT_VINFO_VECTYPE (stmt_info);
12928 if (dump_enabled_p ())
12929 dump_printf_loc (MSG_NOTE, vect_location,
12930 "precomputed vectype: %T\n", vectype);
12932 else if (vect_use_mask_type_p (stmt_info))
12934 unsigned int precision = stmt_info->mask_precision;
12935 scalar_type = build_nonstandard_integer_type (precision, 1);
12936 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12937 if (!vectype)
12938 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12939 " data-type %T\n", scalar_type);
12940 if (dump_enabled_p ())
12941 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12943 else
12945 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12946 scalar_type = TREE_TYPE (DR_REF (dr));
12947 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12948 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12949 else
12950 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12952 if (dump_enabled_p ())
12954 if (group_size)
12955 dump_printf_loc (MSG_NOTE, vect_location,
12956 "get vectype for scalar type (group size %d):"
12957 " %T\n", group_size, scalar_type);
12958 else
12959 dump_printf_loc (MSG_NOTE, vect_location,
12960 "get vectype for scalar type: %T\n", scalar_type);
12962 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12963 if (!vectype)
12964 return opt_result::failure_at (stmt,
12965 "not vectorized:"
12966 " unsupported data-type %T\n",
12967 scalar_type);
12969 if (dump_enabled_p ())
12970 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12973 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12974 return opt_result::failure_at (stmt,
12975 "not vectorized: vector stmt in loop:%G",
12976 stmt);
12978 *stmt_vectype_out = vectype;
12980 /* Don't try to compute scalar types if the stmt produces a boolean
12981 vector; use the existing vector type instead. */
12982 tree nunits_vectype = vectype;
12983 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12985 /* The number of units is set according to the smallest scalar
12986 type (or the largest vector size, but we only support one
12987 vector size per vectorization). */
12988 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12989 TREE_TYPE (vectype));
12990 if (scalar_type != TREE_TYPE (vectype))
12992 if (dump_enabled_p ())
12993 dump_printf_loc (MSG_NOTE, vect_location,
12994 "get vectype for smallest scalar type: %T\n",
12995 scalar_type);
12996 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12997 group_size);
12998 if (!nunits_vectype)
12999 return opt_result::failure_at
13000 (stmt, "not vectorized: unsupported data-type %T\n",
13001 scalar_type);
13002 if (dump_enabled_p ())
13003 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
13004 nunits_vectype);
13008 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
13009 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
13010 return opt_result::failure_at (stmt,
13011 "Not vectorized: Incompatible number "
13012 "of vector subparts between %T and %T\n",
13013 nunits_vectype, *stmt_vectype_out);
13015 if (dump_enabled_p ())
13017 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
13018 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
13019 dump_printf (MSG_NOTE, "\n");
13022 *nunits_vectype_out = nunits_vectype;
13023 return opt_result::success ();
13026 /* Generate and return statement sequence that sets vector length LEN that is:
13028 min_of_start_and_end = min (START_INDEX, END_INDEX);
13029 left_len = END_INDEX - min_of_start_and_end;
13030 rhs = min (left_len, LEN_LIMIT);
13031 LEN = rhs;
13033 Note: the cost of the code generated by this function is modeled
13034 by vect_estimate_min_profitable_iters, so changes here may need
13035 corresponding changes there. */
13037 gimple_seq
13038 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
13040 gimple_seq stmts = NULL;
13041 tree len_type = TREE_TYPE (len);
13042 gcc_assert (TREE_TYPE (start_index) == len_type);
13044 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
13045 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
13046 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
13047 gimple* stmt = gimple_build_assign (len, rhs);
13048 gimple_seq_add_stmt (&stmts, stmt);
13050 return stmts;