compiler: don't use sink as parameter in method expression thunk
[official-gcc.git] / gcc / tree-vect-stmts.cc
blob346d8ce280437e00bfeb19a4b4adc59eb96207f9
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2022 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 static unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind,
95 stmt_vec_info stmt_info, slp_tree node,
96 tree vectype, int misalign,
97 enum vect_cost_model_location where)
99 if ((kind == vector_load || kind == unaligned_load)
100 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
101 kind = vector_gather_load;
102 if ((kind == vector_store || kind == unaligned_store)
103 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
104 kind = vector_scatter_store;
106 stmt_info_for_cost si
107 = { count, kind, where, stmt_info, node, vectype, misalign };
108 body_cost_vec->safe_push (si);
110 return (unsigned)
111 (builtin_vectorization_cost (kind, vectype, misalign) * count);
114 unsigned
115 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
116 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
117 tree vectype, int misalign,
118 enum vect_cost_model_location where)
120 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
121 vectype, misalign, where);
124 unsigned
125 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
126 enum vect_cost_for_stmt kind, slp_tree node,
127 tree vectype, int misalign,
128 enum vect_cost_model_location where)
130 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
131 vectype, misalign, where);
134 unsigned
135 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
136 enum vect_cost_for_stmt kind,
137 enum vect_cost_model_location where)
139 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
140 || kind == scalar_stmt);
141 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
142 NULL_TREE, 0, where);
145 /* Return a variable of type ELEM_TYPE[NELEMS]. */
147 static tree
148 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
150 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
151 "vect_array");
154 /* ARRAY is an array of vectors created by create_vector_array.
155 Return an SSA_NAME for the vector in index N. The reference
156 is part of the vectorization of STMT_INFO and the vector is associated
157 with scalar destination SCALAR_DEST. */
159 static tree
160 read_vector_array (vec_info *vinfo,
161 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
162 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
164 tree vect_type, vect, vect_name, array_ref;
165 gimple *new_stmt;
167 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
168 vect_type = TREE_TYPE (TREE_TYPE (array));
169 vect = vect_create_destination_var (scalar_dest, vect_type);
170 array_ref = build4 (ARRAY_REF, vect_type, array,
171 build_int_cst (size_type_node, n),
172 NULL_TREE, NULL_TREE);
174 new_stmt = gimple_build_assign (vect, array_ref);
175 vect_name = make_ssa_name (vect, new_stmt);
176 gimple_assign_set_lhs (new_stmt, vect_name);
177 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
179 return vect_name;
182 /* ARRAY is an array of vectors created by create_vector_array.
183 Emit code to store SSA_NAME VECT in index N of the array.
184 The store is part of the vectorization of STMT_INFO. */
186 static void
187 write_vector_array (vec_info *vinfo,
188 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
189 tree vect, tree array, unsigned HOST_WIDE_INT n)
191 tree array_ref;
192 gimple *new_stmt;
194 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
195 build_int_cst (size_type_node, n),
196 NULL_TREE, NULL_TREE);
198 new_stmt = gimple_build_assign (array_ref, vect);
199 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
202 /* PTR is a pointer to an array of type TYPE. Return a representation
203 of *PTR. The memory reference replaces those in FIRST_DR
204 (and its group). */
206 static tree
207 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
209 tree mem_ref;
211 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
212 /* Arrays have the same alignment as their type. */
213 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
214 return mem_ref;
217 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
218 Emit the clobber before *GSI. */
220 static void
221 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
222 gimple_stmt_iterator *gsi, tree var)
224 tree clobber = build_clobber (TREE_TYPE (var));
225 gimple *new_stmt = gimple_build_assign (var, clobber);
226 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
229 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
231 /* Function vect_mark_relevant.
233 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
235 static void
236 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
237 enum vect_relevant relevant, bool live_p)
239 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
240 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
242 if (dump_enabled_p ())
243 dump_printf_loc (MSG_NOTE, vect_location,
244 "mark relevant %d, live %d: %G", relevant, live_p,
245 stmt_info->stmt);
247 /* If this stmt is an original stmt in a pattern, we might need to mark its
248 related pattern stmt instead of the original stmt. However, such stmts
249 may have their own uses that are not in any pattern, in such cases the
250 stmt itself should be marked. */
251 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
253 /* This is the last stmt in a sequence that was detected as a
254 pattern that can potentially be vectorized. Don't mark the stmt
255 as relevant/live because it's not going to be vectorized.
256 Instead mark the pattern-stmt that replaces it. */
258 if (dump_enabled_p ())
259 dump_printf_loc (MSG_NOTE, vect_location,
260 "last stmt in pattern. don't mark"
261 " relevant/live.\n");
262 stmt_vec_info old_stmt_info = stmt_info;
263 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
264 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
265 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
266 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
269 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
270 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
271 STMT_VINFO_RELEVANT (stmt_info) = relevant;
273 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
274 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
276 if (dump_enabled_p ())
277 dump_printf_loc (MSG_NOTE, vect_location,
278 "already marked relevant/live.\n");
279 return;
282 worklist->safe_push (stmt_info);
286 /* Function is_simple_and_all_uses_invariant
288 Return true if STMT_INFO is simple and all uses of it are invariant. */
290 bool
291 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
292 loop_vec_info loop_vinfo)
294 tree op;
295 ssa_op_iter iter;
297 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
298 if (!stmt)
299 return false;
301 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
303 enum vect_def_type dt = vect_uninitialized_def;
305 if (!vect_is_simple_use (op, loop_vinfo, &dt))
307 if (dump_enabled_p ())
308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
309 "use not simple.\n");
310 return false;
313 if (dt != vect_external_def && dt != vect_constant_def)
314 return false;
316 return true;
319 /* Function vect_stmt_relevant_p.
321 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
322 is "relevant for vectorization".
324 A stmt is considered "relevant for vectorization" if:
325 - it has uses outside the loop.
326 - it has vdefs (it alters memory).
327 - control stmts in the loop (except for the exit condition).
329 CHECKME: what other side effects would the vectorizer allow? */
331 static bool
332 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
333 enum vect_relevant *relevant, bool *live_p)
335 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
336 ssa_op_iter op_iter;
337 imm_use_iterator imm_iter;
338 use_operand_p use_p;
339 def_operand_p def_p;
341 *relevant = vect_unused_in_scope;
342 *live_p = false;
344 /* cond stmt other than loop exit cond. */
345 if (is_ctrl_stmt (stmt_info->stmt)
346 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
347 *relevant = vect_used_in_scope;
349 /* changing memory. */
350 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
351 if (gimple_vdef (stmt_info->stmt)
352 && !gimple_clobber_p (stmt_info->stmt))
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE, vect_location,
356 "vec_stmt_relevant_p: stmt has vdefs.\n");
357 *relevant = vect_used_in_scope;
360 /* uses outside the loop. */
361 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
363 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
365 basic_block bb = gimple_bb (USE_STMT (use_p));
366 if (!flow_bb_inside_loop_p (loop, bb))
368 if (is_gimple_debug (USE_STMT (use_p)))
369 continue;
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: used out of loop.\n");
375 /* We expect all such uses to be in the loop exit phis
376 (because of loop closed form) */
377 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
378 gcc_assert (bb == single_exit (loop)->dest);
380 *live_p = true;
385 if (*live_p && *relevant == vect_unused_in_scope
386 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: stmt live but not relevant.\n");
391 *relevant = vect_used_only_live;
394 return (*live_p || *relevant);
398 /* Function exist_non_indexing_operands_for_use_p
400 USE is one of the uses attached to STMT_INFO. Check if USE is
401 used in STMT_INFO for anything other than indexing an array. */
403 static bool
404 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
406 tree operand;
408 /* USE corresponds to some operand in STMT. If there is no data
409 reference in STMT, then any operand that corresponds to USE
410 is not indexing an array. */
411 if (!STMT_VINFO_DATA_REF (stmt_info))
412 return true;
414 /* STMT has a data_ref. FORNOW this means that its of one of
415 the following forms:
416 -1- ARRAY_REF = var
417 -2- var = ARRAY_REF
418 (This should have been verified in analyze_data_refs).
420 'var' in the second case corresponds to a def, not a use,
421 so USE cannot correspond to any operands that are not used
422 for array indexing.
424 Therefore, all we need to check is if STMT falls into the
425 first case, and whether var corresponds to USE. */
427 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
428 if (!assign || !gimple_assign_copy_p (assign))
430 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
431 if (call && gimple_call_internal_p (call))
433 internal_fn ifn = gimple_call_internal_fn (call);
434 int mask_index = internal_fn_mask_index (ifn);
435 if (mask_index >= 0
436 && use == gimple_call_arg (call, mask_index))
437 return true;
438 int stored_value_index = internal_fn_stored_value_index (ifn);
439 if (stored_value_index >= 0
440 && use == gimple_call_arg (call, stored_value_index))
441 return true;
442 if (internal_gather_scatter_fn_p (ifn)
443 && use == gimple_call_arg (call, 1))
444 return true;
446 return false;
449 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
450 return false;
451 operand = gimple_assign_rhs1 (assign);
452 if (TREE_CODE (operand) != SSA_NAME)
453 return false;
455 if (operand == use)
456 return true;
458 return false;
463 Function process_use.
465 Inputs:
466 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
467 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
468 that defined USE. This is done by calling mark_relevant and passing it
469 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
470 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
471 be performed.
473 Outputs:
474 Generally, LIVE_P and RELEVANT are used to define the liveness and
475 relevance info of the DEF_STMT of this USE:
476 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
477 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
478 Exceptions:
479 - case 1: If USE is used only for address computations (e.g. array indexing),
480 which does not need to be directly vectorized, then the liveness/relevance
481 of the respective DEF_STMT is left unchanged.
482 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
483 we skip DEF_STMT cause it had already been processed.
484 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
485 "relevant" will be modified accordingly.
487 Return true if everything is as expected. Return false otherwise. */
489 static opt_result
490 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
491 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
492 bool force)
494 stmt_vec_info dstmt_vinfo;
495 enum vect_def_type dt;
497 /* case 1: we are only interested in uses that need to be vectorized. Uses
498 that are used for address computation are not considered relevant. */
499 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
500 return opt_result::success ();
502 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
503 return opt_result::failure_at (stmt_vinfo->stmt,
504 "not vectorized:"
505 " unsupported use in stmt.\n");
507 if (!dstmt_vinfo)
508 return opt_result::success ();
510 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
511 basic_block bb = gimple_bb (stmt_vinfo->stmt);
513 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
514 We have to force the stmt live since the epilogue loop needs it to
515 continue computing the reduction. */
516 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
517 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
518 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
519 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
520 && bb->loop_father == def_bb->loop_father)
522 if (dump_enabled_p ())
523 dump_printf_loc (MSG_NOTE, vect_location,
524 "reduc-stmt defining reduc-phi in the same nest.\n");
525 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
526 return opt_result::success ();
529 /* case 3a: outer-loop stmt defining an inner-loop stmt:
530 outer-loop-header-bb:
531 d = dstmt_vinfo
532 inner-loop:
533 stmt # use (d)
534 outer-loop-tail-bb:
535 ... */
536 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE, vect_location,
540 "outer-loop def-stmt defining inner-loop stmt.\n");
542 switch (relevant)
544 case vect_unused_in_scope:
545 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
546 vect_used_in_scope : vect_unused_in_scope;
547 break;
549 case vect_used_in_outer_by_reduction:
550 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
551 relevant = vect_used_by_reduction;
552 break;
554 case vect_used_in_outer:
555 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
556 relevant = vect_used_in_scope;
557 break;
559 case vect_used_in_scope:
560 break;
562 default:
563 gcc_unreachable ();
567 /* case 3b: inner-loop stmt defining an outer-loop stmt:
568 outer-loop-header-bb:
570 inner-loop:
571 d = dstmt_vinfo
572 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
573 stmt # use (d) */
574 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
576 if (dump_enabled_p ())
577 dump_printf_loc (MSG_NOTE, vect_location,
578 "inner-loop def-stmt defining outer-loop stmt.\n");
580 switch (relevant)
582 case vect_unused_in_scope:
583 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
584 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
585 vect_used_in_outer_by_reduction : vect_unused_in_scope;
586 break;
588 case vect_used_by_reduction:
589 case vect_used_only_live:
590 relevant = vect_used_in_outer_by_reduction;
591 break;
593 case vect_used_in_scope:
594 relevant = vect_used_in_outer;
595 break;
597 default:
598 gcc_unreachable ();
601 /* We are also not interested in uses on loop PHI backedges that are
602 inductions. Otherwise we'll needlessly vectorize the IV increment
603 and cause hybrid SLP for SLP inductions. Unless the PHI is live
604 of course. */
605 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
606 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
607 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
608 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
609 loop_latch_edge (bb->loop_father))
610 == use))
612 if (dump_enabled_p ())
613 dump_printf_loc (MSG_NOTE, vect_location,
614 "induction value on backedge.\n");
615 return opt_result::success ();
619 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
620 return opt_result::success ();
624 /* Function vect_mark_stmts_to_be_vectorized.
626 Not all stmts in the loop need to be vectorized. For example:
628 for i...
629 for j...
630 1. T0 = i + j
631 2. T1 = a[T0]
633 3. j = j + 1
635 Stmt 1 and 3 do not need to be vectorized, because loop control and
636 addressing of vectorized data-refs are handled differently.
638 This pass detects such stmts. */
640 opt_result
641 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
643 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
644 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
645 unsigned int nbbs = loop->num_nodes;
646 gimple_stmt_iterator si;
647 unsigned int i;
648 basic_block bb;
649 bool live_p;
650 enum vect_relevant relevant;
652 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
654 auto_vec<stmt_vec_info, 64> worklist;
656 /* 1. Init worklist. */
657 for (i = 0; i < nbbs; i++)
659 bb = bbs[i];
660 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
662 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
663 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
665 phi_info->stmt);
667 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
668 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
670 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
672 if (is_gimple_debug (gsi_stmt (si)))
673 continue;
674 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
675 if (dump_enabled_p ())
676 dump_printf_loc (MSG_NOTE, vect_location,
677 "init: stmt relevant? %G", stmt_info->stmt);
679 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
680 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
684 /* 2. Process_worklist */
685 while (worklist.length () > 0)
687 use_operand_p use_p;
688 ssa_op_iter iter;
690 stmt_vec_info stmt_vinfo = worklist.pop ();
691 if (dump_enabled_p ())
692 dump_printf_loc (MSG_NOTE, vect_location,
693 "worklist: examine stmt: %G", stmt_vinfo->stmt);
695 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
696 (DEF_STMT) as relevant/irrelevant according to the relevance property
697 of STMT. */
698 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
700 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
701 propagated as is to the DEF_STMTs of its USEs.
703 One exception is when STMT has been identified as defining a reduction
704 variable; in this case we set the relevance to vect_used_by_reduction.
705 This is because we distinguish between two kinds of relevant stmts -
706 those that are used by a reduction computation, and those that are
707 (also) used by a regular computation. This allows us later on to
708 identify stmts that are used solely by a reduction, and therefore the
709 order of the results that they produce does not have to be kept. */
711 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
713 case vect_reduction_def:
714 gcc_assert (relevant != vect_unused_in_scope);
715 if (relevant != vect_unused_in_scope
716 && relevant != vect_used_in_scope
717 && relevant != vect_used_by_reduction
718 && relevant != vect_used_only_live)
719 return opt_result::failure_at
720 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
721 break;
723 case vect_nested_cycle:
724 if (relevant != vect_unused_in_scope
725 && relevant != vect_used_in_outer_by_reduction
726 && relevant != vect_used_in_outer)
727 return opt_result::failure_at
728 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
729 break;
731 case vect_double_reduction_def:
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_by_reduction
734 && relevant != vect_used_only_live)
735 return opt_result::failure_at
736 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
737 break;
739 default:
740 break;
743 if (is_pattern_stmt_p (stmt_vinfo))
745 /* Pattern statements are not inserted into the code, so
746 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
747 have to scan the RHS or function arguments instead. */
748 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
750 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
751 tree op = gimple_assign_rhs1 (assign);
753 i = 1;
754 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
756 opt_result res
757 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
758 loop_vinfo, relevant, &worklist, false);
759 if (!res)
760 return res;
761 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
762 loop_vinfo, relevant, &worklist, false);
763 if (!res)
764 return res;
765 i = 2;
767 for (; i < gimple_num_ops (assign); i++)
769 op = gimple_op (assign, i);
770 if (TREE_CODE (op) == SSA_NAME)
772 opt_result res
773 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
774 &worklist, false);
775 if (!res)
776 return res;
780 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
782 for (i = 0; i < gimple_call_num_args (call); i++)
784 tree arg = gimple_call_arg (call, i);
785 opt_result res
786 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
787 &worklist, false);
788 if (!res)
789 return res;
793 else
794 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
796 tree op = USE_FROM_PTR (use_p);
797 opt_result res
798 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
799 &worklist, false);
800 if (!res)
801 return res;
804 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
806 gather_scatter_info gs_info;
807 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
808 gcc_unreachable ();
809 opt_result res
810 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
811 &worklist, true);
812 if (!res)
814 if (fatal)
815 *fatal = false;
816 return res;
819 } /* while worklist */
821 return opt_result::success ();
824 /* Function vect_model_simple_cost.
826 Models cost for simple operations, i.e. those that only emit ncopies of a
827 single op. Right now, this does not account for multiple insns that could
828 be generated for the single vector op. We will handle that shortly. */
830 static void
831 vect_model_simple_cost (vec_info *,
832 stmt_vec_info stmt_info, int ncopies,
833 enum vect_def_type *dt,
834 int ndts,
835 slp_tree node,
836 stmt_vector_for_cost *cost_vec,
837 vect_cost_for_stmt kind = vector_stmt)
839 int inside_cost = 0, prologue_cost = 0;
841 gcc_assert (cost_vec != NULL);
843 /* ??? Somehow we need to fix this at the callers. */
844 if (node)
845 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
847 if (!node)
848 /* Cost the "broadcast" of a scalar operand in to a vector operand.
849 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
850 cost model. */
851 for (int i = 0; i < ndts; i++)
852 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
853 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
854 stmt_info, 0, vect_prologue);
856 /* Pass the inside-of-loop statements to the target-specific cost model. */
857 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
858 stmt_info, 0, vect_body);
860 if (dump_enabled_p ())
861 dump_printf_loc (MSG_NOTE, vect_location,
862 "vect_model_simple_cost: inside_cost = %d, "
863 "prologue_cost = %d .\n", inside_cost, prologue_cost);
867 /* Model cost for type demotion and promotion operations. PWR is
868 normally zero for single-step promotions and demotions. It will be
869 one if two-step promotion/demotion is required, and so on. NCOPIES
870 is the number of vector results (and thus number of instructions)
871 for the narrowest end of the operation chain. Each additional
872 step doubles the number of instructions required. If WIDEN_ARITH
873 is true the stmt is doing widening arithmetic. */
875 static void
876 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
877 enum vect_def_type *dt,
878 unsigned int ncopies, int pwr,
879 stmt_vector_for_cost *cost_vec,
880 bool widen_arith)
882 int i;
883 int inside_cost = 0, prologue_cost = 0;
885 for (i = 0; i < pwr + 1; i++)
887 inside_cost += record_stmt_cost (cost_vec, ncopies,
888 widen_arith
889 ? vector_stmt : vec_promote_demote,
890 stmt_info, 0, vect_body);
891 ncopies *= 2;
894 /* FORNOW: Assuming maximum 2 args per stmts. */
895 for (i = 0; i < 2; i++)
896 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
897 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
898 stmt_info, 0, vect_prologue);
900 if (dump_enabled_p ())
901 dump_printf_loc (MSG_NOTE, vect_location,
902 "vect_model_promotion_demotion_cost: inside_cost = %d, "
903 "prologue_cost = %d .\n", inside_cost, prologue_cost);
906 /* Returns true if the current function returns DECL. */
908 static bool
909 cfun_returns (tree decl)
911 edge_iterator ei;
912 edge e;
913 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
915 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
916 if (!ret)
917 continue;
918 if (gimple_return_retval (ret) == decl)
919 return true;
920 /* We often end up with an aggregate copy to the result decl,
921 handle that case as well. First skip intermediate clobbers
922 though. */
923 gimple *def = ret;
926 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
928 while (gimple_clobber_p (def));
929 if (is_a <gassign *> (def)
930 && gimple_assign_lhs (def) == gimple_return_retval (ret)
931 && gimple_assign_rhs1 (def) == decl)
932 return true;
934 return false;
937 /* Function vect_model_store_cost
939 Models cost for stores. In the case of grouped accesses, one access
940 has the overhead of the grouped access attributed to it. */
942 static void
943 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
944 vect_memory_access_type memory_access_type,
945 dr_alignment_support alignment_support_scheme,
946 int misalignment,
947 vec_load_store_type vls_type, slp_tree slp_node,
948 stmt_vector_for_cost *cost_vec)
950 unsigned int inside_cost = 0, prologue_cost = 0;
951 stmt_vec_info first_stmt_info = stmt_info;
952 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
954 /* ??? Somehow we need to fix this at the callers. */
955 if (slp_node)
956 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
958 if (vls_type == VLS_STORE_INVARIANT)
960 if (!slp_node)
961 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
962 stmt_info, 0, vect_prologue);
965 /* Grouped stores update all elements in the group at once,
966 so we want the DR for the first statement. */
967 if (!slp_node && grouped_access_p)
968 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
970 /* True if we should include any once-per-group costs as well as
971 the cost of the statement itself. For SLP we only get called
972 once per group anyhow. */
973 bool first_stmt_p = (first_stmt_info == stmt_info);
975 /* We assume that the cost of a single store-lanes instruction is
976 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
977 access is instead being provided by a permute-and-store operation,
978 include the cost of the permutes. */
979 if (first_stmt_p
980 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
982 /* Uses a high and low interleave or shuffle operations for each
983 needed permute. */
984 int group_size = DR_GROUP_SIZE (first_stmt_info);
985 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
986 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
987 stmt_info, 0, vect_body);
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_NOTE, vect_location,
991 "vect_model_store_cost: strided group_size = %d .\n",
992 group_size);
995 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
996 /* Costs of the stores. */
997 if (memory_access_type == VMAT_ELEMENTWISE
998 || memory_access_type == VMAT_GATHER_SCATTER)
1000 /* N scalar stores plus extracting the elements. */
1001 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1002 inside_cost += record_stmt_cost (cost_vec,
1003 ncopies * assumed_nunits,
1004 scalar_store, stmt_info, 0, vect_body);
1006 else
1007 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1008 misalignment, &inside_cost, cost_vec);
1010 if (memory_access_type == VMAT_ELEMENTWISE
1011 || memory_access_type == VMAT_STRIDED_SLP)
1013 /* N scalar stores plus extracting the elements. */
1014 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1015 inside_cost += record_stmt_cost (cost_vec,
1016 ncopies * assumed_nunits,
1017 vec_to_scalar, stmt_info, 0, vect_body);
1020 /* When vectorizing a store into the function result assign
1021 a penalty if the function returns in a multi-register location.
1022 In this case we assume we'll end up with having to spill the
1023 vector result and do piecewise loads as a conservative estimate. */
1024 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1025 if (base
1026 && (TREE_CODE (base) == RESULT_DECL
1027 || (DECL_P (base) && cfun_returns (base)))
1028 && !aggregate_value_p (base, cfun->decl))
1030 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1031 /* ??? Handle PARALLEL in some way. */
1032 if (REG_P (reg))
1034 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1035 /* Assume that a single reg-reg move is possible and cheap,
1036 do not account for vector to gp register move cost. */
1037 if (nregs > 1)
1039 /* Spill. */
1040 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1041 vector_store,
1042 stmt_info, 0, vect_epilogue);
1043 /* Loads. */
1044 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1045 scalar_load,
1046 stmt_info, 0, vect_epilogue);
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_NOTE, vect_location,
1053 "vect_model_store_cost: inside_cost = %d, "
1054 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1058 /* Calculate cost of DR's memory access. */
1059 void
1060 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1061 dr_alignment_support alignment_support_scheme,
1062 int misalignment,
1063 unsigned int *inside_cost,
1064 stmt_vector_for_cost *body_cost_vec)
1066 switch (alignment_support_scheme)
1068 case dr_aligned:
1070 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1071 vector_store, stmt_info, 0,
1072 vect_body);
1074 if (dump_enabled_p ())
1075 dump_printf_loc (MSG_NOTE, vect_location,
1076 "vect_model_store_cost: aligned.\n");
1077 break;
1080 case dr_unaligned_supported:
1082 /* Here, we assign an additional cost for the unaligned store. */
1083 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1084 unaligned_store, stmt_info,
1085 misalignment, vect_body);
1086 if (dump_enabled_p ())
1087 dump_printf_loc (MSG_NOTE, vect_location,
1088 "vect_model_store_cost: unaligned supported by "
1089 "hardware.\n");
1090 break;
1093 case dr_unaligned_unsupported:
1095 *inside_cost = VECT_MAX_COST;
1097 if (dump_enabled_p ())
1098 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1099 "vect_model_store_cost: unsupported access.\n");
1100 break;
1103 default:
1104 gcc_unreachable ();
1109 /* Function vect_model_load_cost
1111 Models cost for loads. In the case of grouped accesses, one access has
1112 the overhead of the grouped access attributed to it. Since unaligned
1113 accesses are supported for loads, we also account for the costs of the
1114 access scheme chosen. */
1116 static void
1117 vect_model_load_cost (vec_info *vinfo,
1118 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1119 vect_memory_access_type memory_access_type,
1120 dr_alignment_support alignment_support_scheme,
1121 int misalignment,
1122 gather_scatter_info *gs_info,
1123 slp_tree slp_node,
1124 stmt_vector_for_cost *cost_vec)
1126 unsigned int inside_cost = 0, prologue_cost = 0;
1127 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1129 gcc_assert (cost_vec);
1131 /* ??? Somehow we need to fix this at the callers. */
1132 if (slp_node)
1133 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1135 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1137 /* If the load is permuted then the alignment is determined by
1138 the first group element not by the first scalar stmt DR. */
1139 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1140 /* Record the cost for the permutation. */
1141 unsigned n_perms, n_loads;
1142 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1143 vf, true, &n_perms, &n_loads);
1144 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1145 first_stmt_info, 0, vect_body);
1147 /* And adjust the number of loads performed. This handles
1148 redundancies as well as loads that are later dead. */
1149 ncopies = n_loads;
1152 /* Grouped loads read all elements in the group at once,
1153 so we want the DR for the first statement. */
1154 stmt_vec_info first_stmt_info = stmt_info;
1155 if (!slp_node && grouped_access_p)
1156 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1158 /* True if we should include any once-per-group costs as well as
1159 the cost of the statement itself. For SLP we only get called
1160 once per group anyhow. */
1161 bool first_stmt_p = (first_stmt_info == stmt_info);
1163 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1164 ones we actually need. Account for the cost of unused results. */
1165 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1167 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1168 stmt_vec_info next_stmt_info = first_stmt_info;
1171 gaps -= 1;
1172 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1174 while (next_stmt_info);
1175 if (gaps)
1177 if (dump_enabled_p ())
1178 dump_printf_loc (MSG_NOTE, vect_location,
1179 "vect_model_load_cost: %d unused vectors.\n",
1180 gaps);
1181 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1182 alignment_support_scheme, misalignment, false,
1183 &inside_cost, &prologue_cost,
1184 cost_vec, cost_vec, true);
1188 /* We assume that the cost of a single load-lanes instruction is
1189 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1190 access is instead being provided by a load-and-permute operation,
1191 include the cost of the permutes. */
1192 if (first_stmt_p
1193 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1195 /* Uses an even and odd extract operations or shuffle operations
1196 for each needed permute. */
1197 int group_size = DR_GROUP_SIZE (first_stmt_info);
1198 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1199 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1200 stmt_info, 0, vect_body);
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE, vect_location,
1204 "vect_model_load_cost: strided group_size = %d .\n",
1205 group_size);
1208 /* The loads themselves. */
1209 if (memory_access_type == VMAT_ELEMENTWISE
1210 || memory_access_type == VMAT_GATHER_SCATTER)
1212 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1213 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1214 if (memory_access_type == VMAT_GATHER_SCATTER
1215 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1216 /* For emulated gathers N offset vector element extracts
1217 (we assume the scalar scaling and ptr + offset add is consumed by
1218 the load). */
1219 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1220 vec_to_scalar, stmt_info, 0,
1221 vect_body);
1222 /* N scalar loads plus gathering them into a vector. */
1223 inside_cost += record_stmt_cost (cost_vec,
1224 ncopies * assumed_nunits,
1225 scalar_load, stmt_info, 0, vect_body);
1227 else if (memory_access_type == VMAT_INVARIANT)
1229 /* Invariant loads will ideally be hoisted and splat to a vector. */
1230 prologue_cost += record_stmt_cost (cost_vec, 1,
1231 scalar_load, stmt_info, 0,
1232 vect_prologue);
1233 prologue_cost += record_stmt_cost (cost_vec, 1,
1234 scalar_to_vec, stmt_info, 0,
1235 vect_prologue);
1237 else
1238 vect_get_load_cost (vinfo, stmt_info, ncopies,
1239 alignment_support_scheme, misalignment, first_stmt_p,
1240 &inside_cost, &prologue_cost,
1241 cost_vec, cost_vec, true);
1242 if (memory_access_type == VMAT_ELEMENTWISE
1243 || memory_access_type == VMAT_STRIDED_SLP
1244 || (memory_access_type == VMAT_GATHER_SCATTER
1245 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1246 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1247 stmt_info, 0, vect_body);
1249 if (dump_enabled_p ())
1250 dump_printf_loc (MSG_NOTE, vect_location,
1251 "vect_model_load_cost: inside_cost = %d, "
1252 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1256 /* Calculate cost of DR's memory access. */
1257 void
1258 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1259 dr_alignment_support alignment_support_scheme,
1260 int misalignment,
1261 bool add_realign_cost, unsigned int *inside_cost,
1262 unsigned int *prologue_cost,
1263 stmt_vector_for_cost *prologue_cost_vec,
1264 stmt_vector_for_cost *body_cost_vec,
1265 bool record_prologue_costs)
1267 switch (alignment_support_scheme)
1269 case dr_aligned:
1271 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1272 stmt_info, 0, vect_body);
1274 if (dump_enabled_p ())
1275 dump_printf_loc (MSG_NOTE, vect_location,
1276 "vect_model_load_cost: aligned.\n");
1278 break;
1280 case dr_unaligned_supported:
1282 /* Here, we assign an additional cost for the unaligned load. */
1283 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1284 unaligned_load, stmt_info,
1285 misalignment, vect_body);
1287 if (dump_enabled_p ())
1288 dump_printf_loc (MSG_NOTE, vect_location,
1289 "vect_model_load_cost: unaligned supported by "
1290 "hardware.\n");
1292 break;
1294 case dr_explicit_realign:
1296 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1297 vector_load, stmt_info, 0, vect_body);
1298 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1299 vec_perm, stmt_info, 0, vect_body);
1301 /* FIXME: If the misalignment remains fixed across the iterations of
1302 the containing loop, the following cost should be added to the
1303 prologue costs. */
1304 if (targetm.vectorize.builtin_mask_for_load)
1305 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1306 stmt_info, 0, vect_body);
1308 if (dump_enabled_p ())
1309 dump_printf_loc (MSG_NOTE, vect_location,
1310 "vect_model_load_cost: explicit realign\n");
1312 break;
1314 case dr_explicit_realign_optimized:
1316 if (dump_enabled_p ())
1317 dump_printf_loc (MSG_NOTE, vect_location,
1318 "vect_model_load_cost: unaligned software "
1319 "pipelined.\n");
1321 /* Unaligned software pipeline has a load of an address, an initial
1322 load, and possibly a mask operation to "prime" the loop. However,
1323 if this is an access in a group of loads, which provide grouped
1324 access, then the above cost should only be considered for one
1325 access in the group. Inside the loop, there is a load op
1326 and a realignment op. */
1328 if (add_realign_cost && record_prologue_costs)
1330 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1331 vector_stmt, stmt_info,
1332 0, vect_prologue);
1333 if (targetm.vectorize.builtin_mask_for_load)
1334 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1335 vector_stmt, stmt_info,
1336 0, vect_prologue);
1339 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1340 stmt_info, 0, vect_body);
1341 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1342 stmt_info, 0, vect_body);
1344 if (dump_enabled_p ())
1345 dump_printf_loc (MSG_NOTE, vect_location,
1346 "vect_model_load_cost: explicit realign optimized"
1347 "\n");
1349 break;
1352 case dr_unaligned_unsupported:
1354 *inside_cost = VECT_MAX_COST;
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1358 "vect_model_load_cost: unsupported access.\n");
1359 break;
1362 default:
1363 gcc_unreachable ();
1367 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1368 the loop preheader for the vectorized stmt STMT_VINFO. */
1370 static void
1371 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1372 gimple_stmt_iterator *gsi)
1374 if (gsi)
1375 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1376 else
1377 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1379 if (dump_enabled_p ())
1380 dump_printf_loc (MSG_NOTE, vect_location,
1381 "created new init_stmt: %G", new_stmt);
1384 /* Function vect_init_vector.
1386 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1387 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1388 vector type a vector with all elements equal to VAL is created first.
1389 Place the initialization at GSI if it is not NULL. Otherwise, place the
1390 initialization at the loop preheader.
1391 Return the DEF of INIT_STMT.
1392 It will be used in the vectorization of STMT_INFO. */
1394 tree
1395 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1396 gimple_stmt_iterator *gsi)
1398 gimple *init_stmt;
1399 tree new_temp;
1401 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1402 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1404 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1405 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1407 /* Scalar boolean value should be transformed into
1408 all zeros or all ones value before building a vector. */
1409 if (VECTOR_BOOLEAN_TYPE_P (type))
1411 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1412 tree false_val = build_zero_cst (TREE_TYPE (type));
1414 if (CONSTANT_CLASS_P (val))
1415 val = integer_zerop (val) ? false_val : true_val;
1416 else
1418 new_temp = make_ssa_name (TREE_TYPE (type));
1419 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1420 val, true_val, false_val);
1421 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1422 val = new_temp;
1425 else
1427 gimple_seq stmts = NULL;
1428 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1429 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1430 TREE_TYPE (type), val);
1431 else
1432 /* ??? Condition vectorization expects us to do
1433 promotion of invariant/external defs. */
1434 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1435 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1436 !gsi_end_p (gsi2); )
1438 init_stmt = gsi_stmt (gsi2);
1439 gsi_remove (&gsi2, false);
1440 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1444 val = build_vector_from_val (type, val);
1447 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1448 init_stmt = gimple_build_assign (new_temp, val);
1449 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1450 return new_temp;
1454 /* Function vect_get_vec_defs_for_operand.
1456 OP is an operand in STMT_VINFO. This function returns a vector of
1457 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1459 In the case that OP is an SSA_NAME which is defined in the loop, then
1460 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1462 In case OP is an invariant or constant, a new stmt that creates a vector def
1463 needs to be introduced. VECTYPE may be used to specify a required type for
1464 vector invariant. */
1466 void
1467 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1468 unsigned ncopies,
1469 tree op, vec<tree> *vec_oprnds, tree vectype)
1471 gimple *def_stmt;
1472 enum vect_def_type dt;
1473 bool is_simple_use;
1474 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1476 if (dump_enabled_p ())
1477 dump_printf_loc (MSG_NOTE, vect_location,
1478 "vect_get_vec_defs_for_operand: %T\n", op);
1480 stmt_vec_info def_stmt_info;
1481 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1482 &def_stmt_info, &def_stmt);
1483 gcc_assert (is_simple_use);
1484 if (def_stmt && dump_enabled_p ())
1485 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1487 vec_oprnds->create (ncopies);
1488 if (dt == vect_constant_def || dt == vect_external_def)
1490 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1491 tree vector_type;
1493 if (vectype)
1494 vector_type = vectype;
1495 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1496 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1497 vector_type = truth_type_for (stmt_vectype);
1498 else
1499 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1501 gcc_assert (vector_type);
1502 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1503 while (ncopies--)
1504 vec_oprnds->quick_push (vop);
1506 else
1508 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1509 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1510 for (unsigned i = 0; i < ncopies; ++i)
1511 vec_oprnds->quick_push (gimple_get_lhs
1512 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1517 /* Get vectorized definitions for OP0 and OP1. */
1519 void
1520 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1521 unsigned ncopies,
1522 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1523 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1524 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1525 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1527 if (slp_node)
1529 if (op0)
1530 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1531 if (op1)
1532 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1533 if (op2)
1534 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1535 if (op3)
1536 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1538 else
1540 if (op0)
1541 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1542 op0, vec_oprnds0, vectype0);
1543 if (op1)
1544 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1545 op1, vec_oprnds1, vectype1);
1546 if (op2)
1547 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1548 op2, vec_oprnds2, vectype2);
1549 if (op3)
1550 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1551 op3, vec_oprnds3, vectype3);
1555 void
1556 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1557 unsigned ncopies,
1558 tree op0, vec<tree> *vec_oprnds0,
1559 tree op1, vec<tree> *vec_oprnds1,
1560 tree op2, vec<tree> *vec_oprnds2,
1561 tree op3, vec<tree> *vec_oprnds3)
1563 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1564 op0, vec_oprnds0, NULL_TREE,
1565 op1, vec_oprnds1, NULL_TREE,
1566 op2, vec_oprnds2, NULL_TREE,
1567 op3, vec_oprnds3, NULL_TREE);
1570 /* Helper function called by vect_finish_replace_stmt and
1571 vect_finish_stmt_generation. Set the location of the new
1572 statement and create and return a stmt_vec_info for it. */
1574 static void
1575 vect_finish_stmt_generation_1 (vec_info *,
1576 stmt_vec_info stmt_info, gimple *vec_stmt)
1578 if (dump_enabled_p ())
1579 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1581 if (stmt_info)
1583 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1585 /* While EH edges will generally prevent vectorization, stmt might
1586 e.g. be in a must-not-throw region. Ensure newly created stmts
1587 that could throw are part of the same region. */
1588 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1589 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1590 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1592 else
1593 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1596 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1597 which sets the same scalar result as STMT_INFO did. Create and return a
1598 stmt_vec_info for VEC_STMT. */
1600 void
1601 vect_finish_replace_stmt (vec_info *vinfo,
1602 stmt_vec_info stmt_info, gimple *vec_stmt)
1604 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1605 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1607 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1608 gsi_replace (&gsi, vec_stmt, true);
1610 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1613 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1614 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1616 void
1617 vect_finish_stmt_generation (vec_info *vinfo,
1618 stmt_vec_info stmt_info, gimple *vec_stmt,
1619 gimple_stmt_iterator *gsi)
1621 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1623 if (!gsi_end_p (*gsi)
1624 && gimple_has_mem_ops (vec_stmt))
1626 gimple *at_stmt = gsi_stmt (*gsi);
1627 tree vuse = gimple_vuse (at_stmt);
1628 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1630 tree vdef = gimple_vdef (at_stmt);
1631 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1632 gimple_set_modified (vec_stmt, true);
1633 /* If we have an SSA vuse and insert a store, update virtual
1634 SSA form to avoid triggering the renamer. Do so only
1635 if we can easily see all uses - which is what almost always
1636 happens with the way vectorized stmts are inserted. */
1637 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1638 && ((is_gimple_assign (vec_stmt)
1639 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1640 || (is_gimple_call (vec_stmt)
1641 && !(gimple_call_flags (vec_stmt)
1642 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1644 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1645 gimple_set_vdef (vec_stmt, new_vdef);
1646 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1650 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1651 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1654 /* We want to vectorize a call to combined function CFN with function
1655 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1656 as the types of all inputs. Check whether this is possible using
1657 an internal function, returning its code if so or IFN_LAST if not. */
1659 static internal_fn
1660 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1661 tree vectype_out, tree vectype_in)
1663 internal_fn ifn;
1664 if (internal_fn_p (cfn))
1665 ifn = as_internal_fn (cfn);
1666 else
1667 ifn = associated_internal_fn (fndecl);
1668 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1670 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1671 if (info.vectorizable)
1673 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1674 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1675 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1676 OPTIMIZE_FOR_SPEED))
1677 return ifn;
1680 return IFN_LAST;
1684 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1685 gimple_stmt_iterator *);
1687 /* Check whether a load or store statement in the loop described by
1688 LOOP_VINFO is possible in a loop using partial vectors. This is
1689 testing whether the vectorizer pass has the appropriate support,
1690 as well as whether the target does.
1692 VLS_TYPE says whether the statement is a load or store and VECTYPE
1693 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1694 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1695 says how the load or store is going to be implemented and GROUP_SIZE
1696 is the number of load or store statements in the containing group.
1697 If the access is a gather load or scatter store, GS_INFO describes
1698 its arguments. If the load or store is conditional, SCALAR_MASK is the
1699 condition under which it occurs.
1701 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1702 vectors is not supported, otherwise record the required rgroup control
1703 types. */
1705 static void
1706 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1707 slp_tree slp_node,
1708 vec_load_store_type vls_type,
1709 int group_size,
1710 vect_memory_access_type
1711 memory_access_type,
1712 gather_scatter_info *gs_info,
1713 tree scalar_mask)
1715 /* Invariant loads need no special support. */
1716 if (memory_access_type == VMAT_INVARIANT)
1717 return;
1719 unsigned int nvectors;
1720 if (slp_node)
1721 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1722 else
1723 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1725 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1726 machine_mode vecmode = TYPE_MODE (vectype);
1727 bool is_load = (vls_type == VLS_LOAD);
1728 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1730 if (is_load
1731 ? !vect_load_lanes_supported (vectype, group_size, true)
1732 : !vect_store_lanes_supported (vectype, group_size, true))
1734 if (dump_enabled_p ())
1735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1736 "can't operate on partial vectors because"
1737 " the target doesn't have an appropriate"
1738 " load/store-lanes instruction.\n");
1739 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1740 return;
1742 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1743 scalar_mask);
1744 return;
1747 if (memory_access_type == VMAT_GATHER_SCATTER)
1749 internal_fn ifn = (is_load
1750 ? IFN_MASK_GATHER_LOAD
1751 : IFN_MASK_SCATTER_STORE);
1752 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1753 gs_info->memory_type,
1754 gs_info->offset_vectype,
1755 gs_info->scale))
1757 if (dump_enabled_p ())
1758 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1759 "can't operate on partial vectors because"
1760 " the target doesn't have an appropriate"
1761 " gather load or scatter store instruction.\n");
1762 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1763 return;
1765 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1766 scalar_mask);
1767 return;
1770 if (memory_access_type != VMAT_CONTIGUOUS
1771 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1773 /* Element X of the data must come from iteration i * VF + X of the
1774 scalar loop. We need more work to support other mappings. */
1775 if (dump_enabled_p ())
1776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1777 "can't operate on partial vectors because an"
1778 " access isn't contiguous.\n");
1779 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1780 return;
1783 if (!VECTOR_MODE_P (vecmode))
1785 if (dump_enabled_p ())
1786 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1787 "can't operate on partial vectors when emulating"
1788 " vector operations.\n");
1789 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1790 return;
1793 /* We might load more scalars than we need for permuting SLP loads.
1794 We checked in get_group_load_store_type that the extra elements
1795 don't leak into a new vector. */
1796 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1798 unsigned int nvectors;
1799 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1800 return nvectors;
1801 gcc_unreachable ();
1804 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1805 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1806 machine_mode mask_mode;
1807 bool using_partial_vectors_p = false;
1808 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1809 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1811 nvectors = group_memory_nvectors (group_size * vf, nunits);
1812 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1813 using_partial_vectors_p = true;
1816 machine_mode vmode;
1817 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1819 nvectors = group_memory_nvectors (group_size * vf, nunits);
1820 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1821 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1822 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1823 using_partial_vectors_p = true;
1826 if (!using_partial_vectors_p)
1828 if (dump_enabled_p ())
1829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1830 "can't operate on partial vectors because the"
1831 " target doesn't have the appropriate partial"
1832 " vectorization load or store.\n");
1833 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1837 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1838 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1839 that needs to be applied to all loads and stores in a vectorized loop.
1840 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1841 otherwise return VEC_MASK & LOOP_MASK.
1843 MASK_TYPE is the type of both masks. If new statements are needed,
1844 insert them before GSI. */
1846 static tree
1847 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1848 tree vec_mask, gimple_stmt_iterator *gsi)
1850 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1851 if (!loop_mask)
1852 return vec_mask;
1854 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1856 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1857 return vec_mask;
1859 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1860 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1861 vec_mask, loop_mask);
1863 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1864 return and_res;
1867 /* Determine whether we can use a gather load or scatter store to vectorize
1868 strided load or store STMT_INFO by truncating the current offset to a
1869 smaller width. We need to be able to construct an offset vector:
1871 { 0, X, X*2, X*3, ... }
1873 without loss of precision, where X is STMT_INFO's DR_STEP.
1875 Return true if this is possible, describing the gather load or scatter
1876 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1878 static bool
1879 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1880 loop_vec_info loop_vinfo, bool masked_p,
1881 gather_scatter_info *gs_info)
1883 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1884 data_reference *dr = dr_info->dr;
1885 tree step = DR_STEP (dr);
1886 if (TREE_CODE (step) != INTEGER_CST)
1888 /* ??? Perhaps we could use range information here? */
1889 if (dump_enabled_p ())
1890 dump_printf_loc (MSG_NOTE, vect_location,
1891 "cannot truncate variable step.\n");
1892 return false;
1895 /* Get the number of bits in an element. */
1896 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1897 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1898 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1900 /* Set COUNT to the upper limit on the number of elements - 1.
1901 Start with the maximum vectorization factor. */
1902 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1904 /* Try lowering COUNT to the number of scalar latch iterations. */
1905 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1906 widest_int max_iters;
1907 if (max_loop_iterations (loop, &max_iters)
1908 && max_iters < count)
1909 count = max_iters.to_shwi ();
1911 /* Try scales of 1 and the element size. */
1912 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1913 wi::overflow_type overflow = wi::OVF_NONE;
1914 for (int i = 0; i < 2; ++i)
1916 int scale = scales[i];
1917 widest_int factor;
1918 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1919 continue;
1921 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1922 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1923 if (overflow)
1924 continue;
1925 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1926 unsigned int min_offset_bits = wi::min_precision (range, sign);
1928 /* Find the narrowest viable offset type. */
1929 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1930 tree offset_type = build_nonstandard_integer_type (offset_bits,
1931 sign == UNSIGNED);
1933 /* See whether the target supports the operation with an offset
1934 no narrower than OFFSET_TYPE. */
1935 tree memory_type = TREE_TYPE (DR_REF (dr));
1936 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1937 vectype, memory_type, offset_type, scale,
1938 &gs_info->ifn, &gs_info->offset_vectype)
1939 || gs_info->ifn == IFN_LAST)
1940 continue;
1942 gs_info->decl = NULL_TREE;
1943 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1944 but we don't need to store that here. */
1945 gs_info->base = NULL_TREE;
1946 gs_info->element_type = TREE_TYPE (vectype);
1947 gs_info->offset = fold_convert (offset_type, step);
1948 gs_info->offset_dt = vect_constant_def;
1949 gs_info->scale = scale;
1950 gs_info->memory_type = memory_type;
1951 return true;
1954 if (overflow && dump_enabled_p ())
1955 dump_printf_loc (MSG_NOTE, vect_location,
1956 "truncating gather/scatter offset to %d bits"
1957 " might change its value.\n", element_bits);
1959 return false;
1962 /* Return true if we can use gather/scatter internal functions to
1963 vectorize STMT_INFO, which is a grouped or strided load or store.
1964 MASKED_P is true if load or store is conditional. When returning
1965 true, fill in GS_INFO with the information required to perform the
1966 operation. */
1968 static bool
1969 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1970 loop_vec_info loop_vinfo, bool masked_p,
1971 gather_scatter_info *gs_info)
1973 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1974 || gs_info->ifn == IFN_LAST)
1975 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1976 masked_p, gs_info);
1978 tree old_offset_type = TREE_TYPE (gs_info->offset);
1979 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1981 gcc_assert (TYPE_PRECISION (new_offset_type)
1982 >= TYPE_PRECISION (old_offset_type));
1983 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1985 if (dump_enabled_p ())
1986 dump_printf_loc (MSG_NOTE, vect_location,
1987 "using gather/scatter for strided/grouped access,"
1988 " scale = %d\n", gs_info->scale);
1990 return true;
1993 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1994 elements with a known constant step. Return -1 if that step
1995 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1997 static int
1998 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
2000 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2001 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
2002 size_zero_node);
2005 /* If the target supports a permute mask that reverses the elements in
2006 a vector of type VECTYPE, return that mask, otherwise return null. */
2008 static tree
2009 perm_mask_for_reverse (tree vectype)
2011 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2013 /* The encoding has a single stepped pattern. */
2014 vec_perm_builder sel (nunits, 1, 3);
2015 for (int i = 0; i < 3; ++i)
2016 sel.quick_push (nunits - 1 - i);
2018 vec_perm_indices indices (sel, 1, nunits);
2019 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
2020 indices))
2021 return NULL_TREE;
2022 return vect_gen_perm_mask_checked (vectype, indices);
2025 /* A subroutine of get_load_store_type, with a subset of the same
2026 arguments. Handle the case where STMT_INFO is a load or store that
2027 accesses consecutive elements with a negative step. Sets *POFFSET
2028 to the offset to be applied to the DR for the first access. */
2030 static vect_memory_access_type
2031 get_negative_load_store_type (vec_info *vinfo,
2032 stmt_vec_info stmt_info, tree vectype,
2033 vec_load_store_type vls_type,
2034 unsigned int ncopies, poly_int64 *poffset)
2036 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2037 dr_alignment_support alignment_support_scheme;
2039 if (ncopies > 1)
2041 if (dump_enabled_p ())
2042 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2043 "multiple types with negative step.\n");
2044 return VMAT_ELEMENTWISE;
2047 /* For backward running DRs the first access in vectype actually is
2048 N-1 elements before the address of the DR. */
2049 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2050 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2052 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
2053 alignment_support_scheme
2054 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2055 if (alignment_support_scheme != dr_aligned
2056 && alignment_support_scheme != dr_unaligned_supported)
2058 if (dump_enabled_p ())
2059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2060 "negative step but alignment required.\n");
2061 *poffset = 0;
2062 return VMAT_ELEMENTWISE;
2065 if (vls_type == VLS_STORE_INVARIANT)
2067 if (dump_enabled_p ())
2068 dump_printf_loc (MSG_NOTE, vect_location,
2069 "negative step with invariant source;"
2070 " no permute needed.\n");
2071 return VMAT_CONTIGUOUS_DOWN;
2074 if (!perm_mask_for_reverse (vectype))
2076 if (dump_enabled_p ())
2077 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2078 "negative step and reversing not supported.\n");
2079 *poffset = 0;
2080 return VMAT_ELEMENTWISE;
2083 return VMAT_CONTIGUOUS_REVERSE;
2086 /* STMT_INFO is either a masked or unconditional store. Return the value
2087 being stored. */
2089 tree
2090 vect_get_store_rhs (stmt_vec_info stmt_info)
2092 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2094 gcc_assert (gimple_assign_single_p (assign));
2095 return gimple_assign_rhs1 (assign);
2097 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2099 internal_fn ifn = gimple_call_internal_fn (call);
2100 int index = internal_fn_stored_value_index (ifn);
2101 gcc_assert (index >= 0);
2102 return gimple_call_arg (call, index);
2104 gcc_unreachable ();
2107 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2109 This function returns a vector type which can be composed with NETLS pieces,
2110 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2111 same vector size as the return vector. It checks target whether supports
2112 pieces-size vector mode for construction firstly, if target fails to, check
2113 pieces-size scalar mode for construction further. It returns NULL_TREE if
2114 fails to find the available composition.
2116 For example, for (vtype=V16QI, nelts=4), we can probably get:
2117 - V16QI with PTYPE V4QI.
2118 - V4SI with PTYPE SI.
2119 - NULL_TREE. */
2121 static tree
2122 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2124 gcc_assert (VECTOR_TYPE_P (vtype));
2125 gcc_assert (known_gt (nelts, 0U));
2127 machine_mode vmode = TYPE_MODE (vtype);
2128 if (!VECTOR_MODE_P (vmode))
2129 return NULL_TREE;
2131 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2132 unsigned int pbsize;
2133 if (constant_multiple_p (vbsize, nelts, &pbsize))
2135 /* First check if vec_init optab supports construction from
2136 vector pieces directly. */
2137 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2138 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2139 machine_mode rmode;
2140 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2141 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2142 != CODE_FOR_nothing))
2144 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2145 return vtype;
2148 /* Otherwise check if exists an integer type of the same piece size and
2149 if vec_init optab supports construction from it directly. */
2150 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2151 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2152 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2153 != CODE_FOR_nothing))
2155 *ptype = build_nonstandard_integer_type (pbsize, 1);
2156 return build_vector_type (*ptype, nelts);
2160 return NULL_TREE;
2163 /* A subroutine of get_load_store_type, with a subset of the same
2164 arguments. Handle the case where STMT_INFO is part of a grouped load
2165 or store.
2167 For stores, the statements in the group are all consecutive
2168 and there is no gap at the end. For loads, the statements in the
2169 group might not be consecutive; there can be gaps between statements
2170 as well as at the end. */
2172 static bool
2173 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2174 tree vectype, slp_tree slp_node,
2175 bool masked_p, vec_load_store_type vls_type,
2176 vect_memory_access_type *memory_access_type,
2177 poly_int64 *poffset,
2178 dr_alignment_support *alignment_support_scheme,
2179 int *misalignment,
2180 gather_scatter_info *gs_info)
2182 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2183 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2184 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2185 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2186 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2187 bool single_element_p = (stmt_info == first_stmt_info
2188 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2189 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2190 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2192 /* True if the vectorized statements would access beyond the last
2193 statement in the group. */
2194 bool overrun_p = false;
2196 /* True if we can cope with such overrun by peeling for gaps, so that
2197 there is at least one final scalar iteration after the vector loop. */
2198 bool can_overrun_p = (!masked_p
2199 && vls_type == VLS_LOAD
2200 && loop_vinfo
2201 && !loop->inner);
2203 /* There can only be a gap at the end of the group if the stride is
2204 known at compile time. */
2205 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2207 /* Stores can't yet have gaps. */
2208 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2210 if (slp_node)
2212 /* For SLP vectorization we directly vectorize a subchain
2213 without permutation. */
2214 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2215 first_dr_info
2216 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2217 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2219 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2220 separated by the stride, until we have a complete vector.
2221 Fall back to scalar accesses if that isn't possible. */
2222 if (multiple_p (nunits, group_size))
2223 *memory_access_type = VMAT_STRIDED_SLP;
2224 else
2225 *memory_access_type = VMAT_ELEMENTWISE;
2227 else
2229 overrun_p = loop_vinfo && gap != 0;
2230 if (overrun_p && vls_type != VLS_LOAD)
2232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2233 "Grouped store with gaps requires"
2234 " non-consecutive accesses\n");
2235 return false;
2237 /* An overrun is fine if the trailing elements are smaller
2238 than the alignment boundary B. Every vector access will
2239 be a multiple of B and so we are guaranteed to access a
2240 non-gap element in the same B-sized block. */
2241 if (overrun_p
2242 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2243 vectype)
2244 / vect_get_scalar_dr_size (first_dr_info)))
2245 overrun_p = false;
2247 /* If the gap splits the vector in half and the target
2248 can do half-vector operations avoid the epilogue peeling
2249 by simply loading half of the vector only. Usually
2250 the construction with an upper zero half will be elided. */
2251 dr_alignment_support alss;
2252 int misalign = dr_misalignment (first_dr_info, vectype);
2253 tree half_vtype;
2254 if (overrun_p
2255 && !masked_p
2256 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2257 vectype, misalign)))
2258 == dr_aligned
2259 || alss == dr_unaligned_supported)
2260 && known_eq (nunits, (group_size - gap) * 2)
2261 && known_eq (nunits, group_size)
2262 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2263 != NULL_TREE))
2264 overrun_p = false;
2266 if (overrun_p && !can_overrun_p)
2268 if (dump_enabled_p ())
2269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2270 "Peeling for outer loop is not supported\n");
2271 return false;
2273 int cmp = compare_step_with_zero (vinfo, stmt_info);
2274 if (cmp < 0)
2276 if (single_element_p)
2277 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2278 only correct for single element "interleaving" SLP. */
2279 *memory_access_type = get_negative_load_store_type
2280 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2281 else
2283 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2284 separated by the stride, until we have a complete vector.
2285 Fall back to scalar accesses if that isn't possible. */
2286 if (multiple_p (nunits, group_size))
2287 *memory_access_type = VMAT_STRIDED_SLP;
2288 else
2289 *memory_access_type = VMAT_ELEMENTWISE;
2292 else
2294 gcc_assert (!loop_vinfo || cmp > 0);
2295 *memory_access_type = VMAT_CONTIGUOUS;
2298 /* When we have a contiguous access across loop iterations
2299 but the access in the loop doesn't cover the full vector
2300 we can end up with no gap recorded but still excess
2301 elements accessed, see PR103116. Make sure we peel for
2302 gaps if necessary and sufficient and give up if not. */
2303 if (loop_vinfo
2304 && *memory_access_type == VMAT_CONTIGUOUS
2305 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2306 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2307 nunits))
2309 unsigned HOST_WIDE_INT cnunits, cvf;
2310 if (!can_overrun_p
2311 || !nunits.is_constant (&cnunits)
2312 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2313 /* Peeling for gaps assumes that a single scalar iteration
2314 is enough to make sure the last vector iteration doesn't
2315 access excess elements.
2316 ??? Enhancements include peeling multiple iterations
2317 or using masked loads with a static mask. */
2318 || (group_size * cvf) % cnunits + group_size < cnunits)
2320 if (dump_enabled_p ())
2321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2322 "peeling for gaps insufficient for "
2323 "access\n");
2324 return false;
2326 overrun_p = true;
2330 else
2332 /* We can always handle this case using elementwise accesses,
2333 but see if something more efficient is available. */
2334 *memory_access_type = VMAT_ELEMENTWISE;
2336 /* If there is a gap at the end of the group then these optimizations
2337 would access excess elements in the last iteration. */
2338 bool would_overrun_p = (gap != 0);
2339 /* An overrun is fine if the trailing elements are smaller than the
2340 alignment boundary B. Every vector access will be a multiple of B
2341 and so we are guaranteed to access a non-gap element in the
2342 same B-sized block. */
2343 if (would_overrun_p
2344 && !masked_p
2345 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2346 / vect_get_scalar_dr_size (first_dr_info)))
2347 would_overrun_p = false;
2349 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2350 && (can_overrun_p || !would_overrun_p)
2351 && compare_step_with_zero (vinfo, stmt_info) > 0)
2353 /* First cope with the degenerate case of a single-element
2354 vector. */
2355 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2358 /* Otherwise try using LOAD/STORE_LANES. */
2359 else if (vls_type == VLS_LOAD
2360 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2361 : vect_store_lanes_supported (vectype, group_size,
2362 masked_p))
2364 *memory_access_type = VMAT_LOAD_STORE_LANES;
2365 overrun_p = would_overrun_p;
2368 /* If that fails, try using permuting loads. */
2369 else if (vls_type == VLS_LOAD
2370 ? vect_grouped_load_supported (vectype, single_element_p,
2371 group_size)
2372 : vect_grouped_store_supported (vectype, group_size))
2374 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2375 overrun_p = would_overrun_p;
2379 /* As a last resort, trying using a gather load or scatter store.
2381 ??? Although the code can handle all group sizes correctly,
2382 it probably isn't a win to use separate strided accesses based
2383 on nearby locations. Or, even if it's a win over scalar code,
2384 it might not be a win over vectorizing at a lower VF, if that
2385 allows us to use contiguous accesses. */
2386 if (*memory_access_type == VMAT_ELEMENTWISE
2387 && single_element_p
2388 && loop_vinfo
2389 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2390 masked_p, gs_info))
2391 *memory_access_type = VMAT_GATHER_SCATTER;
2394 if (*memory_access_type == VMAT_GATHER_SCATTER
2395 || *memory_access_type == VMAT_ELEMENTWISE)
2397 *alignment_support_scheme = dr_unaligned_supported;
2398 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2400 else
2402 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2403 *alignment_support_scheme
2404 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2405 *misalignment);
2408 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2410 /* STMT is the leader of the group. Check the operands of all the
2411 stmts of the group. */
2412 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2413 while (next_stmt_info)
2415 tree op = vect_get_store_rhs (next_stmt_info);
2416 enum vect_def_type dt;
2417 if (!vect_is_simple_use (op, vinfo, &dt))
2419 if (dump_enabled_p ())
2420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2421 "use not simple.\n");
2422 return false;
2424 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2428 if (overrun_p)
2430 gcc_assert (can_overrun_p);
2431 if (dump_enabled_p ())
2432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2433 "Data access with gaps requires scalar "
2434 "epilogue loop\n");
2435 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2438 return true;
2441 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2442 if there is a memory access type that the vectorized form can use,
2443 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2444 or scatters, fill in GS_INFO accordingly. In addition
2445 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2446 the target does not support the alignment scheme. *MISALIGNMENT
2447 is set according to the alignment of the access (including
2448 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2450 SLP says whether we're performing SLP rather than loop vectorization.
2451 MASKED_P is true if the statement is conditional on a vectorized mask.
2452 VECTYPE is the vector type that the vectorized statements will use.
2453 NCOPIES is the number of vector statements that will be needed. */
2455 static bool
2456 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2457 tree vectype, slp_tree slp_node,
2458 bool masked_p, vec_load_store_type vls_type,
2459 unsigned int ncopies,
2460 vect_memory_access_type *memory_access_type,
2461 poly_int64 *poffset,
2462 dr_alignment_support *alignment_support_scheme,
2463 int *misalignment,
2464 gather_scatter_info *gs_info)
2466 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2467 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2468 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2469 *poffset = 0;
2470 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2472 *memory_access_type = VMAT_GATHER_SCATTER;
2473 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2474 gcc_unreachable ();
2475 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2476 &gs_info->offset_dt,
2477 &gs_info->offset_vectype))
2479 if (dump_enabled_p ())
2480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2481 "%s index use not simple.\n",
2482 vls_type == VLS_LOAD ? "gather" : "scatter");
2483 return false;
2485 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2487 if (vls_type != VLS_LOAD)
2489 if (dump_enabled_p ())
2490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2491 "unsupported emulated scatter.\n");
2492 return false;
2494 else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2495 || !TYPE_VECTOR_SUBPARTS
2496 (gs_info->offset_vectype).is_constant ()
2497 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2498 (gs_info->offset_vectype),
2499 TYPE_VECTOR_SUBPARTS (vectype)))
2501 if (dump_enabled_p ())
2502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2503 "unsupported vector types for emulated "
2504 "gather.\n");
2505 return false;
2508 /* Gather-scatter accesses perform only component accesses, alignment
2509 is irrelevant for them. */
2510 *alignment_support_scheme = dr_unaligned_supported;
2512 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2514 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2515 masked_p,
2516 vls_type, memory_access_type, poffset,
2517 alignment_support_scheme,
2518 misalignment, gs_info))
2519 return false;
2521 else if (STMT_VINFO_STRIDED_P (stmt_info))
2523 gcc_assert (!slp_node);
2524 if (loop_vinfo
2525 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2526 masked_p, gs_info))
2527 *memory_access_type = VMAT_GATHER_SCATTER;
2528 else
2529 *memory_access_type = VMAT_ELEMENTWISE;
2530 /* Alignment is irrelevant here. */
2531 *alignment_support_scheme = dr_unaligned_supported;
2533 else
2535 int cmp = compare_step_with_zero (vinfo, stmt_info);
2536 if (cmp == 0)
2538 gcc_assert (vls_type == VLS_LOAD);
2539 *memory_access_type = VMAT_INVARIANT;
2540 /* Invariant accesses perform only component accesses, alignment
2541 is irrelevant for them. */
2542 *alignment_support_scheme = dr_unaligned_supported;
2544 else
2546 if (cmp < 0)
2547 *memory_access_type = get_negative_load_store_type
2548 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2549 else
2550 *memory_access_type = VMAT_CONTIGUOUS;
2551 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2552 vectype, *poffset);
2553 *alignment_support_scheme
2554 = vect_supportable_dr_alignment (vinfo,
2555 STMT_VINFO_DR_INFO (stmt_info),
2556 vectype, *misalignment);
2560 if ((*memory_access_type == VMAT_ELEMENTWISE
2561 || *memory_access_type == VMAT_STRIDED_SLP)
2562 && !nunits.is_constant ())
2564 if (dump_enabled_p ())
2565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2566 "Not using elementwise accesses due to variable "
2567 "vectorization factor.\n");
2568 return false;
2571 if (*alignment_support_scheme == dr_unaligned_unsupported)
2573 if (dump_enabled_p ())
2574 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2575 "unsupported unaligned access\n");
2576 return false;
2579 /* FIXME: At the moment the cost model seems to underestimate the
2580 cost of using elementwise accesses. This check preserves the
2581 traditional behavior until that can be fixed. */
2582 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2583 if (!first_stmt_info)
2584 first_stmt_info = stmt_info;
2585 if (*memory_access_type == VMAT_ELEMENTWISE
2586 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2587 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2588 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2589 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2591 if (dump_enabled_p ())
2592 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2593 "not falling back to elementwise accesses\n");
2594 return false;
2596 return true;
2599 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2600 conditional operation STMT_INFO. When returning true, store the mask
2601 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2602 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2603 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2605 static bool
2606 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2607 slp_tree slp_node, unsigned mask_index,
2608 tree *mask, slp_tree *mask_node,
2609 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2611 enum vect_def_type mask_dt;
2612 tree mask_vectype;
2613 slp_tree mask_node_1;
2614 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2615 mask, &mask_node_1, &mask_dt, &mask_vectype))
2617 if (dump_enabled_p ())
2618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2619 "mask use not simple.\n");
2620 return false;
2623 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2625 if (dump_enabled_p ())
2626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2627 "mask argument is not a boolean.\n");
2628 return false;
2631 /* If the caller is not prepared for adjusting an external/constant
2632 SLP mask vector type fail. */
2633 if (slp_node
2634 && !mask_node
2635 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2637 if (dump_enabled_p ())
2638 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2639 "SLP mask argument is not vectorized.\n");
2640 return false;
2643 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2644 if (!mask_vectype)
2645 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2647 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2649 if (dump_enabled_p ())
2650 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2651 "could not find an appropriate vector mask type.\n");
2652 return false;
2655 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2656 TYPE_VECTOR_SUBPARTS (vectype)))
2658 if (dump_enabled_p ())
2659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2660 "vector mask type %T"
2661 " does not match vector data type %T.\n",
2662 mask_vectype, vectype);
2664 return false;
2667 *mask_dt_out = mask_dt;
2668 *mask_vectype_out = mask_vectype;
2669 if (mask_node)
2670 *mask_node = mask_node_1;
2671 return true;
2674 /* Return true if stored value RHS is suitable for vectorizing store
2675 statement STMT_INFO. When returning true, store the type of the
2676 definition in *RHS_DT_OUT, the type of the vectorized store value in
2677 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2679 static bool
2680 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2681 slp_tree slp_node, tree rhs,
2682 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2683 vec_load_store_type *vls_type_out)
2685 /* In the case this is a store from a constant make sure
2686 native_encode_expr can handle it. */
2687 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2689 if (dump_enabled_p ())
2690 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2691 "cannot encode constant as a byte sequence.\n");
2692 return false;
2695 unsigned op_no = 0;
2696 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2698 if (gimple_call_internal_p (call)
2699 && internal_store_fn_p (gimple_call_internal_fn (call)))
2700 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2703 enum vect_def_type rhs_dt;
2704 tree rhs_vectype;
2705 slp_tree slp_op;
2706 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2707 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2709 if (dump_enabled_p ())
2710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2711 "use not simple.\n");
2712 return false;
2715 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2716 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2718 if (dump_enabled_p ())
2719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2720 "incompatible vector types.\n");
2721 return false;
2724 *rhs_dt_out = rhs_dt;
2725 *rhs_vectype_out = rhs_vectype;
2726 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2727 *vls_type_out = VLS_STORE_INVARIANT;
2728 else
2729 *vls_type_out = VLS_STORE;
2730 return true;
2733 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2734 Note that we support masks with floating-point type, in which case the
2735 floats are interpreted as a bitmask. */
2737 static tree
2738 vect_build_all_ones_mask (vec_info *vinfo,
2739 stmt_vec_info stmt_info, tree masktype)
2741 if (TREE_CODE (masktype) == INTEGER_TYPE)
2742 return build_int_cst (masktype, -1);
2743 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2745 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2746 mask = build_vector_from_val (masktype, mask);
2747 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2749 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2751 REAL_VALUE_TYPE r;
2752 long tmp[6];
2753 for (int j = 0; j < 6; ++j)
2754 tmp[j] = -1;
2755 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2756 tree mask = build_real (TREE_TYPE (masktype), r);
2757 mask = build_vector_from_val (masktype, mask);
2758 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2760 gcc_unreachable ();
2763 /* Build an all-zero merge value of type VECTYPE while vectorizing
2764 STMT_INFO as a gather load. */
2766 static tree
2767 vect_build_zero_merge_argument (vec_info *vinfo,
2768 stmt_vec_info stmt_info, tree vectype)
2770 tree merge;
2771 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2772 merge = build_int_cst (TREE_TYPE (vectype), 0);
2773 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2775 REAL_VALUE_TYPE r;
2776 long tmp[6];
2777 for (int j = 0; j < 6; ++j)
2778 tmp[j] = 0;
2779 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2780 merge = build_real (TREE_TYPE (vectype), r);
2782 else
2783 gcc_unreachable ();
2784 merge = build_vector_from_val (vectype, merge);
2785 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2788 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2789 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2790 the gather load operation. If the load is conditional, MASK is the
2791 unvectorized condition and MASK_DT is its definition type, otherwise
2792 MASK is null. */
2794 static void
2795 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2796 gimple_stmt_iterator *gsi,
2797 gimple **vec_stmt,
2798 gather_scatter_info *gs_info,
2799 tree mask)
2801 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2802 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2803 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2804 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2805 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2806 edge pe = loop_preheader_edge (loop);
2807 enum { NARROW, NONE, WIDEN } modifier;
2808 poly_uint64 gather_off_nunits
2809 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2811 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2812 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2813 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2814 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2815 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2816 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2817 tree scaletype = TREE_VALUE (arglist);
2818 tree real_masktype = masktype;
2819 gcc_checking_assert (types_compatible_p (srctype, rettype)
2820 && (!mask
2821 || TREE_CODE (masktype) == INTEGER_TYPE
2822 || types_compatible_p (srctype, masktype)));
2823 if (mask)
2824 masktype = truth_type_for (srctype);
2826 tree mask_halftype = masktype;
2827 tree perm_mask = NULL_TREE;
2828 tree mask_perm_mask = NULL_TREE;
2829 if (known_eq (nunits, gather_off_nunits))
2830 modifier = NONE;
2831 else if (known_eq (nunits * 2, gather_off_nunits))
2833 modifier = WIDEN;
2835 /* Currently widening gathers and scatters are only supported for
2836 fixed-length vectors. */
2837 int count = gather_off_nunits.to_constant ();
2838 vec_perm_builder sel (count, count, 1);
2839 for (int i = 0; i < count; ++i)
2840 sel.quick_push (i | (count / 2));
2842 vec_perm_indices indices (sel, 1, count);
2843 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2844 indices);
2846 else if (known_eq (nunits, gather_off_nunits * 2))
2848 modifier = NARROW;
2850 /* Currently narrowing gathers and scatters are only supported for
2851 fixed-length vectors. */
2852 int count = nunits.to_constant ();
2853 vec_perm_builder sel (count, count, 1);
2854 sel.quick_grow (count);
2855 for (int i = 0; i < count; ++i)
2856 sel[i] = i < count / 2 ? i : i + count / 2;
2857 vec_perm_indices indices (sel, 2, count);
2858 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2860 ncopies *= 2;
2862 if (mask && VECTOR_TYPE_P (real_masktype))
2864 for (int i = 0; i < count; ++i)
2865 sel[i] = i | (count / 2);
2866 indices.new_vector (sel, 2, count);
2867 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2869 else if (mask)
2870 mask_halftype = truth_type_for (gs_info->offset_vectype);
2872 else
2873 gcc_unreachable ();
2875 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2876 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2878 tree ptr = fold_convert (ptrtype, gs_info->base);
2879 if (!is_gimple_min_invariant (ptr))
2881 gimple_seq seq;
2882 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2883 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2884 gcc_assert (!new_bb);
2887 tree scale = build_int_cst (scaletype, gs_info->scale);
2889 tree vec_oprnd0 = NULL_TREE;
2890 tree vec_mask = NULL_TREE;
2891 tree src_op = NULL_TREE;
2892 tree mask_op = NULL_TREE;
2893 tree prev_res = NULL_TREE;
2895 if (!mask)
2897 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2898 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2901 auto_vec<tree> vec_oprnds0;
2902 auto_vec<tree> vec_masks;
2903 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2904 modifier == WIDEN ? ncopies / 2 : ncopies,
2905 gs_info->offset, &vec_oprnds0);
2906 if (mask)
2907 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2908 modifier == NARROW ? ncopies / 2 : ncopies,
2909 mask, &vec_masks, masktype);
2910 for (int j = 0; j < ncopies; ++j)
2912 tree op, var;
2913 if (modifier == WIDEN && (j & 1))
2914 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2915 perm_mask, stmt_info, gsi);
2916 else
2917 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2919 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2921 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2922 TYPE_VECTOR_SUBPARTS (idxtype)));
2923 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2924 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2925 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2926 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2927 op = var;
2930 if (mask)
2932 if (mask_perm_mask && (j & 1))
2933 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2934 mask_perm_mask, stmt_info, gsi);
2935 else
2937 if (modifier == NARROW)
2939 if ((j & 1) == 0)
2940 vec_mask = vec_masks[j / 2];
2942 else
2943 vec_mask = vec_masks[j];
2945 mask_op = vec_mask;
2946 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2948 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2949 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2950 gcc_assert (known_eq (sub1, sub2));
2951 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2952 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2953 gassign *new_stmt
2954 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2955 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2956 mask_op = var;
2959 if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2961 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2962 gassign *new_stmt
2963 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2964 : VEC_UNPACK_LO_EXPR,
2965 mask_op);
2966 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2967 mask_op = var;
2969 src_op = mask_op;
2972 tree mask_arg = mask_op;
2973 if (masktype != real_masktype)
2975 tree utype, optype = TREE_TYPE (mask_op);
2976 if (VECTOR_TYPE_P (real_masktype)
2977 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2978 utype = real_masktype;
2979 else
2980 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2981 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2982 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2983 gassign *new_stmt
2984 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2985 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2986 mask_arg = var;
2987 if (!useless_type_conversion_p (real_masktype, utype))
2989 gcc_assert (TYPE_PRECISION (utype)
2990 <= TYPE_PRECISION (real_masktype));
2991 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2992 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2993 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2994 mask_arg = var;
2996 src_op = build_zero_cst (srctype);
2998 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2999 mask_arg, scale);
3001 if (!useless_type_conversion_p (vectype, rettype))
3003 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
3004 TYPE_VECTOR_SUBPARTS (rettype)));
3005 op = vect_get_new_ssa_name (rettype, vect_simple_var);
3006 gimple_call_set_lhs (new_stmt, op);
3007 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3008 var = make_ssa_name (vec_dest);
3009 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
3010 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3011 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3013 else
3015 var = make_ssa_name (vec_dest, new_stmt);
3016 gimple_call_set_lhs (new_stmt, var);
3017 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3020 if (modifier == NARROW)
3022 if ((j & 1) == 0)
3024 prev_res = var;
3025 continue;
3027 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
3028 stmt_info, gsi);
3029 new_stmt = SSA_NAME_DEF_STMT (var);
3032 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3034 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3037 /* Prepare the base and offset in GS_INFO for vectorization.
3038 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3039 to the vectorized offset argument for the first copy of STMT_INFO.
3040 STMT_INFO is the statement described by GS_INFO and LOOP is the
3041 containing loop. */
3043 static void
3044 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
3045 class loop *loop, stmt_vec_info stmt_info,
3046 slp_tree slp_node, gather_scatter_info *gs_info,
3047 tree *dataref_ptr, vec<tree> *vec_offset)
3049 gimple_seq stmts = NULL;
3050 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3051 if (stmts != NULL)
3053 basic_block new_bb;
3054 edge pe = loop_preheader_edge (loop);
3055 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3056 gcc_assert (!new_bb);
3058 if (slp_node)
3059 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3060 else
3062 unsigned ncopies
3063 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
3064 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
3065 gs_info->offset, vec_offset,
3066 gs_info->offset_vectype);
3070 /* Prepare to implement a grouped or strided load or store using
3071 the gather load or scatter store operation described by GS_INFO.
3072 STMT_INFO is the load or store statement.
3074 Set *DATAREF_BUMP to the amount that should be added to the base
3075 address after each copy of the vectorized statement. Set *VEC_OFFSET
3076 to an invariant offset vector in which element I has the value
3077 I * DR_STEP / SCALE. */
3079 static void
3080 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3081 loop_vec_info loop_vinfo,
3082 gather_scatter_info *gs_info,
3083 tree *dataref_bump, tree *vec_offset)
3085 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3086 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3088 tree bump = size_binop (MULT_EXPR,
3089 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3090 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3091 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3093 /* The offset given in GS_INFO can have pointer type, so use the element
3094 type of the vector instead. */
3095 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3097 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3098 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3099 ssize_int (gs_info->scale));
3100 step = fold_convert (offset_type, step);
3102 /* Create {0, X, X*2, X*3, ...}. */
3103 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3104 build_zero_cst (offset_type), step);
3105 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3108 /* Return the amount that should be added to a vector pointer to move
3109 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3110 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3111 vectorization. */
3113 static tree
3114 vect_get_data_ptr_increment (vec_info *vinfo,
3115 dr_vec_info *dr_info, tree aggr_type,
3116 vect_memory_access_type memory_access_type)
3118 if (memory_access_type == VMAT_INVARIANT)
3119 return size_zero_node;
3121 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3122 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3123 if (tree_int_cst_sgn (step) == -1)
3124 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3125 return iv_step;
3128 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3130 static bool
3131 vectorizable_bswap (vec_info *vinfo,
3132 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3133 gimple **vec_stmt, slp_tree slp_node,
3134 slp_tree *slp_op,
3135 tree vectype_in, stmt_vector_for_cost *cost_vec)
3137 tree op, vectype;
3138 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3139 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3140 unsigned ncopies;
3142 op = gimple_call_arg (stmt, 0);
3143 vectype = STMT_VINFO_VECTYPE (stmt_info);
3144 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3146 /* Multiple types in SLP are handled by creating the appropriate number of
3147 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3148 case of SLP. */
3149 if (slp_node)
3150 ncopies = 1;
3151 else
3152 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3154 gcc_assert (ncopies >= 1);
3156 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3157 if (! char_vectype)
3158 return false;
3160 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3161 unsigned word_bytes;
3162 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3163 return false;
3165 /* The encoding uses one stepped pattern for each byte in the word. */
3166 vec_perm_builder elts (num_bytes, word_bytes, 3);
3167 for (unsigned i = 0; i < 3; ++i)
3168 for (unsigned j = 0; j < word_bytes; ++j)
3169 elts.quick_push ((i + 1) * word_bytes - j - 1);
3171 vec_perm_indices indices (elts, 1, num_bytes);
3172 machine_mode vmode = TYPE_MODE (char_vectype);
3173 if (!can_vec_perm_const_p (vmode, vmode, indices))
3174 return false;
3176 if (! vec_stmt)
3178 if (slp_node
3179 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3181 if (dump_enabled_p ())
3182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3183 "incompatible vector types for invariants\n");
3184 return false;
3187 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3188 DUMP_VECT_SCOPE ("vectorizable_bswap");
3189 record_stmt_cost (cost_vec,
3190 1, vector_stmt, stmt_info, 0, vect_prologue);
3191 record_stmt_cost (cost_vec,
3192 slp_node
3193 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3194 vec_perm, stmt_info, 0, vect_body);
3195 return true;
3198 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3200 /* Transform. */
3201 vec<tree> vec_oprnds = vNULL;
3202 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3203 op, &vec_oprnds);
3204 /* Arguments are ready. create the new vector stmt. */
3205 unsigned i;
3206 tree vop;
3207 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3209 gimple *new_stmt;
3210 tree tem = make_ssa_name (char_vectype);
3211 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3212 char_vectype, vop));
3213 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3214 tree tem2 = make_ssa_name (char_vectype);
3215 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3216 tem, tem, bswap_vconst);
3217 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3218 tem = make_ssa_name (vectype);
3219 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3220 vectype, tem2));
3221 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3222 if (slp_node)
3223 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3224 else
3225 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3228 if (!slp_node)
3229 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3231 vec_oprnds.release ();
3232 return true;
3235 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3236 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3237 in a single step. On success, store the binary pack code in
3238 *CONVERT_CODE. */
3240 static bool
3241 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3242 tree_code *convert_code)
3244 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3245 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3246 return false;
3248 tree_code code;
3249 int multi_step_cvt = 0;
3250 auto_vec <tree, 8> interm_types;
3251 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3252 &code, &multi_step_cvt, &interm_types)
3253 || multi_step_cvt)
3254 return false;
3256 *convert_code = code;
3257 return true;
3260 /* Function vectorizable_call.
3262 Check if STMT_INFO performs a function call that can be vectorized.
3263 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3264 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3265 Return true if STMT_INFO is vectorizable in this way. */
3267 static bool
3268 vectorizable_call (vec_info *vinfo,
3269 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3270 gimple **vec_stmt, slp_tree slp_node,
3271 stmt_vector_for_cost *cost_vec)
3273 gcall *stmt;
3274 tree vec_dest;
3275 tree scalar_dest;
3276 tree op;
3277 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3278 tree vectype_out, vectype_in;
3279 poly_uint64 nunits_in;
3280 poly_uint64 nunits_out;
3281 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3282 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3283 tree fndecl, new_temp, rhs_type;
3284 enum vect_def_type dt[4]
3285 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3286 vect_unknown_def_type };
3287 tree vectypes[ARRAY_SIZE (dt)] = {};
3288 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3289 int ndts = ARRAY_SIZE (dt);
3290 int ncopies, j;
3291 auto_vec<tree, 8> vargs;
3292 enum { NARROW, NONE, WIDEN } modifier;
3293 size_t i, nargs;
3294 tree lhs;
3296 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3297 return false;
3299 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3300 && ! vec_stmt)
3301 return false;
3303 /* Is STMT_INFO a vectorizable call? */
3304 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3305 if (!stmt)
3306 return false;
3308 if (gimple_call_internal_p (stmt)
3309 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3310 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3311 /* Handled by vectorizable_load and vectorizable_store. */
3312 return false;
3314 if (gimple_call_lhs (stmt) == NULL_TREE
3315 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3316 return false;
3318 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3320 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3322 /* Process function arguments. */
3323 rhs_type = NULL_TREE;
3324 vectype_in = NULL_TREE;
3325 nargs = gimple_call_num_args (stmt);
3327 /* Bail out if the function has more than four arguments, we do not have
3328 interesting builtin functions to vectorize with more than two arguments
3329 except for fma. No arguments is also not good. */
3330 if (nargs == 0 || nargs > 4)
3331 return false;
3333 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3334 combined_fn cfn = gimple_call_combined_fn (stmt);
3335 if (cfn == CFN_GOMP_SIMD_LANE)
3337 nargs = 0;
3338 rhs_type = unsigned_type_node;
3341 int mask_opno = -1;
3342 if (internal_fn_p (cfn))
3343 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3345 for (i = 0; i < nargs; i++)
3347 if ((int) i == mask_opno)
3349 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3350 &op, &slp_op[i], &dt[i], &vectypes[i]))
3351 return false;
3352 continue;
3355 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3356 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3358 if (dump_enabled_p ())
3359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3360 "use not simple.\n");
3361 return false;
3364 /* We can only handle calls with arguments of the same type. */
3365 if (rhs_type
3366 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3368 if (dump_enabled_p ())
3369 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3370 "argument types differ.\n");
3371 return false;
3373 if (!rhs_type)
3374 rhs_type = TREE_TYPE (op);
3376 if (!vectype_in)
3377 vectype_in = vectypes[i];
3378 else if (vectypes[i]
3379 && !types_compatible_p (vectypes[i], vectype_in))
3381 if (dump_enabled_p ())
3382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3383 "argument vector types differ.\n");
3384 return false;
3387 /* If all arguments are external or constant defs, infer the vector type
3388 from the scalar type. */
3389 if (!vectype_in)
3390 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3391 if (vec_stmt)
3392 gcc_assert (vectype_in);
3393 if (!vectype_in)
3395 if (dump_enabled_p ())
3396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3397 "no vectype for scalar type %T\n", rhs_type);
3399 return false;
3401 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3402 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3403 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3404 by a pack of the two vectors into an SI vector. We would need
3405 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3406 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3408 if (dump_enabled_p ())
3409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3410 "mismatched vector sizes %T and %T\n",
3411 vectype_in, vectype_out);
3412 return false;
3415 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3416 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3418 if (dump_enabled_p ())
3419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3420 "mixed mask and nonmask vector types\n");
3421 return false;
3424 /* FORNOW */
3425 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3426 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3427 if (known_eq (nunits_in * 2, nunits_out))
3428 modifier = NARROW;
3429 else if (known_eq (nunits_out, nunits_in))
3430 modifier = NONE;
3431 else if (known_eq (nunits_out * 2, nunits_in))
3432 modifier = WIDEN;
3433 else
3434 return false;
3436 /* We only handle functions that do not read or clobber memory. */
3437 if (gimple_vuse (stmt))
3439 if (dump_enabled_p ())
3440 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3441 "function reads from or writes to memory.\n");
3442 return false;
3445 /* For now, we only vectorize functions if a target specific builtin
3446 is available. TODO -- in some cases, it might be profitable to
3447 insert the calls for pieces of the vector, in order to be able
3448 to vectorize other operations in the loop. */
3449 fndecl = NULL_TREE;
3450 internal_fn ifn = IFN_LAST;
3451 tree callee = gimple_call_fndecl (stmt);
3453 /* First try using an internal function. */
3454 tree_code convert_code = ERROR_MARK;
3455 if (cfn != CFN_LAST
3456 && (modifier == NONE
3457 || (modifier == NARROW
3458 && simple_integer_narrowing (vectype_out, vectype_in,
3459 &convert_code))))
3460 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3461 vectype_in);
3463 /* If that fails, try asking for a target-specific built-in function. */
3464 if (ifn == IFN_LAST)
3466 if (cfn != CFN_LAST)
3467 fndecl = targetm.vectorize.builtin_vectorized_function
3468 (cfn, vectype_out, vectype_in);
3469 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3470 fndecl = targetm.vectorize.builtin_md_vectorized_function
3471 (callee, vectype_out, vectype_in);
3474 if (ifn == IFN_LAST && !fndecl)
3476 if (cfn == CFN_GOMP_SIMD_LANE
3477 && !slp_node
3478 && loop_vinfo
3479 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3480 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3481 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3482 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3484 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3485 { 0, 1, 2, ... vf - 1 } vector. */
3486 gcc_assert (nargs == 0);
3488 else if (modifier == NONE
3489 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3490 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3491 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3492 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3493 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3494 slp_op, vectype_in, cost_vec);
3495 else
3497 if (dump_enabled_p ())
3498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3499 "function is not vectorizable.\n");
3500 return false;
3504 if (slp_node)
3505 ncopies = 1;
3506 else if (modifier == NARROW && ifn == IFN_LAST)
3507 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3508 else
3509 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3511 /* Sanity check: make sure that at least one copy of the vectorized stmt
3512 needs to be generated. */
3513 gcc_assert (ncopies >= 1);
3515 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3516 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3517 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3518 if (!vec_stmt) /* transformation not required. */
3520 if (slp_node)
3521 for (i = 0; i < nargs; ++i)
3522 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3523 vectypes[i]
3524 ? vectypes[i] : vectype_in))
3526 if (dump_enabled_p ())
3527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3528 "incompatible vector types for invariants\n");
3529 return false;
3531 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3532 DUMP_VECT_SCOPE ("vectorizable_call");
3533 vect_model_simple_cost (vinfo, stmt_info,
3534 ncopies, dt, ndts, slp_node, cost_vec);
3535 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3536 record_stmt_cost (cost_vec, ncopies / 2,
3537 vec_promote_demote, stmt_info, 0, vect_body);
3539 if (loop_vinfo
3540 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3541 && (reduc_idx >= 0 || mask_opno >= 0))
3543 if (reduc_idx >= 0
3544 && (cond_fn == IFN_LAST
3545 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3546 OPTIMIZE_FOR_SPEED)))
3548 if (dump_enabled_p ())
3549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3550 "can't use a fully-masked loop because no"
3551 " conditional operation is available.\n");
3552 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3554 else
3556 unsigned int nvectors
3557 = (slp_node
3558 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3559 : ncopies);
3560 tree scalar_mask = NULL_TREE;
3561 if (mask_opno >= 0)
3562 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3563 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3564 vectype_out, scalar_mask);
3567 return true;
3570 /* Transform. */
3572 if (dump_enabled_p ())
3573 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3575 /* Handle def. */
3576 scalar_dest = gimple_call_lhs (stmt);
3577 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3579 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3580 unsigned int vect_nargs = nargs;
3581 if (masked_loop_p && reduc_idx >= 0)
3583 ifn = cond_fn;
3584 vect_nargs += 2;
3587 if (modifier == NONE || ifn != IFN_LAST)
3589 tree prev_res = NULL_TREE;
3590 vargs.safe_grow (vect_nargs, true);
3591 auto_vec<vec<tree> > vec_defs (nargs);
3592 for (j = 0; j < ncopies; ++j)
3594 /* Build argument list for the vectorized call. */
3595 if (slp_node)
3597 vec<tree> vec_oprnds0;
3599 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3600 vec_oprnds0 = vec_defs[0];
3602 /* Arguments are ready. Create the new vector stmt. */
3603 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3605 int varg = 0;
3606 if (masked_loop_p && reduc_idx >= 0)
3608 unsigned int vec_num = vec_oprnds0.length ();
3609 /* Always true for SLP. */
3610 gcc_assert (ncopies == 1);
3611 vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
3612 vectype_out, i);
3614 size_t k;
3615 for (k = 0; k < nargs; k++)
3617 vec<tree> vec_oprndsk = vec_defs[k];
3618 vargs[varg++] = vec_oprndsk[i];
3620 if (masked_loop_p && reduc_idx >= 0)
3621 vargs[varg++] = vargs[reduc_idx + 1];
3622 gimple *new_stmt;
3623 if (modifier == NARROW)
3625 /* We don't define any narrowing conditional functions
3626 at present. */
3627 gcc_assert (mask_opno < 0);
3628 tree half_res = make_ssa_name (vectype_in);
3629 gcall *call
3630 = gimple_build_call_internal_vec (ifn, vargs);
3631 gimple_call_set_lhs (call, half_res);
3632 gimple_call_set_nothrow (call, true);
3633 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3634 if ((i & 1) == 0)
3636 prev_res = half_res;
3637 continue;
3639 new_temp = make_ssa_name (vec_dest);
3640 new_stmt = gimple_build_assign (new_temp, convert_code,
3641 prev_res, half_res);
3642 vect_finish_stmt_generation (vinfo, stmt_info,
3643 new_stmt, gsi);
3645 else
3647 if (mask_opno >= 0 && masked_loop_p)
3649 unsigned int vec_num = vec_oprnds0.length ();
3650 /* Always true for SLP. */
3651 gcc_assert (ncopies == 1);
3652 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3653 vectype_out, i);
3654 vargs[mask_opno] = prepare_vec_mask
3655 (loop_vinfo, TREE_TYPE (mask), mask,
3656 vargs[mask_opno], gsi);
3659 gcall *call;
3660 if (ifn != IFN_LAST)
3661 call = gimple_build_call_internal_vec (ifn, vargs);
3662 else
3663 call = gimple_build_call_vec (fndecl, vargs);
3664 new_temp = make_ssa_name (vec_dest, call);
3665 gimple_call_set_lhs (call, new_temp);
3666 gimple_call_set_nothrow (call, true);
3667 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3668 new_stmt = call;
3670 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3672 continue;
3675 int varg = 0;
3676 if (masked_loop_p && reduc_idx >= 0)
3677 vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
3678 vectype_out, j);
3679 for (i = 0; i < nargs; i++)
3681 op = gimple_call_arg (stmt, i);
3682 if (j == 0)
3684 vec_defs.quick_push (vNULL);
3685 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3686 op, &vec_defs[i],
3687 vectypes[i]);
3689 vargs[varg++] = vec_defs[i][j];
3691 if (masked_loop_p && reduc_idx >= 0)
3692 vargs[varg++] = vargs[reduc_idx + 1];
3694 if (mask_opno >= 0 && masked_loop_p)
3696 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3697 vectype_out, j);
3698 vargs[mask_opno]
3699 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3700 vargs[mask_opno], gsi);
3703 gimple *new_stmt;
3704 if (cfn == CFN_GOMP_SIMD_LANE)
3706 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3707 tree new_var
3708 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3709 gimple *init_stmt = gimple_build_assign (new_var, cst);
3710 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3711 new_temp = make_ssa_name (vec_dest);
3712 new_stmt = gimple_build_assign (new_temp, new_var);
3713 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3715 else if (modifier == NARROW)
3717 /* We don't define any narrowing conditional functions at
3718 present. */
3719 gcc_assert (mask_opno < 0);
3720 tree half_res = make_ssa_name (vectype_in);
3721 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3722 gimple_call_set_lhs (call, half_res);
3723 gimple_call_set_nothrow (call, true);
3724 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3725 if ((j & 1) == 0)
3727 prev_res = half_res;
3728 continue;
3730 new_temp = make_ssa_name (vec_dest);
3731 new_stmt = gimple_build_assign (new_temp, convert_code,
3732 prev_res, half_res);
3733 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3735 else
3737 gcall *call;
3738 if (ifn != IFN_LAST)
3739 call = gimple_build_call_internal_vec (ifn, vargs);
3740 else
3741 call = gimple_build_call_vec (fndecl, vargs);
3742 new_temp = make_ssa_name (vec_dest, call);
3743 gimple_call_set_lhs (call, new_temp);
3744 gimple_call_set_nothrow (call, true);
3745 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3746 new_stmt = call;
3749 if (j == (modifier == NARROW ? 1 : 0))
3750 *vec_stmt = new_stmt;
3751 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3753 for (i = 0; i < nargs; i++)
3755 vec<tree> vec_oprndsi = vec_defs[i];
3756 vec_oprndsi.release ();
3759 else if (modifier == NARROW)
3761 auto_vec<vec<tree> > vec_defs (nargs);
3762 /* We don't define any narrowing conditional functions at present. */
3763 gcc_assert (mask_opno < 0);
3764 for (j = 0; j < ncopies; ++j)
3766 /* Build argument list for the vectorized call. */
3767 if (j == 0)
3768 vargs.create (nargs * 2);
3769 else
3770 vargs.truncate (0);
3772 if (slp_node)
3774 vec<tree> vec_oprnds0;
3776 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3777 vec_oprnds0 = vec_defs[0];
3779 /* Arguments are ready. Create the new vector stmt. */
3780 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3782 size_t k;
3783 vargs.truncate (0);
3784 for (k = 0; k < nargs; k++)
3786 vec<tree> vec_oprndsk = vec_defs[k];
3787 vargs.quick_push (vec_oprndsk[i]);
3788 vargs.quick_push (vec_oprndsk[i + 1]);
3790 gcall *call;
3791 if (ifn != IFN_LAST)
3792 call = gimple_build_call_internal_vec (ifn, vargs);
3793 else
3794 call = gimple_build_call_vec (fndecl, vargs);
3795 new_temp = make_ssa_name (vec_dest, call);
3796 gimple_call_set_lhs (call, new_temp);
3797 gimple_call_set_nothrow (call, true);
3798 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3799 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3801 continue;
3804 for (i = 0; i < nargs; i++)
3806 op = gimple_call_arg (stmt, i);
3807 if (j == 0)
3809 vec_defs.quick_push (vNULL);
3810 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3811 op, &vec_defs[i], vectypes[i]);
3813 vec_oprnd0 = vec_defs[i][2*j];
3814 vec_oprnd1 = vec_defs[i][2*j+1];
3816 vargs.quick_push (vec_oprnd0);
3817 vargs.quick_push (vec_oprnd1);
3820 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3821 new_temp = make_ssa_name (vec_dest, new_stmt);
3822 gimple_call_set_lhs (new_stmt, new_temp);
3823 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3825 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3828 if (!slp_node)
3829 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3831 for (i = 0; i < nargs; i++)
3833 vec<tree> vec_oprndsi = vec_defs[i];
3834 vec_oprndsi.release ();
3837 else
3838 /* No current target implements this case. */
3839 return false;
3841 vargs.release ();
3843 /* The call in STMT might prevent it from being removed in dce.
3844 We however cannot remove it here, due to the way the ssa name
3845 it defines is mapped to the new definition. So just replace
3846 rhs of the statement with something harmless. */
3848 if (slp_node)
3849 return true;
3851 stmt_info = vect_orig_stmt (stmt_info);
3852 lhs = gimple_get_lhs (stmt_info->stmt);
3854 gassign *new_stmt
3855 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3856 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3858 return true;
3862 struct simd_call_arg_info
3864 tree vectype;
3865 tree op;
3866 HOST_WIDE_INT linear_step;
3867 enum vect_def_type dt;
3868 unsigned int align;
3869 bool simd_lane_linear;
3872 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3873 is linear within simd lane (but not within whole loop), note it in
3874 *ARGINFO. */
3876 static void
3877 vect_simd_lane_linear (tree op, class loop *loop,
3878 struct simd_call_arg_info *arginfo)
3880 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3882 if (!is_gimple_assign (def_stmt)
3883 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3884 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3885 return;
3887 tree base = gimple_assign_rhs1 (def_stmt);
3888 HOST_WIDE_INT linear_step = 0;
3889 tree v = gimple_assign_rhs2 (def_stmt);
3890 while (TREE_CODE (v) == SSA_NAME)
3892 tree t;
3893 def_stmt = SSA_NAME_DEF_STMT (v);
3894 if (is_gimple_assign (def_stmt))
3895 switch (gimple_assign_rhs_code (def_stmt))
3897 case PLUS_EXPR:
3898 t = gimple_assign_rhs2 (def_stmt);
3899 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3900 return;
3901 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3902 v = gimple_assign_rhs1 (def_stmt);
3903 continue;
3904 case MULT_EXPR:
3905 t = gimple_assign_rhs2 (def_stmt);
3906 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3907 return;
3908 linear_step = tree_to_shwi (t);
3909 v = gimple_assign_rhs1 (def_stmt);
3910 continue;
3911 CASE_CONVERT:
3912 t = gimple_assign_rhs1 (def_stmt);
3913 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3914 || (TYPE_PRECISION (TREE_TYPE (v))
3915 < TYPE_PRECISION (TREE_TYPE (t))))
3916 return;
3917 if (!linear_step)
3918 linear_step = 1;
3919 v = t;
3920 continue;
3921 default:
3922 return;
3924 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3925 && loop->simduid
3926 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3927 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3928 == loop->simduid))
3930 if (!linear_step)
3931 linear_step = 1;
3932 arginfo->linear_step = linear_step;
3933 arginfo->op = base;
3934 arginfo->simd_lane_linear = true;
3935 return;
3940 /* Return the number of elements in vector type VECTYPE, which is associated
3941 with a SIMD clone. At present these vectors always have a constant
3942 length. */
3944 static unsigned HOST_WIDE_INT
3945 simd_clone_subparts (tree vectype)
3947 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3950 /* Function vectorizable_simd_clone_call.
3952 Check if STMT_INFO performs a function call that can be vectorized
3953 by calling a simd clone of the function.
3954 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3955 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3956 Return true if STMT_INFO is vectorizable in this way. */
3958 static bool
3959 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3960 gimple_stmt_iterator *gsi,
3961 gimple **vec_stmt, slp_tree slp_node,
3962 stmt_vector_for_cost *)
3964 tree vec_dest;
3965 tree scalar_dest;
3966 tree op, type;
3967 tree vec_oprnd0 = NULL_TREE;
3968 tree vectype;
3969 poly_uint64 nunits;
3970 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3971 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3972 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3973 tree fndecl, new_temp;
3974 int ncopies, j;
3975 auto_vec<simd_call_arg_info> arginfo;
3976 vec<tree> vargs = vNULL;
3977 size_t i, nargs;
3978 tree lhs, rtype, ratype;
3979 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3981 /* Is STMT a vectorizable call? */
3982 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3983 if (!stmt)
3984 return false;
3986 fndecl = gimple_call_fndecl (stmt);
3987 if (fndecl == NULL_TREE)
3988 return false;
3990 struct cgraph_node *node = cgraph_node::get (fndecl);
3991 if (node == NULL || node->simd_clones == NULL)
3992 return false;
3994 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3995 return false;
3997 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3998 && ! vec_stmt)
3999 return false;
4001 if (gimple_call_lhs (stmt)
4002 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4003 return false;
4005 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4007 vectype = STMT_VINFO_VECTYPE (stmt_info);
4009 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4010 return false;
4012 /* FORNOW */
4013 if (slp_node)
4014 return false;
4016 /* Process function arguments. */
4017 nargs = gimple_call_num_args (stmt);
4019 /* Bail out if the function has zero arguments. */
4020 if (nargs == 0)
4021 return false;
4023 arginfo.reserve (nargs, true);
4025 for (i = 0; i < nargs; i++)
4027 simd_call_arg_info thisarginfo;
4028 affine_iv iv;
4030 thisarginfo.linear_step = 0;
4031 thisarginfo.align = 0;
4032 thisarginfo.op = NULL_TREE;
4033 thisarginfo.simd_lane_linear = false;
4035 op = gimple_call_arg (stmt, i);
4036 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
4037 &thisarginfo.vectype)
4038 || thisarginfo.dt == vect_uninitialized_def)
4040 if (dump_enabled_p ())
4041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4042 "use not simple.\n");
4043 return false;
4046 if (thisarginfo.dt == vect_constant_def
4047 || thisarginfo.dt == vect_external_def)
4048 gcc_assert (thisarginfo.vectype == NULL_TREE);
4049 else
4051 gcc_assert (thisarginfo.vectype != NULL_TREE);
4052 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
4054 if (dump_enabled_p ())
4055 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4056 "vector mask arguments are not supported\n");
4057 return false;
4061 /* For linear arguments, the analyze phase should have saved
4062 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4063 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4064 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4066 gcc_assert (vec_stmt);
4067 thisarginfo.linear_step
4068 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4069 thisarginfo.op
4070 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4071 thisarginfo.simd_lane_linear
4072 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4073 == boolean_true_node);
4074 /* If loop has been peeled for alignment, we need to adjust it. */
4075 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4076 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4077 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4079 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4080 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4081 tree opt = TREE_TYPE (thisarginfo.op);
4082 bias = fold_convert (TREE_TYPE (step), bias);
4083 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4084 thisarginfo.op
4085 = fold_build2 (POINTER_TYPE_P (opt)
4086 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4087 thisarginfo.op, bias);
4090 else if (!vec_stmt
4091 && thisarginfo.dt != vect_constant_def
4092 && thisarginfo.dt != vect_external_def
4093 && loop_vinfo
4094 && TREE_CODE (op) == SSA_NAME
4095 && simple_iv (loop, loop_containing_stmt (stmt), op,
4096 &iv, false)
4097 && tree_fits_shwi_p (iv.step))
4099 thisarginfo.linear_step = tree_to_shwi (iv.step);
4100 thisarginfo.op = iv.base;
4102 else if ((thisarginfo.dt == vect_constant_def
4103 || thisarginfo.dt == vect_external_def)
4104 && POINTER_TYPE_P (TREE_TYPE (op)))
4105 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4106 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4107 linear too. */
4108 if (POINTER_TYPE_P (TREE_TYPE (op))
4109 && !thisarginfo.linear_step
4110 && !vec_stmt
4111 && thisarginfo.dt != vect_constant_def
4112 && thisarginfo.dt != vect_external_def
4113 && loop_vinfo
4114 && !slp_node
4115 && TREE_CODE (op) == SSA_NAME)
4116 vect_simd_lane_linear (op, loop, &thisarginfo);
4118 arginfo.quick_push (thisarginfo);
4121 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4122 if (!vf.is_constant ())
4124 if (dump_enabled_p ())
4125 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4126 "not considering SIMD clones; not yet supported"
4127 " for variable-width vectors.\n");
4128 return false;
4131 unsigned int badness = 0;
4132 struct cgraph_node *bestn = NULL;
4133 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4134 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4135 else
4136 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4137 n = n->simdclone->next_clone)
4139 unsigned int this_badness = 0;
4140 unsigned int num_calls;
4141 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4142 || n->simdclone->nargs != nargs)
4143 continue;
4144 if (num_calls != 1)
4145 this_badness += exact_log2 (num_calls) * 4096;
4146 if (n->simdclone->inbranch)
4147 this_badness += 8192;
4148 int target_badness = targetm.simd_clone.usable (n);
4149 if (target_badness < 0)
4150 continue;
4151 this_badness += target_badness * 512;
4152 /* FORNOW: Have to add code to add the mask argument. */
4153 if (n->simdclone->inbranch)
4154 continue;
4155 for (i = 0; i < nargs; i++)
4157 switch (n->simdclone->args[i].arg_type)
4159 case SIMD_CLONE_ARG_TYPE_VECTOR:
4160 if (!useless_type_conversion_p
4161 (n->simdclone->args[i].orig_type,
4162 TREE_TYPE (gimple_call_arg (stmt, i))))
4163 i = -1;
4164 else if (arginfo[i].dt == vect_constant_def
4165 || arginfo[i].dt == vect_external_def
4166 || arginfo[i].linear_step)
4167 this_badness += 64;
4168 break;
4169 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4170 if (arginfo[i].dt != vect_constant_def
4171 && arginfo[i].dt != vect_external_def)
4172 i = -1;
4173 break;
4174 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4175 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4176 if (arginfo[i].dt == vect_constant_def
4177 || arginfo[i].dt == vect_external_def
4178 || (arginfo[i].linear_step
4179 != n->simdclone->args[i].linear_step))
4180 i = -1;
4181 break;
4182 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4183 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4184 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4185 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4186 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4187 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4188 /* FORNOW */
4189 i = -1;
4190 break;
4191 case SIMD_CLONE_ARG_TYPE_MASK:
4192 gcc_unreachable ();
4194 if (i == (size_t) -1)
4195 break;
4196 if (n->simdclone->args[i].alignment > arginfo[i].align)
4198 i = -1;
4199 break;
4201 if (arginfo[i].align)
4202 this_badness += (exact_log2 (arginfo[i].align)
4203 - exact_log2 (n->simdclone->args[i].alignment));
4205 if (i == (size_t) -1)
4206 continue;
4207 if (bestn == NULL || this_badness < badness)
4209 bestn = n;
4210 badness = this_badness;
4214 if (bestn == NULL)
4215 return false;
4217 for (i = 0; i < nargs; i++)
4218 if ((arginfo[i].dt == vect_constant_def
4219 || arginfo[i].dt == vect_external_def)
4220 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4222 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4223 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4224 slp_node);
4225 if (arginfo[i].vectype == NULL
4226 || !constant_multiple_p (bestn->simdclone->simdlen,
4227 simd_clone_subparts (arginfo[i].vectype)))
4228 return false;
4231 fndecl = bestn->decl;
4232 nunits = bestn->simdclone->simdlen;
4233 ncopies = vector_unroll_factor (vf, nunits);
4235 /* If the function isn't const, only allow it in simd loops where user
4236 has asserted that at least nunits consecutive iterations can be
4237 performed using SIMD instructions. */
4238 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4239 && gimple_vuse (stmt))
4240 return false;
4242 /* Sanity check: make sure that at least one copy of the vectorized stmt
4243 needs to be generated. */
4244 gcc_assert (ncopies >= 1);
4246 if (!vec_stmt) /* transformation not required. */
4248 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4249 for (i = 0; i < nargs; i++)
4250 if ((bestn->simdclone->args[i].arg_type
4251 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4252 || (bestn->simdclone->args[i].arg_type
4253 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4255 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4256 + 1,
4257 true);
4258 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4259 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4260 ? size_type_node : TREE_TYPE (arginfo[i].op);
4261 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4262 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4263 tree sll = arginfo[i].simd_lane_linear
4264 ? boolean_true_node : boolean_false_node;
4265 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4267 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4268 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4269 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4270 dt, slp_node, cost_vec); */
4271 return true;
4274 /* Transform. */
4276 if (dump_enabled_p ())
4277 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4279 /* Handle def. */
4280 scalar_dest = gimple_call_lhs (stmt);
4281 vec_dest = NULL_TREE;
4282 rtype = NULL_TREE;
4283 ratype = NULL_TREE;
4284 if (scalar_dest)
4286 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4287 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4288 if (TREE_CODE (rtype) == ARRAY_TYPE)
4290 ratype = rtype;
4291 rtype = TREE_TYPE (ratype);
4295 auto_vec<vec<tree> > vec_oprnds;
4296 auto_vec<unsigned> vec_oprnds_i;
4297 vec_oprnds.safe_grow_cleared (nargs, true);
4298 vec_oprnds_i.safe_grow_cleared (nargs, true);
4299 for (j = 0; j < ncopies; ++j)
4301 /* Build argument list for the vectorized call. */
4302 if (j == 0)
4303 vargs.create (nargs);
4304 else
4305 vargs.truncate (0);
4307 for (i = 0; i < nargs; i++)
4309 unsigned int k, l, m, o;
4310 tree atype;
4311 op = gimple_call_arg (stmt, i);
4312 switch (bestn->simdclone->args[i].arg_type)
4314 case SIMD_CLONE_ARG_TYPE_VECTOR:
4315 atype = bestn->simdclone->args[i].vector_type;
4316 o = vector_unroll_factor (nunits,
4317 simd_clone_subparts (atype));
4318 for (m = j * o; m < (j + 1) * o; m++)
4320 if (simd_clone_subparts (atype)
4321 < simd_clone_subparts (arginfo[i].vectype))
4323 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4324 k = (simd_clone_subparts (arginfo[i].vectype)
4325 / simd_clone_subparts (atype));
4326 gcc_assert ((k & (k - 1)) == 0);
4327 if (m == 0)
4329 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4330 ncopies * o / k, op,
4331 &vec_oprnds[i]);
4332 vec_oprnds_i[i] = 0;
4333 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4335 else
4337 vec_oprnd0 = arginfo[i].op;
4338 if ((m & (k - 1)) == 0)
4339 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4341 arginfo[i].op = vec_oprnd0;
4342 vec_oprnd0
4343 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4344 bitsize_int (prec),
4345 bitsize_int ((m & (k - 1)) * prec));
4346 gassign *new_stmt
4347 = gimple_build_assign (make_ssa_name (atype),
4348 vec_oprnd0);
4349 vect_finish_stmt_generation (vinfo, stmt_info,
4350 new_stmt, gsi);
4351 vargs.safe_push (gimple_assign_lhs (new_stmt));
4353 else
4355 k = (simd_clone_subparts (atype)
4356 / simd_clone_subparts (arginfo[i].vectype));
4357 gcc_assert ((k & (k - 1)) == 0);
4358 vec<constructor_elt, va_gc> *ctor_elts;
4359 if (k != 1)
4360 vec_alloc (ctor_elts, k);
4361 else
4362 ctor_elts = NULL;
4363 for (l = 0; l < k; l++)
4365 if (m == 0 && l == 0)
4367 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4368 k * o * ncopies,
4370 &vec_oprnds[i]);
4371 vec_oprnds_i[i] = 0;
4372 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4374 else
4375 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4376 arginfo[i].op = vec_oprnd0;
4377 if (k == 1)
4378 break;
4379 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4380 vec_oprnd0);
4382 if (k == 1)
4383 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4384 atype))
4386 vec_oprnd0
4387 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4388 gassign *new_stmt
4389 = gimple_build_assign (make_ssa_name (atype),
4390 vec_oprnd0);
4391 vect_finish_stmt_generation (vinfo, stmt_info,
4392 new_stmt, gsi);
4393 vargs.safe_push (gimple_assign_lhs (new_stmt));
4395 else
4396 vargs.safe_push (vec_oprnd0);
4397 else
4399 vec_oprnd0 = build_constructor (atype, ctor_elts);
4400 gassign *new_stmt
4401 = gimple_build_assign (make_ssa_name (atype),
4402 vec_oprnd0);
4403 vect_finish_stmt_generation (vinfo, stmt_info,
4404 new_stmt, gsi);
4405 vargs.safe_push (gimple_assign_lhs (new_stmt));
4409 break;
4410 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4411 vargs.safe_push (op);
4412 break;
4413 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4414 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4415 if (j == 0)
4417 gimple_seq stmts;
4418 arginfo[i].op
4419 = force_gimple_operand (unshare_expr (arginfo[i].op),
4420 &stmts, true, NULL_TREE);
4421 if (stmts != NULL)
4423 basic_block new_bb;
4424 edge pe = loop_preheader_edge (loop);
4425 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4426 gcc_assert (!new_bb);
4428 if (arginfo[i].simd_lane_linear)
4430 vargs.safe_push (arginfo[i].op);
4431 break;
4433 tree phi_res = copy_ssa_name (op);
4434 gphi *new_phi = create_phi_node (phi_res, loop->header);
4435 add_phi_arg (new_phi, arginfo[i].op,
4436 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4437 enum tree_code code
4438 = POINTER_TYPE_P (TREE_TYPE (op))
4439 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4440 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4441 ? sizetype : TREE_TYPE (op);
4442 poly_widest_int cst
4443 = wi::mul (bestn->simdclone->args[i].linear_step,
4444 ncopies * nunits);
4445 tree tcst = wide_int_to_tree (type, cst);
4446 tree phi_arg = copy_ssa_name (op);
4447 gassign *new_stmt
4448 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4449 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4450 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4451 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4452 UNKNOWN_LOCATION);
4453 arginfo[i].op = phi_res;
4454 vargs.safe_push (phi_res);
4456 else
4458 enum tree_code code
4459 = POINTER_TYPE_P (TREE_TYPE (op))
4460 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4461 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4462 ? sizetype : TREE_TYPE (op);
4463 poly_widest_int cst
4464 = wi::mul (bestn->simdclone->args[i].linear_step,
4465 j * nunits);
4466 tree tcst = wide_int_to_tree (type, cst);
4467 new_temp = make_ssa_name (TREE_TYPE (op));
4468 gassign *new_stmt
4469 = gimple_build_assign (new_temp, code,
4470 arginfo[i].op, tcst);
4471 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4472 vargs.safe_push (new_temp);
4474 break;
4475 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4476 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4477 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4478 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4479 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4480 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4481 default:
4482 gcc_unreachable ();
4486 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4487 if (vec_dest)
4489 gcc_assert (ratype
4490 || known_eq (simd_clone_subparts (rtype), nunits));
4491 if (ratype)
4492 new_temp = create_tmp_var (ratype);
4493 else if (useless_type_conversion_p (vectype, rtype))
4494 new_temp = make_ssa_name (vec_dest, new_call);
4495 else
4496 new_temp = make_ssa_name (rtype, new_call);
4497 gimple_call_set_lhs (new_call, new_temp);
4499 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4500 gimple *new_stmt = new_call;
4502 if (vec_dest)
4504 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4506 unsigned int k, l;
4507 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4508 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4509 k = vector_unroll_factor (nunits,
4510 simd_clone_subparts (vectype));
4511 gcc_assert ((k & (k - 1)) == 0);
4512 for (l = 0; l < k; l++)
4514 tree t;
4515 if (ratype)
4517 t = build_fold_addr_expr (new_temp);
4518 t = build2 (MEM_REF, vectype, t,
4519 build_int_cst (TREE_TYPE (t), l * bytes));
4521 else
4522 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4523 bitsize_int (prec), bitsize_int (l * prec));
4524 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4525 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4527 if (j == 0 && l == 0)
4528 *vec_stmt = new_stmt;
4529 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4532 if (ratype)
4533 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4534 continue;
4536 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4538 unsigned int k = (simd_clone_subparts (vectype)
4539 / simd_clone_subparts (rtype));
4540 gcc_assert ((k & (k - 1)) == 0);
4541 if ((j & (k - 1)) == 0)
4542 vec_alloc (ret_ctor_elts, k);
4543 if (ratype)
4545 unsigned int m, o;
4546 o = vector_unroll_factor (nunits,
4547 simd_clone_subparts (rtype));
4548 for (m = 0; m < o; m++)
4550 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4551 size_int (m), NULL_TREE, NULL_TREE);
4552 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4553 tem);
4554 vect_finish_stmt_generation (vinfo, stmt_info,
4555 new_stmt, gsi);
4556 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4557 gimple_assign_lhs (new_stmt));
4559 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4561 else
4562 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4563 if ((j & (k - 1)) != k - 1)
4564 continue;
4565 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4566 new_stmt
4567 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4568 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4570 if ((unsigned) j == k - 1)
4571 *vec_stmt = new_stmt;
4572 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4573 continue;
4575 else if (ratype)
4577 tree t = build_fold_addr_expr (new_temp);
4578 t = build2 (MEM_REF, vectype, t,
4579 build_int_cst (TREE_TYPE (t), 0));
4580 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4581 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4582 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4584 else if (!useless_type_conversion_p (vectype, rtype))
4586 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4587 new_stmt
4588 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4589 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4593 if (j == 0)
4594 *vec_stmt = new_stmt;
4595 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4598 for (i = 0; i < nargs; ++i)
4600 vec<tree> oprndsi = vec_oprnds[i];
4601 oprndsi.release ();
4603 vargs.release ();
4605 /* The call in STMT might prevent it from being removed in dce.
4606 We however cannot remove it here, due to the way the ssa name
4607 it defines is mapped to the new definition. So just replace
4608 rhs of the statement with something harmless. */
4610 if (slp_node)
4611 return true;
4613 gimple *new_stmt;
4614 if (scalar_dest)
4616 type = TREE_TYPE (scalar_dest);
4617 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4618 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4620 else
4621 new_stmt = gimple_build_nop ();
4622 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4623 unlink_stmt_vdef (stmt);
4625 return true;
4629 /* Function vect_gen_widened_results_half
4631 Create a vector stmt whose code, type, number of arguments, and result
4632 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4633 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4634 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4635 needs to be created (DECL is a function-decl of a target-builtin).
4636 STMT_INFO is the original scalar stmt that we are vectorizing. */
4638 static gimple *
4639 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4640 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4641 tree vec_dest, gimple_stmt_iterator *gsi,
4642 stmt_vec_info stmt_info)
4644 gimple *new_stmt;
4645 tree new_temp;
4647 /* Generate half of the widened result: */
4648 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4649 if (op_type != binary_op)
4650 vec_oprnd1 = NULL;
4651 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4652 new_temp = make_ssa_name (vec_dest, new_stmt);
4653 gimple_assign_set_lhs (new_stmt, new_temp);
4654 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4656 return new_stmt;
4660 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4661 For multi-step conversions store the resulting vectors and call the function
4662 recursively. */
4664 static void
4665 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4666 int multi_step_cvt,
4667 stmt_vec_info stmt_info,
4668 vec<tree> &vec_dsts,
4669 gimple_stmt_iterator *gsi,
4670 slp_tree slp_node, enum tree_code code)
4672 unsigned int i;
4673 tree vop0, vop1, new_tmp, vec_dest;
4675 vec_dest = vec_dsts.pop ();
4677 for (i = 0; i < vec_oprnds->length (); i += 2)
4679 /* Create demotion operation. */
4680 vop0 = (*vec_oprnds)[i];
4681 vop1 = (*vec_oprnds)[i + 1];
4682 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4683 new_tmp = make_ssa_name (vec_dest, new_stmt);
4684 gimple_assign_set_lhs (new_stmt, new_tmp);
4685 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4687 if (multi_step_cvt)
4688 /* Store the resulting vector for next recursive call. */
4689 (*vec_oprnds)[i/2] = new_tmp;
4690 else
4692 /* This is the last step of the conversion sequence. Store the
4693 vectors in SLP_NODE or in vector info of the scalar statement
4694 (or in STMT_VINFO_RELATED_STMT chain). */
4695 if (slp_node)
4696 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4697 else
4698 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4702 /* For multi-step demotion operations we first generate demotion operations
4703 from the source type to the intermediate types, and then combine the
4704 results (stored in VEC_OPRNDS) in demotion operation to the destination
4705 type. */
4706 if (multi_step_cvt)
4708 /* At each level of recursion we have half of the operands we had at the
4709 previous level. */
4710 vec_oprnds->truncate ((i+1)/2);
4711 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4712 multi_step_cvt - 1,
4713 stmt_info, vec_dsts, gsi,
4714 slp_node, VEC_PACK_TRUNC_EXPR);
4717 vec_dsts.quick_push (vec_dest);
4721 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4722 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4723 STMT_INFO. For multi-step conversions store the resulting vectors and
4724 call the function recursively. */
4726 static void
4727 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4728 vec<tree> *vec_oprnds0,
4729 vec<tree> *vec_oprnds1,
4730 stmt_vec_info stmt_info, tree vec_dest,
4731 gimple_stmt_iterator *gsi,
4732 enum tree_code code1,
4733 enum tree_code code2, int op_type)
4735 int i;
4736 tree vop0, vop1, new_tmp1, new_tmp2;
4737 gimple *new_stmt1, *new_stmt2;
4738 vec<tree> vec_tmp = vNULL;
4740 vec_tmp.create (vec_oprnds0->length () * 2);
4741 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4743 if (op_type == binary_op)
4744 vop1 = (*vec_oprnds1)[i];
4745 else
4746 vop1 = NULL_TREE;
4748 /* Generate the two halves of promotion operation. */
4749 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4750 op_type, vec_dest, gsi,
4751 stmt_info);
4752 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4753 op_type, vec_dest, gsi,
4754 stmt_info);
4755 if (is_gimple_call (new_stmt1))
4757 new_tmp1 = gimple_call_lhs (new_stmt1);
4758 new_tmp2 = gimple_call_lhs (new_stmt2);
4760 else
4762 new_tmp1 = gimple_assign_lhs (new_stmt1);
4763 new_tmp2 = gimple_assign_lhs (new_stmt2);
4766 /* Store the results for the next step. */
4767 vec_tmp.quick_push (new_tmp1);
4768 vec_tmp.quick_push (new_tmp2);
4771 vec_oprnds0->release ();
4772 *vec_oprnds0 = vec_tmp;
4775 /* Create vectorized promotion stmts for widening stmts using only half the
4776 potential vector size for input. */
4777 static void
4778 vect_create_half_widening_stmts (vec_info *vinfo,
4779 vec<tree> *vec_oprnds0,
4780 vec<tree> *vec_oprnds1,
4781 stmt_vec_info stmt_info, tree vec_dest,
4782 gimple_stmt_iterator *gsi,
4783 enum tree_code code1,
4784 int op_type)
4786 int i;
4787 tree vop0, vop1;
4788 gimple *new_stmt1;
4789 gimple *new_stmt2;
4790 gimple *new_stmt3;
4791 vec<tree> vec_tmp = vNULL;
4793 vec_tmp.create (vec_oprnds0->length ());
4794 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4796 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4798 gcc_assert (op_type == binary_op);
4799 vop1 = (*vec_oprnds1)[i];
4801 /* Widen the first vector input. */
4802 out_type = TREE_TYPE (vec_dest);
4803 new_tmp1 = make_ssa_name (out_type);
4804 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4805 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4806 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4808 /* Widen the second vector input. */
4809 new_tmp2 = make_ssa_name (out_type);
4810 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4811 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4812 /* Perform the operation. With both vector inputs widened. */
4813 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4815 else
4817 /* Perform the operation. With the single vector input widened. */
4818 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4821 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4822 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4823 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4825 /* Store the results for the next step. */
4826 vec_tmp.quick_push (new_tmp3);
4829 vec_oprnds0->release ();
4830 *vec_oprnds0 = vec_tmp;
4834 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4835 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4836 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4837 Return true if STMT_INFO is vectorizable in this way. */
4839 static bool
4840 vectorizable_conversion (vec_info *vinfo,
4841 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4842 gimple **vec_stmt, slp_tree slp_node,
4843 stmt_vector_for_cost *cost_vec)
4845 tree vec_dest;
4846 tree scalar_dest;
4847 tree op0, op1 = NULL_TREE;
4848 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4849 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4850 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4851 tree new_temp;
4852 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4853 int ndts = 2;
4854 poly_uint64 nunits_in;
4855 poly_uint64 nunits_out;
4856 tree vectype_out, vectype_in;
4857 int ncopies, i;
4858 tree lhs_type, rhs_type;
4859 enum { NARROW, NONE, WIDEN } modifier;
4860 vec<tree> vec_oprnds0 = vNULL;
4861 vec<tree> vec_oprnds1 = vNULL;
4862 tree vop0;
4863 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4864 int multi_step_cvt = 0;
4865 vec<tree> interm_types = vNULL;
4866 tree intermediate_type, cvt_type = NULL_TREE;
4867 int op_type;
4868 unsigned short fltsz;
4870 /* Is STMT a vectorizable conversion? */
4872 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4873 return false;
4875 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4876 && ! vec_stmt)
4877 return false;
4879 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4880 if (!stmt)
4881 return false;
4883 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4884 return false;
4886 code = gimple_assign_rhs_code (stmt);
4887 if (!CONVERT_EXPR_CODE_P (code)
4888 && code != FIX_TRUNC_EXPR
4889 && code != FLOAT_EXPR
4890 && code != WIDEN_PLUS_EXPR
4891 && code != WIDEN_MINUS_EXPR
4892 && code != WIDEN_MULT_EXPR
4893 && code != WIDEN_LSHIFT_EXPR)
4894 return false;
4896 bool widen_arith = (code == WIDEN_PLUS_EXPR
4897 || code == WIDEN_MINUS_EXPR
4898 || code == WIDEN_MULT_EXPR
4899 || code == WIDEN_LSHIFT_EXPR);
4900 op_type = TREE_CODE_LENGTH (code);
4902 /* Check types of lhs and rhs. */
4903 scalar_dest = gimple_assign_lhs (stmt);
4904 lhs_type = TREE_TYPE (scalar_dest);
4905 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4907 /* Check the operands of the operation. */
4908 slp_tree slp_op0, slp_op1 = NULL;
4909 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4910 0, &op0, &slp_op0, &dt[0], &vectype_in))
4912 if (dump_enabled_p ())
4913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4914 "use not simple.\n");
4915 return false;
4918 rhs_type = TREE_TYPE (op0);
4919 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4920 && !((INTEGRAL_TYPE_P (lhs_type)
4921 && INTEGRAL_TYPE_P (rhs_type))
4922 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4923 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4924 return false;
4926 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4927 && ((INTEGRAL_TYPE_P (lhs_type)
4928 && !type_has_mode_precision_p (lhs_type))
4929 || (INTEGRAL_TYPE_P (rhs_type)
4930 && !type_has_mode_precision_p (rhs_type))))
4932 if (dump_enabled_p ())
4933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4934 "type conversion to/from bit-precision unsupported."
4935 "\n");
4936 return false;
4939 if (op_type == binary_op)
4941 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4942 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4944 op1 = gimple_assign_rhs2 (stmt);
4945 tree vectype1_in;
4946 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4947 &op1, &slp_op1, &dt[1], &vectype1_in))
4949 if (dump_enabled_p ())
4950 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4951 "use not simple.\n");
4952 return false;
4954 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4955 OP1. */
4956 if (!vectype_in)
4957 vectype_in = vectype1_in;
4960 /* If op0 is an external or constant def, infer the vector type
4961 from the scalar type. */
4962 if (!vectype_in)
4963 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4964 if (vec_stmt)
4965 gcc_assert (vectype_in);
4966 if (!vectype_in)
4968 if (dump_enabled_p ())
4969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4970 "no vectype for scalar type %T\n", rhs_type);
4972 return false;
4975 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4976 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4978 if (dump_enabled_p ())
4979 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4980 "can't convert between boolean and non "
4981 "boolean vectors %T\n", rhs_type);
4983 return false;
4986 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4987 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4988 if (known_eq (nunits_out, nunits_in))
4989 if (widen_arith)
4990 modifier = WIDEN;
4991 else
4992 modifier = NONE;
4993 else if (multiple_p (nunits_out, nunits_in))
4994 modifier = NARROW;
4995 else
4997 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4998 modifier = WIDEN;
5001 /* Multiple types in SLP are handled by creating the appropriate number of
5002 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5003 case of SLP. */
5004 if (slp_node)
5005 ncopies = 1;
5006 else if (modifier == NARROW)
5007 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5008 else
5009 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5011 /* Sanity check: make sure that at least one copy of the vectorized stmt
5012 needs to be generated. */
5013 gcc_assert (ncopies >= 1);
5015 bool found_mode = false;
5016 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5017 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5018 opt_scalar_mode rhs_mode_iter;
5020 /* Supportable by target? */
5021 switch (modifier)
5023 case NONE:
5024 if (code != FIX_TRUNC_EXPR
5025 && code != FLOAT_EXPR
5026 && !CONVERT_EXPR_CODE_P (code))
5027 return false;
5028 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
5029 break;
5030 /* FALLTHRU */
5031 unsupported:
5032 if (dump_enabled_p ())
5033 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5034 "conversion not supported by target.\n");
5035 return false;
5037 case WIDEN:
5038 if (known_eq (nunits_in, nunits_out))
5040 if (!supportable_half_widening_operation (code, vectype_out,
5041 vectype_in, &code1))
5042 goto unsupported;
5043 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5044 break;
5046 if (supportable_widening_operation (vinfo, code, stmt_info,
5047 vectype_out, vectype_in, &code1,
5048 &code2, &multi_step_cvt,
5049 &interm_types))
5051 /* Binary widening operation can only be supported directly by the
5052 architecture. */
5053 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5054 break;
5057 if (code != FLOAT_EXPR
5058 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5059 goto unsupported;
5061 fltsz = GET_MODE_SIZE (lhs_mode);
5062 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5064 rhs_mode = rhs_mode_iter.require ();
5065 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5066 break;
5068 cvt_type
5069 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5070 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5071 if (cvt_type == NULL_TREE)
5072 goto unsupported;
5074 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5076 if (!supportable_convert_operation (code, vectype_out,
5077 cvt_type, &codecvt1))
5078 goto unsupported;
5080 else if (!supportable_widening_operation (vinfo, code, stmt_info,
5081 vectype_out, cvt_type,
5082 &codecvt1, &codecvt2,
5083 &multi_step_cvt,
5084 &interm_types))
5085 continue;
5086 else
5087 gcc_assert (multi_step_cvt == 0);
5089 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5090 cvt_type,
5091 vectype_in, &code1, &code2,
5092 &multi_step_cvt, &interm_types))
5094 found_mode = true;
5095 break;
5099 if (!found_mode)
5100 goto unsupported;
5102 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5103 codecvt2 = ERROR_MARK;
5104 else
5106 multi_step_cvt++;
5107 interm_types.safe_push (cvt_type);
5108 cvt_type = NULL_TREE;
5110 break;
5112 case NARROW:
5113 gcc_assert (op_type == unary_op);
5114 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5115 &code1, &multi_step_cvt,
5116 &interm_types))
5117 break;
5119 if (code != FIX_TRUNC_EXPR
5120 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5121 goto unsupported;
5123 cvt_type
5124 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5125 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5126 if (cvt_type == NULL_TREE)
5127 goto unsupported;
5128 if (!supportable_convert_operation (code, cvt_type, vectype_in,
5129 &codecvt1))
5130 goto unsupported;
5131 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5132 &code1, &multi_step_cvt,
5133 &interm_types))
5134 break;
5135 goto unsupported;
5137 default:
5138 gcc_unreachable ();
5141 if (!vec_stmt) /* transformation not required. */
5143 if (slp_node
5144 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5145 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5147 if (dump_enabled_p ())
5148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5149 "incompatible vector types for invariants\n");
5150 return false;
5152 DUMP_VECT_SCOPE ("vectorizable_conversion");
5153 if (modifier == NONE)
5155 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5156 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5157 cost_vec);
5159 else if (modifier == NARROW)
5161 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5162 /* The final packing step produces one vector result per copy. */
5163 unsigned int nvectors
5164 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5165 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5166 multi_step_cvt, cost_vec,
5167 widen_arith);
5169 else
5171 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5172 /* The initial unpacking step produces two vector results
5173 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5174 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5175 unsigned int nvectors
5176 = (slp_node
5177 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5178 : ncopies * 2);
5179 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5180 multi_step_cvt, cost_vec,
5181 widen_arith);
5183 interm_types.release ();
5184 return true;
5187 /* Transform. */
5188 if (dump_enabled_p ())
5189 dump_printf_loc (MSG_NOTE, vect_location,
5190 "transform conversion. ncopies = %d.\n", ncopies);
5192 if (op_type == binary_op)
5194 if (CONSTANT_CLASS_P (op0))
5195 op0 = fold_convert (TREE_TYPE (op1), op0);
5196 else if (CONSTANT_CLASS_P (op1))
5197 op1 = fold_convert (TREE_TYPE (op0), op1);
5200 /* In case of multi-step conversion, we first generate conversion operations
5201 to the intermediate types, and then from that types to the final one.
5202 We create vector destinations for the intermediate type (TYPES) received
5203 from supportable_*_operation, and store them in the correct order
5204 for future use in vect_create_vectorized_*_stmts (). */
5205 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5206 vec_dest = vect_create_destination_var (scalar_dest,
5207 (cvt_type && modifier == WIDEN)
5208 ? cvt_type : vectype_out);
5209 vec_dsts.quick_push (vec_dest);
5211 if (multi_step_cvt)
5213 for (i = interm_types.length () - 1;
5214 interm_types.iterate (i, &intermediate_type); i--)
5216 vec_dest = vect_create_destination_var (scalar_dest,
5217 intermediate_type);
5218 vec_dsts.quick_push (vec_dest);
5222 if (cvt_type)
5223 vec_dest = vect_create_destination_var (scalar_dest,
5224 modifier == WIDEN
5225 ? vectype_out : cvt_type);
5227 int ninputs = 1;
5228 if (!slp_node)
5230 if (modifier == WIDEN)
5232 else if (modifier == NARROW)
5234 if (multi_step_cvt)
5235 ninputs = vect_pow2 (multi_step_cvt);
5236 ninputs *= 2;
5240 switch (modifier)
5242 case NONE:
5243 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5244 op0, &vec_oprnds0);
5245 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5247 /* Arguments are ready, create the new vector stmt. */
5248 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5249 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5250 new_temp = make_ssa_name (vec_dest, new_stmt);
5251 gimple_assign_set_lhs (new_stmt, new_temp);
5252 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5254 if (slp_node)
5255 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5256 else
5257 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5259 break;
5261 case WIDEN:
5262 /* In case the vectorization factor (VF) is bigger than the number
5263 of elements that we can fit in a vectype (nunits), we have to
5264 generate more than one vector stmt - i.e - we need to "unroll"
5265 the vector stmt by a factor VF/nunits. */
5266 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5267 op0, &vec_oprnds0,
5268 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5269 &vec_oprnds1);
5270 if (code == WIDEN_LSHIFT_EXPR)
5272 int oprnds_size = vec_oprnds0.length ();
5273 vec_oprnds1.create (oprnds_size);
5274 for (i = 0; i < oprnds_size; ++i)
5275 vec_oprnds1.quick_push (op1);
5277 /* Arguments are ready. Create the new vector stmts. */
5278 for (i = multi_step_cvt; i >= 0; i--)
5280 tree this_dest = vec_dsts[i];
5281 enum tree_code c1 = code1, c2 = code2;
5282 if (i == 0 && codecvt2 != ERROR_MARK)
5284 c1 = codecvt1;
5285 c2 = codecvt2;
5287 if (known_eq (nunits_out, nunits_in))
5288 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5289 &vec_oprnds1, stmt_info,
5290 this_dest, gsi,
5291 c1, op_type);
5292 else
5293 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5294 &vec_oprnds1, stmt_info,
5295 this_dest, gsi,
5296 c1, c2, op_type);
5299 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5301 gimple *new_stmt;
5302 if (cvt_type)
5304 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5305 new_temp = make_ssa_name (vec_dest);
5306 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5307 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5309 else
5310 new_stmt = SSA_NAME_DEF_STMT (vop0);
5312 if (slp_node)
5313 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5314 else
5315 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5317 break;
5319 case NARROW:
5320 /* In case the vectorization factor (VF) is bigger than the number
5321 of elements that we can fit in a vectype (nunits), we have to
5322 generate more than one vector stmt - i.e - we need to "unroll"
5323 the vector stmt by a factor VF/nunits. */
5324 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5325 op0, &vec_oprnds0);
5326 /* Arguments are ready. Create the new vector stmts. */
5327 if (cvt_type)
5328 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5330 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5331 new_temp = make_ssa_name (vec_dest);
5332 gassign *new_stmt
5333 = gimple_build_assign (new_temp, codecvt1, vop0);
5334 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5335 vec_oprnds0[i] = new_temp;
5338 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5339 multi_step_cvt,
5340 stmt_info, vec_dsts, gsi,
5341 slp_node, code1);
5342 break;
5344 if (!slp_node)
5345 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5347 vec_oprnds0.release ();
5348 vec_oprnds1.release ();
5349 interm_types.release ();
5351 return true;
5354 /* Return true if we can assume from the scalar form of STMT_INFO that
5355 neither the scalar nor the vector forms will generate code. STMT_INFO
5356 is known not to involve a data reference. */
5358 bool
5359 vect_nop_conversion_p (stmt_vec_info stmt_info)
5361 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5362 if (!stmt)
5363 return false;
5365 tree lhs = gimple_assign_lhs (stmt);
5366 tree_code code = gimple_assign_rhs_code (stmt);
5367 tree rhs = gimple_assign_rhs1 (stmt);
5369 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5370 return true;
5372 if (CONVERT_EXPR_CODE_P (code))
5373 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5375 return false;
5378 /* Function vectorizable_assignment.
5380 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5381 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5382 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5383 Return true if STMT_INFO is vectorizable in this way. */
5385 static bool
5386 vectorizable_assignment (vec_info *vinfo,
5387 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5388 gimple **vec_stmt, slp_tree slp_node,
5389 stmt_vector_for_cost *cost_vec)
5391 tree vec_dest;
5392 tree scalar_dest;
5393 tree op;
5394 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5395 tree new_temp;
5396 enum vect_def_type dt[1] = {vect_unknown_def_type};
5397 int ndts = 1;
5398 int ncopies;
5399 int i;
5400 vec<tree> vec_oprnds = vNULL;
5401 tree vop;
5402 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5403 enum tree_code code;
5404 tree vectype_in;
5406 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5407 return false;
5409 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5410 && ! vec_stmt)
5411 return false;
5413 /* Is vectorizable assignment? */
5414 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5415 if (!stmt)
5416 return false;
5418 scalar_dest = gimple_assign_lhs (stmt);
5419 if (TREE_CODE (scalar_dest) != SSA_NAME)
5420 return false;
5422 if (STMT_VINFO_DATA_REF (stmt_info))
5423 return false;
5425 code = gimple_assign_rhs_code (stmt);
5426 if (!(gimple_assign_single_p (stmt)
5427 || code == PAREN_EXPR
5428 || CONVERT_EXPR_CODE_P (code)))
5429 return false;
5431 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5432 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5434 /* Multiple types in SLP are handled by creating the appropriate number of
5435 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5436 case of SLP. */
5437 if (slp_node)
5438 ncopies = 1;
5439 else
5440 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5442 gcc_assert (ncopies >= 1);
5444 slp_tree slp_op;
5445 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5446 &dt[0], &vectype_in))
5448 if (dump_enabled_p ())
5449 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5450 "use not simple.\n");
5451 return false;
5453 if (!vectype_in)
5454 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5456 /* We can handle NOP_EXPR conversions that do not change the number
5457 of elements or the vector size. */
5458 if ((CONVERT_EXPR_CODE_P (code)
5459 || code == VIEW_CONVERT_EXPR)
5460 && (!vectype_in
5461 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5462 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5463 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5464 return false;
5466 if (VECTOR_BOOLEAN_TYPE_P (vectype)
5467 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5469 if (dump_enabled_p ())
5470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5471 "can't convert between boolean and non "
5472 "boolean vectors %T\n", TREE_TYPE (op));
5474 return false;
5477 /* We do not handle bit-precision changes. */
5478 if ((CONVERT_EXPR_CODE_P (code)
5479 || code == VIEW_CONVERT_EXPR)
5480 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5481 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5482 || !type_has_mode_precision_p (TREE_TYPE (op)))
5483 /* But a conversion that does not change the bit-pattern is ok. */
5484 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5485 > TYPE_PRECISION (TREE_TYPE (op)))
5486 && TYPE_UNSIGNED (TREE_TYPE (op))))
5488 if (dump_enabled_p ())
5489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5490 "type conversion to/from bit-precision "
5491 "unsupported.\n");
5492 return false;
5495 if (!vec_stmt) /* transformation not required. */
5497 if (slp_node
5498 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5500 if (dump_enabled_p ())
5501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5502 "incompatible vector types for invariants\n");
5503 return false;
5505 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5506 DUMP_VECT_SCOPE ("vectorizable_assignment");
5507 if (!vect_nop_conversion_p (stmt_info))
5508 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5509 cost_vec);
5510 return true;
5513 /* Transform. */
5514 if (dump_enabled_p ())
5515 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5517 /* Handle def. */
5518 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5520 /* Handle use. */
5521 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5523 /* Arguments are ready. create the new vector stmt. */
5524 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5526 if (CONVERT_EXPR_CODE_P (code)
5527 || code == VIEW_CONVERT_EXPR)
5528 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5529 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5530 new_temp = make_ssa_name (vec_dest, new_stmt);
5531 gimple_assign_set_lhs (new_stmt, new_temp);
5532 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5533 if (slp_node)
5534 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5535 else
5536 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5538 if (!slp_node)
5539 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5541 vec_oprnds.release ();
5542 return true;
5546 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5547 either as shift by a scalar or by a vector. */
5549 bool
5550 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5553 machine_mode vec_mode;
5554 optab optab;
5555 int icode;
5556 tree vectype;
5558 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5559 if (!vectype)
5560 return false;
5562 optab = optab_for_tree_code (code, vectype, optab_scalar);
5563 if (!optab
5564 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5566 optab = optab_for_tree_code (code, vectype, optab_vector);
5567 if (!optab
5568 || (optab_handler (optab, TYPE_MODE (vectype))
5569 == CODE_FOR_nothing))
5570 return false;
5573 vec_mode = TYPE_MODE (vectype);
5574 icode = (int) optab_handler (optab, vec_mode);
5575 if (icode == CODE_FOR_nothing)
5576 return false;
5578 return true;
5582 /* Function vectorizable_shift.
5584 Check if STMT_INFO performs a shift operation that can be vectorized.
5585 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5586 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5587 Return true if STMT_INFO is vectorizable in this way. */
5589 static bool
5590 vectorizable_shift (vec_info *vinfo,
5591 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5592 gimple **vec_stmt, slp_tree slp_node,
5593 stmt_vector_for_cost *cost_vec)
5595 tree vec_dest;
5596 tree scalar_dest;
5597 tree op0, op1 = NULL;
5598 tree vec_oprnd1 = NULL_TREE;
5599 tree vectype;
5600 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5601 enum tree_code code;
5602 machine_mode vec_mode;
5603 tree new_temp;
5604 optab optab;
5605 int icode;
5606 machine_mode optab_op2_mode;
5607 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5608 int ndts = 2;
5609 poly_uint64 nunits_in;
5610 poly_uint64 nunits_out;
5611 tree vectype_out;
5612 tree op1_vectype;
5613 int ncopies;
5614 int i;
5615 vec<tree> vec_oprnds0 = vNULL;
5616 vec<tree> vec_oprnds1 = vNULL;
5617 tree vop0, vop1;
5618 unsigned int k;
5619 bool scalar_shift_arg = true;
5620 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5621 bool incompatible_op1_vectype_p = false;
5623 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5624 return false;
5626 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5627 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5628 && ! vec_stmt)
5629 return false;
5631 /* Is STMT a vectorizable binary/unary operation? */
5632 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5633 if (!stmt)
5634 return false;
5636 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5637 return false;
5639 code = gimple_assign_rhs_code (stmt);
5641 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5642 || code == RROTATE_EXPR))
5643 return false;
5645 scalar_dest = gimple_assign_lhs (stmt);
5646 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5647 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5649 if (dump_enabled_p ())
5650 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5651 "bit-precision shifts not supported.\n");
5652 return false;
5655 slp_tree slp_op0;
5656 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5657 0, &op0, &slp_op0, &dt[0], &vectype))
5659 if (dump_enabled_p ())
5660 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5661 "use not simple.\n");
5662 return false;
5664 /* If op0 is an external or constant def, infer the vector type
5665 from the scalar type. */
5666 if (!vectype)
5667 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5668 if (vec_stmt)
5669 gcc_assert (vectype);
5670 if (!vectype)
5672 if (dump_enabled_p ())
5673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5674 "no vectype for scalar type\n");
5675 return false;
5678 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5679 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5680 if (maybe_ne (nunits_out, nunits_in))
5681 return false;
5683 stmt_vec_info op1_def_stmt_info;
5684 slp_tree slp_op1;
5685 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5686 &dt[1], &op1_vectype, &op1_def_stmt_info))
5688 if (dump_enabled_p ())
5689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5690 "use not simple.\n");
5691 return false;
5694 /* Multiple types in SLP are handled by creating the appropriate number of
5695 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5696 case of SLP. */
5697 if (slp_node)
5698 ncopies = 1;
5699 else
5700 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5702 gcc_assert (ncopies >= 1);
5704 /* Determine whether the shift amount is a vector, or scalar. If the
5705 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5707 if ((dt[1] == vect_internal_def
5708 || dt[1] == vect_induction_def
5709 || dt[1] == vect_nested_cycle)
5710 && !slp_node)
5711 scalar_shift_arg = false;
5712 else if (dt[1] == vect_constant_def
5713 || dt[1] == vect_external_def
5714 || dt[1] == vect_internal_def)
5716 /* In SLP, need to check whether the shift count is the same,
5717 in loops if it is a constant or invariant, it is always
5718 a scalar shift. */
5719 if (slp_node)
5721 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5722 stmt_vec_info slpstmt_info;
5724 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5726 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5727 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5728 scalar_shift_arg = false;
5731 /* For internal SLP defs we have to make sure we see scalar stmts
5732 for all vector elements.
5733 ??? For different vectors we could resort to a different
5734 scalar shift operand but code-generation below simply always
5735 takes the first. */
5736 if (dt[1] == vect_internal_def
5737 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5738 stmts.length ()))
5739 scalar_shift_arg = false;
5742 /* If the shift amount is computed by a pattern stmt we cannot
5743 use the scalar amount directly thus give up and use a vector
5744 shift. */
5745 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5746 scalar_shift_arg = false;
5748 else
5750 if (dump_enabled_p ())
5751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5752 "operand mode requires invariant argument.\n");
5753 return false;
5756 /* Vector shifted by vector. */
5757 bool was_scalar_shift_arg = scalar_shift_arg;
5758 if (!scalar_shift_arg)
5760 optab = optab_for_tree_code (code, vectype, optab_vector);
5761 if (dump_enabled_p ())
5762 dump_printf_loc (MSG_NOTE, vect_location,
5763 "vector/vector shift/rotate found.\n");
5765 if (!op1_vectype)
5766 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5767 slp_op1);
5768 incompatible_op1_vectype_p
5769 = (op1_vectype == NULL_TREE
5770 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5771 TYPE_VECTOR_SUBPARTS (vectype))
5772 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5773 if (incompatible_op1_vectype_p
5774 && (!slp_node
5775 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5776 || slp_op1->refcnt != 1))
5778 if (dump_enabled_p ())
5779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5780 "unusable type for last operand in"
5781 " vector/vector shift/rotate.\n");
5782 return false;
5785 /* See if the machine has a vector shifted by scalar insn and if not
5786 then see if it has a vector shifted by vector insn. */
5787 else
5789 optab = optab_for_tree_code (code, vectype, optab_scalar);
5790 if (optab
5791 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5793 if (dump_enabled_p ())
5794 dump_printf_loc (MSG_NOTE, vect_location,
5795 "vector/scalar shift/rotate found.\n");
5797 else
5799 optab = optab_for_tree_code (code, vectype, optab_vector);
5800 if (optab
5801 && (optab_handler (optab, TYPE_MODE (vectype))
5802 != CODE_FOR_nothing))
5804 scalar_shift_arg = false;
5806 if (dump_enabled_p ())
5807 dump_printf_loc (MSG_NOTE, vect_location,
5808 "vector/vector shift/rotate found.\n");
5810 if (!op1_vectype)
5811 op1_vectype = get_vectype_for_scalar_type (vinfo,
5812 TREE_TYPE (op1),
5813 slp_op1);
5815 /* Unlike the other binary operators, shifts/rotates have
5816 the rhs being int, instead of the same type as the lhs,
5817 so make sure the scalar is the right type if we are
5818 dealing with vectors of long long/long/short/char. */
5819 incompatible_op1_vectype_p
5820 = (!op1_vectype
5821 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5822 TREE_TYPE (op1)));
5823 if (incompatible_op1_vectype_p
5824 && dt[1] == vect_internal_def)
5826 if (dump_enabled_p ())
5827 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5828 "unusable type for last operand in"
5829 " vector/vector shift/rotate.\n");
5830 return false;
5836 /* Supportable by target? */
5837 if (!optab)
5839 if (dump_enabled_p ())
5840 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5841 "no optab.\n");
5842 return false;
5844 vec_mode = TYPE_MODE (vectype);
5845 icode = (int) optab_handler (optab, vec_mode);
5846 if (icode == CODE_FOR_nothing)
5848 if (dump_enabled_p ())
5849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5850 "op not supported by target.\n");
5851 return false;
5853 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5854 if (vect_emulated_vector_p (vectype))
5855 return false;
5857 if (!vec_stmt) /* transformation not required. */
5859 if (slp_node
5860 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5861 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5862 && (!incompatible_op1_vectype_p
5863 || dt[1] == vect_constant_def)
5864 && !vect_maybe_update_slp_op_vectype
5865 (slp_op1,
5866 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5868 if (dump_enabled_p ())
5869 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5870 "incompatible vector types for invariants\n");
5871 return false;
5873 /* Now adjust the constant shift amount in place. */
5874 if (slp_node
5875 && incompatible_op1_vectype_p
5876 && dt[1] == vect_constant_def)
5878 for (unsigned i = 0;
5879 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5881 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5882 = fold_convert (TREE_TYPE (vectype),
5883 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5884 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5885 == INTEGER_CST));
5888 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5889 DUMP_VECT_SCOPE ("vectorizable_shift");
5890 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5891 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5892 return true;
5895 /* Transform. */
5897 if (dump_enabled_p ())
5898 dump_printf_loc (MSG_NOTE, vect_location,
5899 "transform binary/unary operation.\n");
5901 if (incompatible_op1_vectype_p && !slp_node)
5903 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5904 op1 = fold_convert (TREE_TYPE (vectype), op1);
5905 if (dt[1] != vect_constant_def)
5906 op1 = vect_init_vector (vinfo, stmt_info, op1,
5907 TREE_TYPE (vectype), NULL);
5910 /* Handle def. */
5911 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5913 if (scalar_shift_arg && dt[1] != vect_internal_def)
5915 /* Vector shl and shr insn patterns can be defined with scalar
5916 operand 2 (shift operand). In this case, use constant or loop
5917 invariant op1 directly, without extending it to vector mode
5918 first. */
5919 optab_op2_mode = insn_data[icode].operand[2].mode;
5920 if (!VECTOR_MODE_P (optab_op2_mode))
5922 if (dump_enabled_p ())
5923 dump_printf_loc (MSG_NOTE, vect_location,
5924 "operand 1 using scalar mode.\n");
5925 vec_oprnd1 = op1;
5926 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5927 vec_oprnds1.quick_push (vec_oprnd1);
5928 /* Store vec_oprnd1 for every vector stmt to be created.
5929 We check during the analysis that all the shift arguments
5930 are the same.
5931 TODO: Allow different constants for different vector
5932 stmts generated for an SLP instance. */
5933 for (k = 0;
5934 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5935 vec_oprnds1.quick_push (vec_oprnd1);
5938 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5940 if (was_scalar_shift_arg)
5942 /* If the argument was the same in all lanes create
5943 the correctly typed vector shift amount directly. */
5944 op1 = fold_convert (TREE_TYPE (vectype), op1);
5945 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5946 !loop_vinfo ? gsi : NULL);
5947 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5948 !loop_vinfo ? gsi : NULL);
5949 vec_oprnds1.create (slp_node->vec_stmts_size);
5950 for (k = 0; k < slp_node->vec_stmts_size; k++)
5951 vec_oprnds1.quick_push (vec_oprnd1);
5953 else if (dt[1] == vect_constant_def)
5954 /* The constant shift amount has been adjusted in place. */
5956 else
5957 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5960 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5961 (a special case for certain kind of vector shifts); otherwise,
5962 operand 1 should be of a vector type (the usual case). */
5963 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5964 op0, &vec_oprnds0,
5965 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5967 /* Arguments are ready. Create the new vector stmt. */
5968 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5970 /* For internal defs where we need to use a scalar shift arg
5971 extract the first lane. */
5972 if (scalar_shift_arg && dt[1] == vect_internal_def)
5974 vop1 = vec_oprnds1[0];
5975 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5976 gassign *new_stmt
5977 = gimple_build_assign (new_temp,
5978 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5979 vop1,
5980 TYPE_SIZE (TREE_TYPE (new_temp)),
5981 bitsize_zero_node));
5982 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5983 vop1 = new_temp;
5985 else
5986 vop1 = vec_oprnds1[i];
5987 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5988 new_temp = make_ssa_name (vec_dest, new_stmt);
5989 gimple_assign_set_lhs (new_stmt, new_temp);
5990 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5991 if (slp_node)
5992 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5993 else
5994 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5997 if (!slp_node)
5998 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6000 vec_oprnds0.release ();
6001 vec_oprnds1.release ();
6003 return true;
6007 /* Function vectorizable_operation.
6009 Check if STMT_INFO performs a binary, unary or ternary operation that can
6010 be vectorized.
6011 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6012 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6013 Return true if STMT_INFO is vectorizable in this way. */
6015 static bool
6016 vectorizable_operation (vec_info *vinfo,
6017 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6018 gimple **vec_stmt, slp_tree slp_node,
6019 stmt_vector_for_cost *cost_vec)
6021 tree vec_dest;
6022 tree scalar_dest;
6023 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6024 tree vectype;
6025 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6026 enum tree_code code, orig_code;
6027 machine_mode vec_mode;
6028 tree new_temp;
6029 int op_type;
6030 optab optab;
6031 bool target_support_p;
6032 enum vect_def_type dt[3]
6033 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6034 int ndts = 3;
6035 poly_uint64 nunits_in;
6036 poly_uint64 nunits_out;
6037 tree vectype_out;
6038 int ncopies, vec_num;
6039 int i;
6040 vec<tree> vec_oprnds0 = vNULL;
6041 vec<tree> vec_oprnds1 = vNULL;
6042 vec<tree> vec_oprnds2 = vNULL;
6043 tree vop0, vop1, vop2;
6044 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6046 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6047 return false;
6049 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6050 && ! vec_stmt)
6051 return false;
6053 /* Is STMT a vectorizable binary/unary operation? */
6054 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6055 if (!stmt)
6056 return false;
6058 /* Loads and stores are handled in vectorizable_{load,store}. */
6059 if (STMT_VINFO_DATA_REF (stmt_info))
6060 return false;
6062 orig_code = code = gimple_assign_rhs_code (stmt);
6064 /* Shifts are handled in vectorizable_shift. */
6065 if (code == LSHIFT_EXPR
6066 || code == RSHIFT_EXPR
6067 || code == LROTATE_EXPR
6068 || code == RROTATE_EXPR)
6069 return false;
6071 /* Comparisons are handled in vectorizable_comparison. */
6072 if (TREE_CODE_CLASS (code) == tcc_comparison)
6073 return false;
6075 /* Conditions are handled in vectorizable_condition. */
6076 if (code == COND_EXPR)
6077 return false;
6079 /* For pointer addition and subtraction, we should use the normal
6080 plus and minus for the vector operation. */
6081 if (code == POINTER_PLUS_EXPR)
6082 code = PLUS_EXPR;
6083 if (code == POINTER_DIFF_EXPR)
6084 code = MINUS_EXPR;
6086 /* Support only unary or binary operations. */
6087 op_type = TREE_CODE_LENGTH (code);
6088 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6090 if (dump_enabled_p ())
6091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6092 "num. args = %d (not unary/binary/ternary op).\n",
6093 op_type);
6094 return false;
6097 scalar_dest = gimple_assign_lhs (stmt);
6098 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6100 /* Most operations cannot handle bit-precision types without extra
6101 truncations. */
6102 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6103 if (!mask_op_p
6104 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6105 /* Exception are bitwise binary operations. */
6106 && code != BIT_IOR_EXPR
6107 && code != BIT_XOR_EXPR
6108 && code != BIT_AND_EXPR)
6110 if (dump_enabled_p ())
6111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6112 "bit-precision arithmetic not supported.\n");
6113 return false;
6116 slp_tree slp_op0;
6117 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6118 0, &op0, &slp_op0, &dt[0], &vectype))
6120 if (dump_enabled_p ())
6121 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6122 "use not simple.\n");
6123 return false;
6125 /* If op0 is an external or constant def, infer the vector type
6126 from the scalar type. */
6127 if (!vectype)
6129 /* For boolean type we cannot determine vectype by
6130 invariant value (don't know whether it is a vector
6131 of booleans or vector of integers). We use output
6132 vectype because operations on boolean don't change
6133 type. */
6134 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6136 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6138 if (dump_enabled_p ())
6139 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6140 "not supported operation on bool value.\n");
6141 return false;
6143 vectype = vectype_out;
6145 else
6146 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6147 slp_node);
6149 if (vec_stmt)
6150 gcc_assert (vectype);
6151 if (!vectype)
6153 if (dump_enabled_p ())
6154 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6155 "no vectype for scalar type %T\n",
6156 TREE_TYPE (op0));
6158 return false;
6161 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6162 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6163 if (maybe_ne (nunits_out, nunits_in))
6164 return false;
6166 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6167 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6168 if (op_type == binary_op || op_type == ternary_op)
6170 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6171 1, &op1, &slp_op1, &dt[1], &vectype2))
6173 if (dump_enabled_p ())
6174 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6175 "use not simple.\n");
6176 return false;
6178 if (vectype2
6179 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6180 return false;
6182 if (op_type == ternary_op)
6184 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6185 2, &op2, &slp_op2, &dt[2], &vectype3))
6187 if (dump_enabled_p ())
6188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6189 "use not simple.\n");
6190 return false;
6192 if (vectype3
6193 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6194 return false;
6197 /* Multiple types in SLP are handled by creating the appropriate number of
6198 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6199 case of SLP. */
6200 if (slp_node)
6202 ncopies = 1;
6203 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6205 else
6207 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6208 vec_num = 1;
6211 gcc_assert (ncopies >= 1);
6213 /* Reject attempts to combine mask types with nonmask types, e.g. if
6214 we have an AND between a (nonmask) boolean loaded from memory and
6215 a (mask) boolean result of a comparison.
6217 TODO: We could easily fix these cases up using pattern statements. */
6218 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6219 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6220 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6222 if (dump_enabled_p ())
6223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6224 "mixed mask and nonmask vector types\n");
6225 return false;
6228 /* Supportable by target? */
6230 vec_mode = TYPE_MODE (vectype);
6231 if (code == MULT_HIGHPART_EXPR)
6232 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6233 else
6235 optab = optab_for_tree_code (code, vectype, optab_default);
6236 if (!optab)
6238 if (dump_enabled_p ())
6239 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6240 "no optab.\n");
6241 return false;
6243 target_support_p = (optab_handler (optab, vec_mode)
6244 != CODE_FOR_nothing);
6247 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6248 if (!target_support_p)
6250 if (dump_enabled_p ())
6251 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6252 "op not supported by target.\n");
6253 /* Check only during analysis. */
6254 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6255 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6256 return false;
6257 if (dump_enabled_p ())
6258 dump_printf_loc (MSG_NOTE, vect_location,
6259 "proceeding using word mode.\n");
6260 using_emulated_vectors_p = true;
6263 if (using_emulated_vectors_p
6264 && !vect_can_vectorize_without_simd_p (code))
6266 if (dump_enabled_p ())
6267 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6268 return false;
6271 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6272 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6273 internal_fn cond_fn = get_conditional_internal_fn (code);
6275 if (!vec_stmt) /* transformation not required. */
6277 /* If this operation is part of a reduction, a fully-masked loop
6278 should only change the active lanes of the reduction chain,
6279 keeping the inactive lanes as-is. */
6280 if (loop_vinfo
6281 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6282 && reduc_idx >= 0)
6284 if (cond_fn == IFN_LAST
6285 || !direct_internal_fn_supported_p (cond_fn, vectype,
6286 OPTIMIZE_FOR_SPEED))
6288 if (dump_enabled_p ())
6289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6290 "can't use a fully-masked loop because no"
6291 " conditional operation is available.\n");
6292 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6294 else
6295 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6296 vectype, NULL);
6299 /* Put types on constant and invariant SLP children. */
6300 if (slp_node
6301 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6302 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6303 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6305 if (dump_enabled_p ())
6306 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6307 "incompatible vector types for invariants\n");
6308 return false;
6311 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6312 DUMP_VECT_SCOPE ("vectorizable_operation");
6313 vect_model_simple_cost (vinfo, stmt_info,
6314 ncopies, dt, ndts, slp_node, cost_vec);
6315 if (using_emulated_vectors_p)
6317 /* The above vect_model_simple_cost call handles constants
6318 in the prologue and (mis-)costs one of the stmts as
6319 vector stmt. See tree-vect-generic.cc:do_plus_minus/do_negate
6320 for the actual lowering that will be applied. */
6321 unsigned n
6322 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6323 switch (code)
6325 case PLUS_EXPR:
6326 n *= 5;
6327 break;
6328 case MINUS_EXPR:
6329 n *= 6;
6330 break;
6331 case NEGATE_EXPR:
6332 n *= 4;
6333 break;
6334 default:;
6336 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6338 return true;
6341 /* Transform. */
6343 if (dump_enabled_p ())
6344 dump_printf_loc (MSG_NOTE, vect_location,
6345 "transform binary/unary operation.\n");
6347 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6349 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6350 vectors with unsigned elements, but the result is signed. So, we
6351 need to compute the MINUS_EXPR into vectype temporary and
6352 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6353 tree vec_cvt_dest = NULL_TREE;
6354 if (orig_code == POINTER_DIFF_EXPR)
6356 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6357 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6359 /* Handle def. */
6360 else
6361 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6363 /* In case the vectorization factor (VF) is bigger than the number
6364 of elements that we can fit in a vectype (nunits), we have to generate
6365 more than one vector stmt - i.e - we need to "unroll" the
6366 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6367 from one copy of the vector stmt to the next, in the field
6368 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6369 stages to find the correct vector defs to be used when vectorizing
6370 stmts that use the defs of the current stmt. The example below
6371 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6372 we need to create 4 vectorized stmts):
6374 before vectorization:
6375 RELATED_STMT VEC_STMT
6376 S1: x = memref - -
6377 S2: z = x + 1 - -
6379 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6380 there):
6381 RELATED_STMT VEC_STMT
6382 VS1_0: vx0 = memref0 VS1_1 -
6383 VS1_1: vx1 = memref1 VS1_2 -
6384 VS1_2: vx2 = memref2 VS1_3 -
6385 VS1_3: vx3 = memref3 - -
6386 S1: x = load - VS1_0
6387 S2: z = x + 1 - -
6389 step2: vectorize stmt S2 (done here):
6390 To vectorize stmt S2 we first need to find the relevant vector
6391 def for the first operand 'x'. This is, as usual, obtained from
6392 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6393 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6394 relevant vector def 'vx0'. Having found 'vx0' we can generate
6395 the vector stmt VS2_0, and as usual, record it in the
6396 STMT_VINFO_VEC_STMT of stmt S2.
6397 When creating the second copy (VS2_1), we obtain the relevant vector
6398 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6399 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6400 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6401 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6402 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6403 chain of stmts and pointers:
6404 RELATED_STMT VEC_STMT
6405 VS1_0: vx0 = memref0 VS1_1 -
6406 VS1_1: vx1 = memref1 VS1_2 -
6407 VS1_2: vx2 = memref2 VS1_3 -
6408 VS1_3: vx3 = memref3 - -
6409 S1: x = load - VS1_0
6410 VS2_0: vz0 = vx0 + v1 VS2_1 -
6411 VS2_1: vz1 = vx1 + v1 VS2_2 -
6412 VS2_2: vz2 = vx2 + v1 VS2_3 -
6413 VS2_3: vz3 = vx3 + v1 - -
6414 S2: z = x + 1 - VS2_0 */
6416 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6417 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6418 /* Arguments are ready. Create the new vector stmt. */
6419 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6421 gimple *new_stmt = NULL;
6422 vop1 = ((op_type == binary_op || op_type == ternary_op)
6423 ? vec_oprnds1[i] : NULL_TREE);
6424 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6425 if (masked_loop_p && reduc_idx >= 0)
6427 /* Perform the operation on active elements only and take
6428 inactive elements from the reduction chain input. */
6429 gcc_assert (!vop2);
6430 vop2 = reduc_idx == 1 ? vop1 : vop0;
6431 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6432 vectype, i);
6433 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6434 vop0, vop1, vop2);
6435 new_temp = make_ssa_name (vec_dest, call);
6436 gimple_call_set_lhs (call, new_temp);
6437 gimple_call_set_nothrow (call, true);
6438 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6439 new_stmt = call;
6441 else
6443 tree mask = NULL_TREE;
6444 /* When combining two masks check if either of them is elsewhere
6445 combined with a loop mask, if that's the case we can mark that the
6446 new combined mask doesn't need to be combined with a loop mask. */
6447 if (masked_loop_p
6448 && code == BIT_AND_EXPR
6449 && VECTOR_BOOLEAN_TYPE_P (vectype))
6451 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
6452 ncopies}))
6454 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6455 vectype, i);
6457 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6458 vop0, gsi);
6461 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
6462 ncopies }))
6464 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6465 vectype, i);
6467 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6468 vop1, gsi);
6472 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6473 new_temp = make_ssa_name (vec_dest, new_stmt);
6474 gimple_assign_set_lhs (new_stmt, new_temp);
6475 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6476 if (using_emulated_vectors_p)
6477 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
6479 /* Enter the combined value into the vector cond hash so we don't
6480 AND it with a loop mask again. */
6481 if (mask)
6482 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
6484 if (vec_cvt_dest)
6486 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6487 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6488 new_temp);
6489 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6490 gimple_assign_set_lhs (new_stmt, new_temp);
6491 vect_finish_stmt_generation (vinfo, stmt_info,
6492 new_stmt, gsi);
6495 if (slp_node)
6496 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6497 else
6498 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6501 if (!slp_node)
6502 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6504 vec_oprnds0.release ();
6505 vec_oprnds1.release ();
6506 vec_oprnds2.release ();
6508 return true;
6511 /* A helper function to ensure data reference DR_INFO's base alignment. */
6513 static void
6514 ensure_base_align (dr_vec_info *dr_info)
6516 /* Alignment is only analyzed for the first element of a DR group,
6517 use that to look at base alignment we need to enforce. */
6518 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6519 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6521 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6523 if (dr_info->base_misaligned)
6525 tree base_decl = dr_info->base_decl;
6527 // We should only be able to increase the alignment of a base object if
6528 // we know what its new alignment should be at compile time.
6529 unsigned HOST_WIDE_INT align_base_to =
6530 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6532 if (decl_in_symtab_p (base_decl))
6533 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6534 else if (DECL_ALIGN (base_decl) < align_base_to)
6536 SET_DECL_ALIGN (base_decl, align_base_to);
6537 DECL_USER_ALIGN (base_decl) = 1;
6539 dr_info->base_misaligned = false;
6544 /* Function get_group_alias_ptr_type.
6546 Return the alias type for the group starting at FIRST_STMT_INFO. */
6548 static tree
6549 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6551 struct data_reference *first_dr, *next_dr;
6553 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6554 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6555 while (next_stmt_info)
6557 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6558 if (get_alias_set (DR_REF (first_dr))
6559 != get_alias_set (DR_REF (next_dr)))
6561 if (dump_enabled_p ())
6562 dump_printf_loc (MSG_NOTE, vect_location,
6563 "conflicting alias set types.\n");
6564 return ptr_type_node;
6566 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6568 return reference_alias_ptr_type (DR_REF (first_dr));
6572 /* Function scan_operand_equal_p.
6574 Helper function for check_scan_store. Compare two references
6575 with .GOMP_SIMD_LANE bases. */
6577 static bool
6578 scan_operand_equal_p (tree ref1, tree ref2)
6580 tree ref[2] = { ref1, ref2 };
6581 poly_int64 bitsize[2], bitpos[2];
6582 tree offset[2], base[2];
6583 for (int i = 0; i < 2; ++i)
6585 machine_mode mode;
6586 int unsignedp, reversep, volatilep = 0;
6587 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6588 &offset[i], &mode, &unsignedp,
6589 &reversep, &volatilep);
6590 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6591 return false;
6592 if (TREE_CODE (base[i]) == MEM_REF
6593 && offset[i] == NULL_TREE
6594 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6596 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6597 if (is_gimple_assign (def_stmt)
6598 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6599 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6600 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6602 if (maybe_ne (mem_ref_offset (base[i]), 0))
6603 return false;
6604 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6605 offset[i] = gimple_assign_rhs2 (def_stmt);
6610 if (!operand_equal_p (base[0], base[1], 0))
6611 return false;
6612 if (maybe_ne (bitsize[0], bitsize[1]))
6613 return false;
6614 if (offset[0] != offset[1])
6616 if (!offset[0] || !offset[1])
6617 return false;
6618 if (!operand_equal_p (offset[0], offset[1], 0))
6620 tree step[2];
6621 for (int i = 0; i < 2; ++i)
6623 step[i] = integer_one_node;
6624 if (TREE_CODE (offset[i]) == SSA_NAME)
6626 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6627 if (is_gimple_assign (def_stmt)
6628 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6629 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6630 == INTEGER_CST))
6632 step[i] = gimple_assign_rhs2 (def_stmt);
6633 offset[i] = gimple_assign_rhs1 (def_stmt);
6636 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6638 step[i] = TREE_OPERAND (offset[i], 1);
6639 offset[i] = TREE_OPERAND (offset[i], 0);
6641 tree rhs1 = NULL_TREE;
6642 if (TREE_CODE (offset[i]) == SSA_NAME)
6644 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6645 if (gimple_assign_cast_p (def_stmt))
6646 rhs1 = gimple_assign_rhs1 (def_stmt);
6648 else if (CONVERT_EXPR_P (offset[i]))
6649 rhs1 = TREE_OPERAND (offset[i], 0);
6650 if (rhs1
6651 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6652 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6653 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6654 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6655 offset[i] = rhs1;
6657 if (!operand_equal_p (offset[0], offset[1], 0)
6658 || !operand_equal_p (step[0], step[1], 0))
6659 return false;
6662 return true;
6666 enum scan_store_kind {
6667 /* Normal permutation. */
6668 scan_store_kind_perm,
6670 /* Whole vector left shift permutation with zero init. */
6671 scan_store_kind_lshift_zero,
6673 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6674 scan_store_kind_lshift_cond
6677 /* Function check_scan_store.
6679 Verify if we can perform the needed permutations or whole vector shifts.
6680 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6681 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6682 to do at each step. */
6684 static int
6685 scan_store_can_perm_p (tree vectype, tree init,
6686 vec<enum scan_store_kind> *use_whole_vector = NULL)
6688 enum machine_mode vec_mode = TYPE_MODE (vectype);
6689 unsigned HOST_WIDE_INT nunits;
6690 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6691 return -1;
6692 int units_log2 = exact_log2 (nunits);
6693 if (units_log2 <= 0)
6694 return -1;
6696 int i;
6697 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6698 for (i = 0; i <= units_log2; ++i)
6700 unsigned HOST_WIDE_INT j, k;
6701 enum scan_store_kind kind = scan_store_kind_perm;
6702 vec_perm_builder sel (nunits, nunits, 1);
6703 sel.quick_grow (nunits);
6704 if (i == units_log2)
6706 for (j = 0; j < nunits; ++j)
6707 sel[j] = nunits - 1;
6709 else
6711 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6712 sel[j] = j;
6713 for (k = 0; j < nunits; ++j, ++k)
6714 sel[j] = nunits + k;
6716 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6717 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
6719 if (i == units_log2)
6720 return -1;
6722 if (whole_vector_shift_kind == scan_store_kind_perm)
6724 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6725 return -1;
6726 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6727 /* Whole vector shifts shift in zeros, so if init is all zero
6728 constant, there is no need to do anything further. */
6729 if ((TREE_CODE (init) != INTEGER_CST
6730 && TREE_CODE (init) != REAL_CST)
6731 || !initializer_zerop (init))
6733 tree masktype = truth_type_for (vectype);
6734 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6735 return -1;
6736 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6739 kind = whole_vector_shift_kind;
6741 if (use_whole_vector)
6743 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6744 use_whole_vector->safe_grow_cleared (i, true);
6745 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6746 use_whole_vector->safe_push (kind);
6750 return units_log2;
6754 /* Function check_scan_store.
6756 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6758 static bool
6759 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6760 enum vect_def_type rhs_dt, bool slp, tree mask,
6761 vect_memory_access_type memory_access_type)
6763 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6764 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6765 tree ref_type;
6767 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6768 if (slp
6769 || mask
6770 || memory_access_type != VMAT_CONTIGUOUS
6771 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6772 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6773 || loop_vinfo == NULL
6774 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6775 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6776 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6777 || !integer_zerop (DR_INIT (dr_info->dr))
6778 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6779 || !alias_sets_conflict_p (get_alias_set (vectype),
6780 get_alias_set (TREE_TYPE (ref_type))))
6782 if (dump_enabled_p ())
6783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6784 "unsupported OpenMP scan store.\n");
6785 return false;
6788 /* We need to pattern match code built by OpenMP lowering and simplified
6789 by following optimizations into something we can handle.
6790 #pragma omp simd reduction(inscan,+:r)
6791 for (...)
6793 r += something ();
6794 #pragma omp scan inclusive (r)
6795 use (r);
6797 shall have body with:
6798 // Initialization for input phase, store the reduction initializer:
6799 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6800 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6801 D.2042[_21] = 0;
6802 // Actual input phase:
6804 r.0_5 = D.2042[_20];
6805 _6 = _4 + r.0_5;
6806 D.2042[_20] = _6;
6807 // Initialization for scan phase:
6808 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6809 _26 = D.2043[_25];
6810 _27 = D.2042[_25];
6811 _28 = _26 + _27;
6812 D.2043[_25] = _28;
6813 D.2042[_25] = _28;
6814 // Actual scan phase:
6816 r.1_8 = D.2042[_20];
6818 The "omp simd array" variable D.2042 holds the privatized copy used
6819 inside of the loop and D.2043 is another one that holds copies of
6820 the current original list item. The separate GOMP_SIMD_LANE ifn
6821 kinds are there in order to allow optimizing the initializer store
6822 and combiner sequence, e.g. if it is originally some C++ish user
6823 defined reduction, but allow the vectorizer to pattern recognize it
6824 and turn into the appropriate vectorized scan.
6826 For exclusive scan, this is slightly different:
6827 #pragma omp simd reduction(inscan,+:r)
6828 for (...)
6830 use (r);
6831 #pragma omp scan exclusive (r)
6832 r += something ();
6834 shall have body with:
6835 // Initialization for input phase, store the reduction initializer:
6836 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6837 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6838 D.2042[_21] = 0;
6839 // Actual input phase:
6841 r.0_5 = D.2042[_20];
6842 _6 = _4 + r.0_5;
6843 D.2042[_20] = _6;
6844 // Initialization for scan phase:
6845 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6846 _26 = D.2043[_25];
6847 D.2044[_25] = _26;
6848 _27 = D.2042[_25];
6849 _28 = _26 + _27;
6850 D.2043[_25] = _28;
6851 // Actual scan phase:
6853 r.1_8 = D.2044[_20];
6854 ... */
6856 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6858 /* Match the D.2042[_21] = 0; store above. Just require that
6859 it is a constant or external definition store. */
6860 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6862 fail_init:
6863 if (dump_enabled_p ())
6864 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6865 "unsupported OpenMP scan initializer store.\n");
6866 return false;
6869 if (! loop_vinfo->scan_map)
6870 loop_vinfo->scan_map = new hash_map<tree, tree>;
6871 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6872 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6873 if (cached)
6874 goto fail_init;
6875 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6877 /* These stores can be vectorized normally. */
6878 return true;
6881 if (rhs_dt != vect_internal_def)
6883 fail:
6884 if (dump_enabled_p ())
6885 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6886 "unsupported OpenMP scan combiner pattern.\n");
6887 return false;
6890 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6891 tree rhs = gimple_assign_rhs1 (stmt);
6892 if (TREE_CODE (rhs) != SSA_NAME)
6893 goto fail;
6895 gimple *other_store_stmt = NULL;
6896 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6897 bool inscan_var_store
6898 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6900 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6902 if (!inscan_var_store)
6904 use_operand_p use_p;
6905 imm_use_iterator iter;
6906 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6908 gimple *use_stmt = USE_STMT (use_p);
6909 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6910 continue;
6911 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6912 || !is_gimple_assign (use_stmt)
6913 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6914 || other_store_stmt
6915 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6916 goto fail;
6917 other_store_stmt = use_stmt;
6919 if (other_store_stmt == NULL)
6920 goto fail;
6921 rhs = gimple_assign_lhs (other_store_stmt);
6922 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6923 goto fail;
6926 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6928 use_operand_p use_p;
6929 imm_use_iterator iter;
6930 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6932 gimple *use_stmt = USE_STMT (use_p);
6933 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6934 continue;
6935 if (other_store_stmt)
6936 goto fail;
6937 other_store_stmt = use_stmt;
6940 else
6941 goto fail;
6943 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6944 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6945 || !is_gimple_assign (def_stmt)
6946 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6947 goto fail;
6949 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6950 /* For pointer addition, we should use the normal plus for the vector
6951 operation. */
6952 switch (code)
6954 case POINTER_PLUS_EXPR:
6955 code = PLUS_EXPR;
6956 break;
6957 case MULT_HIGHPART_EXPR:
6958 goto fail;
6959 default:
6960 break;
6962 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6963 goto fail;
6965 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6966 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6967 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6968 goto fail;
6970 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6971 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6972 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6973 || !gimple_assign_load_p (load1_stmt)
6974 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6975 || !gimple_assign_load_p (load2_stmt))
6976 goto fail;
6978 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6979 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6980 if (load1_stmt_info == NULL
6981 || load2_stmt_info == NULL
6982 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6983 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6984 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6985 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6986 goto fail;
6988 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6990 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6991 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6992 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6993 goto fail;
6994 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6995 tree lrhs;
6996 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6997 lrhs = rhs1;
6998 else
6999 lrhs = rhs2;
7000 use_operand_p use_p;
7001 imm_use_iterator iter;
7002 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7004 gimple *use_stmt = USE_STMT (use_p);
7005 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7006 continue;
7007 if (other_store_stmt)
7008 goto fail;
7009 other_store_stmt = use_stmt;
7013 if (other_store_stmt == NULL)
7014 goto fail;
7015 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7016 || !gimple_store_p (other_store_stmt))
7017 goto fail;
7019 stmt_vec_info other_store_stmt_info
7020 = loop_vinfo->lookup_stmt (other_store_stmt);
7021 if (other_store_stmt_info == NULL
7022 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7023 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7024 goto fail;
7026 gimple *stmt1 = stmt;
7027 gimple *stmt2 = other_store_stmt;
7028 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7029 std::swap (stmt1, stmt2);
7030 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7031 gimple_assign_rhs1 (load2_stmt)))
7033 std::swap (rhs1, rhs2);
7034 std::swap (load1_stmt, load2_stmt);
7035 std::swap (load1_stmt_info, load2_stmt_info);
7037 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7038 gimple_assign_rhs1 (load1_stmt)))
7039 goto fail;
7041 tree var3 = NULL_TREE;
7042 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7043 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7044 gimple_assign_rhs1 (load2_stmt)))
7045 goto fail;
7046 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7048 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7049 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7050 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7051 goto fail;
7052 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7053 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7054 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7055 || lookup_attribute ("omp simd inscan exclusive",
7056 DECL_ATTRIBUTES (var3)))
7057 goto fail;
7060 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7061 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7062 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7063 goto fail;
7065 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7066 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7067 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7068 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7069 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7070 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7071 goto fail;
7073 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7074 std::swap (var1, var2);
7076 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7078 if (!lookup_attribute ("omp simd inscan exclusive",
7079 DECL_ATTRIBUTES (var1)))
7080 goto fail;
7081 var1 = var3;
7084 if (loop_vinfo->scan_map == NULL)
7085 goto fail;
7086 tree *init = loop_vinfo->scan_map->get (var1);
7087 if (init == NULL)
7088 goto fail;
7090 /* The IL is as expected, now check if we can actually vectorize it.
7091 Inclusive scan:
7092 _26 = D.2043[_25];
7093 _27 = D.2042[_25];
7094 _28 = _26 + _27;
7095 D.2043[_25] = _28;
7096 D.2042[_25] = _28;
7097 should be vectorized as (where _40 is the vectorized rhs
7098 from the D.2042[_21] = 0; store):
7099 _30 = MEM <vector(8) int> [(int *)&D.2043];
7100 _31 = MEM <vector(8) int> [(int *)&D.2042];
7101 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7102 _33 = _31 + _32;
7103 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7104 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7105 _35 = _33 + _34;
7106 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7107 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7108 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7109 _37 = _35 + _36;
7110 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7111 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7112 _38 = _30 + _37;
7113 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7114 MEM <vector(8) int> [(int *)&D.2043] = _39;
7115 MEM <vector(8) int> [(int *)&D.2042] = _38;
7116 Exclusive scan:
7117 _26 = D.2043[_25];
7118 D.2044[_25] = _26;
7119 _27 = D.2042[_25];
7120 _28 = _26 + _27;
7121 D.2043[_25] = _28;
7122 should be vectorized as (where _40 is the vectorized rhs
7123 from the D.2042[_21] = 0; store):
7124 _30 = MEM <vector(8) int> [(int *)&D.2043];
7125 _31 = MEM <vector(8) int> [(int *)&D.2042];
7126 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7127 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7128 _34 = _32 + _33;
7129 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7130 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7131 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7132 _36 = _34 + _35;
7133 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7134 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7135 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7136 _38 = _36 + _37;
7137 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7138 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7139 _39 = _30 + _38;
7140 _50 = _31 + _39;
7141 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7142 MEM <vector(8) int> [(int *)&D.2044] = _39;
7143 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7144 enum machine_mode vec_mode = TYPE_MODE (vectype);
7145 optab optab = optab_for_tree_code (code, vectype, optab_default);
7146 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7147 goto fail;
7149 int units_log2 = scan_store_can_perm_p (vectype, *init);
7150 if (units_log2 == -1)
7151 goto fail;
7153 return true;
7157 /* Function vectorizable_scan_store.
7159 Helper of vectorizable_score, arguments like on vectorizable_store.
7160 Handle only the transformation, checking is done in check_scan_store. */
7162 static bool
7163 vectorizable_scan_store (vec_info *vinfo,
7164 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7165 gimple **vec_stmt, int ncopies)
7167 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7168 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7169 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7170 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7172 if (dump_enabled_p ())
7173 dump_printf_loc (MSG_NOTE, vect_location,
7174 "transform scan store. ncopies = %d\n", ncopies);
7176 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7177 tree rhs = gimple_assign_rhs1 (stmt);
7178 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7180 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7181 bool inscan_var_store
7182 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7184 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7186 use_operand_p use_p;
7187 imm_use_iterator iter;
7188 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7190 gimple *use_stmt = USE_STMT (use_p);
7191 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7192 continue;
7193 rhs = gimple_assign_lhs (use_stmt);
7194 break;
7198 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7199 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7200 if (code == POINTER_PLUS_EXPR)
7201 code = PLUS_EXPR;
7202 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7203 && commutative_tree_code (code));
7204 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7205 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7206 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7207 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7208 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7209 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7210 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7211 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7212 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7213 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7214 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7216 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7218 std::swap (rhs1, rhs2);
7219 std::swap (var1, var2);
7220 std::swap (load1_dr_info, load2_dr_info);
7223 tree *init = loop_vinfo->scan_map->get (var1);
7224 gcc_assert (init);
7226 unsigned HOST_WIDE_INT nunits;
7227 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7228 gcc_unreachable ();
7229 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7230 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7231 gcc_assert (units_log2 > 0);
7232 auto_vec<tree, 16> perms;
7233 perms.quick_grow (units_log2 + 1);
7234 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7235 for (int i = 0; i <= units_log2; ++i)
7237 unsigned HOST_WIDE_INT j, k;
7238 vec_perm_builder sel (nunits, nunits, 1);
7239 sel.quick_grow (nunits);
7240 if (i == units_log2)
7241 for (j = 0; j < nunits; ++j)
7242 sel[j] = nunits - 1;
7243 else
7245 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7246 sel[j] = j;
7247 for (k = 0; j < nunits; ++j, ++k)
7248 sel[j] = nunits + k;
7250 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7251 if (!use_whole_vector.is_empty ()
7252 && use_whole_vector[i] != scan_store_kind_perm)
7254 if (zero_vec == NULL_TREE)
7255 zero_vec = build_zero_cst (vectype);
7256 if (masktype == NULL_TREE
7257 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7258 masktype = truth_type_for (vectype);
7259 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7261 else
7262 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7265 tree vec_oprnd1 = NULL_TREE;
7266 tree vec_oprnd2 = NULL_TREE;
7267 tree vec_oprnd3 = NULL_TREE;
7268 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7269 tree dataref_offset = build_int_cst (ref_type, 0);
7270 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7271 vectype, VMAT_CONTIGUOUS);
7272 tree ldataref_ptr = NULL_TREE;
7273 tree orig = NULL_TREE;
7274 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7275 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7276 auto_vec<tree> vec_oprnds1;
7277 auto_vec<tree> vec_oprnds2;
7278 auto_vec<tree> vec_oprnds3;
7279 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7280 *init, &vec_oprnds1,
7281 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7282 rhs2, &vec_oprnds3);
7283 for (int j = 0; j < ncopies; j++)
7285 vec_oprnd1 = vec_oprnds1[j];
7286 if (ldataref_ptr == NULL)
7287 vec_oprnd2 = vec_oprnds2[j];
7288 vec_oprnd3 = vec_oprnds3[j];
7289 if (j == 0)
7290 orig = vec_oprnd3;
7291 else if (!inscan_var_store)
7292 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7294 if (ldataref_ptr)
7296 vec_oprnd2 = make_ssa_name (vectype);
7297 tree data_ref = fold_build2 (MEM_REF, vectype,
7298 unshare_expr (ldataref_ptr),
7299 dataref_offset);
7300 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7301 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7302 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7303 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7304 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7307 tree v = vec_oprnd2;
7308 for (int i = 0; i < units_log2; ++i)
7310 tree new_temp = make_ssa_name (vectype);
7311 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7312 (zero_vec
7313 && (use_whole_vector[i]
7314 != scan_store_kind_perm))
7315 ? zero_vec : vec_oprnd1, v,
7316 perms[i]);
7317 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7318 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7319 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7321 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7323 /* Whole vector shift shifted in zero bits, but if *init
7324 is not initializer_zerop, we need to replace those elements
7325 with elements from vec_oprnd1. */
7326 tree_vector_builder vb (masktype, nunits, 1);
7327 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7328 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7329 ? boolean_false_node : boolean_true_node);
7331 tree new_temp2 = make_ssa_name (vectype);
7332 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7333 new_temp, vec_oprnd1);
7334 vect_finish_stmt_generation (vinfo, stmt_info,
7335 g, gsi);
7336 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7337 new_temp = new_temp2;
7340 /* For exclusive scan, perform the perms[i] permutation once
7341 more. */
7342 if (i == 0
7343 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7344 && v == vec_oprnd2)
7346 v = new_temp;
7347 --i;
7348 continue;
7351 tree new_temp2 = make_ssa_name (vectype);
7352 g = gimple_build_assign (new_temp2, code, v, new_temp);
7353 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7354 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7356 v = new_temp2;
7359 tree new_temp = make_ssa_name (vectype);
7360 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7361 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7362 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7364 tree last_perm_arg = new_temp;
7365 /* For exclusive scan, new_temp computed above is the exclusive scan
7366 prefix sum. Turn it into inclusive prefix sum for the broadcast
7367 of the last element into orig. */
7368 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7370 last_perm_arg = make_ssa_name (vectype);
7371 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7372 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7373 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7376 orig = make_ssa_name (vectype);
7377 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7378 last_perm_arg, perms[units_log2]);
7379 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7380 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7382 if (!inscan_var_store)
7384 tree data_ref = fold_build2 (MEM_REF, vectype,
7385 unshare_expr (dataref_ptr),
7386 dataref_offset);
7387 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7388 g = gimple_build_assign (data_ref, new_temp);
7389 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7390 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7394 if (inscan_var_store)
7395 for (int j = 0; j < ncopies; j++)
7397 if (j != 0)
7398 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7400 tree data_ref = fold_build2 (MEM_REF, vectype,
7401 unshare_expr (dataref_ptr),
7402 dataref_offset);
7403 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7404 gimple *g = gimple_build_assign (data_ref, orig);
7405 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7406 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7408 return true;
7412 /* Function vectorizable_store.
7414 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7415 that can be vectorized.
7416 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7417 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7418 Return true if STMT_INFO is vectorizable in this way. */
7420 static bool
7421 vectorizable_store (vec_info *vinfo,
7422 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7423 gimple **vec_stmt, slp_tree slp_node,
7424 stmt_vector_for_cost *cost_vec)
7426 tree data_ref;
7427 tree op;
7428 tree vec_oprnd = NULL_TREE;
7429 tree elem_type;
7430 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7431 class loop *loop = NULL;
7432 machine_mode vec_mode;
7433 tree dummy;
7434 enum vect_def_type rhs_dt = vect_unknown_def_type;
7435 enum vect_def_type mask_dt = vect_unknown_def_type;
7436 tree dataref_ptr = NULL_TREE;
7437 tree dataref_offset = NULL_TREE;
7438 gimple *ptr_incr = NULL;
7439 int ncopies;
7440 int j;
7441 stmt_vec_info first_stmt_info;
7442 bool grouped_store;
7443 unsigned int group_size, i;
7444 vec<tree> oprnds = vNULL;
7445 vec<tree> result_chain = vNULL;
7446 vec<tree> vec_oprnds = vNULL;
7447 bool slp = (slp_node != NULL);
7448 unsigned int vec_num;
7449 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7450 tree aggr_type;
7451 gather_scatter_info gs_info;
7452 poly_uint64 vf;
7453 vec_load_store_type vls_type;
7454 tree ref_type;
7456 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7457 return false;
7459 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7460 && ! vec_stmt)
7461 return false;
7463 /* Is vectorizable store? */
7465 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7466 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7468 tree scalar_dest = gimple_assign_lhs (assign);
7469 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7470 && is_pattern_stmt_p (stmt_info))
7471 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7472 if (TREE_CODE (scalar_dest) != ARRAY_REF
7473 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7474 && TREE_CODE (scalar_dest) != INDIRECT_REF
7475 && TREE_CODE (scalar_dest) != COMPONENT_REF
7476 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7477 && TREE_CODE (scalar_dest) != REALPART_EXPR
7478 && TREE_CODE (scalar_dest) != MEM_REF)
7479 return false;
7481 else
7483 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7484 if (!call || !gimple_call_internal_p (call))
7485 return false;
7487 internal_fn ifn = gimple_call_internal_fn (call);
7488 if (!internal_store_fn_p (ifn))
7489 return false;
7491 if (slp_node != NULL)
7493 if (dump_enabled_p ())
7494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7495 "SLP of masked stores not supported.\n");
7496 return false;
7499 int mask_index = internal_fn_mask_index (ifn);
7500 if (mask_index >= 0
7501 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7502 &mask, NULL, &mask_dt, &mask_vectype))
7503 return false;
7506 op = vect_get_store_rhs (stmt_info);
7508 /* Cannot have hybrid store SLP -- that would mean storing to the
7509 same location twice. */
7510 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7512 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7513 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7515 if (loop_vinfo)
7517 loop = LOOP_VINFO_LOOP (loop_vinfo);
7518 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7520 else
7521 vf = 1;
7523 /* Multiple types in SLP are handled by creating the appropriate number of
7524 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7525 case of SLP. */
7526 if (slp)
7527 ncopies = 1;
7528 else
7529 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7531 gcc_assert (ncopies >= 1);
7533 /* FORNOW. This restriction should be relaxed. */
7534 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7536 if (dump_enabled_p ())
7537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7538 "multiple types in nested loop.\n");
7539 return false;
7542 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7543 op, &rhs_dt, &rhs_vectype, &vls_type))
7544 return false;
7546 elem_type = TREE_TYPE (vectype);
7547 vec_mode = TYPE_MODE (vectype);
7549 if (!STMT_VINFO_DATA_REF (stmt_info))
7550 return false;
7552 vect_memory_access_type memory_access_type;
7553 enum dr_alignment_support alignment_support_scheme;
7554 int misalignment;
7555 poly_int64 poffset;
7556 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7557 ncopies, &memory_access_type, &poffset,
7558 &alignment_support_scheme, &misalignment, &gs_info))
7559 return false;
7561 if (mask)
7563 if (memory_access_type == VMAT_CONTIGUOUS)
7565 if (!VECTOR_MODE_P (vec_mode)
7566 || !can_vec_mask_load_store_p (vec_mode,
7567 TYPE_MODE (mask_vectype), false))
7568 return false;
7570 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7571 && (memory_access_type != VMAT_GATHER_SCATTER
7572 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7574 if (dump_enabled_p ())
7575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7576 "unsupported access type for masked store.\n");
7577 return false;
7580 else
7582 /* FORNOW. In some cases can vectorize even if data-type not supported
7583 (e.g. - array initialization with 0). */
7584 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7585 return false;
7588 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7589 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7590 && memory_access_type != VMAT_GATHER_SCATTER
7591 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7592 if (grouped_store)
7594 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7595 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7596 group_size = DR_GROUP_SIZE (first_stmt_info);
7598 else
7600 first_stmt_info = stmt_info;
7601 first_dr_info = dr_info;
7602 group_size = vec_num = 1;
7605 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7607 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7608 memory_access_type))
7609 return false;
7612 if (!vec_stmt) /* transformation not required. */
7614 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7616 if (loop_vinfo
7617 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7618 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
7619 vls_type, group_size,
7620 memory_access_type, &gs_info,
7621 mask);
7623 if (slp_node
7624 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7625 vectype))
7627 if (dump_enabled_p ())
7628 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7629 "incompatible vector types for invariants\n");
7630 return false;
7633 if (dump_enabled_p ()
7634 && memory_access_type != VMAT_ELEMENTWISE
7635 && memory_access_type != VMAT_GATHER_SCATTER
7636 && alignment_support_scheme != dr_aligned)
7637 dump_printf_loc (MSG_NOTE, vect_location,
7638 "Vectorizing an unaligned access.\n");
7640 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7641 vect_model_store_cost (vinfo, stmt_info, ncopies,
7642 memory_access_type, alignment_support_scheme,
7643 misalignment, vls_type, slp_node, cost_vec);
7644 return true;
7646 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7648 /* Transform. */
7650 ensure_base_align (dr_info);
7652 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7654 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7655 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7656 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7657 tree ptr, var, scale, vec_mask;
7658 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7659 tree mask_halfvectype = mask_vectype;
7660 edge pe = loop_preheader_edge (loop);
7661 gimple_seq seq;
7662 basic_block new_bb;
7663 enum { NARROW, NONE, WIDEN } modifier;
7664 poly_uint64 scatter_off_nunits
7665 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7667 if (known_eq (nunits, scatter_off_nunits))
7668 modifier = NONE;
7669 else if (known_eq (nunits * 2, scatter_off_nunits))
7671 modifier = WIDEN;
7673 /* Currently gathers and scatters are only supported for
7674 fixed-length vectors. */
7675 unsigned int count = scatter_off_nunits.to_constant ();
7676 vec_perm_builder sel (count, count, 1);
7677 for (i = 0; i < (unsigned int) count; ++i)
7678 sel.quick_push (i | (count / 2));
7680 vec_perm_indices indices (sel, 1, count);
7681 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7682 indices);
7683 gcc_assert (perm_mask != NULL_TREE);
7685 else if (known_eq (nunits, scatter_off_nunits * 2))
7687 modifier = NARROW;
7689 /* Currently gathers and scatters are only supported for
7690 fixed-length vectors. */
7691 unsigned int count = nunits.to_constant ();
7692 vec_perm_builder sel (count, count, 1);
7693 for (i = 0; i < (unsigned int) count; ++i)
7694 sel.quick_push (i | (count / 2));
7696 vec_perm_indices indices (sel, 2, count);
7697 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7698 gcc_assert (perm_mask != NULL_TREE);
7699 ncopies *= 2;
7701 if (mask)
7702 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7704 else
7705 gcc_unreachable ();
7707 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7708 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7709 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7710 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7711 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7712 scaletype = TREE_VALUE (arglist);
7714 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7715 && TREE_CODE (rettype) == VOID_TYPE);
7717 ptr = fold_convert (ptrtype, gs_info.base);
7718 if (!is_gimple_min_invariant (ptr))
7720 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7721 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7722 gcc_assert (!new_bb);
7725 if (mask == NULL_TREE)
7727 mask_arg = build_int_cst (masktype, -1);
7728 mask_arg = vect_init_vector (vinfo, stmt_info,
7729 mask_arg, masktype, NULL);
7732 scale = build_int_cst (scaletype, gs_info.scale);
7734 auto_vec<tree> vec_oprnds0;
7735 auto_vec<tree> vec_oprnds1;
7736 auto_vec<tree> vec_masks;
7737 if (mask)
7739 tree mask_vectype = truth_type_for (vectype);
7740 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7741 modifier == NARROW
7742 ? ncopies / 2 : ncopies,
7743 mask, &vec_masks, mask_vectype);
7745 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7746 modifier == WIDEN
7747 ? ncopies / 2 : ncopies,
7748 gs_info.offset, &vec_oprnds0);
7749 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7750 modifier == NARROW
7751 ? ncopies / 2 : ncopies,
7752 op, &vec_oprnds1);
7753 for (j = 0; j < ncopies; ++j)
7755 if (modifier == WIDEN)
7757 if (j & 1)
7758 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7759 perm_mask, stmt_info, gsi);
7760 else
7761 op = vec_oprnd0 = vec_oprnds0[j / 2];
7762 src = vec_oprnd1 = vec_oprnds1[j];
7763 if (mask)
7764 mask_op = vec_mask = vec_masks[j];
7766 else if (modifier == NARROW)
7768 if (j & 1)
7769 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7770 perm_mask, stmt_info, gsi);
7771 else
7772 src = vec_oprnd1 = vec_oprnds1[j / 2];
7773 op = vec_oprnd0 = vec_oprnds0[j];
7774 if (mask)
7775 mask_op = vec_mask = vec_masks[j / 2];
7777 else
7779 op = vec_oprnd0 = vec_oprnds0[j];
7780 src = vec_oprnd1 = vec_oprnds1[j];
7781 if (mask)
7782 mask_op = vec_mask = vec_masks[j];
7785 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7787 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7788 TYPE_VECTOR_SUBPARTS (srctype)));
7789 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7790 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7791 gassign *new_stmt
7792 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7793 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7794 src = var;
7797 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7799 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7800 TYPE_VECTOR_SUBPARTS (idxtype)));
7801 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7802 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7803 gassign *new_stmt
7804 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7805 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7806 op = var;
7809 if (mask)
7811 tree utype;
7812 mask_arg = mask_op;
7813 if (modifier == NARROW)
7815 var = vect_get_new_ssa_name (mask_halfvectype,
7816 vect_simple_var);
7817 gassign *new_stmt
7818 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7819 : VEC_UNPACK_LO_EXPR,
7820 mask_op);
7821 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7822 mask_arg = var;
7824 tree optype = TREE_TYPE (mask_arg);
7825 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7826 utype = masktype;
7827 else
7828 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7829 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7830 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7831 gassign *new_stmt
7832 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7833 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7834 mask_arg = var;
7835 if (!useless_type_conversion_p (masktype, utype))
7837 gcc_assert (TYPE_PRECISION (utype)
7838 <= TYPE_PRECISION (masktype));
7839 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7840 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7841 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7842 mask_arg = var;
7846 gcall *new_stmt
7847 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7848 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7850 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7852 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7853 return true;
7855 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7856 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7858 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7859 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7861 if (grouped_store)
7863 /* FORNOW */
7864 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7866 /* We vectorize all the stmts of the interleaving group when we
7867 reach the last stmt in the group. */
7868 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7869 < DR_GROUP_SIZE (first_stmt_info)
7870 && !slp)
7872 *vec_stmt = NULL;
7873 return true;
7876 if (slp)
7878 grouped_store = false;
7879 /* VEC_NUM is the number of vect stmts to be created for this
7880 group. */
7881 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7882 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7883 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7884 == first_stmt_info);
7885 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7886 op = vect_get_store_rhs (first_stmt_info);
7888 else
7889 /* VEC_NUM is the number of vect stmts to be created for this
7890 group. */
7891 vec_num = group_size;
7893 ref_type = get_group_alias_ptr_type (first_stmt_info);
7895 else
7896 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7898 if (dump_enabled_p ())
7899 dump_printf_loc (MSG_NOTE, vect_location,
7900 "transform store. ncopies = %d\n", ncopies);
7902 if (memory_access_type == VMAT_ELEMENTWISE
7903 || memory_access_type == VMAT_STRIDED_SLP)
7905 gimple_stmt_iterator incr_gsi;
7906 bool insert_after;
7907 gimple *incr;
7908 tree offvar;
7909 tree ivstep;
7910 tree running_off;
7911 tree stride_base, stride_step, alias_off;
7912 tree vec_oprnd;
7913 tree dr_offset;
7914 unsigned int g;
7915 /* Checked by get_load_store_type. */
7916 unsigned int const_nunits = nunits.to_constant ();
7918 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7919 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7921 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7922 stride_base
7923 = fold_build_pointer_plus
7924 (DR_BASE_ADDRESS (first_dr_info->dr),
7925 size_binop (PLUS_EXPR,
7926 convert_to_ptrofftype (dr_offset),
7927 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7928 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7930 /* For a store with loop-invariant (but other than power-of-2)
7931 stride (i.e. not a grouped access) like so:
7933 for (i = 0; i < n; i += stride)
7934 array[i] = ...;
7936 we generate a new induction variable and new stores from
7937 the components of the (vectorized) rhs:
7939 for (j = 0; ; j += VF*stride)
7940 vectemp = ...;
7941 tmp1 = vectemp[0];
7942 array[j] = tmp1;
7943 tmp2 = vectemp[1];
7944 array[j + stride] = tmp2;
7948 unsigned nstores = const_nunits;
7949 unsigned lnel = 1;
7950 tree ltype = elem_type;
7951 tree lvectype = vectype;
7952 if (slp)
7954 if (group_size < const_nunits
7955 && const_nunits % group_size == 0)
7957 nstores = const_nunits / group_size;
7958 lnel = group_size;
7959 ltype = build_vector_type (elem_type, group_size);
7960 lvectype = vectype;
7962 /* First check if vec_extract optab doesn't support extraction
7963 of vector elts directly. */
7964 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7965 machine_mode vmode;
7966 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7967 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7968 group_size).exists (&vmode)
7969 || (convert_optab_handler (vec_extract_optab,
7970 TYPE_MODE (vectype), vmode)
7971 == CODE_FOR_nothing))
7973 /* Try to avoid emitting an extract of vector elements
7974 by performing the extracts using an integer type of the
7975 same size, extracting from a vector of those and then
7976 re-interpreting it as the original vector type if
7977 supported. */
7978 unsigned lsize
7979 = group_size * GET_MODE_BITSIZE (elmode);
7980 unsigned int lnunits = const_nunits / group_size;
7981 /* If we can't construct such a vector fall back to
7982 element extracts from the original vector type and
7983 element size stores. */
7984 if (int_mode_for_size (lsize, 0).exists (&elmode)
7985 && VECTOR_MODE_P (TYPE_MODE (vectype))
7986 && related_vector_mode (TYPE_MODE (vectype), elmode,
7987 lnunits).exists (&vmode)
7988 && (convert_optab_handler (vec_extract_optab,
7989 vmode, elmode)
7990 != CODE_FOR_nothing))
7992 nstores = lnunits;
7993 lnel = group_size;
7994 ltype = build_nonstandard_integer_type (lsize, 1);
7995 lvectype = build_vector_type (ltype, nstores);
7997 /* Else fall back to vector extraction anyway.
7998 Fewer stores are more important than avoiding spilling
7999 of the vector we extract from. Compared to the
8000 construction case in vectorizable_load no store-forwarding
8001 issue exists here for reasonable archs. */
8004 else if (group_size >= const_nunits
8005 && group_size % const_nunits == 0)
8007 nstores = 1;
8008 lnel = const_nunits;
8009 ltype = vectype;
8010 lvectype = vectype;
8012 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8013 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8016 ivstep = stride_step;
8017 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8018 build_int_cst (TREE_TYPE (ivstep), vf));
8020 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8022 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8023 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8024 create_iv (stride_base, ivstep, NULL,
8025 loop, &incr_gsi, insert_after,
8026 &offvar, NULL);
8027 incr = gsi_stmt (incr_gsi);
8029 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8031 alias_off = build_int_cst (ref_type, 0);
8032 stmt_vec_info next_stmt_info = first_stmt_info;
8033 for (g = 0; g < group_size; g++)
8035 running_off = offvar;
8036 if (g)
8038 tree size = TYPE_SIZE_UNIT (ltype);
8039 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
8040 size);
8041 tree newoff = copy_ssa_name (running_off, NULL);
8042 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8043 running_off, pos);
8044 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8045 running_off = newoff;
8047 if (!slp)
8048 op = vect_get_store_rhs (next_stmt_info);
8049 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
8050 op, &vec_oprnds);
8051 unsigned int group_el = 0;
8052 unsigned HOST_WIDE_INT
8053 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8054 for (j = 0; j < ncopies; j++)
8056 vec_oprnd = vec_oprnds[j];
8057 /* Pun the vector to extract from if necessary. */
8058 if (lvectype != vectype)
8060 tree tem = make_ssa_name (lvectype);
8061 gimple *pun
8062 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8063 lvectype, vec_oprnd));
8064 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8065 vec_oprnd = tem;
8067 for (i = 0; i < nstores; i++)
8069 tree newref, newoff;
8070 gimple *incr, *assign;
8071 tree size = TYPE_SIZE (ltype);
8072 /* Extract the i'th component. */
8073 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8074 bitsize_int (i), size);
8075 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8076 size, pos);
8078 elem = force_gimple_operand_gsi (gsi, elem, true,
8079 NULL_TREE, true,
8080 GSI_SAME_STMT);
8082 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8083 group_el * elsz);
8084 newref = build2 (MEM_REF, ltype,
8085 running_off, this_off);
8086 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8088 /* And store it to *running_off. */
8089 assign = gimple_build_assign (newref, elem);
8090 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8092 group_el += lnel;
8093 if (! slp
8094 || group_el == group_size)
8096 newoff = copy_ssa_name (running_off, NULL);
8097 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8098 running_off, stride_step);
8099 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8101 running_off = newoff;
8102 group_el = 0;
8104 if (g == group_size - 1
8105 && !slp)
8107 if (j == 0 && i == 0)
8108 *vec_stmt = assign;
8109 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8113 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8114 vec_oprnds.release ();
8115 if (slp)
8116 break;
8119 return true;
8122 auto_vec<tree> dr_chain (group_size);
8123 oprnds.create (group_size);
8125 gcc_assert (alignment_support_scheme);
8126 vec_loop_masks *loop_masks
8127 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8128 ? &LOOP_VINFO_MASKS (loop_vinfo)
8129 : NULL);
8130 vec_loop_lens *loop_lens
8131 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8132 ? &LOOP_VINFO_LENS (loop_vinfo)
8133 : NULL);
8135 /* Shouldn't go with length-based approach if fully masked. */
8136 gcc_assert (!loop_lens || !loop_masks);
8138 /* Targets with store-lane instructions must not require explicit
8139 realignment. vect_supportable_dr_alignment always returns either
8140 dr_aligned or dr_unaligned_supported for masked operations. */
8141 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8142 && !mask
8143 && !loop_masks)
8144 || alignment_support_scheme == dr_aligned
8145 || alignment_support_scheme == dr_unaligned_supported);
8147 tree offset = NULL_TREE;
8148 if (!known_eq (poffset, 0))
8149 offset = size_int (poffset);
8151 tree bump;
8152 tree vec_offset = NULL_TREE;
8153 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8155 aggr_type = NULL_TREE;
8156 bump = NULL_TREE;
8158 else if (memory_access_type == VMAT_GATHER_SCATTER)
8160 aggr_type = elem_type;
8161 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8162 &bump, &vec_offset);
8164 else
8166 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8167 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8168 else
8169 aggr_type = vectype;
8170 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8171 memory_access_type);
8174 if (mask)
8175 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8177 /* In case the vectorization factor (VF) is bigger than the number
8178 of elements that we can fit in a vectype (nunits), we have to generate
8179 more than one vector stmt - i.e - we need to "unroll" the
8180 vector stmt by a factor VF/nunits. */
8182 /* In case of interleaving (non-unit grouped access):
8184 S1: &base + 2 = x2
8185 S2: &base = x0
8186 S3: &base + 1 = x1
8187 S4: &base + 3 = x3
8189 We create vectorized stores starting from base address (the access of the
8190 first stmt in the chain (S2 in the above example), when the last store stmt
8191 of the chain (S4) is reached:
8193 VS1: &base = vx2
8194 VS2: &base + vec_size*1 = vx0
8195 VS3: &base + vec_size*2 = vx1
8196 VS4: &base + vec_size*3 = vx3
8198 Then permutation statements are generated:
8200 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8201 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8204 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8205 (the order of the data-refs in the output of vect_permute_store_chain
8206 corresponds to the order of scalar stmts in the interleaving chain - see
8207 the documentation of vect_permute_store_chain()).
8209 In case of both multiple types and interleaving, above vector stores and
8210 permutation stmts are created for every copy. The result vector stmts are
8211 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8212 STMT_VINFO_RELATED_STMT for the next copies.
8215 auto_vec<tree> vec_masks;
8216 tree vec_mask = NULL;
8217 auto_vec<tree> vec_offsets;
8218 auto_vec<vec<tree> > gvec_oprnds;
8219 gvec_oprnds.safe_grow_cleared (group_size, true);
8220 for (j = 0; j < ncopies; j++)
8222 gimple *new_stmt;
8223 if (j == 0)
8225 if (slp)
8227 /* Get vectorized arguments for SLP_NODE. */
8228 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8229 op, &vec_oprnds);
8230 vec_oprnd = vec_oprnds[0];
8232 else
8234 /* For interleaved stores we collect vectorized defs for all the
8235 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8236 used as an input to vect_permute_store_chain().
8238 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8239 and OPRNDS are of size 1. */
8240 stmt_vec_info next_stmt_info = first_stmt_info;
8241 for (i = 0; i < group_size; i++)
8243 /* Since gaps are not supported for interleaved stores,
8244 DR_GROUP_SIZE is the exact number of stmts in the chain.
8245 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8246 that there is no interleaving, DR_GROUP_SIZE is 1,
8247 and only one iteration of the loop will be executed. */
8248 op = vect_get_store_rhs (next_stmt_info);
8249 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8250 ncopies, op, &gvec_oprnds[i]);
8251 vec_oprnd = gvec_oprnds[i][0];
8252 dr_chain.quick_push (gvec_oprnds[i][0]);
8253 oprnds.quick_push (gvec_oprnds[i][0]);
8254 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8256 if (mask)
8258 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8259 mask, &vec_masks, mask_vectype);
8260 vec_mask = vec_masks[0];
8264 /* We should have catched mismatched types earlier. */
8265 gcc_assert (useless_type_conversion_p (vectype,
8266 TREE_TYPE (vec_oprnd)));
8267 bool simd_lane_access_p
8268 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8269 if (simd_lane_access_p
8270 && !loop_masks
8271 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8272 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8273 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8274 && integer_zerop (DR_INIT (first_dr_info->dr))
8275 && alias_sets_conflict_p (get_alias_set (aggr_type),
8276 get_alias_set (TREE_TYPE (ref_type))))
8278 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8279 dataref_offset = build_int_cst (ref_type, 0);
8281 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8283 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8284 slp_node, &gs_info, &dataref_ptr,
8285 &vec_offsets);
8286 vec_offset = vec_offsets[0];
8288 else
8289 dataref_ptr
8290 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8291 simd_lane_access_p ? loop : NULL,
8292 offset, &dummy, gsi, &ptr_incr,
8293 simd_lane_access_p, bump);
8295 else
8297 /* For interleaved stores we created vectorized defs for all the
8298 defs stored in OPRNDS in the previous iteration (previous copy).
8299 DR_CHAIN is then used as an input to vect_permute_store_chain().
8300 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8301 OPRNDS are of size 1. */
8302 for (i = 0; i < group_size; i++)
8304 vec_oprnd = gvec_oprnds[i][j];
8305 dr_chain[i] = gvec_oprnds[i][j];
8306 oprnds[i] = gvec_oprnds[i][j];
8308 if (mask)
8309 vec_mask = vec_masks[j];
8310 if (dataref_offset)
8311 dataref_offset
8312 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8313 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8314 vec_offset = vec_offsets[j];
8315 else
8316 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8317 stmt_info, bump);
8320 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8322 tree vec_array;
8324 /* Get an array into which we can store the individual vectors. */
8325 vec_array = create_vector_array (vectype, vec_num);
8327 /* Invalidate the current contents of VEC_ARRAY. This should
8328 become an RTL clobber too, which prevents the vector registers
8329 from being upward-exposed. */
8330 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8332 /* Store the individual vectors into the array. */
8333 for (i = 0; i < vec_num; i++)
8335 vec_oprnd = dr_chain[i];
8336 write_vector_array (vinfo, stmt_info,
8337 gsi, vec_oprnd, vec_array, i);
8340 tree final_mask = NULL;
8341 if (loop_masks)
8342 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8343 vectype, j);
8344 if (vec_mask)
8345 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8346 final_mask, vec_mask, gsi);
8348 gcall *call;
8349 if (final_mask)
8351 /* Emit:
8352 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8353 VEC_ARRAY). */
8354 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8355 tree alias_ptr = build_int_cst (ref_type, align);
8356 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8357 dataref_ptr, alias_ptr,
8358 final_mask, vec_array);
8360 else
8362 /* Emit:
8363 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8364 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8365 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8366 vec_array);
8367 gimple_call_set_lhs (call, data_ref);
8369 gimple_call_set_nothrow (call, true);
8370 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8371 new_stmt = call;
8373 /* Record that VEC_ARRAY is now dead. */
8374 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8376 else
8378 new_stmt = NULL;
8379 if (grouped_store)
8381 if (j == 0)
8382 result_chain.create (group_size);
8383 /* Permute. */
8384 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8385 gsi, &result_chain);
8388 stmt_vec_info next_stmt_info = first_stmt_info;
8389 for (i = 0; i < vec_num; i++)
8391 unsigned misalign;
8392 unsigned HOST_WIDE_INT align;
8394 tree final_mask = NULL_TREE;
8395 if (loop_masks)
8396 final_mask = vect_get_loop_mask (gsi, loop_masks,
8397 vec_num * ncopies,
8398 vectype, vec_num * j + i);
8399 if (vec_mask)
8400 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8401 final_mask, vec_mask, gsi);
8403 if (memory_access_type == VMAT_GATHER_SCATTER)
8405 tree scale = size_int (gs_info.scale);
8406 gcall *call;
8407 if (final_mask)
8408 call = gimple_build_call_internal
8409 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8410 scale, vec_oprnd, final_mask);
8411 else
8412 call = gimple_build_call_internal
8413 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8414 scale, vec_oprnd);
8415 gimple_call_set_nothrow (call, true);
8416 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8417 new_stmt = call;
8418 break;
8421 if (i > 0)
8422 /* Bump the vector pointer. */
8423 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8424 gsi, stmt_info, bump);
8426 if (slp)
8427 vec_oprnd = vec_oprnds[i];
8428 else if (grouped_store)
8429 /* For grouped stores vectorized defs are interleaved in
8430 vect_permute_store_chain(). */
8431 vec_oprnd = result_chain[i];
8433 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8434 if (alignment_support_scheme == dr_aligned)
8435 misalign = 0;
8436 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8438 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8439 misalign = 0;
8441 else
8442 misalign = misalignment;
8443 if (dataref_offset == NULL_TREE
8444 && TREE_CODE (dataref_ptr) == SSA_NAME)
8445 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8446 misalign);
8447 align = least_bit_hwi (misalign | align);
8449 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8451 tree perm_mask = perm_mask_for_reverse (vectype);
8452 tree perm_dest = vect_create_destination_var
8453 (vect_get_store_rhs (stmt_info), vectype);
8454 tree new_temp = make_ssa_name (perm_dest);
8456 /* Generate the permute statement. */
8457 gimple *perm_stmt
8458 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8459 vec_oprnd, perm_mask);
8460 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8462 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8463 vec_oprnd = new_temp;
8466 /* Arguments are ready. Create the new vector stmt. */
8467 if (final_mask)
8469 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8470 gcall *call
8471 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8472 dataref_ptr, ptr,
8473 final_mask, vec_oprnd);
8474 gimple_call_set_nothrow (call, true);
8475 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8476 new_stmt = call;
8478 else if (loop_lens)
8480 tree final_len
8481 = vect_get_loop_len (loop_vinfo, loop_lens,
8482 vec_num * ncopies, vec_num * j + i);
8483 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8484 machine_mode vmode = TYPE_MODE (vectype);
8485 opt_machine_mode new_ovmode
8486 = get_len_load_store_mode (vmode, false);
8487 machine_mode new_vmode = new_ovmode.require ();
8488 /* Need conversion if it's wrapped with VnQI. */
8489 if (vmode != new_vmode)
8491 tree new_vtype
8492 = build_vector_type_for_mode (unsigned_intQI_type_node,
8493 new_vmode);
8494 tree var
8495 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8496 vec_oprnd
8497 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8498 gassign *new_stmt
8499 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8500 vec_oprnd);
8501 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8502 gsi);
8503 vec_oprnd = var;
8506 signed char biasval =
8507 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8509 tree bias = build_int_cst (intQI_type_node, biasval);
8510 gcall *call
8511 = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
8512 ptr, final_len, vec_oprnd,
8513 bias);
8514 gimple_call_set_nothrow (call, true);
8515 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8516 new_stmt = call;
8518 else
8520 data_ref = fold_build2 (MEM_REF, vectype,
8521 dataref_ptr,
8522 dataref_offset
8523 ? dataref_offset
8524 : build_int_cst (ref_type, 0));
8525 if (alignment_support_scheme == dr_aligned)
8527 else
8528 TREE_TYPE (data_ref)
8529 = build_aligned_type (TREE_TYPE (data_ref),
8530 align * BITS_PER_UNIT);
8531 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8532 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8533 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8536 if (slp)
8537 continue;
8539 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8540 if (!next_stmt_info)
8541 break;
8544 if (!slp)
8546 if (j == 0)
8547 *vec_stmt = new_stmt;
8548 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8552 for (i = 0; i < group_size; ++i)
8554 vec<tree> oprndsi = gvec_oprnds[i];
8555 oprndsi.release ();
8557 oprnds.release ();
8558 result_chain.release ();
8559 vec_oprnds.release ();
8561 return true;
8564 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8565 VECTOR_CST mask. No checks are made that the target platform supports the
8566 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8567 vect_gen_perm_mask_checked. */
8569 tree
8570 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8572 tree mask_type;
8574 poly_uint64 nunits = sel.length ();
8575 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8577 mask_type = build_vector_type (ssizetype, nunits);
8578 return vec_perm_indices_to_tree (mask_type, sel);
8581 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8582 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8584 tree
8585 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8587 machine_mode vmode = TYPE_MODE (vectype);
8588 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
8589 return vect_gen_perm_mask_any (vectype, sel);
8592 /* Given a vector variable X and Y, that was generated for the scalar
8593 STMT_INFO, generate instructions to permute the vector elements of X and Y
8594 using permutation mask MASK_VEC, insert them at *GSI and return the
8595 permuted vector variable. */
8597 static tree
8598 permute_vec_elements (vec_info *vinfo,
8599 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8600 gimple_stmt_iterator *gsi)
8602 tree vectype = TREE_TYPE (x);
8603 tree perm_dest, data_ref;
8604 gimple *perm_stmt;
8606 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8607 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8608 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8609 else
8610 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8611 data_ref = make_ssa_name (perm_dest);
8613 /* Generate the permute statement. */
8614 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8615 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8617 return data_ref;
8620 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8621 inserting them on the loops preheader edge. Returns true if we
8622 were successful in doing so (and thus STMT_INFO can be moved then),
8623 otherwise returns false. */
8625 static bool
8626 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8628 ssa_op_iter i;
8629 tree op;
8630 bool any = false;
8632 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8634 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8635 if (!gimple_nop_p (def_stmt)
8636 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8638 /* Make sure we don't need to recurse. While we could do
8639 so in simple cases when there are more complex use webs
8640 we don't have an easy way to preserve stmt order to fulfil
8641 dependencies within them. */
8642 tree op2;
8643 ssa_op_iter i2;
8644 if (gimple_code (def_stmt) == GIMPLE_PHI)
8645 return false;
8646 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8648 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8649 if (!gimple_nop_p (def_stmt2)
8650 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8651 return false;
8653 any = true;
8657 if (!any)
8658 return true;
8660 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8662 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8663 if (!gimple_nop_p (def_stmt)
8664 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8666 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8667 gsi_remove (&gsi, false);
8668 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8672 return true;
8675 /* vectorizable_load.
8677 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8678 that can be vectorized.
8679 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8680 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8681 Return true if STMT_INFO is vectorizable in this way. */
8683 static bool
8684 vectorizable_load (vec_info *vinfo,
8685 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8686 gimple **vec_stmt, slp_tree slp_node,
8687 stmt_vector_for_cost *cost_vec)
8689 tree scalar_dest;
8690 tree vec_dest = NULL;
8691 tree data_ref = NULL;
8692 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8693 class loop *loop = NULL;
8694 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8695 bool nested_in_vect_loop = false;
8696 tree elem_type;
8697 tree new_temp;
8698 machine_mode mode;
8699 tree dummy;
8700 tree dataref_ptr = NULL_TREE;
8701 tree dataref_offset = NULL_TREE;
8702 gimple *ptr_incr = NULL;
8703 int ncopies;
8704 int i, j;
8705 unsigned int group_size;
8706 poly_uint64 group_gap_adj;
8707 tree msq = NULL_TREE, lsq;
8708 tree realignment_token = NULL_TREE;
8709 gphi *phi = NULL;
8710 vec<tree> dr_chain = vNULL;
8711 bool grouped_load = false;
8712 stmt_vec_info first_stmt_info;
8713 stmt_vec_info first_stmt_info_for_drptr = NULL;
8714 bool compute_in_loop = false;
8715 class loop *at_loop;
8716 int vec_num;
8717 bool slp = (slp_node != NULL);
8718 bool slp_perm = false;
8719 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8720 poly_uint64 vf;
8721 tree aggr_type;
8722 gather_scatter_info gs_info;
8723 tree ref_type;
8724 enum vect_def_type mask_dt = vect_unknown_def_type;
8726 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8727 return false;
8729 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8730 && ! vec_stmt)
8731 return false;
8733 if (!STMT_VINFO_DATA_REF (stmt_info))
8734 return false;
8736 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8737 int mask_index = -1;
8738 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8740 scalar_dest = gimple_assign_lhs (assign);
8741 if (TREE_CODE (scalar_dest) != SSA_NAME)
8742 return false;
8744 tree_code code = gimple_assign_rhs_code (assign);
8745 if (code != ARRAY_REF
8746 && code != BIT_FIELD_REF
8747 && code != INDIRECT_REF
8748 && code != COMPONENT_REF
8749 && code != IMAGPART_EXPR
8750 && code != REALPART_EXPR
8751 && code != MEM_REF
8752 && TREE_CODE_CLASS (code) != tcc_declaration)
8753 return false;
8755 else
8757 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8758 if (!call || !gimple_call_internal_p (call))
8759 return false;
8761 internal_fn ifn = gimple_call_internal_fn (call);
8762 if (!internal_load_fn_p (ifn))
8763 return false;
8765 scalar_dest = gimple_call_lhs (call);
8766 if (!scalar_dest)
8767 return false;
8769 mask_index = internal_fn_mask_index (ifn);
8770 /* ??? For SLP the mask operand is always last. */
8771 if (mask_index >= 0 && slp_node)
8772 mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
8773 if (mask_index >= 0
8774 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8775 &mask, NULL, &mask_dt, &mask_vectype))
8776 return false;
8779 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8780 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8782 if (loop_vinfo)
8784 loop = LOOP_VINFO_LOOP (loop_vinfo);
8785 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8786 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8788 else
8789 vf = 1;
8791 /* Multiple types in SLP are handled by creating the appropriate number of
8792 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8793 case of SLP. */
8794 if (slp)
8795 ncopies = 1;
8796 else
8797 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8799 gcc_assert (ncopies >= 1);
8801 /* FORNOW. This restriction should be relaxed. */
8802 if (nested_in_vect_loop && ncopies > 1)
8804 if (dump_enabled_p ())
8805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8806 "multiple types in nested loop.\n");
8807 return false;
8810 /* Invalidate assumptions made by dependence analysis when vectorization
8811 on the unrolled body effectively re-orders stmts. */
8812 if (ncopies > 1
8813 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8814 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8815 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8817 if (dump_enabled_p ())
8818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8819 "cannot perform implicit CSE when unrolling "
8820 "with negative dependence distance\n");
8821 return false;
8824 elem_type = TREE_TYPE (vectype);
8825 mode = TYPE_MODE (vectype);
8827 /* FORNOW. In some cases can vectorize even if data-type not supported
8828 (e.g. - data copies). */
8829 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8831 if (dump_enabled_p ())
8832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8833 "Aligned load, but unsupported type.\n");
8834 return false;
8837 /* Check if the load is a part of an interleaving chain. */
8838 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8840 grouped_load = true;
8841 /* FORNOW */
8842 gcc_assert (!nested_in_vect_loop);
8843 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8845 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8846 group_size = DR_GROUP_SIZE (first_stmt_info);
8848 /* Refuse non-SLP vectorization of SLP-only groups. */
8849 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8851 if (dump_enabled_p ())
8852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8853 "cannot vectorize load in non-SLP mode.\n");
8854 return false;
8857 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8859 slp_perm = true;
8861 if (!loop_vinfo)
8863 /* In BB vectorization we may not actually use a loaded vector
8864 accessing elements in excess of DR_GROUP_SIZE. */
8865 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8866 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8867 unsigned HOST_WIDE_INT nunits;
8868 unsigned j, k, maxk = 0;
8869 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8870 if (k > maxk)
8871 maxk = k;
8872 tree vectype = SLP_TREE_VECTYPE (slp_node);
8873 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8874 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8876 if (dump_enabled_p ())
8877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8878 "BB vectorization with gaps at the end of "
8879 "a load is not supported\n");
8880 return false;
8884 auto_vec<tree> tem;
8885 unsigned n_perms;
8886 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8887 true, &n_perms))
8889 if (dump_enabled_p ())
8890 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8891 vect_location,
8892 "unsupported load permutation\n");
8893 return false;
8897 /* Invalidate assumptions made by dependence analysis when vectorization
8898 on the unrolled body effectively re-orders stmts. */
8899 if (!PURE_SLP_STMT (stmt_info)
8900 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8901 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8902 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8904 if (dump_enabled_p ())
8905 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8906 "cannot perform implicit CSE when performing "
8907 "group loads with negative dependence distance\n");
8908 return false;
8911 else
8912 group_size = 1;
8914 vect_memory_access_type memory_access_type;
8915 enum dr_alignment_support alignment_support_scheme;
8916 int misalignment;
8917 poly_int64 poffset;
8918 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8919 ncopies, &memory_access_type, &poffset,
8920 &alignment_support_scheme, &misalignment, &gs_info))
8921 return false;
8923 if (mask)
8925 if (memory_access_type == VMAT_CONTIGUOUS)
8927 machine_mode vec_mode = TYPE_MODE (vectype);
8928 if (!VECTOR_MODE_P (vec_mode)
8929 || !can_vec_mask_load_store_p (vec_mode,
8930 TYPE_MODE (mask_vectype), true))
8931 return false;
8933 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8934 && memory_access_type != VMAT_GATHER_SCATTER)
8936 if (dump_enabled_p ())
8937 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8938 "unsupported access type for masked load.\n");
8939 return false;
8941 else if (memory_access_type == VMAT_GATHER_SCATTER
8942 && gs_info.ifn == IFN_LAST
8943 && !gs_info.decl)
8945 if (dump_enabled_p ())
8946 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8947 "unsupported masked emulated gather.\n");
8948 return false;
8952 if (!vec_stmt) /* transformation not required. */
8954 if (slp_node
8955 && mask
8956 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8957 mask_vectype))
8959 if (dump_enabled_p ())
8960 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8961 "incompatible vector types for invariants\n");
8962 return false;
8965 if (!slp)
8966 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8968 if (loop_vinfo
8969 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8970 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8971 VLS_LOAD, group_size,
8972 memory_access_type, &gs_info,
8973 mask);
8975 if (dump_enabled_p ()
8976 && memory_access_type != VMAT_ELEMENTWISE
8977 && memory_access_type != VMAT_GATHER_SCATTER
8978 && alignment_support_scheme != dr_aligned)
8979 dump_printf_loc (MSG_NOTE, vect_location,
8980 "Vectorizing an unaligned access.\n");
8982 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8983 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8984 alignment_support_scheme, misalignment,
8985 &gs_info, slp_node, cost_vec);
8986 return true;
8989 if (!slp)
8990 gcc_assert (memory_access_type
8991 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8993 if (dump_enabled_p ())
8994 dump_printf_loc (MSG_NOTE, vect_location,
8995 "transform load. ncopies = %d\n", ncopies);
8997 /* Transform. */
8999 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
9000 ensure_base_align (dr_info);
9002 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
9004 vect_build_gather_load_calls (vinfo,
9005 stmt_info, gsi, vec_stmt, &gs_info, mask);
9006 return true;
9009 if (memory_access_type == VMAT_INVARIANT)
9011 gcc_assert (!grouped_load && !mask && !bb_vinfo);
9012 /* If we have versioned for aliasing or the loop doesn't
9013 have any data dependencies that would preclude this,
9014 then we are sure this is a loop invariant load and
9015 thus we can insert it on the preheader edge. */
9016 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
9017 && !nested_in_vect_loop
9018 && hoist_defs_of_uses (stmt_info, loop));
9019 if (hoist_p)
9021 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
9022 if (dump_enabled_p ())
9023 dump_printf_loc (MSG_NOTE, vect_location,
9024 "hoisting out of the vectorized loop: %G", stmt);
9025 scalar_dest = copy_ssa_name (scalar_dest);
9026 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
9027 gsi_insert_on_edge_immediate
9028 (loop_preheader_edge (loop),
9029 gimple_build_assign (scalar_dest, rhs));
9031 /* These copies are all equivalent, but currently the representation
9032 requires a separate STMT_VINFO_VEC_STMT for each one. */
9033 gimple_stmt_iterator gsi2 = *gsi;
9034 gsi_next (&gsi2);
9035 for (j = 0; j < ncopies; j++)
9037 if (hoist_p)
9038 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9039 vectype, NULL);
9040 else
9041 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9042 vectype, &gsi2);
9043 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
9044 if (slp)
9045 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9046 else
9048 if (j == 0)
9049 *vec_stmt = new_stmt;
9050 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9053 return true;
9056 if (memory_access_type == VMAT_ELEMENTWISE
9057 || memory_access_type == VMAT_STRIDED_SLP)
9059 gimple_stmt_iterator incr_gsi;
9060 bool insert_after;
9061 tree offvar;
9062 tree ivstep;
9063 tree running_off;
9064 vec<constructor_elt, va_gc> *v = NULL;
9065 tree stride_base, stride_step, alias_off;
9066 /* Checked by get_load_store_type. */
9067 unsigned int const_nunits = nunits.to_constant ();
9068 unsigned HOST_WIDE_INT cst_offset = 0;
9069 tree dr_offset;
9071 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
9072 gcc_assert (!nested_in_vect_loop);
9074 if (grouped_load)
9076 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9077 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9079 else
9081 first_stmt_info = stmt_info;
9082 first_dr_info = dr_info;
9084 if (slp && grouped_load)
9086 group_size = DR_GROUP_SIZE (first_stmt_info);
9087 ref_type = get_group_alias_ptr_type (first_stmt_info);
9089 else
9091 if (grouped_load)
9092 cst_offset
9093 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
9094 * vect_get_place_in_interleaving_chain (stmt_info,
9095 first_stmt_info));
9096 group_size = 1;
9097 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9100 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9101 stride_base
9102 = fold_build_pointer_plus
9103 (DR_BASE_ADDRESS (first_dr_info->dr),
9104 size_binop (PLUS_EXPR,
9105 convert_to_ptrofftype (dr_offset),
9106 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9107 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9109 /* For a load with loop-invariant (but other than power-of-2)
9110 stride (i.e. not a grouped access) like so:
9112 for (i = 0; i < n; i += stride)
9113 ... = array[i];
9115 we generate a new induction variable and new accesses to
9116 form a new vector (or vectors, depending on ncopies):
9118 for (j = 0; ; j += VF*stride)
9119 tmp1 = array[j];
9120 tmp2 = array[j + stride];
9122 vectemp = {tmp1, tmp2, ...}
9125 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9126 build_int_cst (TREE_TYPE (stride_step), vf));
9128 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9130 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9131 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9132 create_iv (stride_base, ivstep, NULL,
9133 loop, &incr_gsi, insert_after,
9134 &offvar, NULL);
9136 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9138 running_off = offvar;
9139 alias_off = build_int_cst (ref_type, 0);
9140 int nloads = const_nunits;
9141 int lnel = 1;
9142 tree ltype = TREE_TYPE (vectype);
9143 tree lvectype = vectype;
9144 auto_vec<tree> dr_chain;
9145 if (memory_access_type == VMAT_STRIDED_SLP)
9147 if (group_size < const_nunits)
9149 /* First check if vec_init optab supports construction from vector
9150 elts directly. Otherwise avoid emitting a constructor of
9151 vector elements by performing the loads using an integer type
9152 of the same size, constructing a vector of those and then
9153 re-interpreting it as the original vector type. This avoids a
9154 huge runtime penalty due to the general inability to perform
9155 store forwarding from smaller stores to a larger load. */
9156 tree ptype;
9157 tree vtype
9158 = vector_vector_composition_type (vectype,
9159 const_nunits / group_size,
9160 &ptype);
9161 if (vtype != NULL_TREE)
9163 nloads = const_nunits / group_size;
9164 lnel = group_size;
9165 lvectype = vtype;
9166 ltype = ptype;
9169 else
9171 nloads = 1;
9172 lnel = const_nunits;
9173 ltype = vectype;
9175 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9177 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9178 else if (nloads == 1)
9179 ltype = vectype;
9181 if (slp)
9183 /* For SLP permutation support we need to load the whole group,
9184 not only the number of vector stmts the permutation result
9185 fits in. */
9186 if (slp_perm)
9188 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9189 variable VF. */
9190 unsigned int const_vf = vf.to_constant ();
9191 ncopies = CEIL (group_size * const_vf, const_nunits);
9192 dr_chain.create (ncopies);
9194 else
9195 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9197 unsigned int group_el = 0;
9198 unsigned HOST_WIDE_INT
9199 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9200 for (j = 0; j < ncopies; j++)
9202 if (nloads > 1)
9203 vec_alloc (v, nloads);
9204 gimple *new_stmt = NULL;
9205 for (i = 0; i < nloads; i++)
9207 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9208 group_el * elsz + cst_offset);
9209 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9210 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9211 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9212 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9213 if (nloads > 1)
9214 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9215 gimple_assign_lhs (new_stmt));
9217 group_el += lnel;
9218 if (! slp
9219 || group_el == group_size)
9221 tree newoff = copy_ssa_name (running_off);
9222 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9223 running_off, stride_step);
9224 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9226 running_off = newoff;
9227 group_el = 0;
9230 if (nloads > 1)
9232 tree vec_inv = build_constructor (lvectype, v);
9233 new_temp = vect_init_vector (vinfo, stmt_info,
9234 vec_inv, lvectype, gsi);
9235 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9236 if (lvectype != vectype)
9238 new_stmt = gimple_build_assign (make_ssa_name (vectype),
9239 VIEW_CONVERT_EXPR,
9240 build1 (VIEW_CONVERT_EXPR,
9241 vectype, new_temp));
9242 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9246 if (slp)
9248 if (slp_perm)
9249 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9250 else
9251 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9253 else
9255 if (j == 0)
9256 *vec_stmt = new_stmt;
9257 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9260 if (slp_perm)
9262 unsigned n_perms;
9263 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9264 false, &n_perms);
9266 return true;
9269 if (memory_access_type == VMAT_GATHER_SCATTER
9270 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9271 grouped_load = false;
9273 if (grouped_load)
9275 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9276 group_size = DR_GROUP_SIZE (first_stmt_info);
9277 /* For SLP vectorization we directly vectorize a subchain
9278 without permutation. */
9279 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9280 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9281 /* For BB vectorization always use the first stmt to base
9282 the data ref pointer on. */
9283 if (bb_vinfo)
9284 first_stmt_info_for_drptr
9285 = vect_find_first_scalar_stmt_in_slp (slp_node);
9287 /* Check if the chain of loads is already vectorized. */
9288 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9289 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9290 ??? But we can only do so if there is exactly one
9291 as we have no way to get at the rest. Leave the CSE
9292 opportunity alone.
9293 ??? With the group load eventually participating
9294 in multiple different permutations (having multiple
9295 slp nodes which refer to the same group) the CSE
9296 is even wrong code. See PR56270. */
9297 && !slp)
9299 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9300 return true;
9302 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9303 group_gap_adj = 0;
9305 /* VEC_NUM is the number of vect stmts to be created for this group. */
9306 if (slp)
9308 grouped_load = false;
9309 /* If an SLP permutation is from N elements to N elements,
9310 and if one vector holds a whole number of N, we can load
9311 the inputs to the permutation in the same way as an
9312 unpermuted sequence. In other cases we need to load the
9313 whole group, not only the number of vector stmts the
9314 permutation result fits in. */
9315 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9316 if (slp_perm
9317 && (group_size != scalar_lanes
9318 || !multiple_p (nunits, group_size)))
9320 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9321 variable VF; see vect_transform_slp_perm_load. */
9322 unsigned int const_vf = vf.to_constant ();
9323 unsigned int const_nunits = nunits.to_constant ();
9324 vec_num = CEIL (group_size * const_vf, const_nunits);
9325 group_gap_adj = vf * group_size - nunits * vec_num;
9327 else
9329 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9330 group_gap_adj
9331 = group_size - scalar_lanes;
9334 else
9335 vec_num = group_size;
9337 ref_type = get_group_alias_ptr_type (first_stmt_info);
9339 else
9341 first_stmt_info = stmt_info;
9342 first_dr_info = dr_info;
9343 group_size = vec_num = 1;
9344 group_gap_adj = 0;
9345 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9346 if (slp)
9347 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9350 gcc_assert (alignment_support_scheme);
9351 vec_loop_masks *loop_masks
9352 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9353 ? &LOOP_VINFO_MASKS (loop_vinfo)
9354 : NULL);
9355 vec_loop_lens *loop_lens
9356 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9357 ? &LOOP_VINFO_LENS (loop_vinfo)
9358 : NULL);
9360 /* Shouldn't go with length-based approach if fully masked. */
9361 gcc_assert (!loop_lens || !loop_masks);
9363 /* Targets with store-lane instructions must not require explicit
9364 realignment. vect_supportable_dr_alignment always returns either
9365 dr_aligned or dr_unaligned_supported for masked operations. */
9366 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9367 && !mask
9368 && !loop_masks)
9369 || alignment_support_scheme == dr_aligned
9370 || alignment_support_scheme == dr_unaligned_supported);
9372 /* In case the vectorization factor (VF) is bigger than the number
9373 of elements that we can fit in a vectype (nunits), we have to generate
9374 more than one vector stmt - i.e - we need to "unroll" the
9375 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9376 from one copy of the vector stmt to the next, in the field
9377 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9378 stages to find the correct vector defs to be used when vectorizing
9379 stmts that use the defs of the current stmt. The example below
9380 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9381 need to create 4 vectorized stmts):
9383 before vectorization:
9384 RELATED_STMT VEC_STMT
9385 S1: x = memref - -
9386 S2: z = x + 1 - -
9388 step 1: vectorize stmt S1:
9389 We first create the vector stmt VS1_0, and, as usual, record a
9390 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9391 Next, we create the vector stmt VS1_1, and record a pointer to
9392 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9393 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9394 stmts and pointers:
9395 RELATED_STMT VEC_STMT
9396 VS1_0: vx0 = memref0 VS1_1 -
9397 VS1_1: vx1 = memref1 VS1_2 -
9398 VS1_2: vx2 = memref2 VS1_3 -
9399 VS1_3: vx3 = memref3 - -
9400 S1: x = load - VS1_0
9401 S2: z = x + 1 - -
9404 /* In case of interleaving (non-unit grouped access):
9406 S1: x2 = &base + 2
9407 S2: x0 = &base
9408 S3: x1 = &base + 1
9409 S4: x3 = &base + 3
9411 Vectorized loads are created in the order of memory accesses
9412 starting from the access of the first stmt of the chain:
9414 VS1: vx0 = &base
9415 VS2: vx1 = &base + vec_size*1
9416 VS3: vx3 = &base + vec_size*2
9417 VS4: vx4 = &base + vec_size*3
9419 Then permutation statements are generated:
9421 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9422 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9425 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9426 (the order of the data-refs in the output of vect_permute_load_chain
9427 corresponds to the order of scalar stmts in the interleaving chain - see
9428 the documentation of vect_permute_load_chain()).
9429 The generation of permutation stmts and recording them in
9430 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9432 In case of both multiple types and interleaving, the vector loads and
9433 permutation stmts above are created for every copy. The result vector
9434 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9435 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9437 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9438 on a target that supports unaligned accesses (dr_unaligned_supported)
9439 we generate the following code:
9440 p = initial_addr;
9441 indx = 0;
9442 loop {
9443 p = p + indx * vectype_size;
9444 vec_dest = *(p);
9445 indx = indx + 1;
9448 Otherwise, the data reference is potentially unaligned on a target that
9449 does not support unaligned accesses (dr_explicit_realign_optimized) -
9450 then generate the following code, in which the data in each iteration is
9451 obtained by two vector loads, one from the previous iteration, and one
9452 from the current iteration:
9453 p1 = initial_addr;
9454 msq_init = *(floor(p1))
9455 p2 = initial_addr + VS - 1;
9456 realignment_token = call target_builtin;
9457 indx = 0;
9458 loop {
9459 p2 = p2 + indx * vectype_size
9460 lsq = *(floor(p2))
9461 vec_dest = realign_load (msq, lsq, realignment_token)
9462 indx = indx + 1;
9463 msq = lsq;
9464 } */
9466 /* If the misalignment remains the same throughout the execution of the
9467 loop, we can create the init_addr and permutation mask at the loop
9468 preheader. Otherwise, it needs to be created inside the loop.
9469 This can only occur when vectorizing memory accesses in the inner-loop
9470 nested within an outer-loop that is being vectorized. */
9472 if (nested_in_vect_loop
9473 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9474 GET_MODE_SIZE (TYPE_MODE (vectype))))
9476 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9477 compute_in_loop = true;
9480 bool diff_first_stmt_info
9481 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9483 tree offset = NULL_TREE;
9484 if ((alignment_support_scheme == dr_explicit_realign_optimized
9485 || alignment_support_scheme == dr_explicit_realign)
9486 && !compute_in_loop)
9488 /* If we have different first_stmt_info, we can't set up realignment
9489 here, since we can't guarantee first_stmt_info DR has been
9490 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9491 distance from first_stmt_info DR instead as below. */
9492 if (!diff_first_stmt_info)
9493 msq = vect_setup_realignment (vinfo,
9494 first_stmt_info, gsi, &realignment_token,
9495 alignment_support_scheme, NULL_TREE,
9496 &at_loop);
9497 if (alignment_support_scheme == dr_explicit_realign_optimized)
9499 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9500 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9501 size_one_node);
9502 gcc_assert (!first_stmt_info_for_drptr);
9505 else
9506 at_loop = loop;
9508 if (!known_eq (poffset, 0))
9509 offset = (offset
9510 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
9511 : size_int (poffset));
9513 tree bump;
9514 tree vec_offset = NULL_TREE;
9515 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9517 aggr_type = NULL_TREE;
9518 bump = NULL_TREE;
9520 else if (memory_access_type == VMAT_GATHER_SCATTER)
9522 aggr_type = elem_type;
9523 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9524 &bump, &vec_offset);
9526 else
9528 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9529 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9530 else
9531 aggr_type = vectype;
9532 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9533 memory_access_type);
9536 auto_vec<tree> vec_offsets;
9537 auto_vec<tree> vec_masks;
9538 if (mask)
9540 if (slp_node)
9541 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
9542 &vec_masks);
9543 else
9544 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
9545 &vec_masks, mask_vectype);
9547 tree vec_mask = NULL_TREE;
9548 poly_uint64 group_elt = 0;
9549 for (j = 0; j < ncopies; j++)
9551 /* 1. Create the vector or array pointer update chain. */
9552 if (j == 0)
9554 bool simd_lane_access_p
9555 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9556 if (simd_lane_access_p
9557 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9558 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9559 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9560 && integer_zerop (DR_INIT (first_dr_info->dr))
9561 && alias_sets_conflict_p (get_alias_set (aggr_type),
9562 get_alias_set (TREE_TYPE (ref_type)))
9563 && (alignment_support_scheme == dr_aligned
9564 || alignment_support_scheme == dr_unaligned_supported))
9566 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9567 dataref_offset = build_int_cst (ref_type, 0);
9569 else if (diff_first_stmt_info)
9571 dataref_ptr
9572 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9573 aggr_type, at_loop, offset, &dummy,
9574 gsi, &ptr_incr, simd_lane_access_p,
9575 bump);
9576 /* Adjust the pointer by the difference to first_stmt. */
9577 data_reference_p ptrdr
9578 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9579 tree diff
9580 = fold_convert (sizetype,
9581 size_binop (MINUS_EXPR,
9582 DR_INIT (first_dr_info->dr),
9583 DR_INIT (ptrdr)));
9584 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9585 stmt_info, diff);
9586 if (alignment_support_scheme == dr_explicit_realign)
9588 msq = vect_setup_realignment (vinfo,
9589 first_stmt_info_for_drptr, gsi,
9590 &realignment_token,
9591 alignment_support_scheme,
9592 dataref_ptr, &at_loop);
9593 gcc_assert (!compute_in_loop);
9596 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9598 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9599 slp_node, &gs_info, &dataref_ptr,
9600 &vec_offsets);
9602 else
9603 dataref_ptr
9604 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9605 at_loop,
9606 offset, &dummy, gsi, &ptr_incr,
9607 simd_lane_access_p, bump);
9608 if (mask)
9609 vec_mask = vec_masks[0];
9611 else
9613 if (dataref_offset)
9614 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9615 bump);
9616 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9617 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9618 stmt_info, bump);
9619 if (mask)
9620 vec_mask = vec_masks[j];
9623 if (grouped_load || slp_perm)
9624 dr_chain.create (vec_num);
9626 gimple *new_stmt = NULL;
9627 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9629 tree vec_array;
9631 vec_array = create_vector_array (vectype, vec_num);
9633 tree final_mask = NULL_TREE;
9634 if (loop_masks)
9635 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9636 vectype, j);
9637 if (vec_mask)
9638 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9639 final_mask, vec_mask, gsi);
9641 gcall *call;
9642 if (final_mask)
9644 /* Emit:
9645 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9646 VEC_MASK). */
9647 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9648 tree alias_ptr = build_int_cst (ref_type, align);
9649 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9650 dataref_ptr, alias_ptr,
9651 final_mask);
9653 else
9655 /* Emit:
9656 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9657 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9658 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9660 gimple_call_set_lhs (call, vec_array);
9661 gimple_call_set_nothrow (call, true);
9662 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9663 new_stmt = call;
9665 /* Extract each vector into an SSA_NAME. */
9666 for (i = 0; i < vec_num; i++)
9668 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9669 vec_array, i);
9670 dr_chain.quick_push (new_temp);
9673 /* Record the mapping between SSA_NAMEs and statements. */
9674 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9676 /* Record that VEC_ARRAY is now dead. */
9677 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9679 else
9681 for (i = 0; i < vec_num; i++)
9683 tree final_mask = NULL_TREE;
9684 if (loop_masks
9685 && memory_access_type != VMAT_INVARIANT)
9686 final_mask = vect_get_loop_mask (gsi, loop_masks,
9687 vec_num * ncopies,
9688 vectype, vec_num * j + i);
9689 if (vec_mask)
9690 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9691 final_mask, vec_mask, gsi);
9693 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9694 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9695 gsi, stmt_info, bump);
9697 /* 2. Create the vector-load in the loop. */
9698 switch (alignment_support_scheme)
9700 case dr_aligned:
9701 case dr_unaligned_supported:
9703 unsigned int misalign;
9704 unsigned HOST_WIDE_INT align;
9706 if (memory_access_type == VMAT_GATHER_SCATTER
9707 && gs_info.ifn != IFN_LAST)
9709 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9710 vec_offset = vec_offsets[vec_num * j + i];
9711 tree zero = build_zero_cst (vectype);
9712 tree scale = size_int (gs_info.scale);
9713 gcall *call;
9714 if (final_mask)
9715 call = gimple_build_call_internal
9716 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9717 vec_offset, scale, zero, final_mask);
9718 else
9719 call = gimple_build_call_internal
9720 (IFN_GATHER_LOAD, 4, dataref_ptr,
9721 vec_offset, scale, zero);
9722 gimple_call_set_nothrow (call, true);
9723 new_stmt = call;
9724 data_ref = NULL_TREE;
9725 break;
9727 else if (memory_access_type == VMAT_GATHER_SCATTER)
9729 /* Emulated gather-scatter. */
9730 gcc_assert (!final_mask);
9731 unsigned HOST_WIDE_INT const_nunits
9732 = nunits.to_constant ();
9733 unsigned HOST_WIDE_INT const_offset_nunits
9734 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9735 .to_constant ();
9736 vec<constructor_elt, va_gc> *ctor_elts;
9737 vec_alloc (ctor_elts, const_nunits);
9738 gimple_seq stmts = NULL;
9739 /* We support offset vectors with more elements
9740 than the data vector for now. */
9741 unsigned HOST_WIDE_INT factor
9742 = const_offset_nunits / const_nunits;
9743 vec_offset = vec_offsets[j / factor];
9744 unsigned elt_offset = (j % factor) * const_nunits;
9745 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9746 tree scale = size_int (gs_info.scale);
9747 align
9748 = get_object_alignment (DR_REF (first_dr_info->dr));
9749 tree ltype = build_aligned_type (TREE_TYPE (vectype),
9750 align);
9751 for (unsigned k = 0; k < const_nunits; ++k)
9753 tree boff = size_binop (MULT_EXPR,
9754 TYPE_SIZE (idx_type),
9755 bitsize_int
9756 (k + elt_offset));
9757 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9758 idx_type, vec_offset,
9759 TYPE_SIZE (idx_type),
9760 boff);
9761 idx = gimple_convert (&stmts, sizetype, idx);
9762 idx = gimple_build (&stmts, MULT_EXPR,
9763 sizetype, idx, scale);
9764 tree ptr = gimple_build (&stmts, PLUS_EXPR,
9765 TREE_TYPE (dataref_ptr),
9766 dataref_ptr, idx);
9767 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9768 tree elt = make_ssa_name (TREE_TYPE (vectype));
9769 tree ref = build2 (MEM_REF, ltype, ptr,
9770 build_int_cst (ref_type, 0));
9771 new_stmt = gimple_build_assign (elt, ref);
9772 gimple_seq_add_stmt (&stmts, new_stmt);
9773 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
9775 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9776 new_stmt = gimple_build_assign (NULL_TREE,
9777 build_constructor
9778 (vectype, ctor_elts));
9779 data_ref = NULL_TREE;
9780 break;
9783 align =
9784 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9785 if (alignment_support_scheme == dr_aligned)
9786 misalign = 0;
9787 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9789 align = dr_alignment
9790 (vect_dr_behavior (vinfo, first_dr_info));
9791 misalign = 0;
9793 else
9794 misalign = misalignment;
9795 if (dataref_offset == NULL_TREE
9796 && TREE_CODE (dataref_ptr) == SSA_NAME)
9797 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9798 align, misalign);
9799 align = least_bit_hwi (misalign | align);
9801 if (final_mask)
9803 tree ptr = build_int_cst (ref_type,
9804 align * BITS_PER_UNIT);
9805 gcall *call
9806 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9807 dataref_ptr, ptr,
9808 final_mask);
9809 gimple_call_set_nothrow (call, true);
9810 new_stmt = call;
9811 data_ref = NULL_TREE;
9813 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9815 tree final_len
9816 = vect_get_loop_len (loop_vinfo, loop_lens,
9817 vec_num * ncopies,
9818 vec_num * j + i);
9819 tree ptr = build_int_cst (ref_type,
9820 align * BITS_PER_UNIT);
9822 machine_mode vmode = TYPE_MODE (vectype);
9823 opt_machine_mode new_ovmode
9824 = get_len_load_store_mode (vmode, true);
9825 machine_mode new_vmode = new_ovmode.require ();
9826 tree qi_type = unsigned_intQI_type_node;
9828 signed char biasval =
9829 LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9831 tree bias = build_int_cst (intQI_type_node, biasval);
9833 gcall *call
9834 = gimple_build_call_internal (IFN_LEN_LOAD, 4,
9835 dataref_ptr, ptr,
9836 final_len, bias);
9837 gimple_call_set_nothrow (call, true);
9838 new_stmt = call;
9839 data_ref = NULL_TREE;
9841 /* Need conversion if it's wrapped with VnQI. */
9842 if (vmode != new_vmode)
9844 tree new_vtype
9845 = build_vector_type_for_mode (qi_type, new_vmode);
9846 tree var = vect_get_new_ssa_name (new_vtype,
9847 vect_simple_var);
9848 gimple_set_lhs (call, var);
9849 vect_finish_stmt_generation (vinfo, stmt_info, call,
9850 gsi);
9851 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9852 new_stmt
9853 = gimple_build_assign (vec_dest,
9854 VIEW_CONVERT_EXPR, op);
9857 else
9859 tree ltype = vectype;
9860 tree new_vtype = NULL_TREE;
9861 unsigned HOST_WIDE_INT gap
9862 = DR_GROUP_GAP (first_stmt_info);
9863 unsigned int vect_align
9864 = vect_known_alignment_in_bytes (first_dr_info,
9865 vectype);
9866 unsigned int scalar_dr_size
9867 = vect_get_scalar_dr_size (first_dr_info);
9868 /* If there's no peeling for gaps but we have a gap
9869 with slp loads then load the lower half of the
9870 vector only. See get_group_load_store_type for
9871 when we apply this optimization. */
9872 if (slp
9873 && loop_vinfo
9874 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9875 && gap != 0
9876 && known_eq (nunits, (group_size - gap) * 2)
9877 && known_eq (nunits, group_size)
9878 && gap >= (vect_align / scalar_dr_size))
9880 tree half_vtype;
9881 new_vtype
9882 = vector_vector_composition_type (vectype, 2,
9883 &half_vtype);
9884 if (new_vtype != NULL_TREE)
9885 ltype = half_vtype;
9887 tree offset
9888 = (dataref_offset ? dataref_offset
9889 : build_int_cst (ref_type, 0));
9890 if (ltype != vectype
9891 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9893 unsigned HOST_WIDE_INT gap_offset
9894 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9895 tree gapcst = build_int_cst (ref_type, gap_offset);
9896 offset = size_binop (PLUS_EXPR, offset, gapcst);
9898 data_ref
9899 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9900 if (alignment_support_scheme == dr_aligned)
9902 else
9903 TREE_TYPE (data_ref)
9904 = build_aligned_type (TREE_TYPE (data_ref),
9905 align * BITS_PER_UNIT);
9906 if (ltype != vectype)
9908 vect_copy_ref_info (data_ref,
9909 DR_REF (first_dr_info->dr));
9910 tree tem = make_ssa_name (ltype);
9911 new_stmt = gimple_build_assign (tem, data_ref);
9912 vect_finish_stmt_generation (vinfo, stmt_info,
9913 new_stmt, gsi);
9914 data_ref = NULL;
9915 vec<constructor_elt, va_gc> *v;
9916 vec_alloc (v, 2);
9917 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9919 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9920 build_zero_cst (ltype));
9921 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9923 else
9925 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9926 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9927 build_zero_cst (ltype));
9929 gcc_assert (new_vtype != NULL_TREE);
9930 if (new_vtype == vectype)
9931 new_stmt = gimple_build_assign (
9932 vec_dest, build_constructor (vectype, v));
9933 else
9935 tree new_vname = make_ssa_name (new_vtype);
9936 new_stmt = gimple_build_assign (
9937 new_vname, build_constructor (new_vtype, v));
9938 vect_finish_stmt_generation (vinfo, stmt_info,
9939 new_stmt, gsi);
9940 new_stmt = gimple_build_assign (
9941 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9942 new_vname));
9946 break;
9948 case dr_explicit_realign:
9950 tree ptr, bump;
9952 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9954 if (compute_in_loop)
9955 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9956 &realignment_token,
9957 dr_explicit_realign,
9958 dataref_ptr, NULL);
9960 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9961 ptr = copy_ssa_name (dataref_ptr);
9962 else
9963 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9964 // For explicit realign the target alignment should be
9965 // known at compile time.
9966 unsigned HOST_WIDE_INT align =
9967 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9968 new_stmt = gimple_build_assign
9969 (ptr, BIT_AND_EXPR, dataref_ptr,
9970 build_int_cst
9971 (TREE_TYPE (dataref_ptr),
9972 -(HOST_WIDE_INT) align));
9973 vect_finish_stmt_generation (vinfo, stmt_info,
9974 new_stmt, gsi);
9975 data_ref
9976 = build2 (MEM_REF, vectype, ptr,
9977 build_int_cst (ref_type, 0));
9978 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9979 vec_dest = vect_create_destination_var (scalar_dest,
9980 vectype);
9981 new_stmt = gimple_build_assign (vec_dest, data_ref);
9982 new_temp = make_ssa_name (vec_dest, new_stmt);
9983 gimple_assign_set_lhs (new_stmt, new_temp);
9984 gimple_move_vops (new_stmt, stmt_info->stmt);
9985 vect_finish_stmt_generation (vinfo, stmt_info,
9986 new_stmt, gsi);
9987 msq = new_temp;
9989 bump = size_binop (MULT_EXPR, vs,
9990 TYPE_SIZE_UNIT (elem_type));
9991 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9992 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9993 stmt_info, bump);
9994 new_stmt = gimple_build_assign
9995 (NULL_TREE, BIT_AND_EXPR, ptr,
9996 build_int_cst
9997 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9998 ptr = copy_ssa_name (ptr, new_stmt);
9999 gimple_assign_set_lhs (new_stmt, ptr);
10000 vect_finish_stmt_generation (vinfo, stmt_info,
10001 new_stmt, gsi);
10002 data_ref
10003 = build2 (MEM_REF, vectype, ptr,
10004 build_int_cst (ref_type, 0));
10005 break;
10007 case dr_explicit_realign_optimized:
10009 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10010 new_temp = copy_ssa_name (dataref_ptr);
10011 else
10012 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
10013 // We should only be doing this if we know the target
10014 // alignment at compile time.
10015 unsigned HOST_WIDE_INT align =
10016 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10017 new_stmt = gimple_build_assign
10018 (new_temp, BIT_AND_EXPR, dataref_ptr,
10019 build_int_cst (TREE_TYPE (dataref_ptr),
10020 -(HOST_WIDE_INT) align));
10021 vect_finish_stmt_generation (vinfo, stmt_info,
10022 new_stmt, gsi);
10023 data_ref
10024 = build2 (MEM_REF, vectype, new_temp,
10025 build_int_cst (ref_type, 0));
10026 break;
10028 default:
10029 gcc_unreachable ();
10031 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10032 /* DATA_REF is null if we've already built the statement. */
10033 if (data_ref)
10035 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10036 new_stmt = gimple_build_assign (vec_dest, data_ref);
10038 new_temp = make_ssa_name (vec_dest, new_stmt);
10039 gimple_set_lhs (new_stmt, new_temp);
10040 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10042 /* 3. Handle explicit realignment if necessary/supported.
10043 Create in loop:
10044 vec_dest = realign_load (msq, lsq, realignment_token) */
10045 if (alignment_support_scheme == dr_explicit_realign_optimized
10046 || alignment_support_scheme == dr_explicit_realign)
10048 lsq = gimple_assign_lhs (new_stmt);
10049 if (!realignment_token)
10050 realignment_token = dataref_ptr;
10051 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10052 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
10053 msq, lsq, realignment_token);
10054 new_temp = make_ssa_name (vec_dest, new_stmt);
10055 gimple_assign_set_lhs (new_stmt, new_temp);
10056 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10058 if (alignment_support_scheme == dr_explicit_realign_optimized)
10060 gcc_assert (phi);
10061 if (i == vec_num - 1 && j == ncopies - 1)
10062 add_phi_arg (phi, lsq,
10063 loop_latch_edge (containing_loop),
10064 UNKNOWN_LOCATION);
10065 msq = lsq;
10069 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10071 tree perm_mask = perm_mask_for_reverse (vectype);
10072 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
10073 perm_mask, stmt_info, gsi);
10074 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10077 /* Collect vector loads and later create their permutation in
10078 vect_transform_grouped_load (). */
10079 if (grouped_load || slp_perm)
10080 dr_chain.quick_push (new_temp);
10082 /* Store vector loads in the corresponding SLP_NODE. */
10083 if (slp && !slp_perm)
10084 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10086 /* With SLP permutation we load the gaps as well, without
10087 we need to skip the gaps after we manage to fully load
10088 all elements. group_gap_adj is DR_GROUP_SIZE here. */
10089 group_elt += nunits;
10090 if (maybe_ne (group_gap_adj, 0U)
10091 && !slp_perm
10092 && known_eq (group_elt, group_size - group_gap_adj))
10094 poly_wide_int bump_val
10095 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10096 * group_gap_adj);
10097 if (tree_int_cst_sgn
10098 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10099 bump_val = -bump_val;
10100 tree bump = wide_int_to_tree (sizetype, bump_val);
10101 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10102 gsi, stmt_info, bump);
10103 group_elt = 0;
10106 /* Bump the vector pointer to account for a gap or for excess
10107 elements loaded for a permuted SLP load. */
10108 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
10110 poly_wide_int bump_val
10111 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10112 * group_gap_adj);
10113 if (tree_int_cst_sgn
10114 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10115 bump_val = -bump_val;
10116 tree bump = wide_int_to_tree (sizetype, bump_val);
10117 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10118 stmt_info, bump);
10122 if (slp && !slp_perm)
10123 continue;
10125 if (slp_perm)
10127 unsigned n_perms;
10128 /* For SLP we know we've seen all possible uses of dr_chain so
10129 direct vect_transform_slp_perm_load to DCE the unused parts.
10130 ??? This is a hack to prevent compile-time issues as seen
10131 in PR101120 and friends. */
10132 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
10133 gsi, vf, false, &n_perms,
10134 nullptr, true);
10135 gcc_assert (ok);
10137 else
10139 if (grouped_load)
10141 if (memory_access_type != VMAT_LOAD_STORE_LANES)
10142 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
10143 group_size, gsi);
10144 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10146 else
10148 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10151 dr_chain.release ();
10153 if (!slp)
10154 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10156 return true;
10159 /* Function vect_is_simple_cond.
10161 Input:
10162 LOOP - the loop that is being vectorized.
10163 COND - Condition that is checked for simple use.
10165 Output:
10166 *COMP_VECTYPE - the vector type for the comparison.
10167 *DTS - The def types for the arguments of the comparison
10169 Returns whether a COND can be vectorized. Checks whether
10170 condition operands are supportable using vec_is_simple_use. */
10172 static bool
10173 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
10174 slp_tree slp_node, tree *comp_vectype,
10175 enum vect_def_type *dts, tree vectype)
10177 tree lhs, rhs;
10178 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10179 slp_tree slp_op;
10181 /* Mask case. */
10182 if (TREE_CODE (cond) == SSA_NAME
10183 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
10185 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
10186 &slp_op, &dts[0], comp_vectype)
10187 || !*comp_vectype
10188 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
10189 return false;
10190 return true;
10193 if (!COMPARISON_CLASS_P (cond))
10194 return false;
10196 lhs = TREE_OPERAND (cond, 0);
10197 rhs = TREE_OPERAND (cond, 1);
10199 if (TREE_CODE (lhs) == SSA_NAME)
10201 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10202 &lhs, &slp_op, &dts[0], &vectype1))
10203 return false;
10205 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10206 || TREE_CODE (lhs) == FIXED_CST)
10207 dts[0] = vect_constant_def;
10208 else
10209 return false;
10211 if (TREE_CODE (rhs) == SSA_NAME)
10213 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10214 &rhs, &slp_op, &dts[1], &vectype2))
10215 return false;
10217 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10218 || TREE_CODE (rhs) == FIXED_CST)
10219 dts[1] = vect_constant_def;
10220 else
10221 return false;
10223 if (vectype1 && vectype2
10224 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10225 TYPE_VECTOR_SUBPARTS (vectype2)))
10226 return false;
10228 *comp_vectype = vectype1 ? vectype1 : vectype2;
10229 /* Invariant comparison. */
10230 if (! *comp_vectype)
10232 tree scalar_type = TREE_TYPE (lhs);
10233 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10234 *comp_vectype = truth_type_for (vectype);
10235 else
10237 /* If we can widen the comparison to match vectype do so. */
10238 if (INTEGRAL_TYPE_P (scalar_type)
10239 && !slp_node
10240 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10241 TYPE_SIZE (TREE_TYPE (vectype))))
10242 scalar_type = build_nonstandard_integer_type
10243 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10244 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10245 slp_node);
10249 return true;
10252 /* vectorizable_condition.
10254 Check if STMT_INFO is conditional modify expression that can be vectorized.
10255 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10256 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10257 at GSI.
10259 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10261 Return true if STMT_INFO is vectorizable in this way. */
10263 static bool
10264 vectorizable_condition (vec_info *vinfo,
10265 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10266 gimple **vec_stmt,
10267 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10269 tree scalar_dest = NULL_TREE;
10270 tree vec_dest = NULL_TREE;
10271 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10272 tree then_clause, else_clause;
10273 tree comp_vectype = NULL_TREE;
10274 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10275 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10276 tree vec_compare;
10277 tree new_temp;
10278 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10279 enum vect_def_type dts[4]
10280 = {vect_unknown_def_type, vect_unknown_def_type,
10281 vect_unknown_def_type, vect_unknown_def_type};
10282 int ndts = 4;
10283 int ncopies;
10284 int vec_num;
10285 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10286 int i;
10287 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10288 vec<tree> vec_oprnds0 = vNULL;
10289 vec<tree> vec_oprnds1 = vNULL;
10290 vec<tree> vec_oprnds2 = vNULL;
10291 vec<tree> vec_oprnds3 = vNULL;
10292 tree vec_cmp_type;
10293 bool masked = false;
10295 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10296 return false;
10298 /* Is vectorizable conditional operation? */
10299 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10300 if (!stmt)
10301 return false;
10303 code = gimple_assign_rhs_code (stmt);
10304 if (code != COND_EXPR)
10305 return false;
10307 stmt_vec_info reduc_info = NULL;
10308 int reduc_index = -1;
10309 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10310 bool for_reduction
10311 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10312 if (for_reduction)
10314 if (STMT_SLP_TYPE (stmt_info))
10315 return false;
10316 reduc_info = info_for_reduction (vinfo, stmt_info);
10317 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10318 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10319 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10320 || reduc_index != -1);
10322 else
10324 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10325 return false;
10328 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10329 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10331 if (slp_node)
10333 ncopies = 1;
10334 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10336 else
10338 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10339 vec_num = 1;
10342 gcc_assert (ncopies >= 1);
10343 if (for_reduction && ncopies > 1)
10344 return false; /* FORNOW */
10346 cond_expr = gimple_assign_rhs1 (stmt);
10348 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10349 &comp_vectype, &dts[0], vectype)
10350 || !comp_vectype)
10351 return false;
10353 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10354 slp_tree then_slp_node, else_slp_node;
10355 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10356 &then_clause, &then_slp_node, &dts[2], &vectype1))
10357 return false;
10358 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10359 &else_clause, &else_slp_node, &dts[3], &vectype2))
10360 return false;
10362 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10363 return false;
10365 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10366 return false;
10368 masked = !COMPARISON_CLASS_P (cond_expr);
10369 vec_cmp_type = truth_type_for (comp_vectype);
10371 if (vec_cmp_type == NULL_TREE)
10372 return false;
10374 cond_code = TREE_CODE (cond_expr);
10375 if (!masked)
10377 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10378 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10381 /* For conditional reductions, the "then" value needs to be the candidate
10382 value calculated by this iteration while the "else" value needs to be
10383 the result carried over from previous iterations. If the COND_EXPR
10384 is the other way around, we need to swap it. */
10385 bool must_invert_cmp_result = false;
10386 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10388 if (masked)
10389 must_invert_cmp_result = true;
10390 else
10392 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10393 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10394 if (new_code == ERROR_MARK)
10395 must_invert_cmp_result = true;
10396 else
10398 cond_code = new_code;
10399 /* Make sure we don't accidentally use the old condition. */
10400 cond_expr = NULL_TREE;
10403 std::swap (then_clause, else_clause);
10406 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10408 /* Boolean values may have another representation in vectors
10409 and therefore we prefer bit operations over comparison for
10410 them (which also works for scalar masks). We store opcodes
10411 to use in bitop1 and bitop2. Statement is vectorized as
10412 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10413 depending on bitop1 and bitop2 arity. */
10414 switch (cond_code)
10416 case GT_EXPR:
10417 bitop1 = BIT_NOT_EXPR;
10418 bitop2 = BIT_AND_EXPR;
10419 break;
10420 case GE_EXPR:
10421 bitop1 = BIT_NOT_EXPR;
10422 bitop2 = BIT_IOR_EXPR;
10423 break;
10424 case LT_EXPR:
10425 bitop1 = BIT_NOT_EXPR;
10426 bitop2 = BIT_AND_EXPR;
10427 std::swap (cond_expr0, cond_expr1);
10428 break;
10429 case LE_EXPR:
10430 bitop1 = BIT_NOT_EXPR;
10431 bitop2 = BIT_IOR_EXPR;
10432 std::swap (cond_expr0, cond_expr1);
10433 break;
10434 case NE_EXPR:
10435 bitop1 = BIT_XOR_EXPR;
10436 break;
10437 case EQ_EXPR:
10438 bitop1 = BIT_XOR_EXPR;
10439 bitop2 = BIT_NOT_EXPR;
10440 break;
10441 default:
10442 return false;
10444 cond_code = SSA_NAME;
10447 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10448 && reduction_type == EXTRACT_LAST_REDUCTION
10449 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10451 if (dump_enabled_p ())
10452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10453 "reduction comparison operation not supported.\n");
10454 return false;
10457 if (!vec_stmt)
10459 if (bitop1 != NOP_EXPR)
10461 machine_mode mode = TYPE_MODE (comp_vectype);
10462 optab optab;
10464 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10465 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10466 return false;
10468 if (bitop2 != NOP_EXPR)
10470 optab = optab_for_tree_code (bitop2, comp_vectype,
10471 optab_default);
10472 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10473 return false;
10477 vect_cost_for_stmt kind = vector_stmt;
10478 if (reduction_type == EXTRACT_LAST_REDUCTION)
10479 /* Count one reduction-like operation per vector. */
10480 kind = vec_to_scalar;
10481 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10482 return false;
10484 if (slp_node
10485 && (!vect_maybe_update_slp_op_vectype
10486 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10487 || (op_adjust == 1
10488 && !vect_maybe_update_slp_op_vectype
10489 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10490 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10491 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10493 if (dump_enabled_p ())
10494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10495 "incompatible vector types for invariants\n");
10496 return false;
10499 if (loop_vinfo && for_reduction
10500 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10502 if (reduction_type == EXTRACT_LAST_REDUCTION)
10503 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10504 ncopies * vec_num, vectype, NULL);
10505 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10506 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10508 if (dump_enabled_p ())
10509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10510 "conditional reduction prevents the use"
10511 " of partial vectors.\n");
10512 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10516 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10517 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10518 cost_vec, kind);
10519 return true;
10522 /* Transform. */
10524 /* Handle def. */
10525 scalar_dest = gimple_assign_lhs (stmt);
10526 if (reduction_type != EXTRACT_LAST_REDUCTION)
10527 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10529 bool swap_cond_operands = false;
10531 /* See whether another part of the vectorized code applies a loop
10532 mask to the condition, or to its inverse. */
10534 vec_loop_masks *masks = NULL;
10535 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10537 if (reduction_type == EXTRACT_LAST_REDUCTION)
10538 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10539 else
10541 scalar_cond_masked_key cond (cond_expr, ncopies);
10542 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10543 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10544 else
10546 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10547 tree_code orig_code = cond.code;
10548 cond.code = invert_tree_comparison (cond.code, honor_nans);
10549 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
10551 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10552 cond_code = cond.code;
10553 swap_cond_operands = true;
10555 else
10557 /* Try the inverse of the current mask. We check if the
10558 inverse mask is live and if so we generate a negate of
10559 the current mask such that we still honor NaNs. */
10560 cond.inverted_p = true;
10561 cond.code = orig_code;
10562 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10564 bitop1 = orig_code;
10565 bitop2 = BIT_NOT_EXPR;
10566 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10567 cond_code = cond.code;
10568 swap_cond_operands = true;
10575 /* Handle cond expr. */
10576 if (masked)
10577 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10578 cond_expr, &vec_oprnds0, comp_vectype,
10579 then_clause, &vec_oprnds2, vectype,
10580 reduction_type != EXTRACT_LAST_REDUCTION
10581 ? else_clause : NULL, &vec_oprnds3, vectype);
10582 else
10583 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10584 cond_expr0, &vec_oprnds0, comp_vectype,
10585 cond_expr1, &vec_oprnds1, comp_vectype,
10586 then_clause, &vec_oprnds2, vectype,
10587 reduction_type != EXTRACT_LAST_REDUCTION
10588 ? else_clause : NULL, &vec_oprnds3, vectype);
10590 /* Arguments are ready. Create the new vector stmt. */
10591 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10593 vec_then_clause = vec_oprnds2[i];
10594 if (reduction_type != EXTRACT_LAST_REDUCTION)
10595 vec_else_clause = vec_oprnds3[i];
10597 if (swap_cond_operands)
10598 std::swap (vec_then_clause, vec_else_clause);
10600 if (masked)
10601 vec_compare = vec_cond_lhs;
10602 else
10604 vec_cond_rhs = vec_oprnds1[i];
10605 if (bitop1 == NOP_EXPR)
10607 gimple_seq stmts = NULL;
10608 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10609 vec_cond_lhs, vec_cond_rhs);
10610 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10612 else
10614 new_temp = make_ssa_name (vec_cmp_type);
10615 gassign *new_stmt;
10616 if (bitop1 == BIT_NOT_EXPR)
10617 new_stmt = gimple_build_assign (new_temp, bitop1,
10618 vec_cond_rhs);
10619 else
10620 new_stmt
10621 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10622 vec_cond_rhs);
10623 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10624 if (bitop2 == NOP_EXPR)
10625 vec_compare = new_temp;
10626 else if (bitop2 == BIT_NOT_EXPR)
10628 /* Instead of doing ~x ? y : z do x ? z : y. */
10629 vec_compare = new_temp;
10630 std::swap (vec_then_clause, vec_else_clause);
10632 else
10634 vec_compare = make_ssa_name (vec_cmp_type);
10635 new_stmt
10636 = gimple_build_assign (vec_compare, bitop2,
10637 vec_cond_lhs, new_temp);
10638 vect_finish_stmt_generation (vinfo, stmt_info,
10639 new_stmt, gsi);
10644 /* If we decided to apply a loop mask to the result of the vector
10645 comparison, AND the comparison with the mask now. Later passes
10646 should then be able to reuse the AND results between mulitple
10647 vector statements.
10649 For example:
10650 for (int i = 0; i < 100; ++i)
10651 x[i] = y[i] ? z[i] : 10;
10653 results in following optimized GIMPLE:
10655 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10656 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10657 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10658 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10659 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10660 vect_iftmp.11_47, { 10, ... }>;
10662 instead of using a masked and unmasked forms of
10663 vec != { 0, ... } (masked in the MASK_LOAD,
10664 unmasked in the VEC_COND_EXPR). */
10666 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10667 in cases where that's necessary. */
10669 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10671 if (!is_gimple_val (vec_compare))
10673 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10674 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10675 vec_compare);
10676 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10677 vec_compare = vec_compare_name;
10680 if (must_invert_cmp_result)
10682 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10683 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10684 BIT_NOT_EXPR,
10685 vec_compare);
10686 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10687 vec_compare = vec_compare_name;
10690 if (masks)
10692 tree loop_mask
10693 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10694 vectype, i);
10695 tree tmp2 = make_ssa_name (vec_cmp_type);
10696 gassign *g
10697 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10698 loop_mask);
10699 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10700 vec_compare = tmp2;
10704 gimple *new_stmt;
10705 if (reduction_type == EXTRACT_LAST_REDUCTION)
10707 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10708 tree lhs = gimple_get_lhs (old_stmt);
10709 new_stmt = gimple_build_call_internal
10710 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10711 vec_then_clause);
10712 gimple_call_set_lhs (new_stmt, lhs);
10713 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10714 if (old_stmt == gsi_stmt (*gsi))
10715 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10716 else
10718 /* In this case we're moving the definition to later in the
10719 block. That doesn't matter because the only uses of the
10720 lhs are in phi statements. */
10721 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10722 gsi_remove (&old_gsi, true);
10723 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10726 else
10728 new_temp = make_ssa_name (vec_dest);
10729 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10730 vec_then_clause, vec_else_clause);
10731 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10733 if (slp_node)
10734 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10735 else
10736 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10739 if (!slp_node)
10740 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10742 vec_oprnds0.release ();
10743 vec_oprnds1.release ();
10744 vec_oprnds2.release ();
10745 vec_oprnds3.release ();
10747 return true;
10750 /* vectorizable_comparison.
10752 Check if STMT_INFO is comparison expression that can be vectorized.
10753 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10754 comparison, put it in VEC_STMT, and insert it at GSI.
10756 Return true if STMT_INFO is vectorizable in this way. */
10758 static bool
10759 vectorizable_comparison (vec_info *vinfo,
10760 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10761 gimple **vec_stmt,
10762 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10764 tree lhs, rhs1, rhs2;
10765 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10766 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10767 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10768 tree new_temp;
10769 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10770 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10771 int ndts = 2;
10772 poly_uint64 nunits;
10773 int ncopies;
10774 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10775 int i;
10776 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10777 vec<tree> vec_oprnds0 = vNULL;
10778 vec<tree> vec_oprnds1 = vNULL;
10779 tree mask_type;
10780 tree mask;
10782 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10783 return false;
10785 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10786 return false;
10788 mask_type = vectype;
10789 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10791 if (slp_node)
10792 ncopies = 1;
10793 else
10794 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10796 gcc_assert (ncopies >= 1);
10797 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10798 return false;
10800 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10801 if (!stmt)
10802 return false;
10804 code = gimple_assign_rhs_code (stmt);
10806 if (TREE_CODE_CLASS (code) != tcc_comparison)
10807 return false;
10809 slp_tree slp_rhs1, slp_rhs2;
10810 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10811 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10812 return false;
10814 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10815 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10816 return false;
10818 if (vectype1 && vectype2
10819 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10820 TYPE_VECTOR_SUBPARTS (vectype2)))
10821 return false;
10823 vectype = vectype1 ? vectype1 : vectype2;
10825 /* Invariant comparison. */
10826 if (!vectype)
10828 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10829 vectype = mask_type;
10830 else
10831 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10832 slp_node);
10833 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10834 return false;
10836 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10837 return false;
10839 /* Can't compare mask and non-mask types. */
10840 if (vectype1 && vectype2
10841 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10842 return false;
10844 /* Boolean values may have another representation in vectors
10845 and therefore we prefer bit operations over comparison for
10846 them (which also works for scalar masks). We store opcodes
10847 to use in bitop1 and bitop2. Statement is vectorized as
10848 BITOP2 (rhs1 BITOP1 rhs2) or
10849 rhs1 BITOP2 (BITOP1 rhs2)
10850 depending on bitop1 and bitop2 arity. */
10851 bool swap_p = false;
10852 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10854 if (code == GT_EXPR)
10856 bitop1 = BIT_NOT_EXPR;
10857 bitop2 = BIT_AND_EXPR;
10859 else if (code == GE_EXPR)
10861 bitop1 = BIT_NOT_EXPR;
10862 bitop2 = BIT_IOR_EXPR;
10864 else if (code == LT_EXPR)
10866 bitop1 = BIT_NOT_EXPR;
10867 bitop2 = BIT_AND_EXPR;
10868 swap_p = true;
10870 else if (code == LE_EXPR)
10872 bitop1 = BIT_NOT_EXPR;
10873 bitop2 = BIT_IOR_EXPR;
10874 swap_p = true;
10876 else
10878 bitop1 = BIT_XOR_EXPR;
10879 if (code == EQ_EXPR)
10880 bitop2 = BIT_NOT_EXPR;
10884 if (!vec_stmt)
10886 if (bitop1 == NOP_EXPR)
10888 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10889 return false;
10891 else
10893 machine_mode mode = TYPE_MODE (vectype);
10894 optab optab;
10896 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10897 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10898 return false;
10900 if (bitop2 != NOP_EXPR)
10902 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10903 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10904 return false;
10908 /* Put types on constant and invariant SLP children. */
10909 if (slp_node
10910 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10911 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10913 if (dump_enabled_p ())
10914 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10915 "incompatible vector types for invariants\n");
10916 return false;
10919 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10920 vect_model_simple_cost (vinfo, stmt_info,
10921 ncopies * (1 + (bitop2 != NOP_EXPR)),
10922 dts, ndts, slp_node, cost_vec);
10923 return true;
10926 /* Transform. */
10928 /* Handle def. */
10929 lhs = gimple_assign_lhs (stmt);
10930 mask = vect_create_destination_var (lhs, mask_type);
10932 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10933 rhs1, &vec_oprnds0, vectype,
10934 rhs2, &vec_oprnds1, vectype);
10935 if (swap_p)
10936 std::swap (vec_oprnds0, vec_oprnds1);
10938 /* Arguments are ready. Create the new vector stmt. */
10939 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10941 gimple *new_stmt;
10942 vec_rhs2 = vec_oprnds1[i];
10944 new_temp = make_ssa_name (mask);
10945 if (bitop1 == NOP_EXPR)
10947 new_stmt = gimple_build_assign (new_temp, code,
10948 vec_rhs1, vec_rhs2);
10949 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10951 else
10953 if (bitop1 == BIT_NOT_EXPR)
10954 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10955 else
10956 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10957 vec_rhs2);
10958 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10959 if (bitop2 != NOP_EXPR)
10961 tree res = make_ssa_name (mask);
10962 if (bitop2 == BIT_NOT_EXPR)
10963 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10964 else
10965 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10966 new_temp);
10967 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10970 if (slp_node)
10971 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10972 else
10973 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10976 if (!slp_node)
10977 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10979 vec_oprnds0.release ();
10980 vec_oprnds1.release ();
10982 return true;
10985 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10986 can handle all live statements in the node. Otherwise return true
10987 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10988 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10990 static bool
10991 can_vectorize_live_stmts (vec_info *vinfo,
10992 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10993 slp_tree slp_node, slp_instance slp_node_instance,
10994 bool vec_stmt_p,
10995 stmt_vector_for_cost *cost_vec)
10997 if (slp_node)
10999 stmt_vec_info slp_stmt_info;
11000 unsigned int i;
11001 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
11003 if (STMT_VINFO_LIVE_P (slp_stmt_info)
11004 && !vectorizable_live_operation (vinfo,
11005 slp_stmt_info, gsi, slp_node,
11006 slp_node_instance, i,
11007 vec_stmt_p, cost_vec))
11008 return false;
11011 else if (STMT_VINFO_LIVE_P (stmt_info)
11012 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
11013 slp_node, slp_node_instance, -1,
11014 vec_stmt_p, cost_vec))
11015 return false;
11017 return true;
11020 /* Make sure the statement is vectorizable. */
11022 opt_result
11023 vect_analyze_stmt (vec_info *vinfo,
11024 stmt_vec_info stmt_info, bool *need_to_vectorize,
11025 slp_tree node, slp_instance node_instance,
11026 stmt_vector_for_cost *cost_vec)
11028 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11029 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
11030 bool ok;
11031 gimple_seq pattern_def_seq;
11033 if (dump_enabled_p ())
11034 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
11035 stmt_info->stmt);
11037 if (gimple_has_volatile_ops (stmt_info->stmt))
11038 return opt_result::failure_at (stmt_info->stmt,
11039 "not vectorized:"
11040 " stmt has volatile operands: %G\n",
11041 stmt_info->stmt);
11043 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11044 && node == NULL
11045 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
11047 gimple_stmt_iterator si;
11049 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
11051 stmt_vec_info pattern_def_stmt_info
11052 = vinfo->lookup_stmt (gsi_stmt (si));
11053 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
11054 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
11056 /* Analyze def stmt of STMT if it's a pattern stmt. */
11057 if (dump_enabled_p ())
11058 dump_printf_loc (MSG_NOTE, vect_location,
11059 "==> examining pattern def statement: %G",
11060 pattern_def_stmt_info->stmt);
11062 opt_result res
11063 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
11064 need_to_vectorize, node, node_instance,
11065 cost_vec);
11066 if (!res)
11067 return res;
11072 /* Skip stmts that do not need to be vectorized. In loops this is expected
11073 to include:
11074 - the COND_EXPR which is the loop exit condition
11075 - any LABEL_EXPRs in the loop
11076 - computations that are used only for array indexing or loop control.
11077 In basic blocks we only analyze statements that are a part of some SLP
11078 instance, therefore, all the statements are relevant.
11080 Pattern statement needs to be analyzed instead of the original statement
11081 if the original statement is not relevant. Otherwise, we analyze both
11082 statements. In basic blocks we are called from some SLP instance
11083 traversal, don't analyze pattern stmts instead, the pattern stmts
11084 already will be part of SLP instance. */
11086 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
11087 if (!STMT_VINFO_RELEVANT_P (stmt_info)
11088 && !STMT_VINFO_LIVE_P (stmt_info))
11090 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11091 && pattern_stmt_info
11092 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11093 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11095 /* Analyze PATTERN_STMT instead of the original stmt. */
11096 stmt_info = pattern_stmt_info;
11097 if (dump_enabled_p ())
11098 dump_printf_loc (MSG_NOTE, vect_location,
11099 "==> examining pattern statement: %G",
11100 stmt_info->stmt);
11102 else
11104 if (dump_enabled_p ())
11105 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11107 return opt_result::success ();
11110 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11111 && node == NULL
11112 && pattern_stmt_info
11113 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11114 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11116 /* Analyze PATTERN_STMT too. */
11117 if (dump_enabled_p ())
11118 dump_printf_loc (MSG_NOTE, vect_location,
11119 "==> examining pattern statement: %G",
11120 pattern_stmt_info->stmt);
11122 opt_result res
11123 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11124 node_instance, cost_vec);
11125 if (!res)
11126 return res;
11129 switch (STMT_VINFO_DEF_TYPE (stmt_info))
11131 case vect_internal_def:
11132 break;
11134 case vect_reduction_def:
11135 case vect_nested_cycle:
11136 gcc_assert (!bb_vinfo
11137 && (relevance == vect_used_in_outer
11138 || relevance == vect_used_in_outer_by_reduction
11139 || relevance == vect_used_by_reduction
11140 || relevance == vect_unused_in_scope
11141 || relevance == vect_used_only_live));
11142 break;
11144 case vect_induction_def:
11145 gcc_assert (!bb_vinfo);
11146 break;
11148 case vect_constant_def:
11149 case vect_external_def:
11150 case vect_unknown_def_type:
11151 default:
11152 gcc_unreachable ();
11155 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11156 if (node)
11157 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
11159 if (STMT_VINFO_RELEVANT_P (stmt_info))
11161 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11162 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11163 || (call && gimple_call_lhs (call) == NULL_TREE));
11164 *need_to_vectorize = true;
11167 if (PURE_SLP_STMT (stmt_info) && !node)
11169 if (dump_enabled_p ())
11170 dump_printf_loc (MSG_NOTE, vect_location,
11171 "handled only by SLP analysis\n");
11172 return opt_result::success ();
11175 ok = true;
11176 if (!bb_vinfo
11177 && (STMT_VINFO_RELEVANT_P (stmt_info)
11178 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11179 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11180 -mveclibabi= takes preference over library functions with
11181 the simd attribute. */
11182 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11183 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11184 cost_vec)
11185 || vectorizable_conversion (vinfo, stmt_info,
11186 NULL, NULL, node, cost_vec)
11187 || vectorizable_operation (vinfo, stmt_info,
11188 NULL, NULL, node, cost_vec)
11189 || vectorizable_assignment (vinfo, stmt_info,
11190 NULL, NULL, node, cost_vec)
11191 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11192 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11193 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11194 node, node_instance, cost_vec)
11195 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11196 NULL, node, cost_vec)
11197 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11198 || vectorizable_condition (vinfo, stmt_info,
11199 NULL, NULL, node, cost_vec)
11200 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11201 cost_vec)
11202 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11203 stmt_info, NULL, node));
11204 else
11206 if (bb_vinfo)
11207 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11208 || vectorizable_simd_clone_call (vinfo, stmt_info,
11209 NULL, NULL, node, cost_vec)
11210 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11211 cost_vec)
11212 || vectorizable_shift (vinfo, stmt_info,
11213 NULL, NULL, node, cost_vec)
11214 || vectorizable_operation (vinfo, stmt_info,
11215 NULL, NULL, node, cost_vec)
11216 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11217 cost_vec)
11218 || vectorizable_load (vinfo, stmt_info,
11219 NULL, NULL, node, cost_vec)
11220 || vectorizable_store (vinfo, stmt_info,
11221 NULL, NULL, node, cost_vec)
11222 || vectorizable_condition (vinfo, stmt_info,
11223 NULL, NULL, node, cost_vec)
11224 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11225 cost_vec)
11226 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11229 if (node)
11230 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11232 if (!ok)
11233 return opt_result::failure_at (stmt_info->stmt,
11234 "not vectorized:"
11235 " relevant stmt not supported: %G",
11236 stmt_info->stmt);
11238 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11239 need extra handling, except for vectorizable reductions. */
11240 if (!bb_vinfo
11241 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11242 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11243 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11244 stmt_info, NULL, node, node_instance,
11245 false, cost_vec))
11246 return opt_result::failure_at (stmt_info->stmt,
11247 "not vectorized:"
11248 " live stmt not supported: %G",
11249 stmt_info->stmt);
11251 return opt_result::success ();
11255 /* Function vect_transform_stmt.
11257 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11259 bool
11260 vect_transform_stmt (vec_info *vinfo,
11261 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11262 slp_tree slp_node, slp_instance slp_node_instance)
11264 bool is_store = false;
11265 gimple *vec_stmt = NULL;
11266 bool done;
11268 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11270 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11271 if (slp_node)
11272 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11274 switch (STMT_VINFO_TYPE (stmt_info))
11276 case type_demotion_vec_info_type:
11277 case type_promotion_vec_info_type:
11278 case type_conversion_vec_info_type:
11279 done = vectorizable_conversion (vinfo, stmt_info,
11280 gsi, &vec_stmt, slp_node, NULL);
11281 gcc_assert (done);
11282 break;
11284 case induc_vec_info_type:
11285 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11286 stmt_info, &vec_stmt, slp_node,
11287 NULL);
11288 gcc_assert (done);
11289 break;
11291 case shift_vec_info_type:
11292 done = vectorizable_shift (vinfo, stmt_info,
11293 gsi, &vec_stmt, slp_node, NULL);
11294 gcc_assert (done);
11295 break;
11297 case op_vec_info_type:
11298 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11299 NULL);
11300 gcc_assert (done);
11301 break;
11303 case assignment_vec_info_type:
11304 done = vectorizable_assignment (vinfo, stmt_info,
11305 gsi, &vec_stmt, slp_node, NULL);
11306 gcc_assert (done);
11307 break;
11309 case load_vec_info_type:
11310 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11311 NULL);
11312 gcc_assert (done);
11313 break;
11315 case store_vec_info_type:
11316 done = vectorizable_store (vinfo, stmt_info,
11317 gsi, &vec_stmt, slp_node, NULL);
11318 gcc_assert (done);
11319 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11321 /* In case of interleaving, the whole chain is vectorized when the
11322 last store in the chain is reached. Store stmts before the last
11323 one are skipped, and there vec_stmt_info shouldn't be freed
11324 meanwhile. */
11325 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11326 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11327 is_store = true;
11329 else
11330 is_store = true;
11331 break;
11333 case condition_vec_info_type:
11334 done = vectorizable_condition (vinfo, stmt_info,
11335 gsi, &vec_stmt, slp_node, NULL);
11336 gcc_assert (done);
11337 break;
11339 case comparison_vec_info_type:
11340 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11341 slp_node, NULL);
11342 gcc_assert (done);
11343 break;
11345 case call_vec_info_type:
11346 done = vectorizable_call (vinfo, stmt_info,
11347 gsi, &vec_stmt, slp_node, NULL);
11348 break;
11350 case call_simd_clone_vec_info_type:
11351 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11352 slp_node, NULL);
11353 break;
11355 case reduc_vec_info_type:
11356 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11357 gsi, &vec_stmt, slp_node);
11358 gcc_assert (done);
11359 break;
11361 case cycle_phi_info_type:
11362 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11363 &vec_stmt, slp_node, slp_node_instance);
11364 gcc_assert (done);
11365 break;
11367 case lc_phi_info_type:
11368 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11369 stmt_info, &vec_stmt, slp_node);
11370 gcc_assert (done);
11371 break;
11373 case phi_info_type:
11374 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11375 gcc_assert (done);
11376 break;
11378 default:
11379 if (!STMT_VINFO_LIVE_P (stmt_info))
11381 if (dump_enabled_p ())
11382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11383 "stmt not supported.\n");
11384 gcc_unreachable ();
11386 done = true;
11389 if (!slp_node && vec_stmt)
11390 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11392 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11394 /* Handle stmts whose DEF is used outside the loop-nest that is
11395 being vectorized. */
11396 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11397 slp_node_instance, true, NULL);
11398 gcc_assert (done);
11401 if (slp_node)
11402 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11404 return is_store;
11408 /* Remove a group of stores (for SLP or interleaving), free their
11409 stmt_vec_info. */
11411 void
11412 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11414 stmt_vec_info next_stmt_info = first_stmt_info;
11416 while (next_stmt_info)
11418 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11419 next_stmt_info = vect_orig_stmt (next_stmt_info);
11420 /* Free the attached stmt_vec_info and remove the stmt. */
11421 vinfo->remove_stmt (next_stmt_info);
11422 next_stmt_info = tmp;
11426 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11427 elements of type SCALAR_TYPE, or null if the target doesn't support
11428 such a type.
11430 If NUNITS is zero, return a vector type that contains elements of
11431 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11433 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11434 for this vectorization region and want to "autodetect" the best choice.
11435 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11436 and we want the new type to be interoperable with it. PREVAILING_MODE
11437 in this case can be a scalar integer mode or a vector mode; when it
11438 is a vector mode, the function acts like a tree-level version of
11439 related_vector_mode. */
11441 tree
11442 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11443 tree scalar_type, poly_uint64 nunits)
11445 tree orig_scalar_type = scalar_type;
11446 scalar_mode inner_mode;
11447 machine_mode simd_mode;
11448 tree vectype;
11450 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11451 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11452 return NULL_TREE;
11454 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11456 /* For vector types of elements whose mode precision doesn't
11457 match their types precision we use a element type of mode
11458 precision. The vectorization routines will have to make sure
11459 they support the proper result truncation/extension.
11460 We also make sure to build vector types with INTEGER_TYPE
11461 component type only. */
11462 if (INTEGRAL_TYPE_P (scalar_type)
11463 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11464 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11465 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11466 TYPE_UNSIGNED (scalar_type));
11468 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11469 When the component mode passes the above test simply use a type
11470 corresponding to that mode. The theory is that any use that
11471 would cause problems with this will disable vectorization anyway. */
11472 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11473 && !INTEGRAL_TYPE_P (scalar_type))
11474 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11476 /* We can't build a vector type of elements with alignment bigger than
11477 their size. */
11478 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11479 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11480 TYPE_UNSIGNED (scalar_type));
11482 /* If we felt back to using the mode fail if there was
11483 no scalar type for it. */
11484 if (scalar_type == NULL_TREE)
11485 return NULL_TREE;
11487 /* If no prevailing mode was supplied, use the mode the target prefers.
11488 Otherwise lookup a vector mode based on the prevailing mode. */
11489 if (prevailing_mode == VOIDmode)
11491 gcc_assert (known_eq (nunits, 0U));
11492 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11493 if (SCALAR_INT_MODE_P (simd_mode))
11495 /* Traditional behavior is not to take the integer mode
11496 literally, but simply to use it as a way of determining
11497 the vector size. It is up to mode_for_vector to decide
11498 what the TYPE_MODE should be.
11500 Note that nunits == 1 is allowed in order to support single
11501 element vector types. */
11502 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11503 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11504 return NULL_TREE;
11507 else if (SCALAR_INT_MODE_P (prevailing_mode)
11508 || !related_vector_mode (prevailing_mode,
11509 inner_mode, nunits).exists (&simd_mode))
11511 /* Fall back to using mode_for_vector, mostly in the hope of being
11512 able to use an integer mode. */
11513 if (known_eq (nunits, 0U)
11514 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11515 return NULL_TREE;
11517 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11518 return NULL_TREE;
11521 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11523 /* In cases where the mode was chosen by mode_for_vector, check that
11524 the target actually supports the chosen mode, or that it at least
11525 allows the vector mode to be replaced by a like-sized integer. */
11526 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11527 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11528 return NULL_TREE;
11530 /* Re-attach the address-space qualifier if we canonicalized the scalar
11531 type. */
11532 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11533 return build_qualified_type
11534 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11536 return vectype;
11539 /* Function get_vectype_for_scalar_type.
11541 Returns the vector type corresponding to SCALAR_TYPE as supported
11542 by the target. If GROUP_SIZE is nonzero and we're performing BB
11543 vectorization, make sure that the number of elements in the vector
11544 is no bigger than GROUP_SIZE. */
11546 tree
11547 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11548 unsigned int group_size)
11550 /* For BB vectorization, we should always have a group size once we've
11551 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11552 are tentative requests during things like early data reference
11553 analysis and pattern recognition. */
11554 if (is_a <bb_vec_info> (vinfo))
11555 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11556 else
11557 group_size = 0;
11559 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11560 scalar_type);
11561 if (vectype && vinfo->vector_mode == VOIDmode)
11562 vinfo->vector_mode = TYPE_MODE (vectype);
11564 /* Register the natural choice of vector type, before the group size
11565 has been applied. */
11566 if (vectype)
11567 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11569 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11570 try again with an explicit number of elements. */
11571 if (vectype
11572 && group_size
11573 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11575 /* Start with the biggest number of units that fits within
11576 GROUP_SIZE and halve it until we find a valid vector type.
11577 Usually either the first attempt will succeed or all will
11578 fail (in the latter case because GROUP_SIZE is too small
11579 for the target), but it's possible that a target could have
11580 a hole between supported vector types.
11582 If GROUP_SIZE is not a power of 2, this has the effect of
11583 trying the largest power of 2 that fits within the group,
11584 even though the group is not a multiple of that vector size.
11585 The BB vectorizer will then try to carve up the group into
11586 smaller pieces. */
11587 unsigned int nunits = 1 << floor_log2 (group_size);
11590 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11591 scalar_type, nunits);
11592 nunits /= 2;
11594 while (nunits > 1 && !vectype);
11597 return vectype;
11600 /* Return the vector type corresponding to SCALAR_TYPE as supported
11601 by the target. NODE, if nonnull, is the SLP tree node that will
11602 use the returned vector type. */
11604 tree
11605 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11607 unsigned int group_size = 0;
11608 if (node)
11609 group_size = SLP_TREE_LANES (node);
11610 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11613 /* Function get_mask_type_for_scalar_type.
11615 Returns the mask type corresponding to a result of comparison
11616 of vectors of specified SCALAR_TYPE as supported by target.
11617 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11618 make sure that the number of elements in the vector is no bigger
11619 than GROUP_SIZE. */
11621 tree
11622 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11623 unsigned int group_size)
11625 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11627 if (!vectype)
11628 return NULL;
11630 return truth_type_for (vectype);
11633 /* Function get_same_sized_vectype
11635 Returns a vector type corresponding to SCALAR_TYPE of size
11636 VECTOR_TYPE if supported by the target. */
11638 tree
11639 get_same_sized_vectype (tree scalar_type, tree vector_type)
11641 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11642 return truth_type_for (vector_type);
11644 poly_uint64 nunits;
11645 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11646 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11647 return NULL_TREE;
11649 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11650 scalar_type, nunits);
11653 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11654 would not change the chosen vector modes. */
11656 bool
11657 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11659 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11660 i != vinfo->used_vector_modes.end (); ++i)
11661 if (!VECTOR_MODE_P (*i)
11662 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11663 return false;
11664 return true;
11667 /* Function vect_is_simple_use.
11669 Input:
11670 VINFO - the vect info of the loop or basic block that is being vectorized.
11671 OPERAND - operand in the loop or bb.
11672 Output:
11673 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11674 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11675 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11676 the definition could be anywhere in the function
11677 DT - the type of definition
11679 Returns whether a stmt with OPERAND can be vectorized.
11680 For loops, supportable operands are constants, loop invariants, and operands
11681 that are defined by the current iteration of the loop. Unsupportable
11682 operands are those that are defined by a previous iteration of the loop (as
11683 is the case in reduction/induction computations).
11684 For basic blocks, supportable operands are constants and bb invariants.
11685 For now, operands defined outside the basic block are not supported. */
11687 bool
11688 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11689 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11691 if (def_stmt_info_out)
11692 *def_stmt_info_out = NULL;
11693 if (def_stmt_out)
11694 *def_stmt_out = NULL;
11695 *dt = vect_unknown_def_type;
11697 if (dump_enabled_p ())
11699 dump_printf_loc (MSG_NOTE, vect_location,
11700 "vect_is_simple_use: operand ");
11701 if (TREE_CODE (operand) == SSA_NAME
11702 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11703 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11704 else
11705 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11708 if (CONSTANT_CLASS_P (operand))
11709 *dt = vect_constant_def;
11710 else if (is_gimple_min_invariant (operand))
11711 *dt = vect_external_def;
11712 else if (TREE_CODE (operand) != SSA_NAME)
11713 *dt = vect_unknown_def_type;
11714 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11715 *dt = vect_external_def;
11716 else
11718 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11719 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11720 if (!stmt_vinfo)
11721 *dt = vect_external_def;
11722 else
11724 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11725 def_stmt = stmt_vinfo->stmt;
11726 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11727 if (def_stmt_info_out)
11728 *def_stmt_info_out = stmt_vinfo;
11730 if (def_stmt_out)
11731 *def_stmt_out = def_stmt;
11734 if (dump_enabled_p ())
11736 dump_printf (MSG_NOTE, ", type of def: ");
11737 switch (*dt)
11739 case vect_uninitialized_def:
11740 dump_printf (MSG_NOTE, "uninitialized\n");
11741 break;
11742 case vect_constant_def:
11743 dump_printf (MSG_NOTE, "constant\n");
11744 break;
11745 case vect_external_def:
11746 dump_printf (MSG_NOTE, "external\n");
11747 break;
11748 case vect_internal_def:
11749 dump_printf (MSG_NOTE, "internal\n");
11750 break;
11751 case vect_induction_def:
11752 dump_printf (MSG_NOTE, "induction\n");
11753 break;
11754 case vect_reduction_def:
11755 dump_printf (MSG_NOTE, "reduction\n");
11756 break;
11757 case vect_double_reduction_def:
11758 dump_printf (MSG_NOTE, "double reduction\n");
11759 break;
11760 case vect_nested_cycle:
11761 dump_printf (MSG_NOTE, "nested cycle\n");
11762 break;
11763 case vect_unknown_def_type:
11764 dump_printf (MSG_NOTE, "unknown\n");
11765 break;
11769 if (*dt == vect_unknown_def_type)
11771 if (dump_enabled_p ())
11772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11773 "Unsupported pattern.\n");
11774 return false;
11777 return true;
11780 /* Function vect_is_simple_use.
11782 Same as vect_is_simple_use but also determines the vector operand
11783 type of OPERAND and stores it to *VECTYPE. If the definition of
11784 OPERAND is vect_uninitialized_def, vect_constant_def or
11785 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11786 is responsible to compute the best suited vector type for the
11787 scalar operand. */
11789 bool
11790 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11791 tree *vectype, stmt_vec_info *def_stmt_info_out,
11792 gimple **def_stmt_out)
11794 stmt_vec_info def_stmt_info;
11795 gimple *def_stmt;
11796 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11797 return false;
11799 if (def_stmt_out)
11800 *def_stmt_out = def_stmt;
11801 if (def_stmt_info_out)
11802 *def_stmt_info_out = def_stmt_info;
11804 /* Now get a vector type if the def is internal, otherwise supply
11805 NULL_TREE and leave it up to the caller to figure out a proper
11806 type for the use stmt. */
11807 if (*dt == vect_internal_def
11808 || *dt == vect_induction_def
11809 || *dt == vect_reduction_def
11810 || *dt == vect_double_reduction_def
11811 || *dt == vect_nested_cycle)
11813 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11814 gcc_assert (*vectype != NULL_TREE);
11815 if (dump_enabled_p ())
11816 dump_printf_loc (MSG_NOTE, vect_location,
11817 "vect_is_simple_use: vectype %T\n", *vectype);
11819 else if (*dt == vect_uninitialized_def
11820 || *dt == vect_constant_def
11821 || *dt == vect_external_def)
11822 *vectype = NULL_TREE;
11823 else
11824 gcc_unreachable ();
11826 return true;
11829 /* Function vect_is_simple_use.
11831 Same as vect_is_simple_use but determines the operand by operand
11832 position OPERAND from either STMT or SLP_NODE, filling in *OP
11833 and *SLP_DEF (when SLP_NODE is not NULL). */
11835 bool
11836 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11837 unsigned operand, tree *op, slp_tree *slp_def,
11838 enum vect_def_type *dt,
11839 tree *vectype, stmt_vec_info *def_stmt_info_out)
11841 if (slp_node)
11843 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11844 *slp_def = child;
11845 *vectype = SLP_TREE_VECTYPE (child);
11846 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11848 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11849 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11851 else
11853 if (def_stmt_info_out)
11854 *def_stmt_info_out = NULL;
11855 *op = SLP_TREE_SCALAR_OPS (child)[0];
11856 *dt = SLP_TREE_DEF_TYPE (child);
11857 return true;
11860 else
11862 *slp_def = NULL;
11863 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11865 if (gimple_assign_rhs_code (ass) == COND_EXPR
11866 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11868 if (operand < 2)
11869 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11870 else
11871 *op = gimple_op (ass, operand);
11873 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11874 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11875 else
11876 *op = gimple_op (ass, operand + 1);
11878 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11879 *op = gimple_call_arg (call, operand);
11880 else
11881 gcc_unreachable ();
11882 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11886 /* If OP is not NULL and is external or constant update its vector
11887 type with VECTYPE. Returns true if successful or false if not,
11888 for example when conflicting vector types are present. */
11890 bool
11891 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11893 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11894 return true;
11895 if (SLP_TREE_VECTYPE (op))
11896 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11897 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
11898 should be handled by patters. Allow vect_constant_def for now. */
11899 if (VECTOR_BOOLEAN_TYPE_P (vectype)
11900 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
11901 return false;
11902 SLP_TREE_VECTYPE (op) = vectype;
11903 return true;
11906 /* Function supportable_widening_operation
11908 Check whether an operation represented by the code CODE is a
11909 widening operation that is supported by the target platform in
11910 vector form (i.e., when operating on arguments of type VECTYPE_IN
11911 producing a result of type VECTYPE_OUT).
11913 Widening operations we currently support are NOP (CONVERT), FLOAT,
11914 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11915 are supported by the target platform either directly (via vector
11916 tree-codes), or via target builtins.
11918 Output:
11919 - CODE1 and CODE2 are codes of vector operations to be used when
11920 vectorizing the operation, if available.
11921 - MULTI_STEP_CVT determines the number of required intermediate steps in
11922 case of multi-step conversion (like char->short->int - in that case
11923 MULTI_STEP_CVT will be 1).
11924 - INTERM_TYPES contains the intermediate type required to perform the
11925 widening operation (short in the above example). */
11927 bool
11928 supportable_widening_operation (vec_info *vinfo,
11929 enum tree_code code, stmt_vec_info stmt_info,
11930 tree vectype_out, tree vectype_in,
11931 enum tree_code *code1, enum tree_code *code2,
11932 int *multi_step_cvt,
11933 vec<tree> *interm_types)
11935 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11936 class loop *vect_loop = NULL;
11937 machine_mode vec_mode;
11938 enum insn_code icode1, icode2;
11939 optab optab1, optab2;
11940 tree vectype = vectype_in;
11941 tree wide_vectype = vectype_out;
11942 enum tree_code c1, c2;
11943 int i;
11944 tree prev_type, intermediate_type;
11945 machine_mode intermediate_mode, prev_mode;
11946 optab optab3, optab4;
11948 *multi_step_cvt = 0;
11949 if (loop_info)
11950 vect_loop = LOOP_VINFO_LOOP (loop_info);
11952 switch (code)
11954 case WIDEN_MULT_EXPR:
11955 /* The result of a vectorized widening operation usually requires
11956 two vectors (because the widened results do not fit into one vector).
11957 The generated vector results would normally be expected to be
11958 generated in the same order as in the original scalar computation,
11959 i.e. if 8 results are generated in each vector iteration, they are
11960 to be organized as follows:
11961 vect1: [res1,res2,res3,res4],
11962 vect2: [res5,res6,res7,res8].
11964 However, in the special case that the result of the widening
11965 operation is used in a reduction computation only, the order doesn't
11966 matter (because when vectorizing a reduction we change the order of
11967 the computation). Some targets can take advantage of this and
11968 generate more efficient code. For example, targets like Altivec,
11969 that support widen_mult using a sequence of {mult_even,mult_odd}
11970 generate the following vectors:
11971 vect1: [res1,res3,res5,res7],
11972 vect2: [res2,res4,res6,res8].
11974 When vectorizing outer-loops, we execute the inner-loop sequentially
11975 (each vectorized inner-loop iteration contributes to VF outer-loop
11976 iterations in parallel). We therefore don't allow to change the
11977 order of the computation in the inner-loop during outer-loop
11978 vectorization. */
11979 /* TODO: Another case in which order doesn't *really* matter is when we
11980 widen and then contract again, e.g. (short)((int)x * y >> 8).
11981 Normally, pack_trunc performs an even/odd permute, whereas the
11982 repack from an even/odd expansion would be an interleave, which
11983 would be significantly simpler for e.g. AVX2. */
11984 /* In any case, in order to avoid duplicating the code below, recurse
11985 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11986 are properly set up for the caller. If we fail, we'll continue with
11987 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11988 if (vect_loop
11989 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11990 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11991 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11992 stmt_info, vectype_out,
11993 vectype_in, code1, code2,
11994 multi_step_cvt, interm_types))
11996 /* Elements in a vector with vect_used_by_reduction property cannot
11997 be reordered if the use chain with this property does not have the
11998 same operation. One such an example is s += a * b, where elements
11999 in a and b cannot be reordered. Here we check if the vector defined
12000 by STMT is only directly used in the reduction statement. */
12001 tree lhs = gimple_assign_lhs (stmt_info->stmt);
12002 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
12003 if (use_stmt_info
12004 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
12005 return true;
12007 c1 = VEC_WIDEN_MULT_LO_EXPR;
12008 c2 = VEC_WIDEN_MULT_HI_EXPR;
12009 break;
12011 case DOT_PROD_EXPR:
12012 c1 = DOT_PROD_EXPR;
12013 c2 = DOT_PROD_EXPR;
12014 break;
12016 case SAD_EXPR:
12017 c1 = SAD_EXPR;
12018 c2 = SAD_EXPR;
12019 break;
12021 case VEC_WIDEN_MULT_EVEN_EXPR:
12022 /* Support the recursion induced just above. */
12023 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
12024 c2 = VEC_WIDEN_MULT_ODD_EXPR;
12025 break;
12027 case WIDEN_LSHIFT_EXPR:
12028 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
12029 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
12030 break;
12032 case WIDEN_PLUS_EXPR:
12033 c1 = VEC_WIDEN_PLUS_LO_EXPR;
12034 c2 = VEC_WIDEN_PLUS_HI_EXPR;
12035 break;
12037 case WIDEN_MINUS_EXPR:
12038 c1 = VEC_WIDEN_MINUS_LO_EXPR;
12039 c2 = VEC_WIDEN_MINUS_HI_EXPR;
12040 break;
12042 CASE_CONVERT:
12043 c1 = VEC_UNPACK_LO_EXPR;
12044 c2 = VEC_UNPACK_HI_EXPR;
12045 break;
12047 case FLOAT_EXPR:
12048 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
12049 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
12050 break;
12052 case FIX_TRUNC_EXPR:
12053 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
12054 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
12055 break;
12057 default:
12058 gcc_unreachable ();
12061 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
12062 std::swap (c1, c2);
12064 if (code == FIX_TRUNC_EXPR)
12066 /* The signedness is determined from output operand. */
12067 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12068 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
12070 else if (CONVERT_EXPR_CODE_P (code)
12071 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
12072 && VECTOR_BOOLEAN_TYPE_P (vectype)
12073 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
12074 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12076 /* If the input and result modes are the same, a different optab
12077 is needed where we pass in the number of units in vectype. */
12078 optab1 = vec_unpacks_sbool_lo_optab;
12079 optab2 = vec_unpacks_sbool_hi_optab;
12081 else
12083 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12084 optab2 = optab_for_tree_code (c2, vectype, optab_default);
12087 if (!optab1 || !optab2)
12088 return false;
12090 vec_mode = TYPE_MODE (vectype);
12091 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
12092 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
12093 return false;
12095 *code1 = c1;
12096 *code2 = c2;
12098 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12099 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12101 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12102 return true;
12103 /* For scalar masks we may have different boolean
12104 vector types having the same QImode. Thus we
12105 add additional check for elements number. */
12106 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
12107 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12108 return true;
12111 /* Check if it's a multi-step conversion that can be done using intermediate
12112 types. */
12114 prev_type = vectype;
12115 prev_mode = vec_mode;
12117 if (!CONVERT_EXPR_CODE_P (code))
12118 return false;
12120 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12121 intermediate steps in promotion sequence. We try
12122 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12123 not. */
12124 interm_types->create (MAX_INTERM_CVT_STEPS);
12125 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12127 intermediate_mode = insn_data[icode1].operand[0].mode;
12128 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12129 intermediate_type
12130 = vect_halve_mask_nunits (prev_type, intermediate_mode);
12131 else
12132 intermediate_type
12133 = lang_hooks.types.type_for_mode (intermediate_mode,
12134 TYPE_UNSIGNED (prev_type));
12136 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12137 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12138 && intermediate_mode == prev_mode
12139 && SCALAR_INT_MODE_P (prev_mode))
12141 /* If the input and result modes are the same, a different optab
12142 is needed where we pass in the number of units in vectype. */
12143 optab3 = vec_unpacks_sbool_lo_optab;
12144 optab4 = vec_unpacks_sbool_hi_optab;
12146 else
12148 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12149 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12152 if (!optab3 || !optab4
12153 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12154 || insn_data[icode1].operand[0].mode != intermediate_mode
12155 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12156 || insn_data[icode2].operand[0].mode != intermediate_mode
12157 || ((icode1 = optab_handler (optab3, intermediate_mode))
12158 == CODE_FOR_nothing)
12159 || ((icode2 = optab_handler (optab4, intermediate_mode))
12160 == CODE_FOR_nothing))
12161 break;
12163 interm_types->quick_push (intermediate_type);
12164 (*multi_step_cvt)++;
12166 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12167 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12169 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12170 return true;
12171 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12172 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12173 return true;
12176 prev_type = intermediate_type;
12177 prev_mode = intermediate_mode;
12180 interm_types->release ();
12181 return false;
12185 /* Function supportable_narrowing_operation
12187 Check whether an operation represented by the code CODE is a
12188 narrowing operation that is supported by the target platform in
12189 vector form (i.e., when operating on arguments of type VECTYPE_IN
12190 and producing a result of type VECTYPE_OUT).
12192 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12193 and FLOAT. This function checks if these operations are supported by
12194 the target platform directly via vector tree-codes.
12196 Output:
12197 - CODE1 is the code of a vector operation to be used when
12198 vectorizing the operation, if available.
12199 - MULTI_STEP_CVT determines the number of required intermediate steps in
12200 case of multi-step conversion (like int->short->char - in that case
12201 MULTI_STEP_CVT will be 1).
12202 - INTERM_TYPES contains the intermediate type required to perform the
12203 narrowing operation (short in the above example). */
12205 bool
12206 supportable_narrowing_operation (enum tree_code code,
12207 tree vectype_out, tree vectype_in,
12208 enum tree_code *code1, int *multi_step_cvt,
12209 vec<tree> *interm_types)
12211 machine_mode vec_mode;
12212 enum insn_code icode1;
12213 optab optab1, interm_optab;
12214 tree vectype = vectype_in;
12215 tree narrow_vectype = vectype_out;
12216 enum tree_code c1;
12217 tree intermediate_type, prev_type;
12218 machine_mode intermediate_mode, prev_mode;
12219 int i;
12220 unsigned HOST_WIDE_INT n_elts;
12221 bool uns;
12223 *multi_step_cvt = 0;
12224 switch (code)
12226 CASE_CONVERT:
12227 c1 = VEC_PACK_TRUNC_EXPR;
12228 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12229 && VECTOR_BOOLEAN_TYPE_P (vectype)
12230 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
12231 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
12232 && n_elts < BITS_PER_UNIT)
12233 optab1 = vec_pack_sbool_trunc_optab;
12234 else
12235 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12236 break;
12238 case FIX_TRUNC_EXPR:
12239 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12240 /* The signedness is determined from output operand. */
12241 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12242 break;
12244 case FLOAT_EXPR:
12245 c1 = VEC_PACK_FLOAT_EXPR;
12246 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12247 break;
12249 default:
12250 gcc_unreachable ();
12253 if (!optab1)
12254 return false;
12256 vec_mode = TYPE_MODE (vectype);
12257 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12258 return false;
12260 *code1 = c1;
12262 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12264 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12265 return true;
12266 /* For scalar masks we may have different boolean
12267 vector types having the same QImode. Thus we
12268 add additional check for elements number. */
12269 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12270 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12271 return true;
12274 if (code == FLOAT_EXPR)
12275 return false;
12277 /* Check if it's a multi-step conversion that can be done using intermediate
12278 types. */
12279 prev_mode = vec_mode;
12280 prev_type = vectype;
12281 if (code == FIX_TRUNC_EXPR)
12282 uns = TYPE_UNSIGNED (vectype_out);
12283 else
12284 uns = TYPE_UNSIGNED (vectype);
12286 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12287 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12288 costly than signed. */
12289 if (code == FIX_TRUNC_EXPR && uns)
12291 enum insn_code icode2;
12293 intermediate_type
12294 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12295 interm_optab
12296 = optab_for_tree_code (c1, intermediate_type, optab_default);
12297 if (interm_optab != unknown_optab
12298 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12299 && insn_data[icode1].operand[0].mode
12300 == insn_data[icode2].operand[0].mode)
12302 uns = false;
12303 optab1 = interm_optab;
12304 icode1 = icode2;
12308 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12309 intermediate steps in promotion sequence. We try
12310 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12311 interm_types->create (MAX_INTERM_CVT_STEPS);
12312 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12314 intermediate_mode = insn_data[icode1].operand[0].mode;
12315 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12316 intermediate_type
12317 = vect_double_mask_nunits (prev_type, intermediate_mode);
12318 else
12319 intermediate_type
12320 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12321 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12322 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12323 && SCALAR_INT_MODE_P (prev_mode)
12324 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
12325 && n_elts < BITS_PER_UNIT)
12326 interm_optab = vec_pack_sbool_trunc_optab;
12327 else
12328 interm_optab
12329 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12330 optab_default);
12331 if (!interm_optab
12332 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12333 || insn_data[icode1].operand[0].mode != intermediate_mode
12334 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12335 == CODE_FOR_nothing))
12336 break;
12338 interm_types->quick_push (intermediate_type);
12339 (*multi_step_cvt)++;
12341 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12343 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12344 return true;
12345 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12346 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12347 return true;
12350 prev_mode = intermediate_mode;
12351 prev_type = intermediate_type;
12352 optab1 = interm_optab;
12355 interm_types->release ();
12356 return false;
12359 /* Generate and return a vector mask of MASK_TYPE such that
12360 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12361 Add the statements to SEQ. */
12363 tree
12364 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12365 tree end_index, const char *name)
12367 tree cmp_type = TREE_TYPE (start_index);
12368 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12369 cmp_type, mask_type,
12370 OPTIMIZE_FOR_SPEED));
12371 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12372 start_index, end_index,
12373 build_zero_cst (mask_type));
12374 tree tmp;
12375 if (name)
12376 tmp = make_temp_ssa_name (mask_type, NULL, name);
12377 else
12378 tmp = make_ssa_name (mask_type);
12379 gimple_call_set_lhs (call, tmp);
12380 gimple_seq_add_stmt (seq, call);
12381 return tmp;
12384 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12385 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12387 tree
12388 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12389 tree end_index)
12391 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12392 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12395 /* Try to compute the vector types required to vectorize STMT_INFO,
12396 returning true on success and false if vectorization isn't possible.
12397 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12398 take sure that the number of elements in the vectors is no bigger
12399 than GROUP_SIZE.
12401 On success:
12403 - Set *STMT_VECTYPE_OUT to:
12404 - NULL_TREE if the statement doesn't need to be vectorized;
12405 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12407 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12408 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12409 statement does not help to determine the overall number of units. */
12411 opt_result
12412 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12413 tree *stmt_vectype_out,
12414 tree *nunits_vectype_out,
12415 unsigned int group_size)
12417 gimple *stmt = stmt_info->stmt;
12419 /* For BB vectorization, we should always have a group size once we've
12420 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12421 are tentative requests during things like early data reference
12422 analysis and pattern recognition. */
12423 if (is_a <bb_vec_info> (vinfo))
12424 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12425 else
12426 group_size = 0;
12428 *stmt_vectype_out = NULL_TREE;
12429 *nunits_vectype_out = NULL_TREE;
12431 if (gimple_get_lhs (stmt) == NULL_TREE
12432 /* MASK_STORE has no lhs, but is ok. */
12433 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12435 if (is_a <gcall *> (stmt))
12437 /* Ignore calls with no lhs. These must be calls to
12438 #pragma omp simd functions, and what vectorization factor
12439 it really needs can't be determined until
12440 vectorizable_simd_clone_call. */
12441 if (dump_enabled_p ())
12442 dump_printf_loc (MSG_NOTE, vect_location,
12443 "defer to SIMD clone analysis.\n");
12444 return opt_result::success ();
12447 return opt_result::failure_at (stmt,
12448 "not vectorized: irregular stmt.%G", stmt);
12451 tree vectype;
12452 tree scalar_type = NULL_TREE;
12453 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12455 vectype = STMT_VINFO_VECTYPE (stmt_info);
12456 if (dump_enabled_p ())
12457 dump_printf_loc (MSG_NOTE, vect_location,
12458 "precomputed vectype: %T\n", vectype);
12460 else if (vect_use_mask_type_p (stmt_info))
12462 unsigned int precision = stmt_info->mask_precision;
12463 scalar_type = build_nonstandard_integer_type (precision, 1);
12464 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12465 if (!vectype)
12466 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12467 " data-type %T\n", scalar_type);
12468 if (dump_enabled_p ())
12469 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12471 else
12473 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12474 scalar_type = TREE_TYPE (DR_REF (dr));
12475 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12476 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12477 else
12478 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12480 if (dump_enabled_p ())
12482 if (group_size)
12483 dump_printf_loc (MSG_NOTE, vect_location,
12484 "get vectype for scalar type (group size %d):"
12485 " %T\n", group_size, scalar_type);
12486 else
12487 dump_printf_loc (MSG_NOTE, vect_location,
12488 "get vectype for scalar type: %T\n", scalar_type);
12490 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12491 if (!vectype)
12492 return opt_result::failure_at (stmt,
12493 "not vectorized:"
12494 " unsupported data-type %T\n",
12495 scalar_type);
12497 if (dump_enabled_p ())
12498 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12501 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12502 return opt_result::failure_at (stmt,
12503 "not vectorized: vector stmt in loop:%G",
12504 stmt);
12506 *stmt_vectype_out = vectype;
12508 /* Don't try to compute scalar types if the stmt produces a boolean
12509 vector; use the existing vector type instead. */
12510 tree nunits_vectype = vectype;
12511 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12513 /* The number of units is set according to the smallest scalar
12514 type (or the largest vector size, but we only support one
12515 vector size per vectorization). */
12516 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12517 TREE_TYPE (vectype));
12518 if (scalar_type != TREE_TYPE (vectype))
12520 if (dump_enabled_p ())
12521 dump_printf_loc (MSG_NOTE, vect_location,
12522 "get vectype for smallest scalar type: %T\n",
12523 scalar_type);
12524 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12525 group_size);
12526 if (!nunits_vectype)
12527 return opt_result::failure_at
12528 (stmt, "not vectorized: unsupported data-type %T\n",
12529 scalar_type);
12530 if (dump_enabled_p ())
12531 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12532 nunits_vectype);
12536 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12537 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12538 return opt_result::failure_at (stmt,
12539 "Not vectorized: Incompatible number "
12540 "of vector subparts between %T and %T\n",
12541 nunits_vectype, *stmt_vectype_out);
12543 if (dump_enabled_p ())
12545 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12546 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12547 dump_printf (MSG_NOTE, "\n");
12550 *nunits_vectype_out = nunits_vectype;
12551 return opt_result::success ();
12554 /* Generate and return statement sequence that sets vector length LEN that is:
12556 min_of_start_and_end = min (START_INDEX, END_INDEX);
12557 left_len = END_INDEX - min_of_start_and_end;
12558 rhs = min (left_len, LEN_LIMIT);
12559 LEN = rhs;
12561 Note: the cost of the code generated by this function is modeled
12562 by vect_estimate_min_profitable_iters, so changes here may need
12563 corresponding changes there. */
12565 gimple_seq
12566 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12568 gimple_seq stmts = NULL;
12569 tree len_type = TREE_TYPE (len);
12570 gcc_assert (TREE_TYPE (start_index) == len_type);
12572 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12573 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12574 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12575 gimple* stmt = gimple_build_assign (len, rhs);
12576 gimple_seq_add_stmt (&stmts, stmt);
12578 return stmts;