Skip gcc.dg/guality/example.c on hppa-linux.
[official-gcc.git] / gcc / tree-vect-stmts.c
blobc842d500d104be0cdbcea02532aafed20a942421
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
158 tree array_ref;
159 gimple *new_stmt;
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
176 tree mem_ref;
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
249 worklist->safe_push (stmt_info);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
261 tree op;
262 ssa_op_iter iter;
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
270 enum vect_def_type dt = vect_uninitialized_def;
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
283 return true;
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
347 *live_p = true;
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
361 return (*live_p || *relevant);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
373 tree operand;
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
413 return false;
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
422 if (operand == use)
423 return true;
425 return false;
430 Function process_use.
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
474 if (!dstmt_vinfo)
475 return opt_result::success ();
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
509 switch (relevant)
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
526 case vect_used_in_scope:
527 break;
529 default:
530 gcc_unreachable ();
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
547 switch (relevant)
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
564 default:
565 gcc_unreachable ();
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
600 3. j = j + 1
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec<stmt_vec_info, 64> worklist;
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
654 use_operand_p use_p;
655 ssa_op_iter iter;
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
706 default:
707 break;
710 if (is_pattern_stmt_p (stmt_vinfo))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
734 for (; i < gimple_num_ops (assign); i++)
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
749 for (i = 0; i < gimple_call_num_args (call); i++)
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
781 if (fatal)
782 *fatal = false;
783 return res;
786 } /* while worklist */
788 return opt_result::success ();
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
806 int inside_cost = 0, prologue_cost = 0;
808 gcc_assert (cost_vec != NULL);
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. If WIDEN_ARITH
840 is true the stmt is doing widening arithmetic. */
842 static void
843 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
844 enum vect_def_type *dt,
845 unsigned int ncopies, int pwr,
846 stmt_vector_for_cost *cost_vec,
847 bool widen_arith)
849 int i;
850 int inside_cost = 0, prologue_cost = 0;
852 for (i = 0; i < pwr + 1; i++)
854 inside_cost += record_stmt_cost (cost_vec, ncopies,
855 widen_arith
856 ? vector_stmt : vec_promote_demote,
857 stmt_info, 0, vect_body);
858 ncopies *= 2;
861 /* FORNOW: Assuming maximum 2 args per stmts. */
862 for (i = 0; i < 2; i++)
863 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
864 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
865 stmt_info, 0, vect_prologue);
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE, vect_location,
869 "vect_model_promotion_demotion_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost, prologue_cost);
873 /* Returns true if the current function returns DECL. */
875 static bool
876 cfun_returns (tree decl)
878 edge_iterator ei;
879 edge e;
880 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
882 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
883 if (!ret)
884 continue;
885 if (gimple_return_retval (ret) == decl)
886 return true;
887 /* We often end up with an aggregate copy to the result decl,
888 handle that case as well. First skip intermediate clobbers
889 though. */
890 gimple *def = ret;
893 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
895 while (gimple_clobber_p (def));
896 if (is_a <gassign *> (def)
897 && gimple_assign_lhs (def) == gimple_return_retval (ret)
898 && gimple_assign_rhs1 (def) == decl)
899 return true;
901 return false;
904 /* Function vect_model_store_cost
906 Models cost for stores. In the case of grouped accesses, one access
907 has the overhead of the grouped access attributed to it. */
909 static void
910 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
911 vect_memory_access_type memory_access_type,
912 dr_alignment_support alignment_support_scheme,
913 int misalignment,
914 vec_load_store_type vls_type, slp_tree slp_node,
915 stmt_vector_for_cost *cost_vec)
917 unsigned int inside_cost = 0, prologue_cost = 0;
918 stmt_vec_info first_stmt_info = stmt_info;
919 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
921 /* ??? Somehow we need to fix this at the callers. */
922 if (slp_node)
923 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
925 if (vls_type == VLS_STORE_INVARIANT)
927 if (!slp_node)
928 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
929 stmt_info, 0, vect_prologue);
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node && grouped_access_p)
935 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p = (first_stmt_info == stmt_info);
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
946 if (first_stmt_p
947 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
949 /* Uses a high and low interleave or shuffle operations for each
950 needed permute. */
951 int group_size = DR_GROUP_SIZE (first_stmt_info);
952 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
953 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
954 stmt_info, 0, vect_body);
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE, vect_location,
958 "vect_model_store_cost: strided group_size = %d .\n",
959 group_size);
962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
963 /* Costs of the stores. */
964 if (memory_access_type == VMAT_ELEMENTWISE
965 || memory_access_type == VMAT_GATHER_SCATTER)
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
969 inside_cost += record_stmt_cost (cost_vec,
970 ncopies * assumed_nunits,
971 scalar_store, stmt_info, 0, vect_body);
973 else
974 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
975 misalignment, &inside_cost, cost_vec);
977 if (memory_access_type == VMAT_ELEMENTWISE
978 || memory_access_type == VMAT_STRIDED_SLP)
980 /* N scalar stores plus extracting the elements. */
981 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
982 inside_cost += record_stmt_cost (cost_vec,
983 ncopies * assumed_nunits,
984 vec_to_scalar, stmt_info, 0, vect_body);
987 /* When vectorizing a store into the function result assign
988 a penalty if the function returns in a multi-register location.
989 In this case we assume we'll end up with having to spill the
990 vector result and do piecewise loads as a conservative estimate. */
991 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
992 if (base
993 && (TREE_CODE (base) == RESULT_DECL
994 || (DECL_P (base) && cfun_returns (base)))
995 && !aggregate_value_p (base, cfun->decl))
997 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
998 /* ??? Handle PARALLEL in some way. */
999 if (REG_P (reg))
1001 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1002 /* Assume that a single reg-reg move is possible and cheap,
1003 do not account for vector to gp register move cost. */
1004 if (nregs > 1)
1006 /* Spill. */
1007 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1008 vector_store,
1009 stmt_info, 0, vect_epilogue);
1010 /* Loads. */
1011 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1012 scalar_load,
1013 stmt_info, 0, vect_epilogue);
1018 if (dump_enabled_p ())
1019 dump_printf_loc (MSG_NOTE, vect_location,
1020 "vect_model_store_cost: inside_cost = %d, "
1021 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1025 /* Calculate cost of DR's memory access. */
1026 void
1027 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1028 dr_alignment_support alignment_support_scheme,
1029 int misalignment,
1030 unsigned int *inside_cost,
1031 stmt_vector_for_cost *body_cost_vec)
1033 switch (alignment_support_scheme)
1035 case dr_aligned:
1037 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1038 vector_store, stmt_info, 0,
1039 vect_body);
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_NOTE, vect_location,
1043 "vect_model_store_cost: aligned.\n");
1044 break;
1047 case dr_unaligned_supported:
1049 /* Here, we assign an additional cost for the unaligned store. */
1050 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1051 unaligned_store, stmt_info,
1052 misalignment, vect_body);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE, vect_location,
1055 "vect_model_store_cost: unaligned supported by "
1056 "hardware.\n");
1057 break;
1060 case dr_unaligned_unsupported:
1062 *inside_cost = VECT_MAX_COST;
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1066 "vect_model_store_cost: unsupported access.\n");
1067 break;
1070 default:
1071 gcc_unreachable ();
1076 /* Function vect_model_load_cost
1078 Models cost for loads. In the case of grouped accesses, one access has
1079 the overhead of the grouped access attributed to it. Since unaligned
1080 accesses are supported for loads, we also account for the costs of the
1081 access scheme chosen. */
1083 static void
1084 vect_model_load_cost (vec_info *vinfo,
1085 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1086 vect_memory_access_type memory_access_type,
1087 dr_alignment_support alignment_support_scheme,
1088 int misalignment,
1089 gather_scatter_info *gs_info,
1090 slp_tree slp_node,
1091 stmt_vector_for_cost *cost_vec)
1093 unsigned int inside_cost = 0, prologue_cost = 0;
1094 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1096 gcc_assert (cost_vec);
1098 /* ??? Somehow we need to fix this at the callers. */
1099 if (slp_node)
1100 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1102 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1104 /* If the load is permuted then the alignment is determined by
1105 the first group element not by the first scalar stmt DR. */
1106 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1107 /* Record the cost for the permutation. */
1108 unsigned n_perms, n_loads;
1109 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1110 vf, true, &n_perms, &n_loads);
1111 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1112 first_stmt_info, 0, vect_body);
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1116 ncopies = n_loads;
1119 /* Grouped loads read all elements in the group at once,
1120 so we want the DR for the first statement. */
1121 stmt_vec_info first_stmt_info = stmt_info;
1122 if (!slp_node && grouped_access_p)
1123 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1125 /* True if we should include any once-per-group costs as well as
1126 the cost of the statement itself. For SLP we only get called
1127 once per group anyhow. */
1128 bool first_stmt_p = (first_stmt_info == stmt_info);
1130 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1131 ones we actually need. Account for the cost of unused results. */
1132 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1134 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1135 stmt_vec_info next_stmt_info = first_stmt_info;
1138 gaps -= 1;
1139 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1141 while (next_stmt_info);
1142 if (gaps)
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_NOTE, vect_location,
1146 "vect_model_load_cost: %d unused vectors.\n",
1147 gaps);
1148 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1149 alignment_support_scheme, misalignment, false,
1150 &inside_cost, &prologue_cost,
1151 cost_vec, cost_vec, true);
1155 /* We assume that the cost of a single load-lanes instruction is
1156 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1157 access is instead being provided by a load-and-permute operation,
1158 include the cost of the permutes. */
1159 if (first_stmt_p
1160 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1162 /* Uses an even and odd extract operations or shuffle operations
1163 for each needed permute. */
1164 int group_size = DR_GROUP_SIZE (first_stmt_info);
1165 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1166 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1167 stmt_info, 0, vect_body);
1169 if (dump_enabled_p ())
1170 dump_printf_loc (MSG_NOTE, vect_location,
1171 "vect_model_load_cost: strided group_size = %d .\n",
1172 group_size);
1175 /* The loads themselves. */
1176 if (memory_access_type == VMAT_ELEMENTWISE
1177 || memory_access_type == VMAT_GATHER_SCATTER)
1179 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1180 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1181 if (memory_access_type == VMAT_GATHER_SCATTER
1182 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1183 /* For emulated gathers N offset vector element extracts
1184 (we assume the scalar scaling and ptr + offset add is consumed by
1185 the load). */
1186 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1187 vec_to_scalar, stmt_info, 0,
1188 vect_body);
1189 /* N scalar loads plus gathering them into a vector. */
1190 inside_cost += record_stmt_cost (cost_vec,
1191 ncopies * assumed_nunits,
1192 scalar_load, stmt_info, 0, vect_body);
1194 else if (memory_access_type == VMAT_INVARIANT)
1196 /* Invariant loads will ideally be hoisted and splat to a vector. */
1197 prologue_cost += record_stmt_cost (cost_vec, 1,
1198 scalar_load, stmt_info, 0,
1199 vect_prologue);
1200 prologue_cost += record_stmt_cost (cost_vec, 1,
1201 scalar_to_vec, stmt_info, 0,
1202 vect_prologue);
1204 else
1205 vect_get_load_cost (vinfo, stmt_info, ncopies,
1206 alignment_support_scheme, misalignment, first_stmt_p,
1207 &inside_cost, &prologue_cost,
1208 cost_vec, cost_vec, true);
1209 if (memory_access_type == VMAT_ELEMENTWISE
1210 || memory_access_type == VMAT_STRIDED_SLP
1211 || (memory_access_type == VMAT_GATHER_SCATTER
1212 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1213 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1214 stmt_info, 0, vect_body);
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_NOTE, vect_location,
1218 "vect_model_load_cost: inside_cost = %d, "
1219 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1223 /* Calculate cost of DR's memory access. */
1224 void
1225 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1226 dr_alignment_support alignment_support_scheme,
1227 int misalignment,
1228 bool add_realign_cost, unsigned int *inside_cost,
1229 unsigned int *prologue_cost,
1230 stmt_vector_for_cost *prologue_cost_vec,
1231 stmt_vector_for_cost *body_cost_vec,
1232 bool record_prologue_costs)
1234 switch (alignment_support_scheme)
1236 case dr_aligned:
1238 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1239 stmt_info, 0, vect_body);
1241 if (dump_enabled_p ())
1242 dump_printf_loc (MSG_NOTE, vect_location,
1243 "vect_model_load_cost: aligned.\n");
1245 break;
1247 case dr_unaligned_supported:
1249 /* Here, we assign an additional cost for the unaligned load. */
1250 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1251 unaligned_load, stmt_info,
1252 misalignment, vect_body);
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE, vect_location,
1256 "vect_model_load_cost: unaligned supported by "
1257 "hardware.\n");
1259 break;
1261 case dr_explicit_realign:
1263 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1264 vector_load, stmt_info, 0, vect_body);
1265 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1266 vec_perm, stmt_info, 0, vect_body);
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
1270 prologue costs. */
1271 if (targetm.vectorize.builtin_mask_for_load)
1272 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1273 stmt_info, 0, vect_body);
1275 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE, vect_location,
1277 "vect_model_load_cost: explicit realign\n");
1279 break;
1281 case dr_explicit_realign_optimized:
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "vect_model_load_cost: unaligned software "
1286 "pipelined.\n");
1288 /* Unaligned software pipeline has a load of an address, an initial
1289 load, and possibly a mask operation to "prime" the loop. However,
1290 if this is an access in a group of loads, which provide grouped
1291 access, then the above cost should only be considered for one
1292 access in the group. Inside the loop, there is a load op
1293 and a realignment op. */
1295 if (add_realign_cost && record_prologue_costs)
1297 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1298 vector_stmt, stmt_info,
1299 0, vect_prologue);
1300 if (targetm.vectorize.builtin_mask_for_load)
1301 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1302 vector_stmt, stmt_info,
1303 0, vect_prologue);
1306 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1307 stmt_info, 0, vect_body);
1308 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1309 stmt_info, 0, vect_body);
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE, vect_location,
1313 "vect_model_load_cost: explicit realign optimized"
1314 "\n");
1316 break;
1319 case dr_unaligned_unsupported:
1321 *inside_cost = VECT_MAX_COST;
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1325 "vect_model_load_cost: unsupported access.\n");
1326 break;
1329 default:
1330 gcc_unreachable ();
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335 the loop preheader for the vectorized stmt STMT_VINFO. */
1337 static void
1338 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1339 gimple_stmt_iterator *gsi)
1341 if (gsi)
1342 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1343 else
1344 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1346 if (dump_enabled_p ())
1347 dump_printf_loc (MSG_NOTE, vect_location,
1348 "created new init_stmt: %G", new_stmt);
1351 /* Function vect_init_vector.
1353 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1354 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1355 vector type a vector with all elements equal to VAL is created first.
1356 Place the initialization at GSI if it is not NULL. Otherwise, place the
1357 initialization at the loop preheader.
1358 Return the DEF of INIT_STMT.
1359 It will be used in the vectorization of STMT_INFO. */
1361 tree
1362 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1363 gimple_stmt_iterator *gsi)
1365 gimple *init_stmt;
1366 tree new_temp;
1368 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1369 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1371 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1372 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1374 /* Scalar boolean value should be transformed into
1375 all zeros or all ones value before building a vector. */
1376 if (VECTOR_BOOLEAN_TYPE_P (type))
1378 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1379 tree false_val = build_zero_cst (TREE_TYPE (type));
1381 if (CONSTANT_CLASS_P (val))
1382 val = integer_zerop (val) ? false_val : true_val;
1383 else
1385 new_temp = make_ssa_name (TREE_TYPE (type));
1386 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1387 val, true_val, false_val);
1388 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1389 val = new_temp;
1392 else
1394 gimple_seq stmts = NULL;
1395 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1396 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1397 TREE_TYPE (type), val);
1398 else
1399 /* ??? Condition vectorization expects us to do
1400 promotion of invariant/external defs. */
1401 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1402 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1403 !gsi_end_p (gsi2); )
1405 init_stmt = gsi_stmt (gsi2);
1406 gsi_remove (&gsi2, false);
1407 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1411 val = build_vector_from_val (type, val);
1414 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1415 init_stmt = gimple_build_assign (new_temp, val);
1416 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1417 return new_temp;
1421 /* Function vect_get_vec_defs_for_operand.
1423 OP is an operand in STMT_VINFO. This function returns a vector of
1424 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1426 In the case that OP is an SSA_NAME which is defined in the loop, then
1427 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1429 In case OP is an invariant or constant, a new stmt that creates a vector def
1430 needs to be introduced. VECTYPE may be used to specify a required type for
1431 vector invariant. */
1433 void
1434 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1435 unsigned ncopies,
1436 tree op, vec<tree> *vec_oprnds, tree vectype)
1438 gimple *def_stmt;
1439 enum vect_def_type dt;
1440 bool is_simple_use;
1441 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1443 if (dump_enabled_p ())
1444 dump_printf_loc (MSG_NOTE, vect_location,
1445 "vect_get_vec_defs_for_operand: %T\n", op);
1447 stmt_vec_info def_stmt_info;
1448 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1449 &def_stmt_info, &def_stmt);
1450 gcc_assert (is_simple_use);
1451 if (def_stmt && dump_enabled_p ())
1452 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1454 vec_oprnds->create (ncopies);
1455 if (dt == vect_constant_def || dt == vect_external_def)
1457 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1458 tree vector_type;
1460 if (vectype)
1461 vector_type = vectype;
1462 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1463 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1464 vector_type = truth_type_for (stmt_vectype);
1465 else
1466 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1468 gcc_assert (vector_type);
1469 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1470 while (ncopies--)
1471 vec_oprnds->quick_push (vop);
1473 else
1475 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1476 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1477 for (unsigned i = 0; i < ncopies; ++i)
1478 vec_oprnds->quick_push (gimple_get_lhs
1479 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1484 /* Get vectorized definitions for OP0 and OP1. */
1486 void
1487 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1488 unsigned ncopies,
1489 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1490 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1491 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1492 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1494 if (slp_node)
1496 if (op0)
1497 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1498 if (op1)
1499 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1500 if (op2)
1501 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1502 if (op3)
1503 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1505 else
1507 if (op0)
1508 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1509 op0, vec_oprnds0, vectype0);
1510 if (op1)
1511 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1512 op1, vec_oprnds1, vectype1);
1513 if (op2)
1514 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1515 op2, vec_oprnds2, vectype2);
1516 if (op3)
1517 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1518 op3, vec_oprnds3, vectype3);
1522 void
1523 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1524 unsigned ncopies,
1525 tree op0, vec<tree> *vec_oprnds0,
1526 tree op1, vec<tree> *vec_oprnds1,
1527 tree op2, vec<tree> *vec_oprnds2,
1528 tree op3, vec<tree> *vec_oprnds3)
1530 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1531 op0, vec_oprnds0, NULL_TREE,
1532 op1, vec_oprnds1, NULL_TREE,
1533 op2, vec_oprnds2, NULL_TREE,
1534 op3, vec_oprnds3, NULL_TREE);
1537 /* Helper function called by vect_finish_replace_stmt and
1538 vect_finish_stmt_generation. Set the location of the new
1539 statement and create and return a stmt_vec_info for it. */
1541 static void
1542 vect_finish_stmt_generation_1 (vec_info *,
1543 stmt_vec_info stmt_info, gimple *vec_stmt)
1545 if (dump_enabled_p ())
1546 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1548 if (stmt_info)
1550 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1552 /* While EH edges will generally prevent vectorization, stmt might
1553 e.g. be in a must-not-throw region. Ensure newly created stmts
1554 that could throw are part of the same region. */
1555 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1556 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1557 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1559 else
1560 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1563 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1564 which sets the same scalar result as STMT_INFO did. Create and return a
1565 stmt_vec_info for VEC_STMT. */
1567 void
1568 vect_finish_replace_stmt (vec_info *vinfo,
1569 stmt_vec_info stmt_info, gimple *vec_stmt)
1571 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1572 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1574 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1575 gsi_replace (&gsi, vec_stmt, true);
1577 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1580 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1581 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1583 void
1584 vect_finish_stmt_generation (vec_info *vinfo,
1585 stmt_vec_info stmt_info, gimple *vec_stmt,
1586 gimple_stmt_iterator *gsi)
1588 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1590 if (!gsi_end_p (*gsi)
1591 && gimple_has_mem_ops (vec_stmt))
1593 gimple *at_stmt = gsi_stmt (*gsi);
1594 tree vuse = gimple_vuse (at_stmt);
1595 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1597 tree vdef = gimple_vdef (at_stmt);
1598 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1599 gimple_set_modified (vec_stmt, true);
1600 /* If we have an SSA vuse and insert a store, update virtual
1601 SSA form to avoid triggering the renamer. Do so only
1602 if we can easily see all uses - which is what almost always
1603 happens with the way vectorized stmts are inserted. */
1604 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1605 && ((is_gimple_assign (vec_stmt)
1606 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1607 || (is_gimple_call (vec_stmt)
1608 && !(gimple_call_flags (vec_stmt)
1609 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1611 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1612 gimple_set_vdef (vec_stmt, new_vdef);
1613 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1617 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1618 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1621 /* We want to vectorize a call to combined function CFN with function
1622 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1623 as the types of all inputs. Check whether this is possible using
1624 an internal function, returning its code if so or IFN_LAST if not. */
1626 static internal_fn
1627 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1628 tree vectype_out, tree vectype_in)
1630 internal_fn ifn;
1631 if (internal_fn_p (cfn))
1632 ifn = as_internal_fn (cfn);
1633 else
1634 ifn = associated_internal_fn (fndecl);
1635 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1637 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1638 if (info.vectorizable)
1640 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1641 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1642 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1643 OPTIMIZE_FOR_SPEED))
1644 return ifn;
1647 return IFN_LAST;
1651 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1652 gimple_stmt_iterator *);
1654 /* Check whether a load or store statement in the loop described by
1655 LOOP_VINFO is possible in a loop using partial vectors. This is
1656 testing whether the vectorizer pass has the appropriate support,
1657 as well as whether the target does.
1659 VLS_TYPE says whether the statement is a load or store and VECTYPE
1660 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1661 says how the load or store is going to be implemented and GROUP_SIZE
1662 is the number of load or store statements in the containing group.
1663 If the access is a gather load or scatter store, GS_INFO describes
1664 its arguments. If the load or store is conditional, SCALAR_MASK is the
1665 condition under which it occurs.
1667 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1668 vectors is not supported, otherwise record the required rgroup control
1669 types. */
1671 static void
1672 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1673 vec_load_store_type vls_type,
1674 int group_size,
1675 vect_memory_access_type
1676 memory_access_type,
1677 unsigned int ncopies,
1678 gather_scatter_info *gs_info,
1679 tree scalar_mask)
1681 /* Invariant loads need no special support. */
1682 if (memory_access_type == VMAT_INVARIANT)
1683 return;
1685 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1686 machine_mode vecmode = TYPE_MODE (vectype);
1687 bool is_load = (vls_type == VLS_LOAD);
1688 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1690 if (is_load
1691 ? !vect_load_lanes_supported (vectype, group_size, true)
1692 : !vect_store_lanes_supported (vectype, group_size, true))
1694 if (dump_enabled_p ())
1695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1696 "can't operate on partial vectors because"
1697 " the target doesn't have an appropriate"
1698 " load/store-lanes instruction.\n");
1699 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1700 return;
1702 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1703 return;
1706 if (memory_access_type == VMAT_GATHER_SCATTER)
1708 internal_fn ifn = (is_load
1709 ? IFN_MASK_GATHER_LOAD
1710 : IFN_MASK_SCATTER_STORE);
1711 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1712 gs_info->memory_type,
1713 gs_info->offset_vectype,
1714 gs_info->scale))
1716 if (dump_enabled_p ())
1717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1718 "can't operate on partial vectors because"
1719 " the target doesn't have an appropriate"
1720 " gather load or scatter store instruction.\n");
1721 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1722 return;
1724 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1725 return;
1728 if (memory_access_type != VMAT_CONTIGUOUS
1729 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1731 /* Element X of the data must come from iteration i * VF + X of the
1732 scalar loop. We need more work to support other mappings. */
1733 if (dump_enabled_p ())
1734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1735 "can't operate on partial vectors because an"
1736 " access isn't contiguous.\n");
1737 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1738 return;
1741 if (!VECTOR_MODE_P (vecmode))
1743 if (dump_enabled_p ())
1744 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1745 "can't operate on partial vectors when emulating"
1746 " vector operations.\n");
1747 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1748 return;
1751 /* We might load more scalars than we need for permuting SLP loads.
1752 We checked in get_group_load_store_type that the extra elements
1753 don't leak into a new vector. */
1754 auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1756 unsigned int nvectors;
1757 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1758 return nvectors;
1759 gcc_unreachable ();
1762 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1763 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1764 machine_mode mask_mode;
1765 bool using_partial_vectors_p = false;
1766 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1767 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1769 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1770 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1771 using_partial_vectors_p = true;
1774 machine_mode vmode;
1775 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1777 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1778 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1779 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1780 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1781 using_partial_vectors_p = true;
1784 if (!using_partial_vectors_p)
1786 if (dump_enabled_p ())
1787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1788 "can't operate on partial vectors because the"
1789 " target doesn't have the appropriate partial"
1790 " vectorization load or store.\n");
1791 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1795 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1796 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1797 that needs to be applied to all loads and stores in a vectorized loop.
1798 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1799 otherwise return VEC_MASK & LOOP_MASK.
1801 MASK_TYPE is the type of both masks. If new statements are needed,
1802 insert them before GSI. */
1804 static tree
1805 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1806 tree vec_mask, gimple_stmt_iterator *gsi)
1808 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1809 if (!loop_mask)
1810 return vec_mask;
1812 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1814 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1815 return vec_mask;
1817 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1818 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1819 vec_mask, loop_mask);
1821 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1822 return and_res;
1825 /* Determine whether we can use a gather load or scatter store to vectorize
1826 strided load or store STMT_INFO by truncating the current offset to a
1827 smaller width. We need to be able to construct an offset vector:
1829 { 0, X, X*2, X*3, ... }
1831 without loss of precision, where X is STMT_INFO's DR_STEP.
1833 Return true if this is possible, describing the gather load or scatter
1834 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1836 static bool
1837 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1838 loop_vec_info loop_vinfo, bool masked_p,
1839 gather_scatter_info *gs_info)
1841 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1842 data_reference *dr = dr_info->dr;
1843 tree step = DR_STEP (dr);
1844 if (TREE_CODE (step) != INTEGER_CST)
1846 /* ??? Perhaps we could use range information here? */
1847 if (dump_enabled_p ())
1848 dump_printf_loc (MSG_NOTE, vect_location,
1849 "cannot truncate variable step.\n");
1850 return false;
1853 /* Get the number of bits in an element. */
1854 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1855 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1856 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1858 /* Set COUNT to the upper limit on the number of elements - 1.
1859 Start with the maximum vectorization factor. */
1860 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1862 /* Try lowering COUNT to the number of scalar latch iterations. */
1863 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1864 widest_int max_iters;
1865 if (max_loop_iterations (loop, &max_iters)
1866 && max_iters < count)
1867 count = max_iters.to_shwi ();
1869 /* Try scales of 1 and the element size. */
1870 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1871 wi::overflow_type overflow = wi::OVF_NONE;
1872 for (int i = 0; i < 2; ++i)
1874 int scale = scales[i];
1875 widest_int factor;
1876 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1877 continue;
1879 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1880 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1881 if (overflow)
1882 continue;
1883 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1884 unsigned int min_offset_bits = wi::min_precision (range, sign);
1886 /* Find the narrowest viable offset type. */
1887 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1888 tree offset_type = build_nonstandard_integer_type (offset_bits,
1889 sign == UNSIGNED);
1891 /* See whether the target supports the operation with an offset
1892 no narrower than OFFSET_TYPE. */
1893 tree memory_type = TREE_TYPE (DR_REF (dr));
1894 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1895 vectype, memory_type, offset_type, scale,
1896 &gs_info->ifn, &gs_info->offset_vectype)
1897 || gs_info->ifn == IFN_LAST)
1898 continue;
1900 gs_info->decl = NULL_TREE;
1901 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1902 but we don't need to store that here. */
1903 gs_info->base = NULL_TREE;
1904 gs_info->element_type = TREE_TYPE (vectype);
1905 gs_info->offset = fold_convert (offset_type, step);
1906 gs_info->offset_dt = vect_constant_def;
1907 gs_info->scale = scale;
1908 gs_info->memory_type = memory_type;
1909 return true;
1912 if (overflow && dump_enabled_p ())
1913 dump_printf_loc (MSG_NOTE, vect_location,
1914 "truncating gather/scatter offset to %d bits"
1915 " might change its value.\n", element_bits);
1917 return false;
1920 /* Return true if we can use gather/scatter internal functions to
1921 vectorize STMT_INFO, which is a grouped or strided load or store.
1922 MASKED_P is true if load or store is conditional. When returning
1923 true, fill in GS_INFO with the information required to perform the
1924 operation. */
1926 static bool
1927 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1928 loop_vec_info loop_vinfo, bool masked_p,
1929 gather_scatter_info *gs_info)
1931 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1932 || gs_info->ifn == IFN_LAST)
1933 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1934 masked_p, gs_info);
1936 tree old_offset_type = TREE_TYPE (gs_info->offset);
1937 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1939 gcc_assert (TYPE_PRECISION (new_offset_type)
1940 >= TYPE_PRECISION (old_offset_type));
1941 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1943 if (dump_enabled_p ())
1944 dump_printf_loc (MSG_NOTE, vect_location,
1945 "using gather/scatter for strided/grouped access,"
1946 " scale = %d\n", gs_info->scale);
1948 return true;
1951 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1952 elements with a known constant step. Return -1 if that step
1953 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1955 static int
1956 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1958 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1959 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1960 size_zero_node);
1963 /* If the target supports a permute mask that reverses the elements in
1964 a vector of type VECTYPE, return that mask, otherwise return null. */
1966 static tree
1967 perm_mask_for_reverse (tree vectype)
1969 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1971 /* The encoding has a single stepped pattern. */
1972 vec_perm_builder sel (nunits, 1, 3);
1973 for (int i = 0; i < 3; ++i)
1974 sel.quick_push (nunits - 1 - i);
1976 vec_perm_indices indices (sel, 1, nunits);
1977 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1978 return NULL_TREE;
1979 return vect_gen_perm_mask_checked (vectype, indices);
1982 /* A subroutine of get_load_store_type, with a subset of the same
1983 arguments. Handle the case where STMT_INFO is a load or store that
1984 accesses consecutive elements with a negative step. Sets *POFFSET
1985 to the offset to be applied to the DR for the first access. */
1987 static vect_memory_access_type
1988 get_negative_load_store_type (vec_info *vinfo,
1989 stmt_vec_info stmt_info, tree vectype,
1990 vec_load_store_type vls_type,
1991 unsigned int ncopies, poly_int64 *poffset)
1993 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1994 dr_alignment_support alignment_support_scheme;
1996 if (ncopies > 1)
1998 if (dump_enabled_p ())
1999 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2000 "multiple types with negative step.\n");
2001 return VMAT_ELEMENTWISE;
2004 /* For backward running DRs the first access in vectype actually is
2005 N-1 elements before the address of the DR. */
2006 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2007 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2009 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
2010 alignment_support_scheme
2011 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2012 if (alignment_support_scheme != dr_aligned
2013 && alignment_support_scheme != dr_unaligned_supported)
2015 if (dump_enabled_p ())
2016 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2017 "negative step but alignment required.\n");
2018 *poffset = 0;
2019 return VMAT_ELEMENTWISE;
2022 if (vls_type == VLS_STORE_INVARIANT)
2024 if (dump_enabled_p ())
2025 dump_printf_loc (MSG_NOTE, vect_location,
2026 "negative step with invariant source;"
2027 " no permute needed.\n");
2028 return VMAT_CONTIGUOUS_DOWN;
2031 if (!perm_mask_for_reverse (vectype))
2033 if (dump_enabled_p ())
2034 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2035 "negative step and reversing not supported.\n");
2036 *poffset = 0;
2037 return VMAT_ELEMENTWISE;
2040 return VMAT_CONTIGUOUS_REVERSE;
2043 /* STMT_INFO is either a masked or unconditional store. Return the value
2044 being stored. */
2046 tree
2047 vect_get_store_rhs (stmt_vec_info stmt_info)
2049 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2051 gcc_assert (gimple_assign_single_p (assign));
2052 return gimple_assign_rhs1 (assign);
2054 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2056 internal_fn ifn = gimple_call_internal_fn (call);
2057 int index = internal_fn_stored_value_index (ifn);
2058 gcc_assert (index >= 0);
2059 return gimple_call_arg (call, index);
2061 gcc_unreachable ();
2064 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2066 This function returns a vector type which can be composed with NETLS pieces,
2067 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2068 same vector size as the return vector. It checks target whether supports
2069 pieces-size vector mode for construction firstly, if target fails to, check
2070 pieces-size scalar mode for construction further. It returns NULL_TREE if
2071 fails to find the available composition.
2073 For example, for (vtype=V16QI, nelts=4), we can probably get:
2074 - V16QI with PTYPE V4QI.
2075 - V4SI with PTYPE SI.
2076 - NULL_TREE. */
2078 static tree
2079 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2081 gcc_assert (VECTOR_TYPE_P (vtype));
2082 gcc_assert (known_gt (nelts, 0U));
2084 machine_mode vmode = TYPE_MODE (vtype);
2085 if (!VECTOR_MODE_P (vmode))
2086 return NULL_TREE;
2088 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2089 unsigned int pbsize;
2090 if (constant_multiple_p (vbsize, nelts, &pbsize))
2092 /* First check if vec_init optab supports construction from
2093 vector pieces directly. */
2094 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2095 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2096 machine_mode rmode;
2097 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2098 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2099 != CODE_FOR_nothing))
2101 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2102 return vtype;
2105 /* Otherwise check if exists an integer type of the same piece size and
2106 if vec_init optab supports construction from it directly. */
2107 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2108 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2109 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2110 != CODE_FOR_nothing))
2112 *ptype = build_nonstandard_integer_type (pbsize, 1);
2113 return build_vector_type (*ptype, nelts);
2117 return NULL_TREE;
2120 /* A subroutine of get_load_store_type, with a subset of the same
2121 arguments. Handle the case where STMT_INFO is part of a grouped load
2122 or store.
2124 For stores, the statements in the group are all consecutive
2125 and there is no gap at the end. For loads, the statements in the
2126 group might not be consecutive; there can be gaps between statements
2127 as well as at the end. */
2129 static bool
2130 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2131 tree vectype, slp_tree slp_node,
2132 bool masked_p, vec_load_store_type vls_type,
2133 vect_memory_access_type *memory_access_type,
2134 poly_int64 *poffset,
2135 dr_alignment_support *alignment_support_scheme,
2136 int *misalignment,
2137 gather_scatter_info *gs_info)
2139 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2140 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2141 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2142 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2143 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2144 bool single_element_p = (stmt_info == first_stmt_info
2145 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2146 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2147 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2149 /* True if the vectorized statements would access beyond the last
2150 statement in the group. */
2151 bool overrun_p = false;
2153 /* True if we can cope with such overrun by peeling for gaps, so that
2154 there is at least one final scalar iteration after the vector loop. */
2155 bool can_overrun_p = (!masked_p
2156 && vls_type == VLS_LOAD
2157 && loop_vinfo
2158 && !loop->inner);
2160 /* There can only be a gap at the end of the group if the stride is
2161 known at compile time. */
2162 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2164 /* Stores can't yet have gaps. */
2165 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2167 if (slp_node)
2169 /* For SLP vectorization we directly vectorize a subchain
2170 without permutation. */
2171 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2172 first_dr_info
2173 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2174 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2176 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2177 separated by the stride, until we have a complete vector.
2178 Fall back to scalar accesses if that isn't possible. */
2179 if (multiple_p (nunits, group_size))
2180 *memory_access_type = VMAT_STRIDED_SLP;
2181 else
2182 *memory_access_type = VMAT_ELEMENTWISE;
2184 else
2186 overrun_p = loop_vinfo && gap != 0;
2187 if (overrun_p && vls_type != VLS_LOAD)
2189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2190 "Grouped store with gaps requires"
2191 " non-consecutive accesses\n");
2192 return false;
2194 /* An overrun is fine if the trailing elements are smaller
2195 than the alignment boundary B. Every vector access will
2196 be a multiple of B and so we are guaranteed to access a
2197 non-gap element in the same B-sized block. */
2198 if (overrun_p
2199 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2200 vectype)
2201 / vect_get_scalar_dr_size (first_dr_info)))
2202 overrun_p = false;
2204 /* If the gap splits the vector in half and the target
2205 can do half-vector operations avoid the epilogue peeling
2206 by simply loading half of the vector only. Usually
2207 the construction with an upper zero half will be elided. */
2208 dr_alignment_support alss;
2209 int misalign = dr_misalignment (first_dr_info, vectype);
2210 tree half_vtype;
2211 if (overrun_p
2212 && !masked_p
2213 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2214 vectype, misalign)))
2215 == dr_aligned
2216 || alss == dr_unaligned_supported)
2217 && known_eq (nunits, (group_size - gap) * 2)
2218 && known_eq (nunits, group_size)
2219 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2220 != NULL_TREE))
2221 overrun_p = false;
2223 if (overrun_p && !can_overrun_p)
2225 if (dump_enabled_p ())
2226 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2227 "Peeling for outer loop is not supported\n");
2228 return false;
2230 int cmp = compare_step_with_zero (vinfo, stmt_info);
2231 if (cmp < 0)
2233 if (single_element_p)
2234 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2235 only correct for single element "interleaving" SLP. */
2236 *memory_access_type = get_negative_load_store_type
2237 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2238 else
2240 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2241 separated by the stride, until we have a complete vector.
2242 Fall back to scalar accesses if that isn't possible. */
2243 if (multiple_p (nunits, group_size))
2244 *memory_access_type = VMAT_STRIDED_SLP;
2245 else
2246 *memory_access_type = VMAT_ELEMENTWISE;
2249 else
2251 gcc_assert (!loop_vinfo || cmp > 0);
2252 *memory_access_type = VMAT_CONTIGUOUS;
2256 else
2258 /* We can always handle this case using elementwise accesses,
2259 but see if something more efficient is available. */
2260 *memory_access_type = VMAT_ELEMENTWISE;
2262 /* If there is a gap at the end of the group then these optimizations
2263 would access excess elements in the last iteration. */
2264 bool would_overrun_p = (gap != 0);
2265 /* An overrun is fine if the trailing elements are smaller than the
2266 alignment boundary B. Every vector access will be a multiple of B
2267 and so we are guaranteed to access a non-gap element in the
2268 same B-sized block. */
2269 if (would_overrun_p
2270 && !masked_p
2271 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2272 / vect_get_scalar_dr_size (first_dr_info)))
2273 would_overrun_p = false;
2275 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2276 && (can_overrun_p || !would_overrun_p)
2277 && compare_step_with_zero (vinfo, stmt_info) > 0)
2279 /* First cope with the degenerate case of a single-element
2280 vector. */
2281 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2284 /* Otherwise try using LOAD/STORE_LANES. */
2285 else if (vls_type == VLS_LOAD
2286 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2287 : vect_store_lanes_supported (vectype, group_size,
2288 masked_p))
2290 *memory_access_type = VMAT_LOAD_STORE_LANES;
2291 overrun_p = would_overrun_p;
2294 /* If that fails, try using permuting loads. */
2295 else if (vls_type == VLS_LOAD
2296 ? vect_grouped_load_supported (vectype, single_element_p,
2297 group_size)
2298 : vect_grouped_store_supported (vectype, group_size))
2300 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2301 overrun_p = would_overrun_p;
2305 /* As a last resort, trying using a gather load or scatter store.
2307 ??? Although the code can handle all group sizes correctly,
2308 it probably isn't a win to use separate strided accesses based
2309 on nearby locations. Or, even if it's a win over scalar code,
2310 it might not be a win over vectorizing at a lower VF, if that
2311 allows us to use contiguous accesses. */
2312 if (*memory_access_type == VMAT_ELEMENTWISE
2313 && single_element_p
2314 && loop_vinfo
2315 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2316 masked_p, gs_info))
2317 *memory_access_type = VMAT_GATHER_SCATTER;
2320 if (*memory_access_type == VMAT_GATHER_SCATTER
2321 || *memory_access_type == VMAT_ELEMENTWISE)
2323 *alignment_support_scheme = dr_unaligned_supported;
2324 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2326 else
2328 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2329 *alignment_support_scheme
2330 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2331 *misalignment);
2334 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2336 /* STMT is the leader of the group. Check the operands of all the
2337 stmts of the group. */
2338 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2339 while (next_stmt_info)
2341 tree op = vect_get_store_rhs (next_stmt_info);
2342 enum vect_def_type dt;
2343 if (!vect_is_simple_use (op, vinfo, &dt))
2345 if (dump_enabled_p ())
2346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2347 "use not simple.\n");
2348 return false;
2350 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2354 if (overrun_p)
2356 gcc_assert (can_overrun_p);
2357 if (dump_enabled_p ())
2358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2359 "Data access with gaps requires scalar "
2360 "epilogue loop\n");
2361 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2364 return true;
2367 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2368 if there is a memory access type that the vectorized form can use,
2369 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2370 or scatters, fill in GS_INFO accordingly. In addition
2371 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2372 the target does not support the alignment scheme. *MISALIGNMENT
2373 is set according to the alignment of the access (including
2374 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2376 SLP says whether we're performing SLP rather than loop vectorization.
2377 MASKED_P is true if the statement is conditional on a vectorized mask.
2378 VECTYPE is the vector type that the vectorized statements will use.
2379 NCOPIES is the number of vector statements that will be needed. */
2381 static bool
2382 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2383 tree vectype, slp_tree slp_node,
2384 bool masked_p, vec_load_store_type vls_type,
2385 unsigned int ncopies,
2386 vect_memory_access_type *memory_access_type,
2387 poly_int64 *poffset,
2388 dr_alignment_support *alignment_support_scheme,
2389 int *misalignment,
2390 gather_scatter_info *gs_info)
2392 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2393 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2394 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2395 *poffset = 0;
2396 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2398 *memory_access_type = VMAT_GATHER_SCATTER;
2399 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2400 gcc_unreachable ();
2401 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2402 &gs_info->offset_dt,
2403 &gs_info->offset_vectype))
2405 if (dump_enabled_p ())
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2407 "%s index use not simple.\n",
2408 vls_type == VLS_LOAD ? "gather" : "scatter");
2409 return false;
2411 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2413 if (vls_type != VLS_LOAD)
2415 if (dump_enabled_p ())
2416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2417 "unsupported emulated scatter.\n");
2418 return false;
2420 else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2421 || !TYPE_VECTOR_SUBPARTS
2422 (gs_info->offset_vectype).is_constant ()
2423 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2424 (gs_info->offset_vectype),
2425 TYPE_VECTOR_SUBPARTS (vectype)))
2427 if (dump_enabled_p ())
2428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2429 "unsupported vector types for emulated "
2430 "gather.\n");
2431 return false;
2434 /* Gather-scatter accesses perform only component accesses, alignment
2435 is irrelevant for them. */
2436 *alignment_support_scheme = dr_unaligned_supported;
2438 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2440 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2441 masked_p,
2442 vls_type, memory_access_type, poffset,
2443 alignment_support_scheme,
2444 misalignment, gs_info))
2445 return false;
2447 else if (STMT_VINFO_STRIDED_P (stmt_info))
2449 gcc_assert (!slp_node);
2450 if (loop_vinfo
2451 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2452 masked_p, gs_info))
2453 *memory_access_type = VMAT_GATHER_SCATTER;
2454 else
2455 *memory_access_type = VMAT_ELEMENTWISE;
2456 /* Alignment is irrelevant here. */
2457 *alignment_support_scheme = dr_unaligned_supported;
2459 else
2461 int cmp = compare_step_with_zero (vinfo, stmt_info);
2462 if (cmp == 0)
2464 gcc_assert (vls_type == VLS_LOAD);
2465 *memory_access_type = VMAT_INVARIANT;
2466 /* Invariant accesses perform only component accesses, alignment
2467 is irrelevant for them. */
2468 *alignment_support_scheme = dr_unaligned_supported;
2470 else
2472 if (cmp < 0)
2473 *memory_access_type = get_negative_load_store_type
2474 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2475 else
2476 *memory_access_type = VMAT_CONTIGUOUS;
2477 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2478 vectype, *poffset);
2479 *alignment_support_scheme
2480 = vect_supportable_dr_alignment (vinfo,
2481 STMT_VINFO_DR_INFO (stmt_info),
2482 vectype, *misalignment);
2486 if ((*memory_access_type == VMAT_ELEMENTWISE
2487 || *memory_access_type == VMAT_STRIDED_SLP)
2488 && !nunits.is_constant ())
2490 if (dump_enabled_p ())
2491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2492 "Not using elementwise accesses due to variable "
2493 "vectorization factor.\n");
2494 return false;
2497 if (*alignment_support_scheme == dr_unaligned_unsupported)
2499 if (dump_enabled_p ())
2500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2501 "unsupported unaligned access\n");
2502 return false;
2505 /* FIXME: At the moment the cost model seems to underestimate the
2506 cost of using elementwise accesses. This check preserves the
2507 traditional behavior until that can be fixed. */
2508 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2509 if (!first_stmt_info)
2510 first_stmt_info = stmt_info;
2511 if (*memory_access_type == VMAT_ELEMENTWISE
2512 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2513 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2514 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2515 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2517 if (dump_enabled_p ())
2518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2519 "not falling back to elementwise accesses\n");
2520 return false;
2522 return true;
2525 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2526 conditional operation STMT_INFO. When returning true, store the mask
2527 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2528 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2529 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2531 static bool
2532 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2533 slp_tree slp_node, unsigned mask_index,
2534 tree *mask, slp_tree *mask_node,
2535 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2537 enum vect_def_type mask_dt;
2538 tree mask_vectype;
2539 slp_tree mask_node_1;
2540 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2541 mask, &mask_node_1, &mask_dt, &mask_vectype))
2543 if (dump_enabled_p ())
2544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2545 "mask use not simple.\n");
2546 return false;
2549 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2551 if (dump_enabled_p ())
2552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2553 "mask argument is not a boolean.\n");
2554 return false;
2557 /* If the caller is not prepared for adjusting an external/constant
2558 SLP mask vector type fail. */
2559 if (slp_node
2560 && !mask_node
2561 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2563 if (dump_enabled_p ())
2564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2565 "SLP mask argument is not vectorized.\n");
2566 return false;
2569 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2570 if (!mask_vectype)
2571 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2573 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2575 if (dump_enabled_p ())
2576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2577 "could not find an appropriate vector mask type.\n");
2578 return false;
2581 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2582 TYPE_VECTOR_SUBPARTS (vectype)))
2584 if (dump_enabled_p ())
2585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2586 "vector mask type %T"
2587 " does not match vector data type %T.\n",
2588 mask_vectype, vectype);
2590 return false;
2593 *mask_dt_out = mask_dt;
2594 *mask_vectype_out = mask_vectype;
2595 if (mask_node)
2596 *mask_node = mask_node_1;
2597 return true;
2600 /* Return true if stored value RHS is suitable for vectorizing store
2601 statement STMT_INFO. When returning true, store the type of the
2602 definition in *RHS_DT_OUT, the type of the vectorized store value in
2603 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2605 static bool
2606 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2607 slp_tree slp_node, tree rhs,
2608 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2609 vec_load_store_type *vls_type_out)
2611 /* In the case this is a store from a constant make sure
2612 native_encode_expr can handle it. */
2613 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2615 if (dump_enabled_p ())
2616 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2617 "cannot encode constant as a byte sequence.\n");
2618 return false;
2621 unsigned op_no = 0;
2622 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2624 if (gimple_call_internal_p (call)
2625 && internal_store_fn_p (gimple_call_internal_fn (call)))
2626 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2629 enum vect_def_type rhs_dt;
2630 tree rhs_vectype;
2631 slp_tree slp_op;
2632 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2633 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2635 if (dump_enabled_p ())
2636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2637 "use not simple.\n");
2638 return false;
2641 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2642 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2644 if (dump_enabled_p ())
2645 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2646 "incompatible vector types.\n");
2647 return false;
2650 *rhs_dt_out = rhs_dt;
2651 *rhs_vectype_out = rhs_vectype;
2652 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2653 *vls_type_out = VLS_STORE_INVARIANT;
2654 else
2655 *vls_type_out = VLS_STORE;
2656 return true;
2659 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2660 Note that we support masks with floating-point type, in which case the
2661 floats are interpreted as a bitmask. */
2663 static tree
2664 vect_build_all_ones_mask (vec_info *vinfo,
2665 stmt_vec_info stmt_info, tree masktype)
2667 if (TREE_CODE (masktype) == INTEGER_TYPE)
2668 return build_int_cst (masktype, -1);
2669 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2671 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2672 mask = build_vector_from_val (masktype, mask);
2673 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2675 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2677 REAL_VALUE_TYPE r;
2678 long tmp[6];
2679 for (int j = 0; j < 6; ++j)
2680 tmp[j] = -1;
2681 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2682 tree mask = build_real (TREE_TYPE (masktype), r);
2683 mask = build_vector_from_val (masktype, mask);
2684 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2686 gcc_unreachable ();
2689 /* Build an all-zero merge value of type VECTYPE while vectorizing
2690 STMT_INFO as a gather load. */
2692 static tree
2693 vect_build_zero_merge_argument (vec_info *vinfo,
2694 stmt_vec_info stmt_info, tree vectype)
2696 tree merge;
2697 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2698 merge = build_int_cst (TREE_TYPE (vectype), 0);
2699 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2701 REAL_VALUE_TYPE r;
2702 long tmp[6];
2703 for (int j = 0; j < 6; ++j)
2704 tmp[j] = 0;
2705 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2706 merge = build_real (TREE_TYPE (vectype), r);
2708 else
2709 gcc_unreachable ();
2710 merge = build_vector_from_val (vectype, merge);
2711 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2714 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2715 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2716 the gather load operation. If the load is conditional, MASK is the
2717 unvectorized condition and MASK_DT is its definition type, otherwise
2718 MASK is null. */
2720 static void
2721 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2722 gimple_stmt_iterator *gsi,
2723 gimple **vec_stmt,
2724 gather_scatter_info *gs_info,
2725 tree mask)
2727 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2728 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2729 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2730 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2731 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2732 edge pe = loop_preheader_edge (loop);
2733 enum { NARROW, NONE, WIDEN } modifier;
2734 poly_uint64 gather_off_nunits
2735 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2737 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2738 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2739 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2740 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2741 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2742 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2743 tree scaletype = TREE_VALUE (arglist);
2744 tree real_masktype = masktype;
2745 gcc_checking_assert (types_compatible_p (srctype, rettype)
2746 && (!mask
2747 || TREE_CODE (masktype) == INTEGER_TYPE
2748 || types_compatible_p (srctype, masktype)));
2749 if (mask)
2750 masktype = truth_type_for (srctype);
2752 tree mask_halftype = masktype;
2753 tree perm_mask = NULL_TREE;
2754 tree mask_perm_mask = NULL_TREE;
2755 if (known_eq (nunits, gather_off_nunits))
2756 modifier = NONE;
2757 else if (known_eq (nunits * 2, gather_off_nunits))
2759 modifier = WIDEN;
2761 /* Currently widening gathers and scatters are only supported for
2762 fixed-length vectors. */
2763 int count = gather_off_nunits.to_constant ();
2764 vec_perm_builder sel (count, count, 1);
2765 for (int i = 0; i < count; ++i)
2766 sel.quick_push (i | (count / 2));
2768 vec_perm_indices indices (sel, 1, count);
2769 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2770 indices);
2772 else if (known_eq (nunits, gather_off_nunits * 2))
2774 modifier = NARROW;
2776 /* Currently narrowing gathers and scatters are only supported for
2777 fixed-length vectors. */
2778 int count = nunits.to_constant ();
2779 vec_perm_builder sel (count, count, 1);
2780 sel.quick_grow (count);
2781 for (int i = 0; i < count; ++i)
2782 sel[i] = i < count / 2 ? i : i + count / 2;
2783 vec_perm_indices indices (sel, 2, count);
2784 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2786 ncopies *= 2;
2788 if (mask && VECTOR_TYPE_P (real_masktype))
2790 for (int i = 0; i < count; ++i)
2791 sel[i] = i | (count / 2);
2792 indices.new_vector (sel, 2, count);
2793 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2795 else if (mask)
2796 mask_halftype = truth_type_for (gs_info->offset_vectype);
2798 else
2799 gcc_unreachable ();
2801 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2802 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2804 tree ptr = fold_convert (ptrtype, gs_info->base);
2805 if (!is_gimple_min_invariant (ptr))
2807 gimple_seq seq;
2808 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2809 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2810 gcc_assert (!new_bb);
2813 tree scale = build_int_cst (scaletype, gs_info->scale);
2815 tree vec_oprnd0 = NULL_TREE;
2816 tree vec_mask = NULL_TREE;
2817 tree src_op = NULL_TREE;
2818 tree mask_op = NULL_TREE;
2819 tree prev_res = NULL_TREE;
2821 if (!mask)
2823 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2824 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2827 auto_vec<tree> vec_oprnds0;
2828 auto_vec<tree> vec_masks;
2829 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2830 modifier == WIDEN ? ncopies / 2 : ncopies,
2831 gs_info->offset, &vec_oprnds0);
2832 if (mask)
2833 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2834 modifier == NARROW ? ncopies / 2 : ncopies,
2835 mask, &vec_masks, masktype);
2836 for (int j = 0; j < ncopies; ++j)
2838 tree op, var;
2839 if (modifier == WIDEN && (j & 1))
2840 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2841 perm_mask, stmt_info, gsi);
2842 else
2843 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2845 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2847 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2848 TYPE_VECTOR_SUBPARTS (idxtype)));
2849 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2850 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2851 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2852 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2853 op = var;
2856 if (mask)
2858 if (mask_perm_mask && (j & 1))
2859 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2860 mask_perm_mask, stmt_info, gsi);
2861 else
2863 if (modifier == NARROW)
2865 if ((j & 1) == 0)
2866 vec_mask = vec_masks[j / 2];
2868 else
2869 vec_mask = vec_masks[j];
2871 mask_op = vec_mask;
2872 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2874 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2875 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2876 gcc_assert (known_eq (sub1, sub2));
2877 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2878 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2879 gassign *new_stmt
2880 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2881 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2882 mask_op = var;
2885 if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2887 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2888 gassign *new_stmt
2889 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2890 : VEC_UNPACK_LO_EXPR,
2891 mask_op);
2892 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2893 mask_op = var;
2895 src_op = mask_op;
2898 tree mask_arg = mask_op;
2899 if (masktype != real_masktype)
2901 tree utype, optype = TREE_TYPE (mask_op);
2902 if (VECTOR_TYPE_P (real_masktype)
2903 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2904 utype = real_masktype;
2905 else
2906 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2907 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2908 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2909 gassign *new_stmt
2910 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2911 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2912 mask_arg = var;
2913 if (!useless_type_conversion_p (real_masktype, utype))
2915 gcc_assert (TYPE_PRECISION (utype)
2916 <= TYPE_PRECISION (real_masktype));
2917 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2918 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2919 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2920 mask_arg = var;
2922 src_op = build_zero_cst (srctype);
2924 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2925 mask_arg, scale);
2927 if (!useless_type_conversion_p (vectype, rettype))
2929 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2930 TYPE_VECTOR_SUBPARTS (rettype)));
2931 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2932 gimple_call_set_lhs (new_stmt, op);
2933 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2934 var = make_ssa_name (vec_dest);
2935 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2936 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2937 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2939 else
2941 var = make_ssa_name (vec_dest, new_stmt);
2942 gimple_call_set_lhs (new_stmt, var);
2943 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2946 if (modifier == NARROW)
2948 if ((j & 1) == 0)
2950 prev_res = var;
2951 continue;
2953 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2954 stmt_info, gsi);
2955 new_stmt = SSA_NAME_DEF_STMT (var);
2958 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2960 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2963 /* Prepare the base and offset in GS_INFO for vectorization.
2964 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2965 to the vectorized offset argument for the first copy of STMT_INFO.
2966 STMT_INFO is the statement described by GS_INFO and LOOP is the
2967 containing loop. */
2969 static void
2970 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2971 class loop *loop, stmt_vec_info stmt_info,
2972 slp_tree slp_node, gather_scatter_info *gs_info,
2973 tree *dataref_ptr, vec<tree> *vec_offset)
2975 gimple_seq stmts = NULL;
2976 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2977 if (stmts != NULL)
2979 basic_block new_bb;
2980 edge pe = loop_preheader_edge (loop);
2981 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2982 gcc_assert (!new_bb);
2984 if (slp_node)
2985 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
2986 else
2988 unsigned ncopies
2989 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2990 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2991 gs_info->offset, vec_offset,
2992 gs_info->offset_vectype);
2996 /* Prepare to implement a grouped or strided load or store using
2997 the gather load or scatter store operation described by GS_INFO.
2998 STMT_INFO is the load or store statement.
3000 Set *DATAREF_BUMP to the amount that should be added to the base
3001 address after each copy of the vectorized statement. Set *VEC_OFFSET
3002 to an invariant offset vector in which element I has the value
3003 I * DR_STEP / SCALE. */
3005 static void
3006 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3007 loop_vec_info loop_vinfo,
3008 gather_scatter_info *gs_info,
3009 tree *dataref_bump, tree *vec_offset)
3011 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3012 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3014 tree bump = size_binop (MULT_EXPR,
3015 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3016 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3017 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3019 /* The offset given in GS_INFO can have pointer type, so use the element
3020 type of the vector instead. */
3021 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3023 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3024 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3025 ssize_int (gs_info->scale));
3026 step = fold_convert (offset_type, step);
3028 /* Create {0, X, X*2, X*3, ...}. */
3029 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3030 build_zero_cst (offset_type), step);
3031 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3034 /* Return the amount that should be added to a vector pointer to move
3035 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3036 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3037 vectorization. */
3039 static tree
3040 vect_get_data_ptr_increment (vec_info *vinfo,
3041 dr_vec_info *dr_info, tree aggr_type,
3042 vect_memory_access_type memory_access_type)
3044 if (memory_access_type == VMAT_INVARIANT)
3045 return size_zero_node;
3047 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3048 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3049 if (tree_int_cst_sgn (step) == -1)
3050 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3051 return iv_step;
3054 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3056 static bool
3057 vectorizable_bswap (vec_info *vinfo,
3058 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3059 gimple **vec_stmt, slp_tree slp_node,
3060 slp_tree *slp_op,
3061 tree vectype_in, stmt_vector_for_cost *cost_vec)
3063 tree op, vectype;
3064 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3065 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3066 unsigned ncopies;
3068 op = gimple_call_arg (stmt, 0);
3069 vectype = STMT_VINFO_VECTYPE (stmt_info);
3070 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3072 /* Multiple types in SLP are handled by creating the appropriate number of
3073 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3074 case of SLP. */
3075 if (slp_node)
3076 ncopies = 1;
3077 else
3078 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3080 gcc_assert (ncopies >= 1);
3082 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3083 if (! char_vectype)
3084 return false;
3086 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3087 unsigned word_bytes;
3088 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3089 return false;
3091 /* The encoding uses one stepped pattern for each byte in the word. */
3092 vec_perm_builder elts (num_bytes, word_bytes, 3);
3093 for (unsigned i = 0; i < 3; ++i)
3094 for (unsigned j = 0; j < word_bytes; ++j)
3095 elts.quick_push ((i + 1) * word_bytes - j - 1);
3097 vec_perm_indices indices (elts, 1, num_bytes);
3098 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3099 return false;
3101 if (! vec_stmt)
3103 if (slp_node
3104 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3106 if (dump_enabled_p ())
3107 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3108 "incompatible vector types for invariants\n");
3109 return false;
3112 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3113 DUMP_VECT_SCOPE ("vectorizable_bswap");
3114 record_stmt_cost (cost_vec,
3115 1, vector_stmt, stmt_info, 0, vect_prologue);
3116 record_stmt_cost (cost_vec,
3117 slp_node
3118 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3119 vec_perm, stmt_info, 0, vect_body);
3120 return true;
3123 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3125 /* Transform. */
3126 vec<tree> vec_oprnds = vNULL;
3127 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3128 op, &vec_oprnds);
3129 /* Arguments are ready. create the new vector stmt. */
3130 unsigned i;
3131 tree vop;
3132 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3134 gimple *new_stmt;
3135 tree tem = make_ssa_name (char_vectype);
3136 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3137 char_vectype, vop));
3138 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3139 tree tem2 = make_ssa_name (char_vectype);
3140 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3141 tem, tem, bswap_vconst);
3142 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3143 tem = make_ssa_name (vectype);
3144 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3145 vectype, tem2));
3146 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3147 if (slp_node)
3148 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3149 else
3150 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3153 if (!slp_node)
3154 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3156 vec_oprnds.release ();
3157 return true;
3160 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3161 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3162 in a single step. On success, store the binary pack code in
3163 *CONVERT_CODE. */
3165 static bool
3166 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3167 tree_code *convert_code)
3169 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3170 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3171 return false;
3173 tree_code code;
3174 int multi_step_cvt = 0;
3175 auto_vec <tree, 8> interm_types;
3176 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3177 &code, &multi_step_cvt, &interm_types)
3178 || multi_step_cvt)
3179 return false;
3181 *convert_code = code;
3182 return true;
3185 /* Function vectorizable_call.
3187 Check if STMT_INFO performs a function call that can be vectorized.
3188 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3189 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3190 Return true if STMT_INFO is vectorizable in this way. */
3192 static bool
3193 vectorizable_call (vec_info *vinfo,
3194 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3195 gimple **vec_stmt, slp_tree slp_node,
3196 stmt_vector_for_cost *cost_vec)
3198 gcall *stmt;
3199 tree vec_dest;
3200 tree scalar_dest;
3201 tree op;
3202 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3203 tree vectype_out, vectype_in;
3204 poly_uint64 nunits_in;
3205 poly_uint64 nunits_out;
3206 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3207 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3208 tree fndecl, new_temp, rhs_type;
3209 enum vect_def_type dt[4]
3210 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3211 vect_unknown_def_type };
3212 tree vectypes[ARRAY_SIZE (dt)] = {};
3213 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3214 int ndts = ARRAY_SIZE (dt);
3215 int ncopies, j;
3216 auto_vec<tree, 8> vargs;
3217 enum { NARROW, NONE, WIDEN } modifier;
3218 size_t i, nargs;
3219 tree lhs;
3221 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3222 return false;
3224 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3225 && ! vec_stmt)
3226 return false;
3228 /* Is STMT_INFO a vectorizable call? */
3229 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3230 if (!stmt)
3231 return false;
3233 if (gimple_call_internal_p (stmt)
3234 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3235 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3236 /* Handled by vectorizable_load and vectorizable_store. */
3237 return false;
3239 if (gimple_call_lhs (stmt) == NULL_TREE
3240 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3241 return false;
3243 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3245 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3247 /* Process function arguments. */
3248 rhs_type = NULL_TREE;
3249 vectype_in = NULL_TREE;
3250 nargs = gimple_call_num_args (stmt);
3252 /* Bail out if the function has more than four arguments, we do not have
3253 interesting builtin functions to vectorize with more than two arguments
3254 except for fma. No arguments is also not good. */
3255 if (nargs == 0 || nargs > 4)
3256 return false;
3258 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3259 combined_fn cfn = gimple_call_combined_fn (stmt);
3260 if (cfn == CFN_GOMP_SIMD_LANE)
3262 nargs = 0;
3263 rhs_type = unsigned_type_node;
3266 int mask_opno = -1;
3267 if (internal_fn_p (cfn))
3268 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3270 for (i = 0; i < nargs; i++)
3272 if ((int) i == mask_opno)
3274 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3275 &op, &slp_op[i], &dt[i], &vectypes[i]))
3276 return false;
3277 continue;
3280 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3281 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3283 if (dump_enabled_p ())
3284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3285 "use not simple.\n");
3286 return false;
3289 /* We can only handle calls with arguments of the same type. */
3290 if (rhs_type
3291 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3293 if (dump_enabled_p ())
3294 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3295 "argument types differ.\n");
3296 return false;
3298 if (!rhs_type)
3299 rhs_type = TREE_TYPE (op);
3301 if (!vectype_in)
3302 vectype_in = vectypes[i];
3303 else if (vectypes[i]
3304 && !types_compatible_p (vectypes[i], vectype_in))
3306 if (dump_enabled_p ())
3307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3308 "argument vector types differ.\n");
3309 return false;
3312 /* If all arguments are external or constant defs, infer the vector type
3313 from the scalar type. */
3314 if (!vectype_in)
3315 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3316 if (vec_stmt)
3317 gcc_assert (vectype_in);
3318 if (!vectype_in)
3320 if (dump_enabled_p ())
3321 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3322 "no vectype for scalar type %T\n", rhs_type);
3324 return false;
3326 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3327 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3328 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3329 by a pack of the two vectors into an SI vector. We would need
3330 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3331 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3333 if (dump_enabled_p ())
3334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3335 "mismatched vector sizes %T and %T\n",
3336 vectype_in, vectype_out);
3337 return false;
3340 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3341 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3343 if (dump_enabled_p ())
3344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3345 "mixed mask and nonmask vector types\n");
3346 return false;
3349 /* FORNOW */
3350 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3351 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3352 if (known_eq (nunits_in * 2, nunits_out))
3353 modifier = NARROW;
3354 else if (known_eq (nunits_out, nunits_in))
3355 modifier = NONE;
3356 else if (known_eq (nunits_out * 2, nunits_in))
3357 modifier = WIDEN;
3358 else
3359 return false;
3361 /* We only handle functions that do not read or clobber memory. */
3362 if (gimple_vuse (stmt))
3364 if (dump_enabled_p ())
3365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3366 "function reads from or writes to memory.\n");
3367 return false;
3370 /* For now, we only vectorize functions if a target specific builtin
3371 is available. TODO -- in some cases, it might be profitable to
3372 insert the calls for pieces of the vector, in order to be able
3373 to vectorize other operations in the loop. */
3374 fndecl = NULL_TREE;
3375 internal_fn ifn = IFN_LAST;
3376 tree callee = gimple_call_fndecl (stmt);
3378 /* First try using an internal function. */
3379 tree_code convert_code = ERROR_MARK;
3380 if (cfn != CFN_LAST
3381 && (modifier == NONE
3382 || (modifier == NARROW
3383 && simple_integer_narrowing (vectype_out, vectype_in,
3384 &convert_code))))
3385 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3386 vectype_in);
3388 /* If that fails, try asking for a target-specific built-in function. */
3389 if (ifn == IFN_LAST)
3391 if (cfn != CFN_LAST)
3392 fndecl = targetm.vectorize.builtin_vectorized_function
3393 (cfn, vectype_out, vectype_in);
3394 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3395 fndecl = targetm.vectorize.builtin_md_vectorized_function
3396 (callee, vectype_out, vectype_in);
3399 if (ifn == IFN_LAST && !fndecl)
3401 if (cfn == CFN_GOMP_SIMD_LANE
3402 && !slp_node
3403 && loop_vinfo
3404 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3405 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3406 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3407 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3409 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3410 { 0, 1, 2, ... vf - 1 } vector. */
3411 gcc_assert (nargs == 0);
3413 else if (modifier == NONE
3414 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3415 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3416 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3417 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3418 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3419 slp_op, vectype_in, cost_vec);
3420 else
3422 if (dump_enabled_p ())
3423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3424 "function is not vectorizable.\n");
3425 return false;
3429 if (slp_node)
3430 ncopies = 1;
3431 else if (modifier == NARROW && ifn == IFN_LAST)
3432 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3433 else
3434 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3436 /* Sanity check: make sure that at least one copy of the vectorized stmt
3437 needs to be generated. */
3438 gcc_assert (ncopies >= 1);
3440 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3441 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3442 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3443 if (!vec_stmt) /* transformation not required. */
3445 if (slp_node)
3446 for (i = 0; i < nargs; ++i)
3447 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3449 if (dump_enabled_p ())
3450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3451 "incompatible vector types for invariants\n");
3452 return false;
3454 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3455 DUMP_VECT_SCOPE ("vectorizable_call");
3456 vect_model_simple_cost (vinfo, stmt_info,
3457 ncopies, dt, ndts, slp_node, cost_vec);
3458 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3459 record_stmt_cost (cost_vec, ncopies / 2,
3460 vec_promote_demote, stmt_info, 0, vect_body);
3462 if (loop_vinfo
3463 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3464 && (reduc_idx >= 0 || mask_opno >= 0))
3466 if (reduc_idx >= 0
3467 && (cond_fn == IFN_LAST
3468 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3469 OPTIMIZE_FOR_SPEED)))
3471 if (dump_enabled_p ())
3472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3473 "can't use a fully-masked loop because no"
3474 " conditional operation is available.\n");
3475 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3477 else
3479 unsigned int nvectors
3480 = (slp_node
3481 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3482 : ncopies);
3483 tree scalar_mask = NULL_TREE;
3484 if (mask_opno >= 0)
3485 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3486 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3487 vectype_out, scalar_mask);
3490 return true;
3493 /* Transform. */
3495 if (dump_enabled_p ())
3496 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3498 /* Handle def. */
3499 scalar_dest = gimple_call_lhs (stmt);
3500 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3502 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3503 unsigned int vect_nargs = nargs;
3504 if (masked_loop_p && reduc_idx >= 0)
3506 ifn = cond_fn;
3507 vect_nargs += 2;
3510 if (modifier == NONE || ifn != IFN_LAST)
3512 tree prev_res = NULL_TREE;
3513 vargs.safe_grow (vect_nargs, true);
3514 auto_vec<vec<tree> > vec_defs (nargs);
3515 for (j = 0; j < ncopies; ++j)
3517 /* Build argument list for the vectorized call. */
3518 if (slp_node)
3520 vec<tree> vec_oprnds0;
3522 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3523 vec_oprnds0 = vec_defs[0];
3525 /* Arguments are ready. Create the new vector stmt. */
3526 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3528 int varg = 0;
3529 if (masked_loop_p && reduc_idx >= 0)
3531 unsigned int vec_num = vec_oprnds0.length ();
3532 /* Always true for SLP. */
3533 gcc_assert (ncopies == 1);
3534 vargs[varg++] = vect_get_loop_mask (gsi, masks, vec_num,
3535 vectype_out, i);
3537 size_t k;
3538 for (k = 0; k < nargs; k++)
3540 vec<tree> vec_oprndsk = vec_defs[k];
3541 vargs[varg++] = vec_oprndsk[i];
3543 if (masked_loop_p && reduc_idx >= 0)
3544 vargs[varg++] = vargs[reduc_idx + 1];
3545 gimple *new_stmt;
3546 if (modifier == NARROW)
3548 /* We don't define any narrowing conditional functions
3549 at present. */
3550 gcc_assert (mask_opno < 0);
3551 tree half_res = make_ssa_name (vectype_in);
3552 gcall *call
3553 = gimple_build_call_internal_vec (ifn, vargs);
3554 gimple_call_set_lhs (call, half_res);
3555 gimple_call_set_nothrow (call, true);
3556 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3557 if ((i & 1) == 0)
3559 prev_res = half_res;
3560 continue;
3562 new_temp = make_ssa_name (vec_dest);
3563 new_stmt = gimple_build_assign (new_temp, convert_code,
3564 prev_res, half_res);
3565 vect_finish_stmt_generation (vinfo, stmt_info,
3566 new_stmt, gsi);
3568 else
3570 if (mask_opno >= 0 && masked_loop_p)
3572 unsigned int vec_num = vec_oprnds0.length ();
3573 /* Always true for SLP. */
3574 gcc_assert (ncopies == 1);
3575 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3576 vectype_out, i);
3577 vargs[mask_opno] = prepare_vec_mask
3578 (loop_vinfo, TREE_TYPE (mask), mask,
3579 vargs[mask_opno], gsi);
3582 gcall *call;
3583 if (ifn != IFN_LAST)
3584 call = gimple_build_call_internal_vec (ifn, vargs);
3585 else
3586 call = gimple_build_call_vec (fndecl, vargs);
3587 new_temp = make_ssa_name (vec_dest, call);
3588 gimple_call_set_lhs (call, new_temp);
3589 gimple_call_set_nothrow (call, true);
3590 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3591 new_stmt = call;
3593 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3595 continue;
3598 int varg = 0;
3599 if (masked_loop_p && reduc_idx >= 0)
3600 vargs[varg++] = vect_get_loop_mask (gsi, masks, ncopies,
3601 vectype_out, j);
3602 for (i = 0; i < nargs; i++)
3604 op = gimple_call_arg (stmt, i);
3605 if (j == 0)
3607 vec_defs.quick_push (vNULL);
3608 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3609 op, &vec_defs[i],
3610 vectypes[i]);
3612 vargs[varg++] = vec_defs[i][j];
3614 if (masked_loop_p && reduc_idx >= 0)
3615 vargs[varg++] = vargs[reduc_idx + 1];
3617 if (mask_opno >= 0 && masked_loop_p)
3619 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3620 vectype_out, j);
3621 vargs[mask_opno]
3622 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3623 vargs[mask_opno], gsi);
3626 gimple *new_stmt;
3627 if (cfn == CFN_GOMP_SIMD_LANE)
3629 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3630 tree new_var
3631 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3632 gimple *init_stmt = gimple_build_assign (new_var, cst);
3633 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3634 new_temp = make_ssa_name (vec_dest);
3635 new_stmt = gimple_build_assign (new_temp, new_var);
3636 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3638 else if (modifier == NARROW)
3640 /* We don't define any narrowing conditional functions at
3641 present. */
3642 gcc_assert (mask_opno < 0);
3643 tree half_res = make_ssa_name (vectype_in);
3644 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3645 gimple_call_set_lhs (call, half_res);
3646 gimple_call_set_nothrow (call, true);
3647 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3648 if ((j & 1) == 0)
3650 prev_res = half_res;
3651 continue;
3653 new_temp = make_ssa_name (vec_dest);
3654 new_stmt = gimple_build_assign (new_temp, convert_code,
3655 prev_res, half_res);
3656 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3658 else
3660 gcall *call;
3661 if (ifn != IFN_LAST)
3662 call = gimple_build_call_internal_vec (ifn, vargs);
3663 else
3664 call = gimple_build_call_vec (fndecl, vargs);
3665 new_temp = make_ssa_name (vec_dest, call);
3666 gimple_call_set_lhs (call, new_temp);
3667 gimple_call_set_nothrow (call, true);
3668 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3669 new_stmt = call;
3672 if (j == (modifier == NARROW ? 1 : 0))
3673 *vec_stmt = new_stmt;
3674 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3676 for (i = 0; i < nargs; i++)
3678 vec<tree> vec_oprndsi = vec_defs[i];
3679 vec_oprndsi.release ();
3682 else if (modifier == NARROW)
3684 auto_vec<vec<tree> > vec_defs (nargs);
3685 /* We don't define any narrowing conditional functions at present. */
3686 gcc_assert (mask_opno < 0);
3687 for (j = 0; j < ncopies; ++j)
3689 /* Build argument list for the vectorized call. */
3690 if (j == 0)
3691 vargs.create (nargs * 2);
3692 else
3693 vargs.truncate (0);
3695 if (slp_node)
3697 vec<tree> vec_oprnds0;
3699 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3700 vec_oprnds0 = vec_defs[0];
3702 /* Arguments are ready. Create the new vector stmt. */
3703 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3705 size_t k;
3706 vargs.truncate (0);
3707 for (k = 0; k < nargs; k++)
3709 vec<tree> vec_oprndsk = vec_defs[k];
3710 vargs.quick_push (vec_oprndsk[i]);
3711 vargs.quick_push (vec_oprndsk[i + 1]);
3713 gcall *call;
3714 if (ifn != IFN_LAST)
3715 call = gimple_build_call_internal_vec (ifn, vargs);
3716 else
3717 call = gimple_build_call_vec (fndecl, vargs);
3718 new_temp = make_ssa_name (vec_dest, call);
3719 gimple_call_set_lhs (call, new_temp);
3720 gimple_call_set_nothrow (call, true);
3721 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3722 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3724 continue;
3727 for (i = 0; i < nargs; i++)
3729 op = gimple_call_arg (stmt, i);
3730 if (j == 0)
3732 vec_defs.quick_push (vNULL);
3733 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3734 op, &vec_defs[i], vectypes[i]);
3736 vec_oprnd0 = vec_defs[i][2*j];
3737 vec_oprnd1 = vec_defs[i][2*j+1];
3739 vargs.quick_push (vec_oprnd0);
3740 vargs.quick_push (vec_oprnd1);
3743 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3744 new_temp = make_ssa_name (vec_dest, new_stmt);
3745 gimple_call_set_lhs (new_stmt, new_temp);
3746 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3748 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3751 if (!slp_node)
3752 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3754 for (i = 0; i < nargs; i++)
3756 vec<tree> vec_oprndsi = vec_defs[i];
3757 vec_oprndsi.release ();
3760 else
3761 /* No current target implements this case. */
3762 return false;
3764 vargs.release ();
3766 /* The call in STMT might prevent it from being removed in dce.
3767 We however cannot remove it here, due to the way the ssa name
3768 it defines is mapped to the new definition. So just replace
3769 rhs of the statement with something harmless. */
3771 if (slp_node)
3772 return true;
3774 stmt_info = vect_orig_stmt (stmt_info);
3775 lhs = gimple_get_lhs (stmt_info->stmt);
3777 gassign *new_stmt
3778 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3779 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3781 return true;
3785 struct simd_call_arg_info
3787 tree vectype;
3788 tree op;
3789 HOST_WIDE_INT linear_step;
3790 enum vect_def_type dt;
3791 unsigned int align;
3792 bool simd_lane_linear;
3795 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3796 is linear within simd lane (but not within whole loop), note it in
3797 *ARGINFO. */
3799 static void
3800 vect_simd_lane_linear (tree op, class loop *loop,
3801 struct simd_call_arg_info *arginfo)
3803 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3805 if (!is_gimple_assign (def_stmt)
3806 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3807 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3808 return;
3810 tree base = gimple_assign_rhs1 (def_stmt);
3811 HOST_WIDE_INT linear_step = 0;
3812 tree v = gimple_assign_rhs2 (def_stmt);
3813 while (TREE_CODE (v) == SSA_NAME)
3815 tree t;
3816 def_stmt = SSA_NAME_DEF_STMT (v);
3817 if (is_gimple_assign (def_stmt))
3818 switch (gimple_assign_rhs_code (def_stmt))
3820 case PLUS_EXPR:
3821 t = gimple_assign_rhs2 (def_stmt);
3822 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3823 return;
3824 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3825 v = gimple_assign_rhs1 (def_stmt);
3826 continue;
3827 case MULT_EXPR:
3828 t = gimple_assign_rhs2 (def_stmt);
3829 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3830 return;
3831 linear_step = tree_to_shwi (t);
3832 v = gimple_assign_rhs1 (def_stmt);
3833 continue;
3834 CASE_CONVERT:
3835 t = gimple_assign_rhs1 (def_stmt);
3836 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3837 || (TYPE_PRECISION (TREE_TYPE (v))
3838 < TYPE_PRECISION (TREE_TYPE (t))))
3839 return;
3840 if (!linear_step)
3841 linear_step = 1;
3842 v = t;
3843 continue;
3844 default:
3845 return;
3847 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3848 && loop->simduid
3849 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3850 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3851 == loop->simduid))
3853 if (!linear_step)
3854 linear_step = 1;
3855 arginfo->linear_step = linear_step;
3856 arginfo->op = base;
3857 arginfo->simd_lane_linear = true;
3858 return;
3863 /* Return the number of elements in vector type VECTYPE, which is associated
3864 with a SIMD clone. At present these vectors always have a constant
3865 length. */
3867 static unsigned HOST_WIDE_INT
3868 simd_clone_subparts (tree vectype)
3870 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3873 /* Function vectorizable_simd_clone_call.
3875 Check if STMT_INFO performs a function call that can be vectorized
3876 by calling a simd clone of the function.
3877 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3878 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3879 Return true if STMT_INFO is vectorizable in this way. */
3881 static bool
3882 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3883 gimple_stmt_iterator *gsi,
3884 gimple **vec_stmt, slp_tree slp_node,
3885 stmt_vector_for_cost *)
3887 tree vec_dest;
3888 tree scalar_dest;
3889 tree op, type;
3890 tree vec_oprnd0 = NULL_TREE;
3891 tree vectype;
3892 poly_uint64 nunits;
3893 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3894 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3895 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3896 tree fndecl, new_temp;
3897 int ncopies, j;
3898 auto_vec<simd_call_arg_info> arginfo;
3899 vec<tree> vargs = vNULL;
3900 size_t i, nargs;
3901 tree lhs, rtype, ratype;
3902 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3904 /* Is STMT a vectorizable call? */
3905 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3906 if (!stmt)
3907 return false;
3909 fndecl = gimple_call_fndecl (stmt);
3910 if (fndecl == NULL_TREE)
3911 return false;
3913 struct cgraph_node *node = cgraph_node::get (fndecl);
3914 if (node == NULL || node->simd_clones == NULL)
3915 return false;
3917 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3918 return false;
3920 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3921 && ! vec_stmt)
3922 return false;
3924 if (gimple_call_lhs (stmt)
3925 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3926 return false;
3928 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3930 vectype = STMT_VINFO_VECTYPE (stmt_info);
3932 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3933 return false;
3935 /* FORNOW */
3936 if (slp_node)
3937 return false;
3939 /* Process function arguments. */
3940 nargs = gimple_call_num_args (stmt);
3942 /* Bail out if the function has zero arguments. */
3943 if (nargs == 0)
3944 return false;
3946 arginfo.reserve (nargs, true);
3948 for (i = 0; i < nargs; i++)
3950 simd_call_arg_info thisarginfo;
3951 affine_iv iv;
3953 thisarginfo.linear_step = 0;
3954 thisarginfo.align = 0;
3955 thisarginfo.op = NULL_TREE;
3956 thisarginfo.simd_lane_linear = false;
3958 op = gimple_call_arg (stmt, i);
3959 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3960 &thisarginfo.vectype)
3961 || thisarginfo.dt == vect_uninitialized_def)
3963 if (dump_enabled_p ())
3964 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3965 "use not simple.\n");
3966 return false;
3969 if (thisarginfo.dt == vect_constant_def
3970 || thisarginfo.dt == vect_external_def)
3971 gcc_assert (thisarginfo.vectype == NULL_TREE);
3972 else
3974 gcc_assert (thisarginfo.vectype != NULL_TREE);
3975 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3977 if (dump_enabled_p ())
3978 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3979 "vector mask arguments are not supported\n");
3980 return false;
3984 /* For linear arguments, the analyze phase should have saved
3985 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3986 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3987 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3989 gcc_assert (vec_stmt);
3990 thisarginfo.linear_step
3991 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3992 thisarginfo.op
3993 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3994 thisarginfo.simd_lane_linear
3995 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3996 == boolean_true_node);
3997 /* If loop has been peeled for alignment, we need to adjust it. */
3998 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3999 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4000 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4002 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4003 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4004 tree opt = TREE_TYPE (thisarginfo.op);
4005 bias = fold_convert (TREE_TYPE (step), bias);
4006 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4007 thisarginfo.op
4008 = fold_build2 (POINTER_TYPE_P (opt)
4009 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4010 thisarginfo.op, bias);
4013 else if (!vec_stmt
4014 && thisarginfo.dt != vect_constant_def
4015 && thisarginfo.dt != vect_external_def
4016 && loop_vinfo
4017 && TREE_CODE (op) == SSA_NAME
4018 && simple_iv (loop, loop_containing_stmt (stmt), op,
4019 &iv, false)
4020 && tree_fits_shwi_p (iv.step))
4022 thisarginfo.linear_step = tree_to_shwi (iv.step);
4023 thisarginfo.op = iv.base;
4025 else if ((thisarginfo.dt == vect_constant_def
4026 || thisarginfo.dt == vect_external_def)
4027 && POINTER_TYPE_P (TREE_TYPE (op)))
4028 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4029 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4030 linear too. */
4031 if (POINTER_TYPE_P (TREE_TYPE (op))
4032 && !thisarginfo.linear_step
4033 && !vec_stmt
4034 && thisarginfo.dt != vect_constant_def
4035 && thisarginfo.dt != vect_external_def
4036 && loop_vinfo
4037 && !slp_node
4038 && TREE_CODE (op) == SSA_NAME)
4039 vect_simd_lane_linear (op, loop, &thisarginfo);
4041 arginfo.quick_push (thisarginfo);
4044 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4045 if (!vf.is_constant ())
4047 if (dump_enabled_p ())
4048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4049 "not considering SIMD clones; not yet supported"
4050 " for variable-width vectors.\n");
4051 return false;
4054 unsigned int badness = 0;
4055 struct cgraph_node *bestn = NULL;
4056 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4057 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4058 else
4059 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4060 n = n->simdclone->next_clone)
4062 unsigned int this_badness = 0;
4063 unsigned int num_calls;
4064 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4065 || n->simdclone->nargs != nargs)
4066 continue;
4067 if (num_calls != 1)
4068 this_badness += exact_log2 (num_calls) * 4096;
4069 if (n->simdclone->inbranch)
4070 this_badness += 8192;
4071 int target_badness = targetm.simd_clone.usable (n);
4072 if (target_badness < 0)
4073 continue;
4074 this_badness += target_badness * 512;
4075 /* FORNOW: Have to add code to add the mask argument. */
4076 if (n->simdclone->inbranch)
4077 continue;
4078 for (i = 0; i < nargs; i++)
4080 switch (n->simdclone->args[i].arg_type)
4082 case SIMD_CLONE_ARG_TYPE_VECTOR:
4083 if (!useless_type_conversion_p
4084 (n->simdclone->args[i].orig_type,
4085 TREE_TYPE (gimple_call_arg (stmt, i))))
4086 i = -1;
4087 else if (arginfo[i].dt == vect_constant_def
4088 || arginfo[i].dt == vect_external_def
4089 || arginfo[i].linear_step)
4090 this_badness += 64;
4091 break;
4092 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4093 if (arginfo[i].dt != vect_constant_def
4094 && arginfo[i].dt != vect_external_def)
4095 i = -1;
4096 break;
4097 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4098 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4099 if (arginfo[i].dt == vect_constant_def
4100 || arginfo[i].dt == vect_external_def
4101 || (arginfo[i].linear_step
4102 != n->simdclone->args[i].linear_step))
4103 i = -1;
4104 break;
4105 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4106 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4107 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4108 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4109 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4110 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4111 /* FORNOW */
4112 i = -1;
4113 break;
4114 case SIMD_CLONE_ARG_TYPE_MASK:
4115 gcc_unreachable ();
4117 if (i == (size_t) -1)
4118 break;
4119 if (n->simdclone->args[i].alignment > arginfo[i].align)
4121 i = -1;
4122 break;
4124 if (arginfo[i].align)
4125 this_badness += (exact_log2 (arginfo[i].align)
4126 - exact_log2 (n->simdclone->args[i].alignment));
4128 if (i == (size_t) -1)
4129 continue;
4130 if (bestn == NULL || this_badness < badness)
4132 bestn = n;
4133 badness = this_badness;
4137 if (bestn == NULL)
4138 return false;
4140 for (i = 0; i < nargs; i++)
4141 if ((arginfo[i].dt == vect_constant_def
4142 || arginfo[i].dt == vect_external_def)
4143 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4145 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4146 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4147 slp_node);
4148 if (arginfo[i].vectype == NULL
4149 || !constant_multiple_p (bestn->simdclone->simdlen,
4150 simd_clone_subparts (arginfo[i].vectype)))
4151 return false;
4154 fndecl = bestn->decl;
4155 nunits = bestn->simdclone->simdlen;
4156 ncopies = vector_unroll_factor (vf, nunits);
4158 /* If the function isn't const, only allow it in simd loops where user
4159 has asserted that at least nunits consecutive iterations can be
4160 performed using SIMD instructions. */
4161 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4162 && gimple_vuse (stmt))
4163 return false;
4165 /* Sanity check: make sure that at least one copy of the vectorized stmt
4166 needs to be generated. */
4167 gcc_assert (ncopies >= 1);
4169 if (!vec_stmt) /* transformation not required. */
4171 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4172 for (i = 0; i < nargs; i++)
4173 if ((bestn->simdclone->args[i].arg_type
4174 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4175 || (bestn->simdclone->args[i].arg_type
4176 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4178 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4179 + 1,
4180 true);
4181 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4182 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4183 ? size_type_node : TREE_TYPE (arginfo[i].op);
4184 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4185 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4186 tree sll = arginfo[i].simd_lane_linear
4187 ? boolean_true_node : boolean_false_node;
4188 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4190 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4191 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4192 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4193 dt, slp_node, cost_vec); */
4194 return true;
4197 /* Transform. */
4199 if (dump_enabled_p ())
4200 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4202 /* Handle def. */
4203 scalar_dest = gimple_call_lhs (stmt);
4204 vec_dest = NULL_TREE;
4205 rtype = NULL_TREE;
4206 ratype = NULL_TREE;
4207 if (scalar_dest)
4209 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4210 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4211 if (TREE_CODE (rtype) == ARRAY_TYPE)
4213 ratype = rtype;
4214 rtype = TREE_TYPE (ratype);
4218 auto_vec<vec<tree> > vec_oprnds;
4219 auto_vec<unsigned> vec_oprnds_i;
4220 vec_oprnds.safe_grow_cleared (nargs, true);
4221 vec_oprnds_i.safe_grow_cleared (nargs, true);
4222 for (j = 0; j < ncopies; ++j)
4224 /* Build argument list for the vectorized call. */
4225 if (j == 0)
4226 vargs.create (nargs);
4227 else
4228 vargs.truncate (0);
4230 for (i = 0; i < nargs; i++)
4232 unsigned int k, l, m, o;
4233 tree atype;
4234 op = gimple_call_arg (stmt, i);
4235 switch (bestn->simdclone->args[i].arg_type)
4237 case SIMD_CLONE_ARG_TYPE_VECTOR:
4238 atype = bestn->simdclone->args[i].vector_type;
4239 o = vector_unroll_factor (nunits,
4240 simd_clone_subparts (atype));
4241 for (m = j * o; m < (j + 1) * o; m++)
4243 if (simd_clone_subparts (atype)
4244 < simd_clone_subparts (arginfo[i].vectype))
4246 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4247 k = (simd_clone_subparts (arginfo[i].vectype)
4248 / simd_clone_subparts (atype));
4249 gcc_assert ((k & (k - 1)) == 0);
4250 if (m == 0)
4252 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4253 ncopies * o / k, op,
4254 &vec_oprnds[i]);
4255 vec_oprnds_i[i] = 0;
4256 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4258 else
4260 vec_oprnd0 = arginfo[i].op;
4261 if ((m & (k - 1)) == 0)
4262 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4264 arginfo[i].op = vec_oprnd0;
4265 vec_oprnd0
4266 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4267 bitsize_int (prec),
4268 bitsize_int ((m & (k - 1)) * prec));
4269 gassign *new_stmt
4270 = gimple_build_assign (make_ssa_name (atype),
4271 vec_oprnd0);
4272 vect_finish_stmt_generation (vinfo, stmt_info,
4273 new_stmt, gsi);
4274 vargs.safe_push (gimple_assign_lhs (new_stmt));
4276 else
4278 k = (simd_clone_subparts (atype)
4279 / simd_clone_subparts (arginfo[i].vectype));
4280 gcc_assert ((k & (k - 1)) == 0);
4281 vec<constructor_elt, va_gc> *ctor_elts;
4282 if (k != 1)
4283 vec_alloc (ctor_elts, k);
4284 else
4285 ctor_elts = NULL;
4286 for (l = 0; l < k; l++)
4288 if (m == 0 && l == 0)
4290 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4291 k * o * ncopies,
4293 &vec_oprnds[i]);
4294 vec_oprnds_i[i] = 0;
4295 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4297 else
4298 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4299 arginfo[i].op = vec_oprnd0;
4300 if (k == 1)
4301 break;
4302 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4303 vec_oprnd0);
4305 if (k == 1)
4306 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4307 atype))
4309 vec_oprnd0
4310 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4311 gassign *new_stmt
4312 = gimple_build_assign (make_ssa_name (atype),
4313 vec_oprnd0);
4314 vect_finish_stmt_generation (vinfo, stmt_info,
4315 new_stmt, gsi);
4316 vargs.safe_push (gimple_assign_lhs (new_stmt));
4318 else
4319 vargs.safe_push (vec_oprnd0);
4320 else
4322 vec_oprnd0 = build_constructor (atype, ctor_elts);
4323 gassign *new_stmt
4324 = gimple_build_assign (make_ssa_name (atype),
4325 vec_oprnd0);
4326 vect_finish_stmt_generation (vinfo, stmt_info,
4327 new_stmt, gsi);
4328 vargs.safe_push (gimple_assign_lhs (new_stmt));
4332 break;
4333 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4334 vargs.safe_push (op);
4335 break;
4336 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4337 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4338 if (j == 0)
4340 gimple_seq stmts;
4341 arginfo[i].op
4342 = force_gimple_operand (unshare_expr (arginfo[i].op),
4343 &stmts, true, NULL_TREE);
4344 if (stmts != NULL)
4346 basic_block new_bb;
4347 edge pe = loop_preheader_edge (loop);
4348 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4349 gcc_assert (!new_bb);
4351 if (arginfo[i].simd_lane_linear)
4353 vargs.safe_push (arginfo[i].op);
4354 break;
4356 tree phi_res = copy_ssa_name (op);
4357 gphi *new_phi = create_phi_node (phi_res, loop->header);
4358 add_phi_arg (new_phi, arginfo[i].op,
4359 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4360 enum tree_code code
4361 = POINTER_TYPE_P (TREE_TYPE (op))
4362 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4363 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4364 ? sizetype : TREE_TYPE (op);
4365 poly_widest_int cst
4366 = wi::mul (bestn->simdclone->args[i].linear_step,
4367 ncopies * nunits);
4368 tree tcst = wide_int_to_tree (type, cst);
4369 tree phi_arg = copy_ssa_name (op);
4370 gassign *new_stmt
4371 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4372 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4373 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4374 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4375 UNKNOWN_LOCATION);
4376 arginfo[i].op = phi_res;
4377 vargs.safe_push (phi_res);
4379 else
4381 enum tree_code code
4382 = POINTER_TYPE_P (TREE_TYPE (op))
4383 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4384 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4385 ? sizetype : TREE_TYPE (op);
4386 poly_widest_int cst
4387 = wi::mul (bestn->simdclone->args[i].linear_step,
4388 j * nunits);
4389 tree tcst = wide_int_to_tree (type, cst);
4390 new_temp = make_ssa_name (TREE_TYPE (op));
4391 gassign *new_stmt
4392 = gimple_build_assign (new_temp, code,
4393 arginfo[i].op, tcst);
4394 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4395 vargs.safe_push (new_temp);
4397 break;
4398 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4399 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4400 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4401 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4402 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4403 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4404 default:
4405 gcc_unreachable ();
4409 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4410 if (vec_dest)
4412 gcc_assert (ratype
4413 || known_eq (simd_clone_subparts (rtype), nunits));
4414 if (ratype)
4415 new_temp = create_tmp_var (ratype);
4416 else if (useless_type_conversion_p (vectype, rtype))
4417 new_temp = make_ssa_name (vec_dest, new_call);
4418 else
4419 new_temp = make_ssa_name (rtype, new_call);
4420 gimple_call_set_lhs (new_call, new_temp);
4422 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4423 gimple *new_stmt = new_call;
4425 if (vec_dest)
4427 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4429 unsigned int k, l;
4430 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4431 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4432 k = vector_unroll_factor (nunits,
4433 simd_clone_subparts (vectype));
4434 gcc_assert ((k & (k - 1)) == 0);
4435 for (l = 0; l < k; l++)
4437 tree t;
4438 if (ratype)
4440 t = build_fold_addr_expr (new_temp);
4441 t = build2 (MEM_REF, vectype, t,
4442 build_int_cst (TREE_TYPE (t), l * bytes));
4444 else
4445 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4446 bitsize_int (prec), bitsize_int (l * prec));
4447 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4448 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4450 if (j == 0 && l == 0)
4451 *vec_stmt = new_stmt;
4452 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4455 if (ratype)
4456 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4457 continue;
4459 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4461 unsigned int k = (simd_clone_subparts (vectype)
4462 / simd_clone_subparts (rtype));
4463 gcc_assert ((k & (k - 1)) == 0);
4464 if ((j & (k - 1)) == 0)
4465 vec_alloc (ret_ctor_elts, k);
4466 if (ratype)
4468 unsigned int m, o;
4469 o = vector_unroll_factor (nunits,
4470 simd_clone_subparts (rtype));
4471 for (m = 0; m < o; m++)
4473 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4474 size_int (m), NULL_TREE, NULL_TREE);
4475 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4476 tem);
4477 vect_finish_stmt_generation (vinfo, stmt_info,
4478 new_stmt, gsi);
4479 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4480 gimple_assign_lhs (new_stmt));
4482 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4484 else
4485 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4486 if ((j & (k - 1)) != k - 1)
4487 continue;
4488 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4489 new_stmt
4490 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4491 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4493 if ((unsigned) j == k - 1)
4494 *vec_stmt = new_stmt;
4495 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4496 continue;
4498 else if (ratype)
4500 tree t = build_fold_addr_expr (new_temp);
4501 t = build2 (MEM_REF, vectype, t,
4502 build_int_cst (TREE_TYPE (t), 0));
4503 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4504 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4505 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4507 else if (!useless_type_conversion_p (vectype, rtype))
4509 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4510 new_stmt
4511 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4512 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4516 if (j == 0)
4517 *vec_stmt = new_stmt;
4518 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4521 for (i = 0; i < nargs; ++i)
4523 vec<tree> oprndsi = vec_oprnds[i];
4524 oprndsi.release ();
4526 vargs.release ();
4528 /* The call in STMT might prevent it from being removed in dce.
4529 We however cannot remove it here, due to the way the ssa name
4530 it defines is mapped to the new definition. So just replace
4531 rhs of the statement with something harmless. */
4533 if (slp_node)
4534 return true;
4536 gimple *new_stmt;
4537 if (scalar_dest)
4539 type = TREE_TYPE (scalar_dest);
4540 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4541 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4543 else
4544 new_stmt = gimple_build_nop ();
4545 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4546 unlink_stmt_vdef (stmt);
4548 return true;
4552 /* Function vect_gen_widened_results_half
4554 Create a vector stmt whose code, type, number of arguments, and result
4555 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4556 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4557 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4558 needs to be created (DECL is a function-decl of a target-builtin).
4559 STMT_INFO is the original scalar stmt that we are vectorizing. */
4561 static gimple *
4562 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4563 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4564 tree vec_dest, gimple_stmt_iterator *gsi,
4565 stmt_vec_info stmt_info)
4567 gimple *new_stmt;
4568 tree new_temp;
4570 /* Generate half of the widened result: */
4571 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4572 if (op_type != binary_op)
4573 vec_oprnd1 = NULL;
4574 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4575 new_temp = make_ssa_name (vec_dest, new_stmt);
4576 gimple_assign_set_lhs (new_stmt, new_temp);
4577 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4579 return new_stmt;
4583 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4584 For multi-step conversions store the resulting vectors and call the function
4585 recursively. */
4587 static void
4588 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4589 int multi_step_cvt,
4590 stmt_vec_info stmt_info,
4591 vec<tree> &vec_dsts,
4592 gimple_stmt_iterator *gsi,
4593 slp_tree slp_node, enum tree_code code)
4595 unsigned int i;
4596 tree vop0, vop1, new_tmp, vec_dest;
4598 vec_dest = vec_dsts.pop ();
4600 for (i = 0; i < vec_oprnds->length (); i += 2)
4602 /* Create demotion operation. */
4603 vop0 = (*vec_oprnds)[i];
4604 vop1 = (*vec_oprnds)[i + 1];
4605 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4606 new_tmp = make_ssa_name (vec_dest, new_stmt);
4607 gimple_assign_set_lhs (new_stmt, new_tmp);
4608 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4610 if (multi_step_cvt)
4611 /* Store the resulting vector for next recursive call. */
4612 (*vec_oprnds)[i/2] = new_tmp;
4613 else
4615 /* This is the last step of the conversion sequence. Store the
4616 vectors in SLP_NODE or in vector info of the scalar statement
4617 (or in STMT_VINFO_RELATED_STMT chain). */
4618 if (slp_node)
4619 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4620 else
4621 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4625 /* For multi-step demotion operations we first generate demotion operations
4626 from the source type to the intermediate types, and then combine the
4627 results (stored in VEC_OPRNDS) in demotion operation to the destination
4628 type. */
4629 if (multi_step_cvt)
4631 /* At each level of recursion we have half of the operands we had at the
4632 previous level. */
4633 vec_oprnds->truncate ((i+1)/2);
4634 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4635 multi_step_cvt - 1,
4636 stmt_info, vec_dsts, gsi,
4637 slp_node, VEC_PACK_TRUNC_EXPR);
4640 vec_dsts.quick_push (vec_dest);
4644 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4645 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4646 STMT_INFO. For multi-step conversions store the resulting vectors and
4647 call the function recursively. */
4649 static void
4650 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4651 vec<tree> *vec_oprnds0,
4652 vec<tree> *vec_oprnds1,
4653 stmt_vec_info stmt_info, tree vec_dest,
4654 gimple_stmt_iterator *gsi,
4655 enum tree_code code1,
4656 enum tree_code code2, int op_type)
4658 int i;
4659 tree vop0, vop1, new_tmp1, new_tmp2;
4660 gimple *new_stmt1, *new_stmt2;
4661 vec<tree> vec_tmp = vNULL;
4663 vec_tmp.create (vec_oprnds0->length () * 2);
4664 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4666 if (op_type == binary_op)
4667 vop1 = (*vec_oprnds1)[i];
4668 else
4669 vop1 = NULL_TREE;
4671 /* Generate the two halves of promotion operation. */
4672 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4673 op_type, vec_dest, gsi,
4674 stmt_info);
4675 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4676 op_type, vec_dest, gsi,
4677 stmt_info);
4678 if (is_gimple_call (new_stmt1))
4680 new_tmp1 = gimple_call_lhs (new_stmt1);
4681 new_tmp2 = gimple_call_lhs (new_stmt2);
4683 else
4685 new_tmp1 = gimple_assign_lhs (new_stmt1);
4686 new_tmp2 = gimple_assign_lhs (new_stmt2);
4689 /* Store the results for the next step. */
4690 vec_tmp.quick_push (new_tmp1);
4691 vec_tmp.quick_push (new_tmp2);
4694 vec_oprnds0->release ();
4695 *vec_oprnds0 = vec_tmp;
4698 /* Create vectorized promotion stmts for widening stmts using only half the
4699 potential vector size for input. */
4700 static void
4701 vect_create_half_widening_stmts (vec_info *vinfo,
4702 vec<tree> *vec_oprnds0,
4703 vec<tree> *vec_oprnds1,
4704 stmt_vec_info stmt_info, tree vec_dest,
4705 gimple_stmt_iterator *gsi,
4706 enum tree_code code1,
4707 int op_type)
4709 int i;
4710 tree vop0, vop1;
4711 gimple *new_stmt1;
4712 gimple *new_stmt2;
4713 gimple *new_stmt3;
4714 vec<tree> vec_tmp = vNULL;
4716 vec_tmp.create (vec_oprnds0->length ());
4717 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4719 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4721 gcc_assert (op_type == binary_op);
4722 vop1 = (*vec_oprnds1)[i];
4724 /* Widen the first vector input. */
4725 out_type = TREE_TYPE (vec_dest);
4726 new_tmp1 = make_ssa_name (out_type);
4727 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4728 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4729 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4731 /* Widen the second vector input. */
4732 new_tmp2 = make_ssa_name (out_type);
4733 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4734 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4735 /* Perform the operation. With both vector inputs widened. */
4736 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4738 else
4740 /* Perform the operation. With the single vector input widened. */
4741 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4744 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4745 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4746 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4748 /* Store the results for the next step. */
4749 vec_tmp.quick_push (new_tmp3);
4752 vec_oprnds0->release ();
4753 *vec_oprnds0 = vec_tmp;
4757 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4758 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4759 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4760 Return true if STMT_INFO is vectorizable in this way. */
4762 static bool
4763 vectorizable_conversion (vec_info *vinfo,
4764 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4765 gimple **vec_stmt, slp_tree slp_node,
4766 stmt_vector_for_cost *cost_vec)
4768 tree vec_dest;
4769 tree scalar_dest;
4770 tree op0, op1 = NULL_TREE;
4771 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4772 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4773 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4774 tree new_temp;
4775 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4776 int ndts = 2;
4777 poly_uint64 nunits_in;
4778 poly_uint64 nunits_out;
4779 tree vectype_out, vectype_in;
4780 int ncopies, i;
4781 tree lhs_type, rhs_type;
4782 enum { NARROW, NONE, WIDEN } modifier;
4783 vec<tree> vec_oprnds0 = vNULL;
4784 vec<tree> vec_oprnds1 = vNULL;
4785 tree vop0;
4786 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4787 int multi_step_cvt = 0;
4788 vec<tree> interm_types = vNULL;
4789 tree intermediate_type, cvt_type = NULL_TREE;
4790 int op_type;
4791 unsigned short fltsz;
4793 /* Is STMT a vectorizable conversion? */
4795 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4796 return false;
4798 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4799 && ! vec_stmt)
4800 return false;
4802 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4803 if (!stmt)
4804 return false;
4806 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4807 return false;
4809 code = gimple_assign_rhs_code (stmt);
4810 if (!CONVERT_EXPR_CODE_P (code)
4811 && code != FIX_TRUNC_EXPR
4812 && code != FLOAT_EXPR
4813 && code != WIDEN_PLUS_EXPR
4814 && code != WIDEN_MINUS_EXPR
4815 && code != WIDEN_MULT_EXPR
4816 && code != WIDEN_LSHIFT_EXPR)
4817 return false;
4819 bool widen_arith = (code == WIDEN_PLUS_EXPR
4820 || code == WIDEN_MINUS_EXPR
4821 || code == WIDEN_MULT_EXPR
4822 || code == WIDEN_LSHIFT_EXPR);
4823 op_type = TREE_CODE_LENGTH (code);
4825 /* Check types of lhs and rhs. */
4826 scalar_dest = gimple_assign_lhs (stmt);
4827 lhs_type = TREE_TYPE (scalar_dest);
4828 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4830 /* Check the operands of the operation. */
4831 slp_tree slp_op0, slp_op1 = NULL;
4832 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4833 0, &op0, &slp_op0, &dt[0], &vectype_in))
4835 if (dump_enabled_p ())
4836 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4837 "use not simple.\n");
4838 return false;
4841 rhs_type = TREE_TYPE (op0);
4842 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4843 && !((INTEGRAL_TYPE_P (lhs_type)
4844 && INTEGRAL_TYPE_P (rhs_type))
4845 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4846 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4847 return false;
4849 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4850 && ((INTEGRAL_TYPE_P (lhs_type)
4851 && !type_has_mode_precision_p (lhs_type))
4852 || (INTEGRAL_TYPE_P (rhs_type)
4853 && !type_has_mode_precision_p (rhs_type))))
4855 if (dump_enabled_p ())
4856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4857 "type conversion to/from bit-precision unsupported."
4858 "\n");
4859 return false;
4862 if (op_type == binary_op)
4864 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4865 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4867 op1 = gimple_assign_rhs2 (stmt);
4868 tree vectype1_in;
4869 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4870 &op1, &slp_op1, &dt[1], &vectype1_in))
4872 if (dump_enabled_p ())
4873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4874 "use not simple.\n");
4875 return false;
4877 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4878 OP1. */
4879 if (!vectype_in)
4880 vectype_in = vectype1_in;
4883 /* If op0 is an external or constant def, infer the vector type
4884 from the scalar type. */
4885 if (!vectype_in)
4886 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4887 if (vec_stmt)
4888 gcc_assert (vectype_in);
4889 if (!vectype_in)
4891 if (dump_enabled_p ())
4892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4893 "no vectype for scalar type %T\n", rhs_type);
4895 return false;
4898 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4899 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4901 if (dump_enabled_p ())
4902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4903 "can't convert between boolean and non "
4904 "boolean vectors %T\n", rhs_type);
4906 return false;
4909 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4910 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4911 if (known_eq (nunits_out, nunits_in))
4912 if (widen_arith)
4913 modifier = WIDEN;
4914 else
4915 modifier = NONE;
4916 else if (multiple_p (nunits_out, nunits_in))
4917 modifier = NARROW;
4918 else
4920 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4921 modifier = WIDEN;
4924 /* Multiple types in SLP are handled by creating the appropriate number of
4925 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4926 case of SLP. */
4927 if (slp_node)
4928 ncopies = 1;
4929 else if (modifier == NARROW)
4930 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4931 else
4932 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4934 /* Sanity check: make sure that at least one copy of the vectorized stmt
4935 needs to be generated. */
4936 gcc_assert (ncopies >= 1);
4938 bool found_mode = false;
4939 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4940 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4941 opt_scalar_mode rhs_mode_iter;
4943 /* Supportable by target? */
4944 switch (modifier)
4946 case NONE:
4947 if (code != FIX_TRUNC_EXPR
4948 && code != FLOAT_EXPR
4949 && !CONVERT_EXPR_CODE_P (code))
4950 return false;
4951 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4952 break;
4953 /* FALLTHRU */
4954 unsupported:
4955 if (dump_enabled_p ())
4956 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4957 "conversion not supported by target.\n");
4958 return false;
4960 case WIDEN:
4961 if (known_eq (nunits_in, nunits_out))
4963 if (!supportable_half_widening_operation (code, vectype_out,
4964 vectype_in, &code1))
4965 goto unsupported;
4966 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4967 break;
4969 if (supportable_widening_operation (vinfo, code, stmt_info,
4970 vectype_out, vectype_in, &code1,
4971 &code2, &multi_step_cvt,
4972 &interm_types))
4974 /* Binary widening operation can only be supported directly by the
4975 architecture. */
4976 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4977 break;
4980 if (code != FLOAT_EXPR
4981 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4982 goto unsupported;
4984 fltsz = GET_MODE_SIZE (lhs_mode);
4985 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4987 rhs_mode = rhs_mode_iter.require ();
4988 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4989 break;
4991 cvt_type
4992 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4993 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4994 if (cvt_type == NULL_TREE)
4995 goto unsupported;
4997 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4999 if (!supportable_convert_operation (code, vectype_out,
5000 cvt_type, &codecvt1))
5001 goto unsupported;
5003 else if (!supportable_widening_operation (vinfo, code, stmt_info,
5004 vectype_out, cvt_type,
5005 &codecvt1, &codecvt2,
5006 &multi_step_cvt,
5007 &interm_types))
5008 continue;
5009 else
5010 gcc_assert (multi_step_cvt == 0);
5012 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5013 cvt_type,
5014 vectype_in, &code1, &code2,
5015 &multi_step_cvt, &interm_types))
5017 found_mode = true;
5018 break;
5022 if (!found_mode)
5023 goto unsupported;
5025 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5026 codecvt2 = ERROR_MARK;
5027 else
5029 multi_step_cvt++;
5030 interm_types.safe_push (cvt_type);
5031 cvt_type = NULL_TREE;
5033 break;
5035 case NARROW:
5036 gcc_assert (op_type == unary_op);
5037 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5038 &code1, &multi_step_cvt,
5039 &interm_types))
5040 break;
5042 if (code != FIX_TRUNC_EXPR
5043 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5044 goto unsupported;
5046 cvt_type
5047 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5048 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5049 if (cvt_type == NULL_TREE)
5050 goto unsupported;
5051 if (!supportable_convert_operation (code, cvt_type, vectype_in,
5052 &codecvt1))
5053 goto unsupported;
5054 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5055 &code1, &multi_step_cvt,
5056 &interm_types))
5057 break;
5058 goto unsupported;
5060 default:
5061 gcc_unreachable ();
5064 if (!vec_stmt) /* transformation not required. */
5066 if (slp_node
5067 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5068 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5070 if (dump_enabled_p ())
5071 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5072 "incompatible vector types for invariants\n");
5073 return false;
5075 DUMP_VECT_SCOPE ("vectorizable_conversion");
5076 if (modifier == NONE)
5078 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5079 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5080 cost_vec);
5082 else if (modifier == NARROW)
5084 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5085 /* The final packing step produces one vector result per copy. */
5086 unsigned int nvectors
5087 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5088 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5089 multi_step_cvt, cost_vec,
5090 widen_arith);
5092 else
5094 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5095 /* The initial unpacking step produces two vector results
5096 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5097 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5098 unsigned int nvectors
5099 = (slp_node
5100 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5101 : ncopies * 2);
5102 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5103 multi_step_cvt, cost_vec,
5104 widen_arith);
5106 interm_types.release ();
5107 return true;
5110 /* Transform. */
5111 if (dump_enabled_p ())
5112 dump_printf_loc (MSG_NOTE, vect_location,
5113 "transform conversion. ncopies = %d.\n", ncopies);
5115 if (op_type == binary_op)
5117 if (CONSTANT_CLASS_P (op0))
5118 op0 = fold_convert (TREE_TYPE (op1), op0);
5119 else if (CONSTANT_CLASS_P (op1))
5120 op1 = fold_convert (TREE_TYPE (op0), op1);
5123 /* In case of multi-step conversion, we first generate conversion operations
5124 to the intermediate types, and then from that types to the final one.
5125 We create vector destinations for the intermediate type (TYPES) received
5126 from supportable_*_operation, and store them in the correct order
5127 for future use in vect_create_vectorized_*_stmts (). */
5128 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5129 vec_dest = vect_create_destination_var (scalar_dest,
5130 (cvt_type && modifier == WIDEN)
5131 ? cvt_type : vectype_out);
5132 vec_dsts.quick_push (vec_dest);
5134 if (multi_step_cvt)
5136 for (i = interm_types.length () - 1;
5137 interm_types.iterate (i, &intermediate_type); i--)
5139 vec_dest = vect_create_destination_var (scalar_dest,
5140 intermediate_type);
5141 vec_dsts.quick_push (vec_dest);
5145 if (cvt_type)
5146 vec_dest = vect_create_destination_var (scalar_dest,
5147 modifier == WIDEN
5148 ? vectype_out : cvt_type);
5150 int ninputs = 1;
5151 if (!slp_node)
5153 if (modifier == WIDEN)
5155 else if (modifier == NARROW)
5157 if (multi_step_cvt)
5158 ninputs = vect_pow2 (multi_step_cvt);
5159 ninputs *= 2;
5163 switch (modifier)
5165 case NONE:
5166 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5167 op0, &vec_oprnds0);
5168 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5170 /* Arguments are ready, create the new vector stmt. */
5171 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5172 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5173 new_temp = make_ssa_name (vec_dest, new_stmt);
5174 gimple_assign_set_lhs (new_stmt, new_temp);
5175 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5177 if (slp_node)
5178 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5179 else
5180 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5182 break;
5184 case WIDEN:
5185 /* In case the vectorization factor (VF) is bigger than the number
5186 of elements that we can fit in a vectype (nunits), we have to
5187 generate more than one vector stmt - i.e - we need to "unroll"
5188 the vector stmt by a factor VF/nunits. */
5189 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5190 op0, &vec_oprnds0,
5191 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5192 &vec_oprnds1);
5193 if (code == WIDEN_LSHIFT_EXPR)
5195 int oprnds_size = vec_oprnds0.length ();
5196 vec_oprnds1.create (oprnds_size);
5197 for (i = 0; i < oprnds_size; ++i)
5198 vec_oprnds1.quick_push (op1);
5200 /* Arguments are ready. Create the new vector stmts. */
5201 for (i = multi_step_cvt; i >= 0; i--)
5203 tree this_dest = vec_dsts[i];
5204 enum tree_code c1 = code1, c2 = code2;
5205 if (i == 0 && codecvt2 != ERROR_MARK)
5207 c1 = codecvt1;
5208 c2 = codecvt2;
5210 if (known_eq (nunits_out, nunits_in))
5211 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5212 &vec_oprnds1, stmt_info,
5213 this_dest, gsi,
5214 c1, op_type);
5215 else
5216 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5217 &vec_oprnds1, stmt_info,
5218 this_dest, gsi,
5219 c1, c2, op_type);
5222 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5224 gimple *new_stmt;
5225 if (cvt_type)
5227 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5228 new_temp = make_ssa_name (vec_dest);
5229 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5230 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5232 else
5233 new_stmt = SSA_NAME_DEF_STMT (vop0);
5235 if (slp_node)
5236 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5237 else
5238 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5240 break;
5242 case NARROW:
5243 /* In case the vectorization factor (VF) is bigger than the number
5244 of elements that we can fit in a vectype (nunits), we have to
5245 generate more than one vector stmt - i.e - we need to "unroll"
5246 the vector stmt by a factor VF/nunits. */
5247 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5248 op0, &vec_oprnds0);
5249 /* Arguments are ready. Create the new vector stmts. */
5250 if (cvt_type)
5251 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5253 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5254 new_temp = make_ssa_name (vec_dest);
5255 gassign *new_stmt
5256 = gimple_build_assign (new_temp, codecvt1, vop0);
5257 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5258 vec_oprnds0[i] = new_temp;
5261 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5262 multi_step_cvt,
5263 stmt_info, vec_dsts, gsi,
5264 slp_node, code1);
5265 break;
5267 if (!slp_node)
5268 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5270 vec_oprnds0.release ();
5271 vec_oprnds1.release ();
5272 interm_types.release ();
5274 return true;
5277 /* Return true if we can assume from the scalar form of STMT_INFO that
5278 neither the scalar nor the vector forms will generate code. STMT_INFO
5279 is known not to involve a data reference. */
5281 bool
5282 vect_nop_conversion_p (stmt_vec_info stmt_info)
5284 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5285 if (!stmt)
5286 return false;
5288 tree lhs = gimple_assign_lhs (stmt);
5289 tree_code code = gimple_assign_rhs_code (stmt);
5290 tree rhs = gimple_assign_rhs1 (stmt);
5292 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5293 return true;
5295 if (CONVERT_EXPR_CODE_P (code))
5296 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5298 return false;
5301 /* Function vectorizable_assignment.
5303 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5304 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5305 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5306 Return true if STMT_INFO is vectorizable in this way. */
5308 static bool
5309 vectorizable_assignment (vec_info *vinfo,
5310 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5311 gimple **vec_stmt, slp_tree slp_node,
5312 stmt_vector_for_cost *cost_vec)
5314 tree vec_dest;
5315 tree scalar_dest;
5316 tree op;
5317 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5318 tree new_temp;
5319 enum vect_def_type dt[1] = {vect_unknown_def_type};
5320 int ndts = 1;
5321 int ncopies;
5322 int i;
5323 vec<tree> vec_oprnds = vNULL;
5324 tree vop;
5325 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5326 enum tree_code code;
5327 tree vectype_in;
5329 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5330 return false;
5332 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5333 && ! vec_stmt)
5334 return false;
5336 /* Is vectorizable assignment? */
5337 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5338 if (!stmt)
5339 return false;
5341 scalar_dest = gimple_assign_lhs (stmt);
5342 if (TREE_CODE (scalar_dest) != SSA_NAME)
5343 return false;
5345 if (STMT_VINFO_DATA_REF (stmt_info))
5346 return false;
5348 code = gimple_assign_rhs_code (stmt);
5349 if (!(gimple_assign_single_p (stmt)
5350 || code == PAREN_EXPR
5351 || CONVERT_EXPR_CODE_P (code)))
5352 return false;
5354 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5355 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5357 /* Multiple types in SLP are handled by creating the appropriate number of
5358 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5359 case of SLP. */
5360 if (slp_node)
5361 ncopies = 1;
5362 else
5363 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5365 gcc_assert (ncopies >= 1);
5367 slp_tree slp_op;
5368 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5369 &dt[0], &vectype_in))
5371 if (dump_enabled_p ())
5372 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5373 "use not simple.\n");
5374 return false;
5376 if (!vectype_in)
5377 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5379 /* We can handle NOP_EXPR conversions that do not change the number
5380 of elements or the vector size. */
5381 if ((CONVERT_EXPR_CODE_P (code)
5382 || code == VIEW_CONVERT_EXPR)
5383 && (!vectype_in
5384 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5385 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5386 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5387 return false;
5389 if (VECTOR_BOOLEAN_TYPE_P (vectype)
5390 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5392 if (dump_enabled_p ())
5393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5394 "can't convert between boolean and non "
5395 "boolean vectors %T\n", TREE_TYPE (op));
5397 return false;
5400 /* We do not handle bit-precision changes. */
5401 if ((CONVERT_EXPR_CODE_P (code)
5402 || code == VIEW_CONVERT_EXPR)
5403 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5404 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5405 || !type_has_mode_precision_p (TREE_TYPE (op)))
5406 /* But a conversion that does not change the bit-pattern is ok. */
5407 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5408 > TYPE_PRECISION (TREE_TYPE (op)))
5409 && TYPE_UNSIGNED (TREE_TYPE (op))))
5411 if (dump_enabled_p ())
5412 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5413 "type conversion to/from bit-precision "
5414 "unsupported.\n");
5415 return false;
5418 if (!vec_stmt) /* transformation not required. */
5420 if (slp_node
5421 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5423 if (dump_enabled_p ())
5424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5425 "incompatible vector types for invariants\n");
5426 return false;
5428 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5429 DUMP_VECT_SCOPE ("vectorizable_assignment");
5430 if (!vect_nop_conversion_p (stmt_info))
5431 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5432 cost_vec);
5433 return true;
5436 /* Transform. */
5437 if (dump_enabled_p ())
5438 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5440 /* Handle def. */
5441 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5443 /* Handle use. */
5444 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5446 /* Arguments are ready. create the new vector stmt. */
5447 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5449 if (CONVERT_EXPR_CODE_P (code)
5450 || code == VIEW_CONVERT_EXPR)
5451 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5452 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5453 new_temp = make_ssa_name (vec_dest, new_stmt);
5454 gimple_assign_set_lhs (new_stmt, new_temp);
5455 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5456 if (slp_node)
5457 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5458 else
5459 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5461 if (!slp_node)
5462 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5464 vec_oprnds.release ();
5465 return true;
5469 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5470 either as shift by a scalar or by a vector. */
5472 bool
5473 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5476 machine_mode vec_mode;
5477 optab optab;
5478 int icode;
5479 tree vectype;
5481 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5482 if (!vectype)
5483 return false;
5485 optab = optab_for_tree_code (code, vectype, optab_scalar);
5486 if (!optab
5487 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5489 optab = optab_for_tree_code (code, vectype, optab_vector);
5490 if (!optab
5491 || (optab_handler (optab, TYPE_MODE (vectype))
5492 == CODE_FOR_nothing))
5493 return false;
5496 vec_mode = TYPE_MODE (vectype);
5497 icode = (int) optab_handler (optab, vec_mode);
5498 if (icode == CODE_FOR_nothing)
5499 return false;
5501 return true;
5505 /* Function vectorizable_shift.
5507 Check if STMT_INFO performs a shift operation that can be vectorized.
5508 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5509 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5510 Return true if STMT_INFO is vectorizable in this way. */
5512 static bool
5513 vectorizable_shift (vec_info *vinfo,
5514 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5515 gimple **vec_stmt, slp_tree slp_node,
5516 stmt_vector_for_cost *cost_vec)
5518 tree vec_dest;
5519 tree scalar_dest;
5520 tree op0, op1 = NULL;
5521 tree vec_oprnd1 = NULL_TREE;
5522 tree vectype;
5523 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5524 enum tree_code code;
5525 machine_mode vec_mode;
5526 tree new_temp;
5527 optab optab;
5528 int icode;
5529 machine_mode optab_op2_mode;
5530 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5531 int ndts = 2;
5532 poly_uint64 nunits_in;
5533 poly_uint64 nunits_out;
5534 tree vectype_out;
5535 tree op1_vectype;
5536 int ncopies;
5537 int i;
5538 vec<tree> vec_oprnds0 = vNULL;
5539 vec<tree> vec_oprnds1 = vNULL;
5540 tree vop0, vop1;
5541 unsigned int k;
5542 bool scalar_shift_arg = true;
5543 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5544 bool incompatible_op1_vectype_p = false;
5546 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5547 return false;
5549 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5550 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5551 && ! vec_stmt)
5552 return false;
5554 /* Is STMT a vectorizable binary/unary operation? */
5555 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5556 if (!stmt)
5557 return false;
5559 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5560 return false;
5562 code = gimple_assign_rhs_code (stmt);
5564 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5565 || code == RROTATE_EXPR))
5566 return false;
5568 scalar_dest = gimple_assign_lhs (stmt);
5569 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5570 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5572 if (dump_enabled_p ())
5573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5574 "bit-precision shifts not supported.\n");
5575 return false;
5578 slp_tree slp_op0;
5579 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5580 0, &op0, &slp_op0, &dt[0], &vectype))
5582 if (dump_enabled_p ())
5583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5584 "use not simple.\n");
5585 return false;
5587 /* If op0 is an external or constant def, infer the vector type
5588 from the scalar type. */
5589 if (!vectype)
5590 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5591 if (vec_stmt)
5592 gcc_assert (vectype);
5593 if (!vectype)
5595 if (dump_enabled_p ())
5596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5597 "no vectype for scalar type\n");
5598 return false;
5601 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5602 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5603 if (maybe_ne (nunits_out, nunits_in))
5604 return false;
5606 stmt_vec_info op1_def_stmt_info;
5607 slp_tree slp_op1;
5608 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5609 &dt[1], &op1_vectype, &op1_def_stmt_info))
5611 if (dump_enabled_p ())
5612 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5613 "use not simple.\n");
5614 return false;
5617 /* Multiple types in SLP are handled by creating the appropriate number of
5618 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5619 case of SLP. */
5620 if (slp_node)
5621 ncopies = 1;
5622 else
5623 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5625 gcc_assert (ncopies >= 1);
5627 /* Determine whether the shift amount is a vector, or scalar. If the
5628 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5630 if ((dt[1] == vect_internal_def
5631 || dt[1] == vect_induction_def
5632 || dt[1] == vect_nested_cycle)
5633 && !slp_node)
5634 scalar_shift_arg = false;
5635 else if (dt[1] == vect_constant_def
5636 || dt[1] == vect_external_def
5637 || dt[1] == vect_internal_def)
5639 /* In SLP, need to check whether the shift count is the same,
5640 in loops if it is a constant or invariant, it is always
5641 a scalar shift. */
5642 if (slp_node)
5644 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5645 stmt_vec_info slpstmt_info;
5647 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5649 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5650 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5651 scalar_shift_arg = false;
5654 /* For internal SLP defs we have to make sure we see scalar stmts
5655 for all vector elements.
5656 ??? For different vectors we could resort to a different
5657 scalar shift operand but code-generation below simply always
5658 takes the first. */
5659 if (dt[1] == vect_internal_def
5660 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5661 stmts.length ()))
5662 scalar_shift_arg = false;
5665 /* If the shift amount is computed by a pattern stmt we cannot
5666 use the scalar amount directly thus give up and use a vector
5667 shift. */
5668 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5669 scalar_shift_arg = false;
5671 else
5673 if (dump_enabled_p ())
5674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5675 "operand mode requires invariant argument.\n");
5676 return false;
5679 /* Vector shifted by vector. */
5680 bool was_scalar_shift_arg = scalar_shift_arg;
5681 if (!scalar_shift_arg)
5683 optab = optab_for_tree_code (code, vectype, optab_vector);
5684 if (dump_enabled_p ())
5685 dump_printf_loc (MSG_NOTE, vect_location,
5686 "vector/vector shift/rotate found.\n");
5688 if (!op1_vectype)
5689 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5690 slp_op1);
5691 incompatible_op1_vectype_p
5692 = (op1_vectype == NULL_TREE
5693 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5694 TYPE_VECTOR_SUBPARTS (vectype))
5695 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5696 if (incompatible_op1_vectype_p
5697 && (!slp_node
5698 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5699 || slp_op1->refcnt != 1))
5701 if (dump_enabled_p ())
5702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5703 "unusable type for last operand in"
5704 " vector/vector shift/rotate.\n");
5705 return false;
5708 /* See if the machine has a vector shifted by scalar insn and if not
5709 then see if it has a vector shifted by vector insn. */
5710 else
5712 optab = optab_for_tree_code (code, vectype, optab_scalar);
5713 if (optab
5714 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5716 if (dump_enabled_p ())
5717 dump_printf_loc (MSG_NOTE, vect_location,
5718 "vector/scalar shift/rotate found.\n");
5720 else
5722 optab = optab_for_tree_code (code, vectype, optab_vector);
5723 if (optab
5724 && (optab_handler (optab, TYPE_MODE (vectype))
5725 != CODE_FOR_nothing))
5727 scalar_shift_arg = false;
5729 if (dump_enabled_p ())
5730 dump_printf_loc (MSG_NOTE, vect_location,
5731 "vector/vector shift/rotate found.\n");
5733 if (!op1_vectype)
5734 op1_vectype = get_vectype_for_scalar_type (vinfo,
5735 TREE_TYPE (op1),
5736 slp_op1);
5738 /* Unlike the other binary operators, shifts/rotates have
5739 the rhs being int, instead of the same type as the lhs,
5740 so make sure the scalar is the right type if we are
5741 dealing with vectors of long long/long/short/char. */
5742 incompatible_op1_vectype_p
5743 = (!op1_vectype
5744 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5745 TREE_TYPE (op1)));
5746 if (incompatible_op1_vectype_p
5747 && dt[1] == vect_internal_def)
5749 if (dump_enabled_p ())
5750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5751 "unusable type for last operand in"
5752 " vector/vector shift/rotate.\n");
5753 return false;
5759 /* Supportable by target? */
5760 if (!optab)
5762 if (dump_enabled_p ())
5763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5764 "no optab.\n");
5765 return false;
5767 vec_mode = TYPE_MODE (vectype);
5768 icode = (int) optab_handler (optab, vec_mode);
5769 if (icode == CODE_FOR_nothing)
5771 if (dump_enabled_p ())
5772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5773 "op not supported by target.\n");
5774 return false;
5776 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5777 if (vect_emulated_vector_p (vectype))
5778 return false;
5780 if (!vec_stmt) /* transformation not required. */
5782 if (slp_node
5783 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5784 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5785 && (!incompatible_op1_vectype_p
5786 || dt[1] == vect_constant_def)
5787 && !vect_maybe_update_slp_op_vectype
5788 (slp_op1,
5789 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5791 if (dump_enabled_p ())
5792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5793 "incompatible vector types for invariants\n");
5794 return false;
5796 /* Now adjust the constant shift amount in place. */
5797 if (slp_node
5798 && incompatible_op1_vectype_p
5799 && dt[1] == vect_constant_def)
5801 for (unsigned i = 0;
5802 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5804 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5805 = fold_convert (TREE_TYPE (vectype),
5806 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5807 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5808 == INTEGER_CST));
5811 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5812 DUMP_VECT_SCOPE ("vectorizable_shift");
5813 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5814 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5815 return true;
5818 /* Transform. */
5820 if (dump_enabled_p ())
5821 dump_printf_loc (MSG_NOTE, vect_location,
5822 "transform binary/unary operation.\n");
5824 if (incompatible_op1_vectype_p && !slp_node)
5826 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5827 op1 = fold_convert (TREE_TYPE (vectype), op1);
5828 if (dt[1] != vect_constant_def)
5829 op1 = vect_init_vector (vinfo, stmt_info, op1,
5830 TREE_TYPE (vectype), NULL);
5833 /* Handle def. */
5834 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5836 if (scalar_shift_arg && dt[1] != vect_internal_def)
5838 /* Vector shl and shr insn patterns can be defined with scalar
5839 operand 2 (shift operand). In this case, use constant or loop
5840 invariant op1 directly, without extending it to vector mode
5841 first. */
5842 optab_op2_mode = insn_data[icode].operand[2].mode;
5843 if (!VECTOR_MODE_P (optab_op2_mode))
5845 if (dump_enabled_p ())
5846 dump_printf_loc (MSG_NOTE, vect_location,
5847 "operand 1 using scalar mode.\n");
5848 vec_oprnd1 = op1;
5849 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5850 vec_oprnds1.quick_push (vec_oprnd1);
5851 /* Store vec_oprnd1 for every vector stmt to be created.
5852 We check during the analysis that all the shift arguments
5853 are the same.
5854 TODO: Allow different constants for different vector
5855 stmts generated for an SLP instance. */
5856 for (k = 0;
5857 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5858 vec_oprnds1.quick_push (vec_oprnd1);
5861 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5863 if (was_scalar_shift_arg)
5865 /* If the argument was the same in all lanes create
5866 the correctly typed vector shift amount directly. */
5867 op1 = fold_convert (TREE_TYPE (vectype), op1);
5868 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5869 !loop_vinfo ? gsi : NULL);
5870 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5871 !loop_vinfo ? gsi : NULL);
5872 vec_oprnds1.create (slp_node->vec_stmts_size);
5873 for (k = 0; k < slp_node->vec_stmts_size; k++)
5874 vec_oprnds1.quick_push (vec_oprnd1);
5876 else if (dt[1] == vect_constant_def)
5877 /* The constant shift amount has been adjusted in place. */
5879 else
5880 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5883 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5884 (a special case for certain kind of vector shifts); otherwise,
5885 operand 1 should be of a vector type (the usual case). */
5886 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5887 op0, &vec_oprnds0,
5888 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5890 /* Arguments are ready. Create the new vector stmt. */
5891 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5893 /* For internal defs where we need to use a scalar shift arg
5894 extract the first lane. */
5895 if (scalar_shift_arg && dt[1] == vect_internal_def)
5897 vop1 = vec_oprnds1[0];
5898 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5899 gassign *new_stmt
5900 = gimple_build_assign (new_temp,
5901 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5902 vop1,
5903 TYPE_SIZE (TREE_TYPE (new_temp)),
5904 bitsize_zero_node));
5905 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5906 vop1 = new_temp;
5908 else
5909 vop1 = vec_oprnds1[i];
5910 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5911 new_temp = make_ssa_name (vec_dest, new_stmt);
5912 gimple_assign_set_lhs (new_stmt, new_temp);
5913 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5914 if (slp_node)
5915 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5916 else
5917 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5920 if (!slp_node)
5921 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5923 vec_oprnds0.release ();
5924 vec_oprnds1.release ();
5926 return true;
5930 /* Function vectorizable_operation.
5932 Check if STMT_INFO performs a binary, unary or ternary operation that can
5933 be vectorized.
5934 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5935 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5936 Return true if STMT_INFO is vectorizable in this way. */
5938 static bool
5939 vectorizable_operation (vec_info *vinfo,
5940 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5941 gimple **vec_stmt, slp_tree slp_node,
5942 stmt_vector_for_cost *cost_vec)
5944 tree vec_dest;
5945 tree scalar_dest;
5946 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5947 tree vectype;
5948 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5949 enum tree_code code, orig_code;
5950 machine_mode vec_mode;
5951 tree new_temp;
5952 int op_type;
5953 optab optab;
5954 bool target_support_p;
5955 enum vect_def_type dt[3]
5956 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5957 int ndts = 3;
5958 poly_uint64 nunits_in;
5959 poly_uint64 nunits_out;
5960 tree vectype_out;
5961 int ncopies, vec_num;
5962 int i;
5963 vec<tree> vec_oprnds0 = vNULL;
5964 vec<tree> vec_oprnds1 = vNULL;
5965 vec<tree> vec_oprnds2 = vNULL;
5966 tree vop0, vop1, vop2;
5967 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5969 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5970 return false;
5972 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5973 && ! vec_stmt)
5974 return false;
5976 /* Is STMT a vectorizable binary/unary operation? */
5977 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5978 if (!stmt)
5979 return false;
5981 /* Loads and stores are handled in vectorizable_{load,store}. */
5982 if (STMT_VINFO_DATA_REF (stmt_info))
5983 return false;
5985 orig_code = code = gimple_assign_rhs_code (stmt);
5987 /* Shifts are handled in vectorizable_shift. */
5988 if (code == LSHIFT_EXPR
5989 || code == RSHIFT_EXPR
5990 || code == LROTATE_EXPR
5991 || code == RROTATE_EXPR)
5992 return false;
5994 /* Comparisons are handled in vectorizable_comparison. */
5995 if (TREE_CODE_CLASS (code) == tcc_comparison)
5996 return false;
5998 /* Conditions are handled in vectorizable_condition. */
5999 if (code == COND_EXPR)
6000 return false;
6002 /* For pointer addition and subtraction, we should use the normal
6003 plus and minus for the vector operation. */
6004 if (code == POINTER_PLUS_EXPR)
6005 code = PLUS_EXPR;
6006 if (code == POINTER_DIFF_EXPR)
6007 code = MINUS_EXPR;
6009 /* Support only unary or binary operations. */
6010 op_type = TREE_CODE_LENGTH (code);
6011 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6013 if (dump_enabled_p ())
6014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6015 "num. args = %d (not unary/binary/ternary op).\n",
6016 op_type);
6017 return false;
6020 scalar_dest = gimple_assign_lhs (stmt);
6021 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6023 /* Most operations cannot handle bit-precision types without extra
6024 truncations. */
6025 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6026 if (!mask_op_p
6027 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6028 /* Exception are bitwise binary operations. */
6029 && code != BIT_IOR_EXPR
6030 && code != BIT_XOR_EXPR
6031 && code != BIT_AND_EXPR)
6033 if (dump_enabled_p ())
6034 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6035 "bit-precision arithmetic not supported.\n");
6036 return false;
6039 slp_tree slp_op0;
6040 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6041 0, &op0, &slp_op0, &dt[0], &vectype))
6043 if (dump_enabled_p ())
6044 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6045 "use not simple.\n");
6046 return false;
6048 /* If op0 is an external or constant def, infer the vector type
6049 from the scalar type. */
6050 if (!vectype)
6052 /* For boolean type we cannot determine vectype by
6053 invariant value (don't know whether it is a vector
6054 of booleans or vector of integers). We use output
6055 vectype because operations on boolean don't change
6056 type. */
6057 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6059 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6061 if (dump_enabled_p ())
6062 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6063 "not supported operation on bool value.\n");
6064 return false;
6066 vectype = vectype_out;
6068 else
6069 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6070 slp_node);
6072 if (vec_stmt)
6073 gcc_assert (vectype);
6074 if (!vectype)
6076 if (dump_enabled_p ())
6077 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6078 "no vectype for scalar type %T\n",
6079 TREE_TYPE (op0));
6081 return false;
6084 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6085 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6086 if (maybe_ne (nunits_out, nunits_in))
6087 return false;
6089 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6090 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6091 if (op_type == binary_op || op_type == ternary_op)
6093 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6094 1, &op1, &slp_op1, &dt[1], &vectype2))
6096 if (dump_enabled_p ())
6097 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6098 "use not simple.\n");
6099 return false;
6102 if (op_type == ternary_op)
6104 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6105 2, &op2, &slp_op2, &dt[2], &vectype3))
6107 if (dump_enabled_p ())
6108 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6109 "use not simple.\n");
6110 return false;
6114 /* Multiple types in SLP are handled by creating the appropriate number of
6115 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6116 case of SLP. */
6117 if (slp_node)
6119 ncopies = 1;
6120 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6122 else
6124 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6125 vec_num = 1;
6128 gcc_assert (ncopies >= 1);
6130 /* Reject attempts to combine mask types with nonmask types, e.g. if
6131 we have an AND between a (nonmask) boolean loaded from memory and
6132 a (mask) boolean result of a comparison.
6134 TODO: We could easily fix these cases up using pattern statements. */
6135 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6136 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6137 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6139 if (dump_enabled_p ())
6140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6141 "mixed mask and nonmask vector types\n");
6142 return false;
6145 /* Supportable by target? */
6147 vec_mode = TYPE_MODE (vectype);
6148 if (code == MULT_HIGHPART_EXPR)
6149 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6150 else
6152 optab = optab_for_tree_code (code, vectype, optab_default);
6153 if (!optab)
6155 if (dump_enabled_p ())
6156 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6157 "no optab.\n");
6158 return false;
6160 target_support_p = (optab_handler (optab, vec_mode)
6161 != CODE_FOR_nothing);
6164 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6165 if (!target_support_p)
6167 if (dump_enabled_p ())
6168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6169 "op not supported by target.\n");
6170 /* Check only during analysis. */
6171 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6172 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6173 return false;
6174 if (dump_enabled_p ())
6175 dump_printf_loc (MSG_NOTE, vect_location,
6176 "proceeding using word mode.\n");
6177 using_emulated_vectors_p = true;
6180 if (using_emulated_vectors_p
6181 && !vect_can_vectorize_without_simd_p (code))
6183 if (dump_enabled_p ())
6184 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6185 return false;
6188 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6189 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6190 internal_fn cond_fn = get_conditional_internal_fn (code);
6192 if (!vec_stmt) /* transformation not required. */
6194 /* If this operation is part of a reduction, a fully-masked loop
6195 should only change the active lanes of the reduction chain,
6196 keeping the inactive lanes as-is. */
6197 if (loop_vinfo
6198 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6199 && reduc_idx >= 0)
6201 if (cond_fn == IFN_LAST
6202 || !direct_internal_fn_supported_p (cond_fn, vectype,
6203 OPTIMIZE_FOR_SPEED))
6205 if (dump_enabled_p ())
6206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6207 "can't use a fully-masked loop because no"
6208 " conditional operation is available.\n");
6209 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6211 else
6212 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6213 vectype, NULL);
6216 /* Put types on constant and invariant SLP children. */
6217 if (slp_node
6218 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6219 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6220 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6222 if (dump_enabled_p ())
6223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6224 "incompatible vector types for invariants\n");
6225 return false;
6228 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6229 DUMP_VECT_SCOPE ("vectorizable_operation");
6230 vect_model_simple_cost (vinfo, stmt_info,
6231 ncopies, dt, ndts, slp_node, cost_vec);
6232 if (using_emulated_vectors_p)
6234 /* The above vect_model_simple_cost call handles constants
6235 in the prologue and (mis-)costs one of the stmts as
6236 vector stmt. See tree-vect-generic.c:do_plus_minus/do_negate
6237 for the actual lowering that will be applied. */
6238 unsigned n
6239 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6240 switch (code)
6242 case PLUS_EXPR:
6243 n *= 5;
6244 break;
6245 case MINUS_EXPR:
6246 n *= 6;
6247 break;
6248 case NEGATE_EXPR:
6249 n *= 4;
6250 break;
6251 default:;
6253 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6255 return true;
6258 /* Transform. */
6260 if (dump_enabled_p ())
6261 dump_printf_loc (MSG_NOTE, vect_location,
6262 "transform binary/unary operation.\n");
6264 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6266 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6267 vectors with unsigned elements, but the result is signed. So, we
6268 need to compute the MINUS_EXPR into vectype temporary and
6269 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6270 tree vec_cvt_dest = NULL_TREE;
6271 if (orig_code == POINTER_DIFF_EXPR)
6273 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6274 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6276 /* Handle def. */
6277 else
6278 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6280 /* In case the vectorization factor (VF) is bigger than the number
6281 of elements that we can fit in a vectype (nunits), we have to generate
6282 more than one vector stmt - i.e - we need to "unroll" the
6283 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6284 from one copy of the vector stmt to the next, in the field
6285 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6286 stages to find the correct vector defs to be used when vectorizing
6287 stmts that use the defs of the current stmt. The example below
6288 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6289 we need to create 4 vectorized stmts):
6291 before vectorization:
6292 RELATED_STMT VEC_STMT
6293 S1: x = memref - -
6294 S2: z = x + 1 - -
6296 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6297 there):
6298 RELATED_STMT VEC_STMT
6299 VS1_0: vx0 = memref0 VS1_1 -
6300 VS1_1: vx1 = memref1 VS1_2 -
6301 VS1_2: vx2 = memref2 VS1_3 -
6302 VS1_3: vx3 = memref3 - -
6303 S1: x = load - VS1_0
6304 S2: z = x + 1 - -
6306 step2: vectorize stmt S2 (done here):
6307 To vectorize stmt S2 we first need to find the relevant vector
6308 def for the first operand 'x'. This is, as usual, obtained from
6309 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6310 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6311 relevant vector def 'vx0'. Having found 'vx0' we can generate
6312 the vector stmt VS2_0, and as usual, record it in the
6313 STMT_VINFO_VEC_STMT of stmt S2.
6314 When creating the second copy (VS2_1), we obtain the relevant vector
6315 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6316 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6317 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6318 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6319 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6320 chain of stmts and pointers:
6321 RELATED_STMT VEC_STMT
6322 VS1_0: vx0 = memref0 VS1_1 -
6323 VS1_1: vx1 = memref1 VS1_2 -
6324 VS1_2: vx2 = memref2 VS1_3 -
6325 VS1_3: vx3 = memref3 - -
6326 S1: x = load - VS1_0
6327 VS2_0: vz0 = vx0 + v1 VS2_1 -
6328 VS2_1: vz1 = vx1 + v1 VS2_2 -
6329 VS2_2: vz2 = vx2 + v1 VS2_3 -
6330 VS2_3: vz3 = vx3 + v1 - -
6331 S2: z = x + 1 - VS2_0 */
6333 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6334 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6335 /* Arguments are ready. Create the new vector stmt. */
6336 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6338 gimple *new_stmt = NULL;
6339 vop1 = ((op_type == binary_op || op_type == ternary_op)
6340 ? vec_oprnds1[i] : NULL_TREE);
6341 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6342 if (masked_loop_p && reduc_idx >= 0)
6344 /* Perform the operation on active elements only and take
6345 inactive elements from the reduction chain input. */
6346 gcc_assert (!vop2);
6347 vop2 = reduc_idx == 1 ? vop1 : vop0;
6348 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6349 vectype, i);
6350 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6351 vop0, vop1, vop2);
6352 new_temp = make_ssa_name (vec_dest, call);
6353 gimple_call_set_lhs (call, new_temp);
6354 gimple_call_set_nothrow (call, true);
6355 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6356 new_stmt = call;
6358 else
6360 tree mask = NULL_TREE;
6361 /* When combining two masks check if either of them is elsewhere
6362 combined with a loop mask, if that's the case we can mark that the
6363 new combined mask doesn't need to be combined with a loop mask. */
6364 if (masked_loop_p
6365 && code == BIT_AND_EXPR
6366 && VECTOR_BOOLEAN_TYPE_P (vectype))
6368 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
6369 ncopies}))
6371 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6372 vectype, i);
6374 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6375 vop0, gsi);
6378 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
6379 ncopies }))
6381 mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6382 vectype, i);
6384 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
6385 vop1, gsi);
6389 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6390 new_temp = make_ssa_name (vec_dest, new_stmt);
6391 gimple_assign_set_lhs (new_stmt, new_temp);
6392 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6394 /* Enter the combined value into the vector cond hash so we don't
6395 AND it with a loop mask again. */
6396 if (mask)
6397 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
6399 if (vec_cvt_dest)
6401 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6402 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6403 new_temp);
6404 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6405 gimple_assign_set_lhs (new_stmt, new_temp);
6406 vect_finish_stmt_generation (vinfo, stmt_info,
6407 new_stmt, gsi);
6410 if (slp_node)
6411 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6412 else
6413 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6416 if (!slp_node)
6417 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6419 vec_oprnds0.release ();
6420 vec_oprnds1.release ();
6421 vec_oprnds2.release ();
6423 return true;
6426 /* A helper function to ensure data reference DR_INFO's base alignment. */
6428 static void
6429 ensure_base_align (dr_vec_info *dr_info)
6431 /* Alignment is only analyzed for the first element of a DR group,
6432 use that to look at base alignment we need to enforce. */
6433 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6434 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6436 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6438 if (dr_info->base_misaligned)
6440 tree base_decl = dr_info->base_decl;
6442 // We should only be able to increase the alignment of a base object if
6443 // we know what its new alignment should be at compile time.
6444 unsigned HOST_WIDE_INT align_base_to =
6445 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6447 if (decl_in_symtab_p (base_decl))
6448 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6449 else if (DECL_ALIGN (base_decl) < align_base_to)
6451 SET_DECL_ALIGN (base_decl, align_base_to);
6452 DECL_USER_ALIGN (base_decl) = 1;
6454 dr_info->base_misaligned = false;
6459 /* Function get_group_alias_ptr_type.
6461 Return the alias type for the group starting at FIRST_STMT_INFO. */
6463 static tree
6464 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6466 struct data_reference *first_dr, *next_dr;
6468 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6469 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6470 while (next_stmt_info)
6472 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6473 if (get_alias_set (DR_REF (first_dr))
6474 != get_alias_set (DR_REF (next_dr)))
6476 if (dump_enabled_p ())
6477 dump_printf_loc (MSG_NOTE, vect_location,
6478 "conflicting alias set types.\n");
6479 return ptr_type_node;
6481 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6483 return reference_alias_ptr_type (DR_REF (first_dr));
6487 /* Function scan_operand_equal_p.
6489 Helper function for check_scan_store. Compare two references
6490 with .GOMP_SIMD_LANE bases. */
6492 static bool
6493 scan_operand_equal_p (tree ref1, tree ref2)
6495 tree ref[2] = { ref1, ref2 };
6496 poly_int64 bitsize[2], bitpos[2];
6497 tree offset[2], base[2];
6498 for (int i = 0; i < 2; ++i)
6500 machine_mode mode;
6501 int unsignedp, reversep, volatilep = 0;
6502 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6503 &offset[i], &mode, &unsignedp,
6504 &reversep, &volatilep);
6505 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6506 return false;
6507 if (TREE_CODE (base[i]) == MEM_REF
6508 && offset[i] == NULL_TREE
6509 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6511 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6512 if (is_gimple_assign (def_stmt)
6513 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6514 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6515 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6517 if (maybe_ne (mem_ref_offset (base[i]), 0))
6518 return false;
6519 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6520 offset[i] = gimple_assign_rhs2 (def_stmt);
6525 if (!operand_equal_p (base[0], base[1], 0))
6526 return false;
6527 if (maybe_ne (bitsize[0], bitsize[1]))
6528 return false;
6529 if (offset[0] != offset[1])
6531 if (!offset[0] || !offset[1])
6532 return false;
6533 if (!operand_equal_p (offset[0], offset[1], 0))
6535 tree step[2];
6536 for (int i = 0; i < 2; ++i)
6538 step[i] = integer_one_node;
6539 if (TREE_CODE (offset[i]) == SSA_NAME)
6541 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6542 if (is_gimple_assign (def_stmt)
6543 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6544 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6545 == INTEGER_CST))
6547 step[i] = gimple_assign_rhs2 (def_stmt);
6548 offset[i] = gimple_assign_rhs1 (def_stmt);
6551 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6553 step[i] = TREE_OPERAND (offset[i], 1);
6554 offset[i] = TREE_OPERAND (offset[i], 0);
6556 tree rhs1 = NULL_TREE;
6557 if (TREE_CODE (offset[i]) == SSA_NAME)
6559 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6560 if (gimple_assign_cast_p (def_stmt))
6561 rhs1 = gimple_assign_rhs1 (def_stmt);
6563 else if (CONVERT_EXPR_P (offset[i]))
6564 rhs1 = TREE_OPERAND (offset[i], 0);
6565 if (rhs1
6566 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6567 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6568 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6569 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6570 offset[i] = rhs1;
6572 if (!operand_equal_p (offset[0], offset[1], 0)
6573 || !operand_equal_p (step[0], step[1], 0))
6574 return false;
6577 return true;
6581 enum scan_store_kind {
6582 /* Normal permutation. */
6583 scan_store_kind_perm,
6585 /* Whole vector left shift permutation with zero init. */
6586 scan_store_kind_lshift_zero,
6588 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6589 scan_store_kind_lshift_cond
6592 /* Function check_scan_store.
6594 Verify if we can perform the needed permutations or whole vector shifts.
6595 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6596 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6597 to do at each step. */
6599 static int
6600 scan_store_can_perm_p (tree vectype, tree init,
6601 vec<enum scan_store_kind> *use_whole_vector = NULL)
6603 enum machine_mode vec_mode = TYPE_MODE (vectype);
6604 unsigned HOST_WIDE_INT nunits;
6605 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6606 return -1;
6607 int units_log2 = exact_log2 (nunits);
6608 if (units_log2 <= 0)
6609 return -1;
6611 int i;
6612 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6613 for (i = 0; i <= units_log2; ++i)
6615 unsigned HOST_WIDE_INT j, k;
6616 enum scan_store_kind kind = scan_store_kind_perm;
6617 vec_perm_builder sel (nunits, nunits, 1);
6618 sel.quick_grow (nunits);
6619 if (i == units_log2)
6621 for (j = 0; j < nunits; ++j)
6622 sel[j] = nunits - 1;
6624 else
6626 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6627 sel[j] = j;
6628 for (k = 0; j < nunits; ++j, ++k)
6629 sel[j] = nunits + k;
6631 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6632 if (!can_vec_perm_const_p (vec_mode, indices))
6634 if (i == units_log2)
6635 return -1;
6637 if (whole_vector_shift_kind == scan_store_kind_perm)
6639 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6640 return -1;
6641 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6642 /* Whole vector shifts shift in zeros, so if init is all zero
6643 constant, there is no need to do anything further. */
6644 if ((TREE_CODE (init) != INTEGER_CST
6645 && TREE_CODE (init) != REAL_CST)
6646 || !initializer_zerop (init))
6648 tree masktype = truth_type_for (vectype);
6649 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6650 return -1;
6651 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6654 kind = whole_vector_shift_kind;
6656 if (use_whole_vector)
6658 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6659 use_whole_vector->safe_grow_cleared (i, true);
6660 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6661 use_whole_vector->safe_push (kind);
6665 return units_log2;
6669 /* Function check_scan_store.
6671 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6673 static bool
6674 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6675 enum vect_def_type rhs_dt, bool slp, tree mask,
6676 vect_memory_access_type memory_access_type)
6678 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6679 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6680 tree ref_type;
6682 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6683 if (slp
6684 || mask
6685 || memory_access_type != VMAT_CONTIGUOUS
6686 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6687 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6688 || loop_vinfo == NULL
6689 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6690 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6691 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6692 || !integer_zerop (DR_INIT (dr_info->dr))
6693 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6694 || !alias_sets_conflict_p (get_alias_set (vectype),
6695 get_alias_set (TREE_TYPE (ref_type))))
6697 if (dump_enabled_p ())
6698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6699 "unsupported OpenMP scan store.\n");
6700 return false;
6703 /* We need to pattern match code built by OpenMP lowering and simplified
6704 by following optimizations into something we can handle.
6705 #pragma omp simd reduction(inscan,+:r)
6706 for (...)
6708 r += something ();
6709 #pragma omp scan inclusive (r)
6710 use (r);
6712 shall have body with:
6713 // Initialization for input phase, store the reduction initializer:
6714 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6715 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6716 D.2042[_21] = 0;
6717 // Actual input phase:
6719 r.0_5 = D.2042[_20];
6720 _6 = _4 + r.0_5;
6721 D.2042[_20] = _6;
6722 // Initialization for scan phase:
6723 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6724 _26 = D.2043[_25];
6725 _27 = D.2042[_25];
6726 _28 = _26 + _27;
6727 D.2043[_25] = _28;
6728 D.2042[_25] = _28;
6729 // Actual scan phase:
6731 r.1_8 = D.2042[_20];
6733 The "omp simd array" variable D.2042 holds the privatized copy used
6734 inside of the loop and D.2043 is another one that holds copies of
6735 the current original list item. The separate GOMP_SIMD_LANE ifn
6736 kinds are there in order to allow optimizing the initializer store
6737 and combiner sequence, e.g. if it is originally some C++ish user
6738 defined reduction, but allow the vectorizer to pattern recognize it
6739 and turn into the appropriate vectorized scan.
6741 For exclusive scan, this is slightly different:
6742 #pragma omp simd reduction(inscan,+:r)
6743 for (...)
6745 use (r);
6746 #pragma omp scan exclusive (r)
6747 r += something ();
6749 shall have body with:
6750 // Initialization for input phase, store the reduction initializer:
6751 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6752 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6753 D.2042[_21] = 0;
6754 // Actual input phase:
6756 r.0_5 = D.2042[_20];
6757 _6 = _4 + r.0_5;
6758 D.2042[_20] = _6;
6759 // Initialization for scan phase:
6760 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6761 _26 = D.2043[_25];
6762 D.2044[_25] = _26;
6763 _27 = D.2042[_25];
6764 _28 = _26 + _27;
6765 D.2043[_25] = _28;
6766 // Actual scan phase:
6768 r.1_8 = D.2044[_20];
6769 ... */
6771 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6773 /* Match the D.2042[_21] = 0; store above. Just require that
6774 it is a constant or external definition store. */
6775 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6777 fail_init:
6778 if (dump_enabled_p ())
6779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6780 "unsupported OpenMP scan initializer store.\n");
6781 return false;
6784 if (! loop_vinfo->scan_map)
6785 loop_vinfo->scan_map = new hash_map<tree, tree>;
6786 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6787 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6788 if (cached)
6789 goto fail_init;
6790 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6792 /* These stores can be vectorized normally. */
6793 return true;
6796 if (rhs_dt != vect_internal_def)
6798 fail:
6799 if (dump_enabled_p ())
6800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6801 "unsupported OpenMP scan combiner pattern.\n");
6802 return false;
6805 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6806 tree rhs = gimple_assign_rhs1 (stmt);
6807 if (TREE_CODE (rhs) != SSA_NAME)
6808 goto fail;
6810 gimple *other_store_stmt = NULL;
6811 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6812 bool inscan_var_store
6813 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6815 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6817 if (!inscan_var_store)
6819 use_operand_p use_p;
6820 imm_use_iterator iter;
6821 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6823 gimple *use_stmt = USE_STMT (use_p);
6824 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6825 continue;
6826 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6827 || !is_gimple_assign (use_stmt)
6828 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6829 || other_store_stmt
6830 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6831 goto fail;
6832 other_store_stmt = use_stmt;
6834 if (other_store_stmt == NULL)
6835 goto fail;
6836 rhs = gimple_assign_lhs (other_store_stmt);
6837 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6838 goto fail;
6841 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6843 use_operand_p use_p;
6844 imm_use_iterator iter;
6845 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6847 gimple *use_stmt = USE_STMT (use_p);
6848 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6849 continue;
6850 if (other_store_stmt)
6851 goto fail;
6852 other_store_stmt = use_stmt;
6855 else
6856 goto fail;
6858 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6859 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6860 || !is_gimple_assign (def_stmt)
6861 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6862 goto fail;
6864 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6865 /* For pointer addition, we should use the normal plus for the vector
6866 operation. */
6867 switch (code)
6869 case POINTER_PLUS_EXPR:
6870 code = PLUS_EXPR;
6871 break;
6872 case MULT_HIGHPART_EXPR:
6873 goto fail;
6874 default:
6875 break;
6877 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6878 goto fail;
6880 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6881 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6882 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6883 goto fail;
6885 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6886 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6887 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6888 || !gimple_assign_load_p (load1_stmt)
6889 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6890 || !gimple_assign_load_p (load2_stmt))
6891 goto fail;
6893 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6894 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6895 if (load1_stmt_info == NULL
6896 || load2_stmt_info == NULL
6897 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6898 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6899 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6900 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6901 goto fail;
6903 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6905 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6906 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6907 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6908 goto fail;
6909 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6910 tree lrhs;
6911 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6912 lrhs = rhs1;
6913 else
6914 lrhs = rhs2;
6915 use_operand_p use_p;
6916 imm_use_iterator iter;
6917 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6919 gimple *use_stmt = USE_STMT (use_p);
6920 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6921 continue;
6922 if (other_store_stmt)
6923 goto fail;
6924 other_store_stmt = use_stmt;
6928 if (other_store_stmt == NULL)
6929 goto fail;
6930 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6931 || !gimple_store_p (other_store_stmt))
6932 goto fail;
6934 stmt_vec_info other_store_stmt_info
6935 = loop_vinfo->lookup_stmt (other_store_stmt);
6936 if (other_store_stmt_info == NULL
6937 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6938 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6939 goto fail;
6941 gimple *stmt1 = stmt;
6942 gimple *stmt2 = other_store_stmt;
6943 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6944 std::swap (stmt1, stmt2);
6945 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6946 gimple_assign_rhs1 (load2_stmt)))
6948 std::swap (rhs1, rhs2);
6949 std::swap (load1_stmt, load2_stmt);
6950 std::swap (load1_stmt_info, load2_stmt_info);
6952 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6953 gimple_assign_rhs1 (load1_stmt)))
6954 goto fail;
6956 tree var3 = NULL_TREE;
6957 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6958 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6959 gimple_assign_rhs1 (load2_stmt)))
6960 goto fail;
6961 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6963 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6964 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6965 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6966 goto fail;
6967 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6968 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6969 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6970 || lookup_attribute ("omp simd inscan exclusive",
6971 DECL_ATTRIBUTES (var3)))
6972 goto fail;
6975 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6976 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6977 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6978 goto fail;
6980 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6981 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6982 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6983 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6984 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6985 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6986 goto fail;
6988 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6989 std::swap (var1, var2);
6991 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6993 if (!lookup_attribute ("omp simd inscan exclusive",
6994 DECL_ATTRIBUTES (var1)))
6995 goto fail;
6996 var1 = var3;
6999 if (loop_vinfo->scan_map == NULL)
7000 goto fail;
7001 tree *init = loop_vinfo->scan_map->get (var1);
7002 if (init == NULL)
7003 goto fail;
7005 /* The IL is as expected, now check if we can actually vectorize it.
7006 Inclusive scan:
7007 _26 = D.2043[_25];
7008 _27 = D.2042[_25];
7009 _28 = _26 + _27;
7010 D.2043[_25] = _28;
7011 D.2042[_25] = _28;
7012 should be vectorized as (where _40 is the vectorized rhs
7013 from the D.2042[_21] = 0; store):
7014 _30 = MEM <vector(8) int> [(int *)&D.2043];
7015 _31 = MEM <vector(8) int> [(int *)&D.2042];
7016 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7017 _33 = _31 + _32;
7018 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7019 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7020 _35 = _33 + _34;
7021 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7022 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7023 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7024 _37 = _35 + _36;
7025 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7026 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7027 _38 = _30 + _37;
7028 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7029 MEM <vector(8) int> [(int *)&D.2043] = _39;
7030 MEM <vector(8) int> [(int *)&D.2042] = _38;
7031 Exclusive scan:
7032 _26 = D.2043[_25];
7033 D.2044[_25] = _26;
7034 _27 = D.2042[_25];
7035 _28 = _26 + _27;
7036 D.2043[_25] = _28;
7037 should be vectorized as (where _40 is the vectorized rhs
7038 from the D.2042[_21] = 0; store):
7039 _30 = MEM <vector(8) int> [(int *)&D.2043];
7040 _31 = MEM <vector(8) int> [(int *)&D.2042];
7041 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7042 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7043 _34 = _32 + _33;
7044 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7045 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7046 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7047 _36 = _34 + _35;
7048 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7049 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7050 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7051 _38 = _36 + _37;
7052 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7053 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7054 _39 = _30 + _38;
7055 _50 = _31 + _39;
7056 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7057 MEM <vector(8) int> [(int *)&D.2044] = _39;
7058 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7059 enum machine_mode vec_mode = TYPE_MODE (vectype);
7060 optab optab = optab_for_tree_code (code, vectype, optab_default);
7061 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7062 goto fail;
7064 int units_log2 = scan_store_can_perm_p (vectype, *init);
7065 if (units_log2 == -1)
7066 goto fail;
7068 return true;
7072 /* Function vectorizable_scan_store.
7074 Helper of vectorizable_score, arguments like on vectorizable_store.
7075 Handle only the transformation, checking is done in check_scan_store. */
7077 static bool
7078 vectorizable_scan_store (vec_info *vinfo,
7079 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7080 gimple **vec_stmt, int ncopies)
7082 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7083 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7084 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7085 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7087 if (dump_enabled_p ())
7088 dump_printf_loc (MSG_NOTE, vect_location,
7089 "transform scan store. ncopies = %d\n", ncopies);
7091 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7092 tree rhs = gimple_assign_rhs1 (stmt);
7093 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7095 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7096 bool inscan_var_store
7097 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7099 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7101 use_operand_p use_p;
7102 imm_use_iterator iter;
7103 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7105 gimple *use_stmt = USE_STMT (use_p);
7106 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7107 continue;
7108 rhs = gimple_assign_lhs (use_stmt);
7109 break;
7113 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7114 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7115 if (code == POINTER_PLUS_EXPR)
7116 code = PLUS_EXPR;
7117 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7118 && commutative_tree_code (code));
7119 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7120 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7121 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7122 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7123 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7124 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7125 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7126 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7127 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7128 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7129 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7131 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7133 std::swap (rhs1, rhs2);
7134 std::swap (var1, var2);
7135 std::swap (load1_dr_info, load2_dr_info);
7138 tree *init = loop_vinfo->scan_map->get (var1);
7139 gcc_assert (init);
7141 unsigned HOST_WIDE_INT nunits;
7142 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7143 gcc_unreachable ();
7144 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7145 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7146 gcc_assert (units_log2 > 0);
7147 auto_vec<tree, 16> perms;
7148 perms.quick_grow (units_log2 + 1);
7149 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7150 for (int i = 0; i <= units_log2; ++i)
7152 unsigned HOST_WIDE_INT j, k;
7153 vec_perm_builder sel (nunits, nunits, 1);
7154 sel.quick_grow (nunits);
7155 if (i == units_log2)
7156 for (j = 0; j < nunits; ++j)
7157 sel[j] = nunits - 1;
7158 else
7160 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7161 sel[j] = j;
7162 for (k = 0; j < nunits; ++j, ++k)
7163 sel[j] = nunits + k;
7165 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7166 if (!use_whole_vector.is_empty ()
7167 && use_whole_vector[i] != scan_store_kind_perm)
7169 if (zero_vec == NULL_TREE)
7170 zero_vec = build_zero_cst (vectype);
7171 if (masktype == NULL_TREE
7172 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7173 masktype = truth_type_for (vectype);
7174 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7176 else
7177 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7180 tree vec_oprnd1 = NULL_TREE;
7181 tree vec_oprnd2 = NULL_TREE;
7182 tree vec_oprnd3 = NULL_TREE;
7183 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7184 tree dataref_offset = build_int_cst (ref_type, 0);
7185 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7186 vectype, VMAT_CONTIGUOUS);
7187 tree ldataref_ptr = NULL_TREE;
7188 tree orig = NULL_TREE;
7189 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7190 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7191 auto_vec<tree> vec_oprnds1;
7192 auto_vec<tree> vec_oprnds2;
7193 auto_vec<tree> vec_oprnds3;
7194 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7195 *init, &vec_oprnds1,
7196 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7197 rhs2, &vec_oprnds3);
7198 for (int j = 0; j < ncopies; j++)
7200 vec_oprnd1 = vec_oprnds1[j];
7201 if (ldataref_ptr == NULL)
7202 vec_oprnd2 = vec_oprnds2[j];
7203 vec_oprnd3 = vec_oprnds3[j];
7204 if (j == 0)
7205 orig = vec_oprnd3;
7206 else if (!inscan_var_store)
7207 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7209 if (ldataref_ptr)
7211 vec_oprnd2 = make_ssa_name (vectype);
7212 tree data_ref = fold_build2 (MEM_REF, vectype,
7213 unshare_expr (ldataref_ptr),
7214 dataref_offset);
7215 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7216 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7217 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7218 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7219 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7222 tree v = vec_oprnd2;
7223 for (int i = 0; i < units_log2; ++i)
7225 tree new_temp = make_ssa_name (vectype);
7226 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7227 (zero_vec
7228 && (use_whole_vector[i]
7229 != scan_store_kind_perm))
7230 ? zero_vec : vec_oprnd1, v,
7231 perms[i]);
7232 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7233 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7234 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7236 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7238 /* Whole vector shift shifted in zero bits, but if *init
7239 is not initializer_zerop, we need to replace those elements
7240 with elements from vec_oprnd1. */
7241 tree_vector_builder vb (masktype, nunits, 1);
7242 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7243 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7244 ? boolean_false_node : boolean_true_node);
7246 tree new_temp2 = make_ssa_name (vectype);
7247 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7248 new_temp, vec_oprnd1);
7249 vect_finish_stmt_generation (vinfo, stmt_info,
7250 g, gsi);
7251 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7252 new_temp = new_temp2;
7255 /* For exclusive scan, perform the perms[i] permutation once
7256 more. */
7257 if (i == 0
7258 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7259 && v == vec_oprnd2)
7261 v = new_temp;
7262 --i;
7263 continue;
7266 tree new_temp2 = make_ssa_name (vectype);
7267 g = gimple_build_assign (new_temp2, code, v, new_temp);
7268 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7269 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7271 v = new_temp2;
7274 tree new_temp = make_ssa_name (vectype);
7275 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7276 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7277 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7279 tree last_perm_arg = new_temp;
7280 /* For exclusive scan, new_temp computed above is the exclusive scan
7281 prefix sum. Turn it into inclusive prefix sum for the broadcast
7282 of the last element into orig. */
7283 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7285 last_perm_arg = make_ssa_name (vectype);
7286 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7287 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7288 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7291 orig = make_ssa_name (vectype);
7292 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7293 last_perm_arg, perms[units_log2]);
7294 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7295 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7297 if (!inscan_var_store)
7299 tree data_ref = fold_build2 (MEM_REF, vectype,
7300 unshare_expr (dataref_ptr),
7301 dataref_offset);
7302 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7303 g = gimple_build_assign (data_ref, new_temp);
7304 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7305 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7309 if (inscan_var_store)
7310 for (int j = 0; j < ncopies; j++)
7312 if (j != 0)
7313 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7315 tree data_ref = fold_build2 (MEM_REF, vectype,
7316 unshare_expr (dataref_ptr),
7317 dataref_offset);
7318 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7319 gimple *g = gimple_build_assign (data_ref, orig);
7320 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7321 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7323 return true;
7327 /* Function vectorizable_store.
7329 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7330 that can be vectorized.
7331 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7332 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7333 Return true if STMT_INFO is vectorizable in this way. */
7335 static bool
7336 vectorizable_store (vec_info *vinfo,
7337 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7338 gimple **vec_stmt, slp_tree slp_node,
7339 stmt_vector_for_cost *cost_vec)
7341 tree data_ref;
7342 tree op;
7343 tree vec_oprnd = NULL_TREE;
7344 tree elem_type;
7345 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7346 class loop *loop = NULL;
7347 machine_mode vec_mode;
7348 tree dummy;
7349 enum vect_def_type rhs_dt = vect_unknown_def_type;
7350 enum vect_def_type mask_dt = vect_unknown_def_type;
7351 tree dataref_ptr = NULL_TREE;
7352 tree dataref_offset = NULL_TREE;
7353 gimple *ptr_incr = NULL;
7354 int ncopies;
7355 int j;
7356 stmt_vec_info first_stmt_info;
7357 bool grouped_store;
7358 unsigned int group_size, i;
7359 vec<tree> oprnds = vNULL;
7360 vec<tree> result_chain = vNULL;
7361 vec<tree> vec_oprnds = vNULL;
7362 bool slp = (slp_node != NULL);
7363 unsigned int vec_num;
7364 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7365 tree aggr_type;
7366 gather_scatter_info gs_info;
7367 poly_uint64 vf;
7368 vec_load_store_type vls_type;
7369 tree ref_type;
7371 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7372 return false;
7374 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7375 && ! vec_stmt)
7376 return false;
7378 /* Is vectorizable store? */
7380 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7381 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7383 tree scalar_dest = gimple_assign_lhs (assign);
7384 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7385 && is_pattern_stmt_p (stmt_info))
7386 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7387 if (TREE_CODE (scalar_dest) != ARRAY_REF
7388 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7389 && TREE_CODE (scalar_dest) != INDIRECT_REF
7390 && TREE_CODE (scalar_dest) != COMPONENT_REF
7391 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7392 && TREE_CODE (scalar_dest) != REALPART_EXPR
7393 && TREE_CODE (scalar_dest) != MEM_REF)
7394 return false;
7396 else
7398 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7399 if (!call || !gimple_call_internal_p (call))
7400 return false;
7402 internal_fn ifn = gimple_call_internal_fn (call);
7403 if (!internal_store_fn_p (ifn))
7404 return false;
7406 if (slp_node != NULL)
7408 if (dump_enabled_p ())
7409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7410 "SLP of masked stores not supported.\n");
7411 return false;
7414 int mask_index = internal_fn_mask_index (ifn);
7415 if (mask_index >= 0
7416 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7417 &mask, NULL, &mask_dt, &mask_vectype))
7418 return false;
7421 op = vect_get_store_rhs (stmt_info);
7423 /* Cannot have hybrid store SLP -- that would mean storing to the
7424 same location twice. */
7425 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7427 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7428 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7430 if (loop_vinfo)
7432 loop = LOOP_VINFO_LOOP (loop_vinfo);
7433 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7435 else
7436 vf = 1;
7438 /* Multiple types in SLP are handled by creating the appropriate number of
7439 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7440 case of SLP. */
7441 if (slp)
7442 ncopies = 1;
7443 else
7444 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7446 gcc_assert (ncopies >= 1);
7448 /* FORNOW. This restriction should be relaxed. */
7449 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7451 if (dump_enabled_p ())
7452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7453 "multiple types in nested loop.\n");
7454 return false;
7457 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7458 op, &rhs_dt, &rhs_vectype, &vls_type))
7459 return false;
7461 elem_type = TREE_TYPE (vectype);
7462 vec_mode = TYPE_MODE (vectype);
7464 if (!STMT_VINFO_DATA_REF (stmt_info))
7465 return false;
7467 vect_memory_access_type memory_access_type;
7468 enum dr_alignment_support alignment_support_scheme;
7469 int misalignment;
7470 poly_int64 poffset;
7471 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7472 ncopies, &memory_access_type, &poffset,
7473 &alignment_support_scheme, &misalignment, &gs_info))
7474 return false;
7476 if (mask)
7478 if (memory_access_type == VMAT_CONTIGUOUS)
7480 if (!VECTOR_MODE_P (vec_mode)
7481 || !can_vec_mask_load_store_p (vec_mode,
7482 TYPE_MODE (mask_vectype), false))
7483 return false;
7485 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7486 && (memory_access_type != VMAT_GATHER_SCATTER
7487 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7489 if (dump_enabled_p ())
7490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7491 "unsupported access type for masked store.\n");
7492 return false;
7495 else
7497 /* FORNOW. In some cases can vectorize even if data-type not supported
7498 (e.g. - array initialization with 0). */
7499 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7500 return false;
7503 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7504 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7505 && memory_access_type != VMAT_GATHER_SCATTER
7506 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7507 if (grouped_store)
7509 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7510 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7511 group_size = DR_GROUP_SIZE (first_stmt_info);
7513 else
7515 first_stmt_info = stmt_info;
7516 first_dr_info = dr_info;
7517 group_size = vec_num = 1;
7520 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7522 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7523 memory_access_type))
7524 return false;
7527 if (!vec_stmt) /* transformation not required. */
7529 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7531 if (loop_vinfo
7532 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7533 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7534 group_size, memory_access_type,
7535 ncopies, &gs_info, mask);
7537 if (slp_node
7538 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7539 vectype))
7541 if (dump_enabled_p ())
7542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7543 "incompatible vector types for invariants\n");
7544 return false;
7547 if (dump_enabled_p ()
7548 && memory_access_type != VMAT_ELEMENTWISE
7549 && memory_access_type != VMAT_GATHER_SCATTER
7550 && alignment_support_scheme != dr_aligned)
7551 dump_printf_loc (MSG_NOTE, vect_location,
7552 "Vectorizing an unaligned access.\n");
7554 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7555 vect_model_store_cost (vinfo, stmt_info, ncopies,
7556 memory_access_type, alignment_support_scheme,
7557 misalignment, vls_type, slp_node, cost_vec);
7558 return true;
7560 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7562 /* Transform. */
7564 ensure_base_align (dr_info);
7566 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7568 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7569 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7570 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7571 tree ptr, var, scale, vec_mask;
7572 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7573 tree mask_halfvectype = mask_vectype;
7574 edge pe = loop_preheader_edge (loop);
7575 gimple_seq seq;
7576 basic_block new_bb;
7577 enum { NARROW, NONE, WIDEN } modifier;
7578 poly_uint64 scatter_off_nunits
7579 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7581 if (known_eq (nunits, scatter_off_nunits))
7582 modifier = NONE;
7583 else if (known_eq (nunits * 2, scatter_off_nunits))
7585 modifier = WIDEN;
7587 /* Currently gathers and scatters are only supported for
7588 fixed-length vectors. */
7589 unsigned int count = scatter_off_nunits.to_constant ();
7590 vec_perm_builder sel (count, count, 1);
7591 for (i = 0; i < (unsigned int) count; ++i)
7592 sel.quick_push (i | (count / 2));
7594 vec_perm_indices indices (sel, 1, count);
7595 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7596 indices);
7597 gcc_assert (perm_mask != NULL_TREE);
7599 else if (known_eq (nunits, scatter_off_nunits * 2))
7601 modifier = NARROW;
7603 /* Currently gathers and scatters are only supported for
7604 fixed-length vectors. */
7605 unsigned int count = nunits.to_constant ();
7606 vec_perm_builder sel (count, count, 1);
7607 for (i = 0; i < (unsigned int) count; ++i)
7608 sel.quick_push (i | (count / 2));
7610 vec_perm_indices indices (sel, 2, count);
7611 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7612 gcc_assert (perm_mask != NULL_TREE);
7613 ncopies *= 2;
7615 if (mask)
7616 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7618 else
7619 gcc_unreachable ();
7621 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7622 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7623 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7624 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7625 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7626 scaletype = TREE_VALUE (arglist);
7628 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7629 && TREE_CODE (rettype) == VOID_TYPE);
7631 ptr = fold_convert (ptrtype, gs_info.base);
7632 if (!is_gimple_min_invariant (ptr))
7634 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7635 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7636 gcc_assert (!new_bb);
7639 if (mask == NULL_TREE)
7641 mask_arg = build_int_cst (masktype, -1);
7642 mask_arg = vect_init_vector (vinfo, stmt_info,
7643 mask_arg, masktype, NULL);
7646 scale = build_int_cst (scaletype, gs_info.scale);
7648 auto_vec<tree> vec_oprnds0;
7649 auto_vec<tree> vec_oprnds1;
7650 auto_vec<tree> vec_masks;
7651 if (mask)
7653 tree mask_vectype = truth_type_for (vectype);
7654 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7655 modifier == NARROW
7656 ? ncopies / 2 : ncopies,
7657 mask, &vec_masks, mask_vectype);
7659 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7660 modifier == WIDEN
7661 ? ncopies / 2 : ncopies,
7662 gs_info.offset, &vec_oprnds0);
7663 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7664 modifier == NARROW
7665 ? ncopies / 2 : ncopies,
7666 op, &vec_oprnds1);
7667 for (j = 0; j < ncopies; ++j)
7669 if (modifier == WIDEN)
7671 if (j & 1)
7672 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7673 perm_mask, stmt_info, gsi);
7674 else
7675 op = vec_oprnd0 = vec_oprnds0[j / 2];
7676 src = vec_oprnd1 = vec_oprnds1[j];
7677 if (mask)
7678 mask_op = vec_mask = vec_masks[j];
7680 else if (modifier == NARROW)
7682 if (j & 1)
7683 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7684 perm_mask, stmt_info, gsi);
7685 else
7686 src = vec_oprnd1 = vec_oprnds1[j / 2];
7687 op = vec_oprnd0 = vec_oprnds0[j];
7688 if (mask)
7689 mask_op = vec_mask = vec_masks[j / 2];
7691 else
7693 op = vec_oprnd0 = vec_oprnds0[j];
7694 src = vec_oprnd1 = vec_oprnds1[j];
7695 if (mask)
7696 mask_op = vec_mask = vec_masks[j];
7699 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7701 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7702 TYPE_VECTOR_SUBPARTS (srctype)));
7703 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7704 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7705 gassign *new_stmt
7706 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7707 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7708 src = var;
7711 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7713 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7714 TYPE_VECTOR_SUBPARTS (idxtype)));
7715 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7716 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7717 gassign *new_stmt
7718 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7719 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7720 op = var;
7723 if (mask)
7725 tree utype;
7726 mask_arg = mask_op;
7727 if (modifier == NARROW)
7729 var = vect_get_new_ssa_name (mask_halfvectype,
7730 vect_simple_var);
7731 gassign *new_stmt
7732 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7733 : VEC_UNPACK_LO_EXPR,
7734 mask_op);
7735 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7736 mask_arg = var;
7738 tree optype = TREE_TYPE (mask_arg);
7739 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7740 utype = masktype;
7741 else
7742 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7743 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7744 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7745 gassign *new_stmt
7746 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7747 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7748 mask_arg = var;
7749 if (!useless_type_conversion_p (masktype, utype))
7751 gcc_assert (TYPE_PRECISION (utype)
7752 <= TYPE_PRECISION (masktype));
7753 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7754 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7755 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7756 mask_arg = var;
7760 gcall *new_stmt
7761 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7762 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7764 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7766 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7767 return true;
7769 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7770 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7772 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7773 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7775 if (grouped_store)
7777 /* FORNOW */
7778 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7780 /* We vectorize all the stmts of the interleaving group when we
7781 reach the last stmt in the group. */
7782 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7783 < DR_GROUP_SIZE (first_stmt_info)
7784 && !slp)
7786 *vec_stmt = NULL;
7787 return true;
7790 if (slp)
7792 grouped_store = false;
7793 /* VEC_NUM is the number of vect stmts to be created for this
7794 group. */
7795 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7796 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7797 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7798 == first_stmt_info);
7799 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7800 op = vect_get_store_rhs (first_stmt_info);
7802 else
7803 /* VEC_NUM is the number of vect stmts to be created for this
7804 group. */
7805 vec_num = group_size;
7807 ref_type = get_group_alias_ptr_type (first_stmt_info);
7809 else
7810 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7812 if (dump_enabled_p ())
7813 dump_printf_loc (MSG_NOTE, vect_location,
7814 "transform store. ncopies = %d\n", ncopies);
7816 if (memory_access_type == VMAT_ELEMENTWISE
7817 || memory_access_type == VMAT_STRIDED_SLP)
7819 gimple_stmt_iterator incr_gsi;
7820 bool insert_after;
7821 gimple *incr;
7822 tree offvar;
7823 tree ivstep;
7824 tree running_off;
7825 tree stride_base, stride_step, alias_off;
7826 tree vec_oprnd;
7827 tree dr_offset;
7828 unsigned int g;
7829 /* Checked by get_load_store_type. */
7830 unsigned int const_nunits = nunits.to_constant ();
7832 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7833 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7835 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7836 stride_base
7837 = fold_build_pointer_plus
7838 (DR_BASE_ADDRESS (first_dr_info->dr),
7839 size_binop (PLUS_EXPR,
7840 convert_to_ptrofftype (dr_offset),
7841 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7842 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7844 /* For a store with loop-invariant (but other than power-of-2)
7845 stride (i.e. not a grouped access) like so:
7847 for (i = 0; i < n; i += stride)
7848 array[i] = ...;
7850 we generate a new induction variable and new stores from
7851 the components of the (vectorized) rhs:
7853 for (j = 0; ; j += VF*stride)
7854 vectemp = ...;
7855 tmp1 = vectemp[0];
7856 array[j] = tmp1;
7857 tmp2 = vectemp[1];
7858 array[j + stride] = tmp2;
7862 unsigned nstores = const_nunits;
7863 unsigned lnel = 1;
7864 tree ltype = elem_type;
7865 tree lvectype = vectype;
7866 if (slp)
7868 if (group_size < const_nunits
7869 && const_nunits % group_size == 0)
7871 nstores = const_nunits / group_size;
7872 lnel = group_size;
7873 ltype = build_vector_type (elem_type, group_size);
7874 lvectype = vectype;
7876 /* First check if vec_extract optab doesn't support extraction
7877 of vector elts directly. */
7878 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7879 machine_mode vmode;
7880 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7881 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7882 group_size).exists (&vmode)
7883 || (convert_optab_handler (vec_extract_optab,
7884 TYPE_MODE (vectype), vmode)
7885 == CODE_FOR_nothing))
7887 /* Try to avoid emitting an extract of vector elements
7888 by performing the extracts using an integer type of the
7889 same size, extracting from a vector of those and then
7890 re-interpreting it as the original vector type if
7891 supported. */
7892 unsigned lsize
7893 = group_size * GET_MODE_BITSIZE (elmode);
7894 unsigned int lnunits = const_nunits / group_size;
7895 /* If we can't construct such a vector fall back to
7896 element extracts from the original vector type and
7897 element size stores. */
7898 if (int_mode_for_size (lsize, 0).exists (&elmode)
7899 && VECTOR_MODE_P (TYPE_MODE (vectype))
7900 && related_vector_mode (TYPE_MODE (vectype), elmode,
7901 lnunits).exists (&vmode)
7902 && (convert_optab_handler (vec_extract_optab,
7903 vmode, elmode)
7904 != CODE_FOR_nothing))
7906 nstores = lnunits;
7907 lnel = group_size;
7908 ltype = build_nonstandard_integer_type (lsize, 1);
7909 lvectype = build_vector_type (ltype, nstores);
7911 /* Else fall back to vector extraction anyway.
7912 Fewer stores are more important than avoiding spilling
7913 of the vector we extract from. Compared to the
7914 construction case in vectorizable_load no store-forwarding
7915 issue exists here for reasonable archs. */
7918 else if (group_size >= const_nunits
7919 && group_size % const_nunits == 0)
7921 nstores = 1;
7922 lnel = const_nunits;
7923 ltype = vectype;
7924 lvectype = vectype;
7926 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7927 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7930 ivstep = stride_step;
7931 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7932 build_int_cst (TREE_TYPE (ivstep), vf));
7934 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7936 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7937 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7938 create_iv (stride_base, ivstep, NULL,
7939 loop, &incr_gsi, insert_after,
7940 &offvar, NULL);
7941 incr = gsi_stmt (incr_gsi);
7943 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7945 alias_off = build_int_cst (ref_type, 0);
7946 stmt_vec_info next_stmt_info = first_stmt_info;
7947 for (g = 0; g < group_size; g++)
7949 running_off = offvar;
7950 if (g)
7952 tree size = TYPE_SIZE_UNIT (ltype);
7953 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7954 size);
7955 tree newoff = copy_ssa_name (running_off, NULL);
7956 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7957 running_off, pos);
7958 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7959 running_off = newoff;
7961 if (!slp)
7962 op = vect_get_store_rhs (next_stmt_info);
7963 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7964 op, &vec_oprnds);
7965 unsigned int group_el = 0;
7966 unsigned HOST_WIDE_INT
7967 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7968 for (j = 0; j < ncopies; j++)
7970 vec_oprnd = vec_oprnds[j];
7971 /* Pun the vector to extract from if necessary. */
7972 if (lvectype != vectype)
7974 tree tem = make_ssa_name (lvectype);
7975 gimple *pun
7976 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7977 lvectype, vec_oprnd));
7978 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7979 vec_oprnd = tem;
7981 for (i = 0; i < nstores; i++)
7983 tree newref, newoff;
7984 gimple *incr, *assign;
7985 tree size = TYPE_SIZE (ltype);
7986 /* Extract the i'th component. */
7987 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7988 bitsize_int (i), size);
7989 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7990 size, pos);
7992 elem = force_gimple_operand_gsi (gsi, elem, true,
7993 NULL_TREE, true,
7994 GSI_SAME_STMT);
7996 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7997 group_el * elsz);
7998 newref = build2 (MEM_REF, ltype,
7999 running_off, this_off);
8000 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8002 /* And store it to *running_off. */
8003 assign = gimple_build_assign (newref, elem);
8004 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8006 group_el += lnel;
8007 if (! slp
8008 || group_el == group_size)
8010 newoff = copy_ssa_name (running_off, NULL);
8011 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8012 running_off, stride_step);
8013 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8015 running_off = newoff;
8016 group_el = 0;
8018 if (g == group_size - 1
8019 && !slp)
8021 if (j == 0 && i == 0)
8022 *vec_stmt = assign;
8023 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8027 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8028 vec_oprnds.release ();
8029 if (slp)
8030 break;
8033 return true;
8036 auto_vec<tree> dr_chain (group_size);
8037 oprnds.create (group_size);
8039 gcc_assert (alignment_support_scheme);
8040 vec_loop_masks *loop_masks
8041 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8042 ? &LOOP_VINFO_MASKS (loop_vinfo)
8043 : NULL);
8044 vec_loop_lens *loop_lens
8045 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8046 ? &LOOP_VINFO_LENS (loop_vinfo)
8047 : NULL);
8049 /* Shouldn't go with length-based approach if fully masked. */
8050 gcc_assert (!loop_lens || !loop_masks);
8052 /* Targets with store-lane instructions must not require explicit
8053 realignment. vect_supportable_dr_alignment always returns either
8054 dr_aligned or dr_unaligned_supported for masked operations. */
8055 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8056 && !mask
8057 && !loop_masks)
8058 || alignment_support_scheme == dr_aligned
8059 || alignment_support_scheme == dr_unaligned_supported);
8061 tree offset = NULL_TREE;
8062 if (!known_eq (poffset, 0))
8063 offset = size_int (poffset);
8065 tree bump;
8066 tree vec_offset = NULL_TREE;
8067 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8069 aggr_type = NULL_TREE;
8070 bump = NULL_TREE;
8072 else if (memory_access_type == VMAT_GATHER_SCATTER)
8074 aggr_type = elem_type;
8075 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8076 &bump, &vec_offset);
8078 else
8080 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8081 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8082 else
8083 aggr_type = vectype;
8084 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
8085 memory_access_type);
8088 if (mask)
8089 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8091 /* In case the vectorization factor (VF) is bigger than the number
8092 of elements that we can fit in a vectype (nunits), we have to generate
8093 more than one vector stmt - i.e - we need to "unroll" the
8094 vector stmt by a factor VF/nunits. */
8096 /* In case of interleaving (non-unit grouped access):
8098 S1: &base + 2 = x2
8099 S2: &base = x0
8100 S3: &base + 1 = x1
8101 S4: &base + 3 = x3
8103 We create vectorized stores starting from base address (the access of the
8104 first stmt in the chain (S2 in the above example), when the last store stmt
8105 of the chain (S4) is reached:
8107 VS1: &base = vx2
8108 VS2: &base + vec_size*1 = vx0
8109 VS3: &base + vec_size*2 = vx1
8110 VS4: &base + vec_size*3 = vx3
8112 Then permutation statements are generated:
8114 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8115 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8118 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8119 (the order of the data-refs in the output of vect_permute_store_chain
8120 corresponds to the order of scalar stmts in the interleaving chain - see
8121 the documentation of vect_permute_store_chain()).
8123 In case of both multiple types and interleaving, above vector stores and
8124 permutation stmts are created for every copy. The result vector stmts are
8125 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8126 STMT_VINFO_RELATED_STMT for the next copies.
8129 auto_vec<tree> vec_masks;
8130 tree vec_mask = NULL;
8131 auto_vec<tree> vec_offsets;
8132 auto_vec<vec<tree> > gvec_oprnds;
8133 gvec_oprnds.safe_grow_cleared (group_size, true);
8134 for (j = 0; j < ncopies; j++)
8136 gimple *new_stmt;
8137 if (j == 0)
8139 if (slp)
8141 /* Get vectorized arguments for SLP_NODE. */
8142 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8143 op, &vec_oprnds);
8144 vec_oprnd = vec_oprnds[0];
8146 else
8148 /* For interleaved stores we collect vectorized defs for all the
8149 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8150 used as an input to vect_permute_store_chain().
8152 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8153 and OPRNDS are of size 1. */
8154 stmt_vec_info next_stmt_info = first_stmt_info;
8155 for (i = 0; i < group_size; i++)
8157 /* Since gaps are not supported for interleaved stores,
8158 DR_GROUP_SIZE is the exact number of stmts in the chain.
8159 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8160 that there is no interleaving, DR_GROUP_SIZE is 1,
8161 and only one iteration of the loop will be executed. */
8162 op = vect_get_store_rhs (next_stmt_info);
8163 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8164 ncopies, op, &gvec_oprnds[i]);
8165 vec_oprnd = gvec_oprnds[i][0];
8166 dr_chain.quick_push (gvec_oprnds[i][0]);
8167 oprnds.quick_push (gvec_oprnds[i][0]);
8168 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8170 if (mask)
8172 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8173 mask, &vec_masks, mask_vectype);
8174 vec_mask = vec_masks[0];
8178 /* We should have catched mismatched types earlier. */
8179 gcc_assert (useless_type_conversion_p (vectype,
8180 TREE_TYPE (vec_oprnd)));
8181 bool simd_lane_access_p
8182 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8183 if (simd_lane_access_p
8184 && !loop_masks
8185 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8186 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8187 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8188 && integer_zerop (DR_INIT (first_dr_info->dr))
8189 && alias_sets_conflict_p (get_alias_set (aggr_type),
8190 get_alias_set (TREE_TYPE (ref_type))))
8192 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8193 dataref_offset = build_int_cst (ref_type, 0);
8195 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8197 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8198 slp_node, &gs_info, &dataref_ptr,
8199 &vec_offsets);
8200 vec_offset = vec_offsets[0];
8202 else
8203 dataref_ptr
8204 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8205 simd_lane_access_p ? loop : NULL,
8206 offset, &dummy, gsi, &ptr_incr,
8207 simd_lane_access_p, bump);
8209 else
8211 /* For interleaved stores we created vectorized defs for all the
8212 defs stored in OPRNDS in the previous iteration (previous copy).
8213 DR_CHAIN is then used as an input to vect_permute_store_chain().
8214 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8215 OPRNDS are of size 1. */
8216 for (i = 0; i < group_size; i++)
8218 vec_oprnd = gvec_oprnds[i][j];
8219 dr_chain[i] = gvec_oprnds[i][j];
8220 oprnds[i] = gvec_oprnds[i][j];
8222 if (mask)
8223 vec_mask = vec_masks[j];
8224 if (dataref_offset)
8225 dataref_offset
8226 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8227 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8228 vec_offset = vec_offsets[j];
8229 else
8230 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8231 stmt_info, bump);
8234 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8236 tree vec_array;
8238 /* Get an array into which we can store the individual vectors. */
8239 vec_array = create_vector_array (vectype, vec_num);
8241 /* Invalidate the current contents of VEC_ARRAY. This should
8242 become an RTL clobber too, which prevents the vector registers
8243 from being upward-exposed. */
8244 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8246 /* Store the individual vectors into the array. */
8247 for (i = 0; i < vec_num; i++)
8249 vec_oprnd = dr_chain[i];
8250 write_vector_array (vinfo, stmt_info,
8251 gsi, vec_oprnd, vec_array, i);
8254 tree final_mask = NULL;
8255 if (loop_masks)
8256 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8257 vectype, j);
8258 if (vec_mask)
8259 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8260 final_mask, vec_mask, gsi);
8262 gcall *call;
8263 if (final_mask)
8265 /* Emit:
8266 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8267 VEC_ARRAY). */
8268 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8269 tree alias_ptr = build_int_cst (ref_type, align);
8270 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8271 dataref_ptr, alias_ptr,
8272 final_mask, vec_array);
8274 else
8276 /* Emit:
8277 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8278 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8279 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8280 vec_array);
8281 gimple_call_set_lhs (call, data_ref);
8283 gimple_call_set_nothrow (call, true);
8284 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8285 new_stmt = call;
8287 /* Record that VEC_ARRAY is now dead. */
8288 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8290 else
8292 new_stmt = NULL;
8293 if (grouped_store)
8295 if (j == 0)
8296 result_chain.create (group_size);
8297 /* Permute. */
8298 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8299 gsi, &result_chain);
8302 stmt_vec_info next_stmt_info = first_stmt_info;
8303 for (i = 0; i < vec_num; i++)
8305 unsigned misalign;
8306 unsigned HOST_WIDE_INT align;
8308 tree final_mask = NULL_TREE;
8309 if (loop_masks)
8310 final_mask = vect_get_loop_mask (gsi, loop_masks,
8311 vec_num * ncopies,
8312 vectype, vec_num * j + i);
8313 if (vec_mask)
8314 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8315 final_mask, vec_mask, gsi);
8317 if (memory_access_type == VMAT_GATHER_SCATTER)
8319 tree scale = size_int (gs_info.scale);
8320 gcall *call;
8321 if (final_mask)
8322 call = gimple_build_call_internal
8323 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8324 scale, vec_oprnd, final_mask);
8325 else
8326 call = gimple_build_call_internal
8327 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8328 scale, vec_oprnd);
8329 gimple_call_set_nothrow (call, true);
8330 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8331 new_stmt = call;
8332 break;
8335 if (i > 0)
8336 /* Bump the vector pointer. */
8337 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8338 gsi, stmt_info, bump);
8340 if (slp)
8341 vec_oprnd = vec_oprnds[i];
8342 else if (grouped_store)
8343 /* For grouped stores vectorized defs are interleaved in
8344 vect_permute_store_chain(). */
8345 vec_oprnd = result_chain[i];
8347 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8348 if (alignment_support_scheme == dr_aligned)
8349 misalign = 0;
8350 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8352 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8353 misalign = 0;
8355 else
8356 misalign = misalignment;
8357 if (dataref_offset == NULL_TREE
8358 && TREE_CODE (dataref_ptr) == SSA_NAME)
8359 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8360 misalign);
8361 align = least_bit_hwi (misalign | align);
8363 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8365 tree perm_mask = perm_mask_for_reverse (vectype);
8366 tree perm_dest = vect_create_destination_var
8367 (vect_get_store_rhs (stmt_info), vectype);
8368 tree new_temp = make_ssa_name (perm_dest);
8370 /* Generate the permute statement. */
8371 gimple *perm_stmt
8372 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8373 vec_oprnd, perm_mask);
8374 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8376 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8377 vec_oprnd = new_temp;
8380 /* Arguments are ready. Create the new vector stmt. */
8381 if (final_mask)
8383 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8384 gcall *call
8385 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8386 dataref_ptr, ptr,
8387 final_mask, vec_oprnd);
8388 gimple_call_set_nothrow (call, true);
8389 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8390 new_stmt = call;
8392 else if (loop_lens)
8394 tree final_len
8395 = vect_get_loop_len (loop_vinfo, loop_lens,
8396 vec_num * ncopies, vec_num * j + i);
8397 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8398 machine_mode vmode = TYPE_MODE (vectype);
8399 opt_machine_mode new_ovmode
8400 = get_len_load_store_mode (vmode, false);
8401 machine_mode new_vmode = new_ovmode.require ();
8402 /* Need conversion if it's wrapped with VnQI. */
8403 if (vmode != new_vmode)
8405 tree new_vtype
8406 = build_vector_type_for_mode (unsigned_intQI_type_node,
8407 new_vmode);
8408 tree var
8409 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8410 vec_oprnd
8411 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8412 gassign *new_stmt
8413 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8414 vec_oprnd);
8415 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8416 gsi);
8417 vec_oprnd = var;
8419 gcall *call
8420 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8421 ptr, final_len, vec_oprnd);
8422 gimple_call_set_nothrow (call, true);
8423 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8424 new_stmt = call;
8426 else
8428 data_ref = fold_build2 (MEM_REF, vectype,
8429 dataref_ptr,
8430 dataref_offset
8431 ? dataref_offset
8432 : build_int_cst (ref_type, 0));
8433 if (alignment_support_scheme == dr_aligned)
8435 else
8436 TREE_TYPE (data_ref)
8437 = build_aligned_type (TREE_TYPE (data_ref),
8438 align * BITS_PER_UNIT);
8439 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8440 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8441 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8444 if (slp)
8445 continue;
8447 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8448 if (!next_stmt_info)
8449 break;
8452 if (!slp)
8454 if (j == 0)
8455 *vec_stmt = new_stmt;
8456 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8460 for (i = 0; i < group_size; ++i)
8462 vec<tree> oprndsi = gvec_oprnds[i];
8463 oprndsi.release ();
8465 oprnds.release ();
8466 result_chain.release ();
8467 vec_oprnds.release ();
8469 return true;
8472 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8473 VECTOR_CST mask. No checks are made that the target platform supports the
8474 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8475 vect_gen_perm_mask_checked. */
8477 tree
8478 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8480 tree mask_type;
8482 poly_uint64 nunits = sel.length ();
8483 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8485 mask_type = build_vector_type (ssizetype, nunits);
8486 return vec_perm_indices_to_tree (mask_type, sel);
8489 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8490 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8492 tree
8493 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8495 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8496 return vect_gen_perm_mask_any (vectype, sel);
8499 /* Given a vector variable X and Y, that was generated for the scalar
8500 STMT_INFO, generate instructions to permute the vector elements of X and Y
8501 using permutation mask MASK_VEC, insert them at *GSI and return the
8502 permuted vector variable. */
8504 static tree
8505 permute_vec_elements (vec_info *vinfo,
8506 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8507 gimple_stmt_iterator *gsi)
8509 tree vectype = TREE_TYPE (x);
8510 tree perm_dest, data_ref;
8511 gimple *perm_stmt;
8513 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8514 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8515 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8516 else
8517 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8518 data_ref = make_ssa_name (perm_dest);
8520 /* Generate the permute statement. */
8521 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8522 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8524 return data_ref;
8527 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8528 inserting them on the loops preheader edge. Returns true if we
8529 were successful in doing so (and thus STMT_INFO can be moved then),
8530 otherwise returns false. */
8532 static bool
8533 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8535 ssa_op_iter i;
8536 tree op;
8537 bool any = false;
8539 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8541 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8542 if (!gimple_nop_p (def_stmt)
8543 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8545 /* Make sure we don't need to recurse. While we could do
8546 so in simple cases when there are more complex use webs
8547 we don't have an easy way to preserve stmt order to fulfil
8548 dependencies within them. */
8549 tree op2;
8550 ssa_op_iter i2;
8551 if (gimple_code (def_stmt) == GIMPLE_PHI)
8552 return false;
8553 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8555 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8556 if (!gimple_nop_p (def_stmt2)
8557 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8558 return false;
8560 any = true;
8564 if (!any)
8565 return true;
8567 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8569 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8570 if (!gimple_nop_p (def_stmt)
8571 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8573 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8574 gsi_remove (&gsi, false);
8575 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8579 return true;
8582 /* vectorizable_load.
8584 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8585 that can be vectorized.
8586 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8587 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8588 Return true if STMT_INFO is vectorizable in this way. */
8590 static bool
8591 vectorizable_load (vec_info *vinfo,
8592 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8593 gimple **vec_stmt, slp_tree slp_node,
8594 stmt_vector_for_cost *cost_vec)
8596 tree scalar_dest;
8597 tree vec_dest = NULL;
8598 tree data_ref = NULL;
8599 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8600 class loop *loop = NULL;
8601 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8602 bool nested_in_vect_loop = false;
8603 tree elem_type;
8604 tree new_temp;
8605 machine_mode mode;
8606 tree dummy;
8607 tree dataref_ptr = NULL_TREE;
8608 tree dataref_offset = NULL_TREE;
8609 gimple *ptr_incr = NULL;
8610 int ncopies;
8611 int i, j;
8612 unsigned int group_size;
8613 poly_uint64 group_gap_adj;
8614 tree msq = NULL_TREE, lsq;
8615 tree realignment_token = NULL_TREE;
8616 gphi *phi = NULL;
8617 vec<tree> dr_chain = vNULL;
8618 bool grouped_load = false;
8619 stmt_vec_info first_stmt_info;
8620 stmt_vec_info first_stmt_info_for_drptr = NULL;
8621 bool compute_in_loop = false;
8622 class loop *at_loop;
8623 int vec_num;
8624 bool slp = (slp_node != NULL);
8625 bool slp_perm = false;
8626 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8627 poly_uint64 vf;
8628 tree aggr_type;
8629 gather_scatter_info gs_info;
8630 tree ref_type;
8631 enum vect_def_type mask_dt = vect_unknown_def_type;
8633 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8634 return false;
8636 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8637 && ! vec_stmt)
8638 return false;
8640 if (!STMT_VINFO_DATA_REF (stmt_info))
8641 return false;
8643 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8644 int mask_index = -1;
8645 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8647 scalar_dest = gimple_assign_lhs (assign);
8648 if (TREE_CODE (scalar_dest) != SSA_NAME)
8649 return false;
8651 tree_code code = gimple_assign_rhs_code (assign);
8652 if (code != ARRAY_REF
8653 && code != BIT_FIELD_REF
8654 && code != INDIRECT_REF
8655 && code != COMPONENT_REF
8656 && code != IMAGPART_EXPR
8657 && code != REALPART_EXPR
8658 && code != MEM_REF
8659 && TREE_CODE_CLASS (code) != tcc_declaration)
8660 return false;
8662 else
8664 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8665 if (!call || !gimple_call_internal_p (call))
8666 return false;
8668 internal_fn ifn = gimple_call_internal_fn (call);
8669 if (!internal_load_fn_p (ifn))
8670 return false;
8672 scalar_dest = gimple_call_lhs (call);
8673 if (!scalar_dest)
8674 return false;
8676 mask_index = internal_fn_mask_index (ifn);
8677 /* ??? For SLP the mask operand is always last. */
8678 if (mask_index >= 0 && slp_node)
8679 mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
8680 if (mask_index >= 0
8681 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8682 &mask, NULL, &mask_dt, &mask_vectype))
8683 return false;
8686 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8687 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8689 if (loop_vinfo)
8691 loop = LOOP_VINFO_LOOP (loop_vinfo);
8692 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8693 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8695 else
8696 vf = 1;
8698 /* Multiple types in SLP are handled by creating the appropriate number of
8699 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8700 case of SLP. */
8701 if (slp)
8702 ncopies = 1;
8703 else
8704 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8706 gcc_assert (ncopies >= 1);
8708 /* FORNOW. This restriction should be relaxed. */
8709 if (nested_in_vect_loop && ncopies > 1)
8711 if (dump_enabled_p ())
8712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8713 "multiple types in nested loop.\n");
8714 return false;
8717 /* Invalidate assumptions made by dependence analysis when vectorization
8718 on the unrolled body effectively re-orders stmts. */
8719 if (ncopies > 1
8720 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8721 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8722 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8724 if (dump_enabled_p ())
8725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8726 "cannot perform implicit CSE when unrolling "
8727 "with negative dependence distance\n");
8728 return false;
8731 elem_type = TREE_TYPE (vectype);
8732 mode = TYPE_MODE (vectype);
8734 /* FORNOW. In some cases can vectorize even if data-type not supported
8735 (e.g. - data copies). */
8736 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8738 if (dump_enabled_p ())
8739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8740 "Aligned load, but unsupported type.\n");
8741 return false;
8744 /* Check if the load is a part of an interleaving chain. */
8745 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8747 grouped_load = true;
8748 /* FORNOW */
8749 gcc_assert (!nested_in_vect_loop);
8750 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8752 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8753 group_size = DR_GROUP_SIZE (first_stmt_info);
8755 /* Refuse non-SLP vectorization of SLP-only groups. */
8756 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8758 if (dump_enabled_p ())
8759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8760 "cannot vectorize load in non-SLP mode.\n");
8761 return false;
8764 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8766 slp_perm = true;
8768 if (!loop_vinfo)
8770 /* In BB vectorization we may not actually use a loaded vector
8771 accessing elements in excess of DR_GROUP_SIZE. */
8772 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8773 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8774 unsigned HOST_WIDE_INT nunits;
8775 unsigned j, k, maxk = 0;
8776 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8777 if (k > maxk)
8778 maxk = k;
8779 tree vectype = SLP_TREE_VECTYPE (slp_node);
8780 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8781 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8783 if (dump_enabled_p ())
8784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8785 "BB vectorization with gaps at the end of "
8786 "a load is not supported\n");
8787 return false;
8791 auto_vec<tree> tem;
8792 unsigned n_perms;
8793 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8794 true, &n_perms))
8796 if (dump_enabled_p ())
8797 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8798 vect_location,
8799 "unsupported load permutation\n");
8800 return false;
8804 /* Invalidate assumptions made by dependence analysis when vectorization
8805 on the unrolled body effectively re-orders stmts. */
8806 if (!PURE_SLP_STMT (stmt_info)
8807 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8808 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8809 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8811 if (dump_enabled_p ())
8812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8813 "cannot perform implicit CSE when performing "
8814 "group loads with negative dependence distance\n");
8815 return false;
8818 else
8819 group_size = 1;
8821 vect_memory_access_type memory_access_type;
8822 enum dr_alignment_support alignment_support_scheme;
8823 int misalignment;
8824 poly_int64 poffset;
8825 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8826 ncopies, &memory_access_type, &poffset,
8827 &alignment_support_scheme, &misalignment, &gs_info))
8828 return false;
8830 if (mask)
8832 if (memory_access_type == VMAT_CONTIGUOUS)
8834 machine_mode vec_mode = TYPE_MODE (vectype);
8835 if (!VECTOR_MODE_P (vec_mode)
8836 || !can_vec_mask_load_store_p (vec_mode,
8837 TYPE_MODE (mask_vectype), true))
8838 return false;
8840 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8841 && memory_access_type != VMAT_GATHER_SCATTER)
8843 if (dump_enabled_p ())
8844 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8845 "unsupported access type for masked load.\n");
8846 return false;
8848 else if (memory_access_type == VMAT_GATHER_SCATTER
8849 && gs_info.ifn == IFN_LAST
8850 && !gs_info.decl)
8852 if (dump_enabled_p ())
8853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8854 "unsupported masked emulated gather.\n");
8855 return false;
8859 if (!vec_stmt) /* transformation not required. */
8861 if (slp_node
8862 && mask
8863 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8864 mask_vectype))
8866 if (dump_enabled_p ())
8867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8868 "incompatible vector types for invariants\n");
8869 return false;
8872 if (!slp)
8873 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8875 if (loop_vinfo
8876 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8877 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8878 group_size, memory_access_type,
8879 ncopies, &gs_info, mask);
8881 if (dump_enabled_p ()
8882 && memory_access_type != VMAT_ELEMENTWISE
8883 && memory_access_type != VMAT_GATHER_SCATTER
8884 && alignment_support_scheme != dr_aligned)
8885 dump_printf_loc (MSG_NOTE, vect_location,
8886 "Vectorizing an unaligned access.\n");
8888 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8889 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8890 alignment_support_scheme, misalignment,
8891 &gs_info, slp_node, cost_vec);
8892 return true;
8895 if (!slp)
8896 gcc_assert (memory_access_type
8897 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8899 if (dump_enabled_p ())
8900 dump_printf_loc (MSG_NOTE, vect_location,
8901 "transform load. ncopies = %d\n", ncopies);
8903 /* Transform. */
8905 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8906 ensure_base_align (dr_info);
8908 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8910 vect_build_gather_load_calls (vinfo,
8911 stmt_info, gsi, vec_stmt, &gs_info, mask);
8912 return true;
8915 if (memory_access_type == VMAT_INVARIANT)
8917 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8918 /* If we have versioned for aliasing or the loop doesn't
8919 have any data dependencies that would preclude this,
8920 then we are sure this is a loop invariant load and
8921 thus we can insert it on the preheader edge. */
8922 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8923 && !nested_in_vect_loop
8924 && hoist_defs_of_uses (stmt_info, loop));
8925 if (hoist_p)
8927 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8928 if (dump_enabled_p ())
8929 dump_printf_loc (MSG_NOTE, vect_location,
8930 "hoisting out of the vectorized loop: %G", stmt);
8931 scalar_dest = copy_ssa_name (scalar_dest);
8932 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8933 gsi_insert_on_edge_immediate
8934 (loop_preheader_edge (loop),
8935 gimple_build_assign (scalar_dest, rhs));
8937 /* These copies are all equivalent, but currently the representation
8938 requires a separate STMT_VINFO_VEC_STMT for each one. */
8939 gimple_stmt_iterator gsi2 = *gsi;
8940 gsi_next (&gsi2);
8941 for (j = 0; j < ncopies; j++)
8943 if (hoist_p)
8944 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8945 vectype, NULL);
8946 else
8947 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8948 vectype, &gsi2);
8949 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8950 if (slp)
8951 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8952 else
8954 if (j == 0)
8955 *vec_stmt = new_stmt;
8956 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8959 return true;
8962 if (memory_access_type == VMAT_ELEMENTWISE
8963 || memory_access_type == VMAT_STRIDED_SLP)
8965 gimple_stmt_iterator incr_gsi;
8966 bool insert_after;
8967 tree offvar;
8968 tree ivstep;
8969 tree running_off;
8970 vec<constructor_elt, va_gc> *v = NULL;
8971 tree stride_base, stride_step, alias_off;
8972 /* Checked by get_load_store_type. */
8973 unsigned int const_nunits = nunits.to_constant ();
8974 unsigned HOST_WIDE_INT cst_offset = 0;
8975 tree dr_offset;
8977 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8978 gcc_assert (!nested_in_vect_loop);
8980 if (grouped_load)
8982 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8983 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8985 else
8987 first_stmt_info = stmt_info;
8988 first_dr_info = dr_info;
8990 if (slp && grouped_load)
8992 group_size = DR_GROUP_SIZE (first_stmt_info);
8993 ref_type = get_group_alias_ptr_type (first_stmt_info);
8995 else
8997 if (grouped_load)
8998 cst_offset
8999 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
9000 * vect_get_place_in_interleaving_chain (stmt_info,
9001 first_stmt_info));
9002 group_size = 1;
9003 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9006 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9007 stride_base
9008 = fold_build_pointer_plus
9009 (DR_BASE_ADDRESS (first_dr_info->dr),
9010 size_binop (PLUS_EXPR,
9011 convert_to_ptrofftype (dr_offset),
9012 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9013 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9015 /* For a load with loop-invariant (but other than power-of-2)
9016 stride (i.e. not a grouped access) like so:
9018 for (i = 0; i < n; i += stride)
9019 ... = array[i];
9021 we generate a new induction variable and new accesses to
9022 form a new vector (or vectors, depending on ncopies):
9024 for (j = 0; ; j += VF*stride)
9025 tmp1 = array[j];
9026 tmp2 = array[j + stride];
9028 vectemp = {tmp1, tmp2, ...}
9031 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9032 build_int_cst (TREE_TYPE (stride_step), vf));
9034 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9036 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9037 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9038 create_iv (stride_base, ivstep, NULL,
9039 loop, &incr_gsi, insert_after,
9040 &offvar, NULL);
9042 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9044 running_off = offvar;
9045 alias_off = build_int_cst (ref_type, 0);
9046 int nloads = const_nunits;
9047 int lnel = 1;
9048 tree ltype = TREE_TYPE (vectype);
9049 tree lvectype = vectype;
9050 auto_vec<tree> dr_chain;
9051 if (memory_access_type == VMAT_STRIDED_SLP)
9053 if (group_size < const_nunits)
9055 /* First check if vec_init optab supports construction from vector
9056 elts directly. Otherwise avoid emitting a constructor of
9057 vector elements by performing the loads using an integer type
9058 of the same size, constructing a vector of those and then
9059 re-interpreting it as the original vector type. This avoids a
9060 huge runtime penalty due to the general inability to perform
9061 store forwarding from smaller stores to a larger load. */
9062 tree ptype;
9063 tree vtype
9064 = vector_vector_composition_type (vectype,
9065 const_nunits / group_size,
9066 &ptype);
9067 if (vtype != NULL_TREE)
9069 nloads = const_nunits / group_size;
9070 lnel = group_size;
9071 lvectype = vtype;
9072 ltype = ptype;
9075 else
9077 nloads = 1;
9078 lnel = const_nunits;
9079 ltype = vectype;
9081 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9083 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9084 else if (nloads == 1)
9085 ltype = vectype;
9087 if (slp)
9089 /* For SLP permutation support we need to load the whole group,
9090 not only the number of vector stmts the permutation result
9091 fits in. */
9092 if (slp_perm)
9094 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9095 variable VF. */
9096 unsigned int const_vf = vf.to_constant ();
9097 ncopies = CEIL (group_size * const_vf, const_nunits);
9098 dr_chain.create (ncopies);
9100 else
9101 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9103 unsigned int group_el = 0;
9104 unsigned HOST_WIDE_INT
9105 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9106 for (j = 0; j < ncopies; j++)
9108 if (nloads > 1)
9109 vec_alloc (v, nloads);
9110 gimple *new_stmt = NULL;
9111 for (i = 0; i < nloads; i++)
9113 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9114 group_el * elsz + cst_offset);
9115 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9116 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9117 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9118 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9119 if (nloads > 1)
9120 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9121 gimple_assign_lhs (new_stmt));
9123 group_el += lnel;
9124 if (! slp
9125 || group_el == group_size)
9127 tree newoff = copy_ssa_name (running_off);
9128 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9129 running_off, stride_step);
9130 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9132 running_off = newoff;
9133 group_el = 0;
9136 if (nloads > 1)
9138 tree vec_inv = build_constructor (lvectype, v);
9139 new_temp = vect_init_vector (vinfo, stmt_info,
9140 vec_inv, lvectype, gsi);
9141 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9142 if (lvectype != vectype)
9144 new_stmt = gimple_build_assign (make_ssa_name (vectype),
9145 VIEW_CONVERT_EXPR,
9146 build1 (VIEW_CONVERT_EXPR,
9147 vectype, new_temp));
9148 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9152 if (slp)
9154 if (slp_perm)
9155 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9156 else
9157 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9159 else
9161 if (j == 0)
9162 *vec_stmt = new_stmt;
9163 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9166 if (slp_perm)
9168 unsigned n_perms;
9169 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9170 false, &n_perms);
9172 return true;
9175 if (memory_access_type == VMAT_GATHER_SCATTER
9176 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9177 grouped_load = false;
9179 if (grouped_load)
9181 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9182 group_size = DR_GROUP_SIZE (first_stmt_info);
9183 /* For SLP vectorization we directly vectorize a subchain
9184 without permutation. */
9185 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9186 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9187 /* For BB vectorization always use the first stmt to base
9188 the data ref pointer on. */
9189 if (bb_vinfo)
9190 first_stmt_info_for_drptr
9191 = vect_find_first_scalar_stmt_in_slp (slp_node);
9193 /* Check if the chain of loads is already vectorized. */
9194 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9195 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9196 ??? But we can only do so if there is exactly one
9197 as we have no way to get at the rest. Leave the CSE
9198 opportunity alone.
9199 ??? With the group load eventually participating
9200 in multiple different permutations (having multiple
9201 slp nodes which refer to the same group) the CSE
9202 is even wrong code. See PR56270. */
9203 && !slp)
9205 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9206 return true;
9208 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9209 group_gap_adj = 0;
9211 /* VEC_NUM is the number of vect stmts to be created for this group. */
9212 if (slp)
9214 grouped_load = false;
9215 /* If an SLP permutation is from N elements to N elements,
9216 and if one vector holds a whole number of N, we can load
9217 the inputs to the permutation in the same way as an
9218 unpermuted sequence. In other cases we need to load the
9219 whole group, not only the number of vector stmts the
9220 permutation result fits in. */
9221 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9222 if (slp_perm
9223 && (group_size != scalar_lanes
9224 || !multiple_p (nunits, group_size)))
9226 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9227 variable VF; see vect_transform_slp_perm_load. */
9228 unsigned int const_vf = vf.to_constant ();
9229 unsigned int const_nunits = nunits.to_constant ();
9230 vec_num = CEIL (group_size * const_vf, const_nunits);
9231 group_gap_adj = vf * group_size - nunits * vec_num;
9233 else
9235 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9236 group_gap_adj
9237 = group_size - scalar_lanes;
9240 else
9241 vec_num = group_size;
9243 ref_type = get_group_alias_ptr_type (first_stmt_info);
9245 else
9247 first_stmt_info = stmt_info;
9248 first_dr_info = dr_info;
9249 group_size = vec_num = 1;
9250 group_gap_adj = 0;
9251 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9252 if (slp)
9253 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9256 gcc_assert (alignment_support_scheme);
9257 vec_loop_masks *loop_masks
9258 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9259 ? &LOOP_VINFO_MASKS (loop_vinfo)
9260 : NULL);
9261 vec_loop_lens *loop_lens
9262 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9263 ? &LOOP_VINFO_LENS (loop_vinfo)
9264 : NULL);
9266 /* Shouldn't go with length-based approach if fully masked. */
9267 gcc_assert (!loop_lens || !loop_masks);
9269 /* Targets with store-lane instructions must not require explicit
9270 realignment. vect_supportable_dr_alignment always returns either
9271 dr_aligned or dr_unaligned_supported for masked operations. */
9272 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9273 && !mask
9274 && !loop_masks)
9275 || alignment_support_scheme == dr_aligned
9276 || alignment_support_scheme == dr_unaligned_supported);
9278 /* In case the vectorization factor (VF) is bigger than the number
9279 of elements that we can fit in a vectype (nunits), we have to generate
9280 more than one vector stmt - i.e - we need to "unroll" the
9281 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9282 from one copy of the vector stmt to the next, in the field
9283 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9284 stages to find the correct vector defs to be used when vectorizing
9285 stmts that use the defs of the current stmt. The example below
9286 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9287 need to create 4 vectorized stmts):
9289 before vectorization:
9290 RELATED_STMT VEC_STMT
9291 S1: x = memref - -
9292 S2: z = x + 1 - -
9294 step 1: vectorize stmt S1:
9295 We first create the vector stmt VS1_0, and, as usual, record a
9296 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9297 Next, we create the vector stmt VS1_1, and record a pointer to
9298 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9299 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9300 stmts and pointers:
9301 RELATED_STMT VEC_STMT
9302 VS1_0: vx0 = memref0 VS1_1 -
9303 VS1_1: vx1 = memref1 VS1_2 -
9304 VS1_2: vx2 = memref2 VS1_3 -
9305 VS1_3: vx3 = memref3 - -
9306 S1: x = load - VS1_0
9307 S2: z = x + 1 - -
9310 /* In case of interleaving (non-unit grouped access):
9312 S1: x2 = &base + 2
9313 S2: x0 = &base
9314 S3: x1 = &base + 1
9315 S4: x3 = &base + 3
9317 Vectorized loads are created in the order of memory accesses
9318 starting from the access of the first stmt of the chain:
9320 VS1: vx0 = &base
9321 VS2: vx1 = &base + vec_size*1
9322 VS3: vx3 = &base + vec_size*2
9323 VS4: vx4 = &base + vec_size*3
9325 Then permutation statements are generated:
9327 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9328 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9331 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9332 (the order of the data-refs in the output of vect_permute_load_chain
9333 corresponds to the order of scalar stmts in the interleaving chain - see
9334 the documentation of vect_permute_load_chain()).
9335 The generation of permutation stmts and recording them in
9336 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9338 In case of both multiple types and interleaving, the vector loads and
9339 permutation stmts above are created for every copy. The result vector
9340 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9341 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9343 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9344 on a target that supports unaligned accesses (dr_unaligned_supported)
9345 we generate the following code:
9346 p = initial_addr;
9347 indx = 0;
9348 loop {
9349 p = p + indx * vectype_size;
9350 vec_dest = *(p);
9351 indx = indx + 1;
9354 Otherwise, the data reference is potentially unaligned on a target that
9355 does not support unaligned accesses (dr_explicit_realign_optimized) -
9356 then generate the following code, in which the data in each iteration is
9357 obtained by two vector loads, one from the previous iteration, and one
9358 from the current iteration:
9359 p1 = initial_addr;
9360 msq_init = *(floor(p1))
9361 p2 = initial_addr + VS - 1;
9362 realignment_token = call target_builtin;
9363 indx = 0;
9364 loop {
9365 p2 = p2 + indx * vectype_size
9366 lsq = *(floor(p2))
9367 vec_dest = realign_load (msq, lsq, realignment_token)
9368 indx = indx + 1;
9369 msq = lsq;
9370 } */
9372 /* If the misalignment remains the same throughout the execution of the
9373 loop, we can create the init_addr and permutation mask at the loop
9374 preheader. Otherwise, it needs to be created inside the loop.
9375 This can only occur when vectorizing memory accesses in the inner-loop
9376 nested within an outer-loop that is being vectorized. */
9378 if (nested_in_vect_loop
9379 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9380 GET_MODE_SIZE (TYPE_MODE (vectype))))
9382 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9383 compute_in_loop = true;
9386 bool diff_first_stmt_info
9387 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9389 tree offset = NULL_TREE;
9390 if ((alignment_support_scheme == dr_explicit_realign_optimized
9391 || alignment_support_scheme == dr_explicit_realign)
9392 && !compute_in_loop)
9394 /* If we have different first_stmt_info, we can't set up realignment
9395 here, since we can't guarantee first_stmt_info DR has been
9396 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9397 distance from first_stmt_info DR instead as below. */
9398 if (!diff_first_stmt_info)
9399 msq = vect_setup_realignment (vinfo,
9400 first_stmt_info, gsi, &realignment_token,
9401 alignment_support_scheme, NULL_TREE,
9402 &at_loop);
9403 if (alignment_support_scheme == dr_explicit_realign_optimized)
9405 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9406 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9407 size_one_node);
9408 gcc_assert (!first_stmt_info_for_drptr);
9411 else
9412 at_loop = loop;
9414 if (!known_eq (poffset, 0))
9415 offset = (offset
9416 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
9417 : size_int (poffset));
9419 tree bump;
9420 tree vec_offset = NULL_TREE;
9421 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9423 aggr_type = NULL_TREE;
9424 bump = NULL_TREE;
9426 else if (memory_access_type == VMAT_GATHER_SCATTER)
9428 aggr_type = elem_type;
9429 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9430 &bump, &vec_offset);
9432 else
9434 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9435 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9436 else
9437 aggr_type = vectype;
9438 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9439 memory_access_type);
9442 vec<tree> vec_offsets = vNULL;
9443 auto_vec<tree> vec_masks;
9444 if (mask)
9446 if (slp_node)
9447 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
9448 &vec_masks);
9449 else
9450 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
9451 &vec_masks, mask_vectype);
9453 tree vec_mask = NULL_TREE;
9454 poly_uint64 group_elt = 0;
9455 for (j = 0; j < ncopies; j++)
9457 /* 1. Create the vector or array pointer update chain. */
9458 if (j == 0)
9460 bool simd_lane_access_p
9461 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9462 if (simd_lane_access_p
9463 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9464 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9465 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9466 && integer_zerop (DR_INIT (first_dr_info->dr))
9467 && alias_sets_conflict_p (get_alias_set (aggr_type),
9468 get_alias_set (TREE_TYPE (ref_type)))
9469 && (alignment_support_scheme == dr_aligned
9470 || alignment_support_scheme == dr_unaligned_supported))
9472 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9473 dataref_offset = build_int_cst (ref_type, 0);
9475 else if (diff_first_stmt_info)
9477 dataref_ptr
9478 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9479 aggr_type, at_loop, offset, &dummy,
9480 gsi, &ptr_incr, simd_lane_access_p,
9481 bump);
9482 /* Adjust the pointer by the difference to first_stmt. */
9483 data_reference_p ptrdr
9484 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9485 tree diff
9486 = fold_convert (sizetype,
9487 size_binop (MINUS_EXPR,
9488 DR_INIT (first_dr_info->dr),
9489 DR_INIT (ptrdr)));
9490 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9491 stmt_info, diff);
9492 if (alignment_support_scheme == dr_explicit_realign)
9494 msq = vect_setup_realignment (vinfo,
9495 first_stmt_info_for_drptr, gsi,
9496 &realignment_token,
9497 alignment_support_scheme,
9498 dataref_ptr, &at_loop);
9499 gcc_assert (!compute_in_loop);
9502 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9504 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9505 slp_node, &gs_info, &dataref_ptr,
9506 &vec_offsets);
9508 else
9509 dataref_ptr
9510 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9511 at_loop,
9512 offset, &dummy, gsi, &ptr_incr,
9513 simd_lane_access_p, bump);
9514 if (mask)
9515 vec_mask = vec_masks[0];
9517 else
9519 if (dataref_offset)
9520 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9521 bump);
9522 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9523 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9524 stmt_info, bump);
9525 if (mask)
9526 vec_mask = vec_masks[j];
9529 if (grouped_load || slp_perm)
9530 dr_chain.create (vec_num);
9532 gimple *new_stmt = NULL;
9533 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9535 tree vec_array;
9537 vec_array = create_vector_array (vectype, vec_num);
9539 tree final_mask = NULL_TREE;
9540 if (loop_masks)
9541 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9542 vectype, j);
9543 if (vec_mask)
9544 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9545 final_mask, vec_mask, gsi);
9547 gcall *call;
9548 if (final_mask)
9550 /* Emit:
9551 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9552 VEC_MASK). */
9553 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9554 tree alias_ptr = build_int_cst (ref_type, align);
9555 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9556 dataref_ptr, alias_ptr,
9557 final_mask);
9559 else
9561 /* Emit:
9562 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9563 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9564 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9566 gimple_call_set_lhs (call, vec_array);
9567 gimple_call_set_nothrow (call, true);
9568 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9569 new_stmt = call;
9571 /* Extract each vector into an SSA_NAME. */
9572 for (i = 0; i < vec_num; i++)
9574 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9575 vec_array, i);
9576 dr_chain.quick_push (new_temp);
9579 /* Record the mapping between SSA_NAMEs and statements. */
9580 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9582 /* Record that VEC_ARRAY is now dead. */
9583 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9585 else
9587 for (i = 0; i < vec_num; i++)
9589 tree final_mask = NULL_TREE;
9590 if (loop_masks
9591 && memory_access_type != VMAT_INVARIANT)
9592 final_mask = vect_get_loop_mask (gsi, loop_masks,
9593 vec_num * ncopies,
9594 vectype, vec_num * j + i);
9595 if (vec_mask)
9596 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9597 final_mask, vec_mask, gsi);
9599 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9600 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9601 gsi, stmt_info, bump);
9603 /* 2. Create the vector-load in the loop. */
9604 switch (alignment_support_scheme)
9606 case dr_aligned:
9607 case dr_unaligned_supported:
9609 unsigned int misalign;
9610 unsigned HOST_WIDE_INT align;
9612 if (memory_access_type == VMAT_GATHER_SCATTER
9613 && gs_info.ifn != IFN_LAST)
9615 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9616 vec_offset = vec_offsets[vec_num * j + i];
9617 tree zero = build_zero_cst (vectype);
9618 tree scale = size_int (gs_info.scale);
9619 gcall *call;
9620 if (final_mask)
9621 call = gimple_build_call_internal
9622 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9623 vec_offset, scale, zero, final_mask);
9624 else
9625 call = gimple_build_call_internal
9626 (IFN_GATHER_LOAD, 4, dataref_ptr,
9627 vec_offset, scale, zero);
9628 gimple_call_set_nothrow (call, true);
9629 new_stmt = call;
9630 data_ref = NULL_TREE;
9631 break;
9633 else if (memory_access_type == VMAT_GATHER_SCATTER)
9635 /* Emulated gather-scatter. */
9636 gcc_assert (!final_mask);
9637 unsigned HOST_WIDE_INT const_nunits
9638 = nunits.to_constant ();
9639 unsigned HOST_WIDE_INT const_offset_nunits
9640 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9641 .to_constant ();
9642 vec<constructor_elt, va_gc> *ctor_elts;
9643 vec_alloc (ctor_elts, const_nunits);
9644 gimple_seq stmts = NULL;
9645 /* We support offset vectors with more elements
9646 than the data vector for now. */
9647 unsigned HOST_WIDE_INT factor
9648 = const_offset_nunits / const_nunits;
9649 vec_offset = vec_offsets[j / factor];
9650 unsigned elt_offset = (j % factor) * const_nunits;
9651 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9652 tree scale = size_int (gs_info.scale);
9653 align
9654 = get_object_alignment (DR_REF (first_dr_info->dr));
9655 tree ltype = build_aligned_type (TREE_TYPE (vectype),
9656 align);
9657 for (unsigned k = 0; k < const_nunits; ++k)
9659 tree boff = size_binop (MULT_EXPR,
9660 TYPE_SIZE (idx_type),
9661 bitsize_int
9662 (k + elt_offset));
9663 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9664 idx_type, vec_offset,
9665 TYPE_SIZE (idx_type),
9666 boff);
9667 idx = gimple_convert (&stmts, sizetype, idx);
9668 idx = gimple_build (&stmts, MULT_EXPR,
9669 sizetype, idx, scale);
9670 tree ptr = gimple_build (&stmts, PLUS_EXPR,
9671 TREE_TYPE (dataref_ptr),
9672 dataref_ptr, idx);
9673 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9674 tree elt = make_ssa_name (TREE_TYPE (vectype));
9675 tree ref = build2 (MEM_REF, ltype, ptr,
9676 build_int_cst (ref_type, 0));
9677 new_stmt = gimple_build_assign (elt, ref);
9678 gimple_seq_add_stmt (&stmts, new_stmt);
9679 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
9681 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9682 new_stmt = gimple_build_assign (NULL_TREE,
9683 build_constructor
9684 (vectype, ctor_elts));
9685 data_ref = NULL_TREE;
9686 break;
9689 align =
9690 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9691 if (alignment_support_scheme == dr_aligned)
9692 misalign = 0;
9693 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9695 align = dr_alignment
9696 (vect_dr_behavior (vinfo, first_dr_info));
9697 misalign = 0;
9699 else
9700 misalign = misalignment;
9701 if (dataref_offset == NULL_TREE
9702 && TREE_CODE (dataref_ptr) == SSA_NAME)
9703 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9704 align, misalign);
9705 align = least_bit_hwi (misalign | align);
9707 if (final_mask)
9709 tree ptr = build_int_cst (ref_type,
9710 align * BITS_PER_UNIT);
9711 gcall *call
9712 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9713 dataref_ptr, ptr,
9714 final_mask);
9715 gimple_call_set_nothrow (call, true);
9716 new_stmt = call;
9717 data_ref = NULL_TREE;
9719 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9721 tree final_len
9722 = vect_get_loop_len (loop_vinfo, loop_lens,
9723 vec_num * ncopies,
9724 vec_num * j + i);
9725 tree ptr = build_int_cst (ref_type,
9726 align * BITS_PER_UNIT);
9727 gcall *call
9728 = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9729 dataref_ptr, ptr,
9730 final_len);
9731 gimple_call_set_nothrow (call, true);
9732 new_stmt = call;
9733 data_ref = NULL_TREE;
9735 /* Need conversion if it's wrapped with VnQI. */
9736 machine_mode vmode = TYPE_MODE (vectype);
9737 opt_machine_mode new_ovmode
9738 = get_len_load_store_mode (vmode, true);
9739 machine_mode new_vmode = new_ovmode.require ();
9740 if (vmode != new_vmode)
9742 tree qi_type = unsigned_intQI_type_node;
9743 tree new_vtype
9744 = build_vector_type_for_mode (qi_type, new_vmode);
9745 tree var = vect_get_new_ssa_name (new_vtype,
9746 vect_simple_var);
9747 gimple_set_lhs (call, var);
9748 vect_finish_stmt_generation (vinfo, stmt_info, call,
9749 gsi);
9750 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9751 new_stmt
9752 = gimple_build_assign (vec_dest,
9753 VIEW_CONVERT_EXPR, op);
9756 else
9758 tree ltype = vectype;
9759 tree new_vtype = NULL_TREE;
9760 unsigned HOST_WIDE_INT gap
9761 = DR_GROUP_GAP (first_stmt_info);
9762 unsigned int vect_align
9763 = vect_known_alignment_in_bytes (first_dr_info,
9764 vectype);
9765 unsigned int scalar_dr_size
9766 = vect_get_scalar_dr_size (first_dr_info);
9767 /* If there's no peeling for gaps but we have a gap
9768 with slp loads then load the lower half of the
9769 vector only. See get_group_load_store_type for
9770 when we apply this optimization. */
9771 if (slp
9772 && loop_vinfo
9773 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9774 && gap != 0
9775 && known_eq (nunits, (group_size - gap) * 2)
9776 && known_eq (nunits, group_size)
9777 && gap >= (vect_align / scalar_dr_size))
9779 tree half_vtype;
9780 new_vtype
9781 = vector_vector_composition_type (vectype, 2,
9782 &half_vtype);
9783 if (new_vtype != NULL_TREE)
9784 ltype = half_vtype;
9786 tree offset
9787 = (dataref_offset ? dataref_offset
9788 : build_int_cst (ref_type, 0));
9789 if (ltype != vectype
9790 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9792 unsigned HOST_WIDE_INT gap_offset
9793 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9794 tree gapcst = build_int_cst (ref_type, gap_offset);
9795 offset = size_binop (PLUS_EXPR, offset, gapcst);
9797 data_ref
9798 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9799 if (alignment_support_scheme == dr_aligned)
9801 else
9802 TREE_TYPE (data_ref)
9803 = build_aligned_type (TREE_TYPE (data_ref),
9804 align * BITS_PER_UNIT);
9805 if (ltype != vectype)
9807 vect_copy_ref_info (data_ref,
9808 DR_REF (first_dr_info->dr));
9809 tree tem = make_ssa_name (ltype);
9810 new_stmt = gimple_build_assign (tem, data_ref);
9811 vect_finish_stmt_generation (vinfo, stmt_info,
9812 new_stmt, gsi);
9813 data_ref = NULL;
9814 vec<constructor_elt, va_gc> *v;
9815 vec_alloc (v, 2);
9816 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9818 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9819 build_zero_cst (ltype));
9820 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9822 else
9824 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9825 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9826 build_zero_cst (ltype));
9828 gcc_assert (new_vtype != NULL_TREE);
9829 if (new_vtype == vectype)
9830 new_stmt = gimple_build_assign (
9831 vec_dest, build_constructor (vectype, v));
9832 else
9834 tree new_vname = make_ssa_name (new_vtype);
9835 new_stmt = gimple_build_assign (
9836 new_vname, build_constructor (new_vtype, v));
9837 vect_finish_stmt_generation (vinfo, stmt_info,
9838 new_stmt, gsi);
9839 new_stmt = gimple_build_assign (
9840 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9841 new_vname));
9845 break;
9847 case dr_explicit_realign:
9849 tree ptr, bump;
9851 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9853 if (compute_in_loop)
9854 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9855 &realignment_token,
9856 dr_explicit_realign,
9857 dataref_ptr, NULL);
9859 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9860 ptr = copy_ssa_name (dataref_ptr);
9861 else
9862 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9863 // For explicit realign the target alignment should be
9864 // known at compile time.
9865 unsigned HOST_WIDE_INT align =
9866 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9867 new_stmt = gimple_build_assign
9868 (ptr, BIT_AND_EXPR, dataref_ptr,
9869 build_int_cst
9870 (TREE_TYPE (dataref_ptr),
9871 -(HOST_WIDE_INT) align));
9872 vect_finish_stmt_generation (vinfo, stmt_info,
9873 new_stmt, gsi);
9874 data_ref
9875 = build2 (MEM_REF, vectype, ptr,
9876 build_int_cst (ref_type, 0));
9877 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9878 vec_dest = vect_create_destination_var (scalar_dest,
9879 vectype);
9880 new_stmt = gimple_build_assign (vec_dest, data_ref);
9881 new_temp = make_ssa_name (vec_dest, new_stmt);
9882 gimple_assign_set_lhs (new_stmt, new_temp);
9883 gimple_move_vops (new_stmt, stmt_info->stmt);
9884 vect_finish_stmt_generation (vinfo, stmt_info,
9885 new_stmt, gsi);
9886 msq = new_temp;
9888 bump = size_binop (MULT_EXPR, vs,
9889 TYPE_SIZE_UNIT (elem_type));
9890 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9891 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9892 stmt_info, bump);
9893 new_stmt = gimple_build_assign
9894 (NULL_TREE, BIT_AND_EXPR, ptr,
9895 build_int_cst
9896 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9897 ptr = copy_ssa_name (ptr, new_stmt);
9898 gimple_assign_set_lhs (new_stmt, ptr);
9899 vect_finish_stmt_generation (vinfo, stmt_info,
9900 new_stmt, gsi);
9901 data_ref
9902 = build2 (MEM_REF, vectype, ptr,
9903 build_int_cst (ref_type, 0));
9904 break;
9906 case dr_explicit_realign_optimized:
9908 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9909 new_temp = copy_ssa_name (dataref_ptr);
9910 else
9911 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9912 // We should only be doing this if we know the target
9913 // alignment at compile time.
9914 unsigned HOST_WIDE_INT align =
9915 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9916 new_stmt = gimple_build_assign
9917 (new_temp, BIT_AND_EXPR, dataref_ptr,
9918 build_int_cst (TREE_TYPE (dataref_ptr),
9919 -(HOST_WIDE_INT) align));
9920 vect_finish_stmt_generation (vinfo, stmt_info,
9921 new_stmt, gsi);
9922 data_ref
9923 = build2 (MEM_REF, vectype, new_temp,
9924 build_int_cst (ref_type, 0));
9925 break;
9927 default:
9928 gcc_unreachable ();
9930 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9931 /* DATA_REF is null if we've already built the statement. */
9932 if (data_ref)
9934 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9935 new_stmt = gimple_build_assign (vec_dest, data_ref);
9937 new_temp = make_ssa_name (vec_dest, new_stmt);
9938 gimple_set_lhs (new_stmt, new_temp);
9939 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9941 /* 3. Handle explicit realignment if necessary/supported.
9942 Create in loop:
9943 vec_dest = realign_load (msq, lsq, realignment_token) */
9944 if (alignment_support_scheme == dr_explicit_realign_optimized
9945 || alignment_support_scheme == dr_explicit_realign)
9947 lsq = gimple_assign_lhs (new_stmt);
9948 if (!realignment_token)
9949 realignment_token = dataref_ptr;
9950 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9951 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9952 msq, lsq, realignment_token);
9953 new_temp = make_ssa_name (vec_dest, new_stmt);
9954 gimple_assign_set_lhs (new_stmt, new_temp);
9955 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9957 if (alignment_support_scheme == dr_explicit_realign_optimized)
9959 gcc_assert (phi);
9960 if (i == vec_num - 1 && j == ncopies - 1)
9961 add_phi_arg (phi, lsq,
9962 loop_latch_edge (containing_loop),
9963 UNKNOWN_LOCATION);
9964 msq = lsq;
9968 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9970 tree perm_mask = perm_mask_for_reverse (vectype);
9971 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9972 perm_mask, stmt_info, gsi);
9973 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9976 /* Collect vector loads and later create their permutation in
9977 vect_transform_grouped_load (). */
9978 if (grouped_load || slp_perm)
9979 dr_chain.quick_push (new_temp);
9981 /* Store vector loads in the corresponding SLP_NODE. */
9982 if (slp && !slp_perm)
9983 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9985 /* With SLP permutation we load the gaps as well, without
9986 we need to skip the gaps after we manage to fully load
9987 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9988 group_elt += nunits;
9989 if (maybe_ne (group_gap_adj, 0U)
9990 && !slp_perm
9991 && known_eq (group_elt, group_size - group_gap_adj))
9993 poly_wide_int bump_val
9994 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9995 * group_gap_adj);
9996 if (tree_int_cst_sgn
9997 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
9998 bump_val = -bump_val;
9999 tree bump = wide_int_to_tree (sizetype, bump_val);
10000 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10001 gsi, stmt_info, bump);
10002 group_elt = 0;
10005 /* Bump the vector pointer to account for a gap or for excess
10006 elements loaded for a permuted SLP load. */
10007 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
10009 poly_wide_int bump_val
10010 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
10011 * group_gap_adj);
10012 if (tree_int_cst_sgn
10013 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
10014 bump_val = -bump_val;
10015 tree bump = wide_int_to_tree (sizetype, bump_val);
10016 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10017 stmt_info, bump);
10021 if (slp && !slp_perm)
10022 continue;
10024 if (slp_perm)
10026 unsigned n_perms;
10027 /* For SLP we know we've seen all possible uses of dr_chain so
10028 direct vect_transform_slp_perm_load to DCE the unused parts.
10029 ??? This is a hack to prevent compile-time issues as seen
10030 in PR101120 and friends. */
10031 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
10032 gsi, vf, false, &n_perms,
10033 nullptr, true);
10034 gcc_assert (ok);
10036 else
10038 if (grouped_load)
10040 if (memory_access_type != VMAT_LOAD_STORE_LANES)
10041 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
10042 group_size, gsi);
10043 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10045 else
10047 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10050 dr_chain.release ();
10052 if (!slp)
10053 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10055 return true;
10058 /* Function vect_is_simple_cond.
10060 Input:
10061 LOOP - the loop that is being vectorized.
10062 COND - Condition that is checked for simple use.
10064 Output:
10065 *COMP_VECTYPE - the vector type for the comparison.
10066 *DTS - The def types for the arguments of the comparison
10068 Returns whether a COND can be vectorized. Checks whether
10069 condition operands are supportable using vec_is_simple_use. */
10071 static bool
10072 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
10073 slp_tree slp_node, tree *comp_vectype,
10074 enum vect_def_type *dts, tree vectype)
10076 tree lhs, rhs;
10077 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10078 slp_tree slp_op;
10080 /* Mask case. */
10081 if (TREE_CODE (cond) == SSA_NAME
10082 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
10084 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
10085 &slp_op, &dts[0], comp_vectype)
10086 || !*comp_vectype
10087 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
10088 return false;
10089 return true;
10092 if (!COMPARISON_CLASS_P (cond))
10093 return false;
10095 lhs = TREE_OPERAND (cond, 0);
10096 rhs = TREE_OPERAND (cond, 1);
10098 if (TREE_CODE (lhs) == SSA_NAME)
10100 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10101 &lhs, &slp_op, &dts[0], &vectype1))
10102 return false;
10104 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10105 || TREE_CODE (lhs) == FIXED_CST)
10106 dts[0] = vect_constant_def;
10107 else
10108 return false;
10110 if (TREE_CODE (rhs) == SSA_NAME)
10112 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10113 &rhs, &slp_op, &dts[1], &vectype2))
10114 return false;
10116 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10117 || TREE_CODE (rhs) == FIXED_CST)
10118 dts[1] = vect_constant_def;
10119 else
10120 return false;
10122 if (vectype1 && vectype2
10123 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10124 TYPE_VECTOR_SUBPARTS (vectype2)))
10125 return false;
10127 *comp_vectype = vectype1 ? vectype1 : vectype2;
10128 /* Invariant comparison. */
10129 if (! *comp_vectype)
10131 tree scalar_type = TREE_TYPE (lhs);
10132 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10133 *comp_vectype = truth_type_for (vectype);
10134 else
10136 /* If we can widen the comparison to match vectype do so. */
10137 if (INTEGRAL_TYPE_P (scalar_type)
10138 && !slp_node
10139 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10140 TYPE_SIZE (TREE_TYPE (vectype))))
10141 scalar_type = build_nonstandard_integer_type
10142 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10143 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10144 slp_node);
10148 return true;
10151 /* vectorizable_condition.
10153 Check if STMT_INFO is conditional modify expression that can be vectorized.
10154 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10155 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10156 at GSI.
10158 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10160 Return true if STMT_INFO is vectorizable in this way. */
10162 static bool
10163 vectorizable_condition (vec_info *vinfo,
10164 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10165 gimple **vec_stmt,
10166 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10168 tree scalar_dest = NULL_TREE;
10169 tree vec_dest = NULL_TREE;
10170 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10171 tree then_clause, else_clause;
10172 tree comp_vectype = NULL_TREE;
10173 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10174 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10175 tree vec_compare;
10176 tree new_temp;
10177 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10178 enum vect_def_type dts[4]
10179 = {vect_unknown_def_type, vect_unknown_def_type,
10180 vect_unknown_def_type, vect_unknown_def_type};
10181 int ndts = 4;
10182 int ncopies;
10183 int vec_num;
10184 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10185 int i;
10186 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10187 vec<tree> vec_oprnds0 = vNULL;
10188 vec<tree> vec_oprnds1 = vNULL;
10189 vec<tree> vec_oprnds2 = vNULL;
10190 vec<tree> vec_oprnds3 = vNULL;
10191 tree vec_cmp_type;
10192 bool masked = false;
10194 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10195 return false;
10197 /* Is vectorizable conditional operation? */
10198 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10199 if (!stmt)
10200 return false;
10202 code = gimple_assign_rhs_code (stmt);
10203 if (code != COND_EXPR)
10204 return false;
10206 stmt_vec_info reduc_info = NULL;
10207 int reduc_index = -1;
10208 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10209 bool for_reduction
10210 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10211 if (for_reduction)
10213 if (STMT_SLP_TYPE (stmt_info))
10214 return false;
10215 reduc_info = info_for_reduction (vinfo, stmt_info);
10216 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10217 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10218 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10219 || reduc_index != -1);
10221 else
10223 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10224 return false;
10227 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10228 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10230 if (slp_node)
10232 ncopies = 1;
10233 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10235 else
10237 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10238 vec_num = 1;
10241 gcc_assert (ncopies >= 1);
10242 if (for_reduction && ncopies > 1)
10243 return false; /* FORNOW */
10245 cond_expr = gimple_assign_rhs1 (stmt);
10247 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10248 &comp_vectype, &dts[0], vectype)
10249 || !comp_vectype)
10250 return false;
10252 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10253 slp_tree then_slp_node, else_slp_node;
10254 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10255 &then_clause, &then_slp_node, &dts[2], &vectype1))
10256 return false;
10257 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10258 &else_clause, &else_slp_node, &dts[3], &vectype2))
10259 return false;
10261 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10262 return false;
10264 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10265 return false;
10267 masked = !COMPARISON_CLASS_P (cond_expr);
10268 vec_cmp_type = truth_type_for (comp_vectype);
10270 if (vec_cmp_type == NULL_TREE)
10271 return false;
10273 cond_code = TREE_CODE (cond_expr);
10274 if (!masked)
10276 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10277 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10280 /* For conditional reductions, the "then" value needs to be the candidate
10281 value calculated by this iteration while the "else" value needs to be
10282 the result carried over from previous iterations. If the COND_EXPR
10283 is the other way around, we need to swap it. */
10284 bool must_invert_cmp_result = false;
10285 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10287 if (masked)
10288 must_invert_cmp_result = true;
10289 else
10291 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10292 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10293 if (new_code == ERROR_MARK)
10294 must_invert_cmp_result = true;
10295 else
10297 cond_code = new_code;
10298 /* Make sure we don't accidentally use the old condition. */
10299 cond_expr = NULL_TREE;
10302 std::swap (then_clause, else_clause);
10305 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10307 /* Boolean values may have another representation in vectors
10308 and therefore we prefer bit operations over comparison for
10309 them (which also works for scalar masks). We store opcodes
10310 to use in bitop1 and bitop2. Statement is vectorized as
10311 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10312 depending on bitop1 and bitop2 arity. */
10313 switch (cond_code)
10315 case GT_EXPR:
10316 bitop1 = BIT_NOT_EXPR;
10317 bitop2 = BIT_AND_EXPR;
10318 break;
10319 case GE_EXPR:
10320 bitop1 = BIT_NOT_EXPR;
10321 bitop2 = BIT_IOR_EXPR;
10322 break;
10323 case LT_EXPR:
10324 bitop1 = BIT_NOT_EXPR;
10325 bitop2 = BIT_AND_EXPR;
10326 std::swap (cond_expr0, cond_expr1);
10327 break;
10328 case LE_EXPR:
10329 bitop1 = BIT_NOT_EXPR;
10330 bitop2 = BIT_IOR_EXPR;
10331 std::swap (cond_expr0, cond_expr1);
10332 break;
10333 case NE_EXPR:
10334 bitop1 = BIT_XOR_EXPR;
10335 break;
10336 case EQ_EXPR:
10337 bitop1 = BIT_XOR_EXPR;
10338 bitop2 = BIT_NOT_EXPR;
10339 break;
10340 default:
10341 return false;
10343 cond_code = SSA_NAME;
10346 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10347 && reduction_type == EXTRACT_LAST_REDUCTION
10348 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10350 if (dump_enabled_p ())
10351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10352 "reduction comparison operation not supported.\n");
10353 return false;
10356 if (!vec_stmt)
10358 if (bitop1 != NOP_EXPR)
10360 machine_mode mode = TYPE_MODE (comp_vectype);
10361 optab optab;
10363 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10364 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10365 return false;
10367 if (bitop2 != NOP_EXPR)
10369 optab = optab_for_tree_code (bitop2, comp_vectype,
10370 optab_default);
10371 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10372 return false;
10376 vect_cost_for_stmt kind = vector_stmt;
10377 if (reduction_type == EXTRACT_LAST_REDUCTION)
10378 /* Count one reduction-like operation per vector. */
10379 kind = vec_to_scalar;
10380 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10381 return false;
10383 if (slp_node
10384 && (!vect_maybe_update_slp_op_vectype
10385 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10386 || (op_adjust == 1
10387 && !vect_maybe_update_slp_op_vectype
10388 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10389 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10390 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10392 if (dump_enabled_p ())
10393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10394 "incompatible vector types for invariants\n");
10395 return false;
10398 if (loop_vinfo && for_reduction
10399 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10401 if (reduction_type == EXTRACT_LAST_REDUCTION)
10402 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10403 ncopies * vec_num, vectype, NULL);
10404 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10405 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10407 if (dump_enabled_p ())
10408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10409 "conditional reduction prevents the use"
10410 " of partial vectors.\n");
10411 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10415 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10416 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10417 cost_vec, kind);
10418 return true;
10421 /* Transform. */
10423 /* Handle def. */
10424 scalar_dest = gimple_assign_lhs (stmt);
10425 if (reduction_type != EXTRACT_LAST_REDUCTION)
10426 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10428 bool swap_cond_operands = false;
10430 /* See whether another part of the vectorized code applies a loop
10431 mask to the condition, or to its inverse. */
10433 vec_loop_masks *masks = NULL;
10434 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10436 if (reduction_type == EXTRACT_LAST_REDUCTION)
10437 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10438 else
10440 scalar_cond_masked_key cond (cond_expr, ncopies);
10441 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10442 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10443 else
10445 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10446 tree_code orig_code = cond.code;
10447 cond.code = invert_tree_comparison (cond.code, honor_nans);
10448 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10450 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10451 cond_code = cond.code;
10452 swap_cond_operands = true;
10454 else
10456 /* Try the inverse of the current mask. We check if the
10457 inverse mask is live and if so we generate a negate of
10458 the current mask such that we still honor NaNs. */
10459 cond.inverted_p = true;
10460 cond.code = orig_code;
10461 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10463 bitop1 = orig_code;
10464 bitop2 = BIT_NOT_EXPR;
10465 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10466 cond_code = cond.code;
10467 swap_cond_operands = true;
10474 /* Handle cond expr. */
10475 if (masked)
10476 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10477 cond_expr, &vec_oprnds0, comp_vectype,
10478 then_clause, &vec_oprnds2, vectype,
10479 reduction_type != EXTRACT_LAST_REDUCTION
10480 ? else_clause : NULL, &vec_oprnds3, vectype);
10481 else
10482 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10483 cond_expr0, &vec_oprnds0, comp_vectype,
10484 cond_expr1, &vec_oprnds1, comp_vectype,
10485 then_clause, &vec_oprnds2, vectype,
10486 reduction_type != EXTRACT_LAST_REDUCTION
10487 ? else_clause : NULL, &vec_oprnds3, vectype);
10489 /* Arguments are ready. Create the new vector stmt. */
10490 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10492 vec_then_clause = vec_oprnds2[i];
10493 if (reduction_type != EXTRACT_LAST_REDUCTION)
10494 vec_else_clause = vec_oprnds3[i];
10496 if (swap_cond_operands)
10497 std::swap (vec_then_clause, vec_else_clause);
10499 if (masked)
10500 vec_compare = vec_cond_lhs;
10501 else
10503 vec_cond_rhs = vec_oprnds1[i];
10504 if (bitop1 == NOP_EXPR)
10506 gimple_seq stmts = NULL;
10507 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10508 vec_cond_lhs, vec_cond_rhs);
10509 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10511 else
10513 new_temp = make_ssa_name (vec_cmp_type);
10514 gassign *new_stmt;
10515 if (bitop1 == BIT_NOT_EXPR)
10516 new_stmt = gimple_build_assign (new_temp, bitop1,
10517 vec_cond_rhs);
10518 else
10519 new_stmt
10520 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10521 vec_cond_rhs);
10522 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10523 if (bitop2 == NOP_EXPR)
10524 vec_compare = new_temp;
10525 else if (bitop2 == BIT_NOT_EXPR)
10527 /* Instead of doing ~x ? y : z do x ? z : y. */
10528 vec_compare = new_temp;
10529 std::swap (vec_then_clause, vec_else_clause);
10531 else
10533 vec_compare = make_ssa_name (vec_cmp_type);
10534 new_stmt
10535 = gimple_build_assign (vec_compare, bitop2,
10536 vec_cond_lhs, new_temp);
10537 vect_finish_stmt_generation (vinfo, stmt_info,
10538 new_stmt, gsi);
10543 /* If we decided to apply a loop mask to the result of the vector
10544 comparison, AND the comparison with the mask now. Later passes
10545 should then be able to reuse the AND results between mulitple
10546 vector statements.
10548 For example:
10549 for (int i = 0; i < 100; ++i)
10550 x[i] = y[i] ? z[i] : 10;
10552 results in following optimized GIMPLE:
10554 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10555 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10556 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10557 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10558 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10559 vect_iftmp.11_47, { 10, ... }>;
10561 instead of using a masked and unmasked forms of
10562 vec != { 0, ... } (masked in the MASK_LOAD,
10563 unmasked in the VEC_COND_EXPR). */
10565 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10566 in cases where that's necessary. */
10568 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10570 if (!is_gimple_val (vec_compare))
10572 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10573 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10574 vec_compare);
10575 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10576 vec_compare = vec_compare_name;
10579 if (must_invert_cmp_result)
10581 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10582 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10583 BIT_NOT_EXPR,
10584 vec_compare);
10585 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10586 vec_compare = vec_compare_name;
10589 if (masks)
10591 tree loop_mask
10592 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10593 vectype, i);
10594 tree tmp2 = make_ssa_name (vec_cmp_type);
10595 gassign *g
10596 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10597 loop_mask);
10598 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10599 vec_compare = tmp2;
10603 gimple *new_stmt;
10604 if (reduction_type == EXTRACT_LAST_REDUCTION)
10606 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10607 tree lhs = gimple_get_lhs (old_stmt);
10608 new_stmt = gimple_build_call_internal
10609 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10610 vec_then_clause);
10611 gimple_call_set_lhs (new_stmt, lhs);
10612 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10613 if (old_stmt == gsi_stmt (*gsi))
10614 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10615 else
10617 /* In this case we're moving the definition to later in the
10618 block. That doesn't matter because the only uses of the
10619 lhs are in phi statements. */
10620 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10621 gsi_remove (&old_gsi, true);
10622 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10625 else
10627 new_temp = make_ssa_name (vec_dest);
10628 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10629 vec_then_clause, vec_else_clause);
10630 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10632 if (slp_node)
10633 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10634 else
10635 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10638 if (!slp_node)
10639 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10641 vec_oprnds0.release ();
10642 vec_oprnds1.release ();
10643 vec_oprnds2.release ();
10644 vec_oprnds3.release ();
10646 return true;
10649 /* vectorizable_comparison.
10651 Check if STMT_INFO is comparison expression that can be vectorized.
10652 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10653 comparison, put it in VEC_STMT, and insert it at GSI.
10655 Return true if STMT_INFO is vectorizable in this way. */
10657 static bool
10658 vectorizable_comparison (vec_info *vinfo,
10659 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10660 gimple **vec_stmt,
10661 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10663 tree lhs, rhs1, rhs2;
10664 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10665 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10666 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10667 tree new_temp;
10668 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10669 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10670 int ndts = 2;
10671 poly_uint64 nunits;
10672 int ncopies;
10673 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10674 int i;
10675 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10676 vec<tree> vec_oprnds0 = vNULL;
10677 vec<tree> vec_oprnds1 = vNULL;
10678 tree mask_type;
10679 tree mask;
10681 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10682 return false;
10684 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10685 return false;
10687 mask_type = vectype;
10688 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10690 if (slp_node)
10691 ncopies = 1;
10692 else
10693 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10695 gcc_assert (ncopies >= 1);
10696 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10697 return false;
10699 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10700 if (!stmt)
10701 return false;
10703 code = gimple_assign_rhs_code (stmt);
10705 if (TREE_CODE_CLASS (code) != tcc_comparison)
10706 return false;
10708 slp_tree slp_rhs1, slp_rhs2;
10709 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10710 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10711 return false;
10713 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10714 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10715 return false;
10717 if (vectype1 && vectype2
10718 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10719 TYPE_VECTOR_SUBPARTS (vectype2)))
10720 return false;
10722 vectype = vectype1 ? vectype1 : vectype2;
10724 /* Invariant comparison. */
10725 if (!vectype)
10727 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10728 vectype = mask_type;
10729 else
10730 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10731 slp_node);
10732 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10733 return false;
10735 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10736 return false;
10738 /* Can't compare mask and non-mask types. */
10739 if (vectype1 && vectype2
10740 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10741 return false;
10743 /* Boolean values may have another representation in vectors
10744 and therefore we prefer bit operations over comparison for
10745 them (which also works for scalar masks). We store opcodes
10746 to use in bitop1 and bitop2. Statement is vectorized as
10747 BITOP2 (rhs1 BITOP1 rhs2) or
10748 rhs1 BITOP2 (BITOP1 rhs2)
10749 depending on bitop1 and bitop2 arity. */
10750 bool swap_p = false;
10751 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10753 if (code == GT_EXPR)
10755 bitop1 = BIT_NOT_EXPR;
10756 bitop2 = BIT_AND_EXPR;
10758 else if (code == GE_EXPR)
10760 bitop1 = BIT_NOT_EXPR;
10761 bitop2 = BIT_IOR_EXPR;
10763 else if (code == LT_EXPR)
10765 bitop1 = BIT_NOT_EXPR;
10766 bitop2 = BIT_AND_EXPR;
10767 swap_p = true;
10769 else if (code == LE_EXPR)
10771 bitop1 = BIT_NOT_EXPR;
10772 bitop2 = BIT_IOR_EXPR;
10773 swap_p = true;
10775 else
10777 bitop1 = BIT_XOR_EXPR;
10778 if (code == EQ_EXPR)
10779 bitop2 = BIT_NOT_EXPR;
10783 if (!vec_stmt)
10785 if (bitop1 == NOP_EXPR)
10787 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10788 return false;
10790 else
10792 machine_mode mode = TYPE_MODE (vectype);
10793 optab optab;
10795 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10796 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10797 return false;
10799 if (bitop2 != NOP_EXPR)
10801 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10802 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10803 return false;
10807 /* Put types on constant and invariant SLP children. */
10808 if (slp_node
10809 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10810 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10812 if (dump_enabled_p ())
10813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10814 "incompatible vector types for invariants\n");
10815 return false;
10818 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10819 vect_model_simple_cost (vinfo, stmt_info,
10820 ncopies * (1 + (bitop2 != NOP_EXPR)),
10821 dts, ndts, slp_node, cost_vec);
10822 return true;
10825 /* Transform. */
10827 /* Handle def. */
10828 lhs = gimple_assign_lhs (stmt);
10829 mask = vect_create_destination_var (lhs, mask_type);
10831 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10832 rhs1, &vec_oprnds0, vectype,
10833 rhs2, &vec_oprnds1, vectype);
10834 if (swap_p)
10835 std::swap (vec_oprnds0, vec_oprnds1);
10837 /* Arguments are ready. Create the new vector stmt. */
10838 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10840 gimple *new_stmt;
10841 vec_rhs2 = vec_oprnds1[i];
10843 new_temp = make_ssa_name (mask);
10844 if (bitop1 == NOP_EXPR)
10846 new_stmt = gimple_build_assign (new_temp, code,
10847 vec_rhs1, vec_rhs2);
10848 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10850 else
10852 if (bitop1 == BIT_NOT_EXPR)
10853 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10854 else
10855 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10856 vec_rhs2);
10857 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10858 if (bitop2 != NOP_EXPR)
10860 tree res = make_ssa_name (mask);
10861 if (bitop2 == BIT_NOT_EXPR)
10862 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10863 else
10864 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10865 new_temp);
10866 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10869 if (slp_node)
10870 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10871 else
10872 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10875 if (!slp_node)
10876 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10878 vec_oprnds0.release ();
10879 vec_oprnds1.release ();
10881 return true;
10884 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10885 can handle all live statements in the node. Otherwise return true
10886 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10887 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10889 static bool
10890 can_vectorize_live_stmts (vec_info *vinfo,
10891 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10892 slp_tree slp_node, slp_instance slp_node_instance,
10893 bool vec_stmt_p,
10894 stmt_vector_for_cost *cost_vec)
10896 if (slp_node)
10898 stmt_vec_info slp_stmt_info;
10899 unsigned int i;
10900 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10902 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10903 && !vectorizable_live_operation (vinfo,
10904 slp_stmt_info, gsi, slp_node,
10905 slp_node_instance, i,
10906 vec_stmt_p, cost_vec))
10907 return false;
10910 else if (STMT_VINFO_LIVE_P (stmt_info)
10911 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10912 slp_node, slp_node_instance, -1,
10913 vec_stmt_p, cost_vec))
10914 return false;
10916 return true;
10919 /* Make sure the statement is vectorizable. */
10921 opt_result
10922 vect_analyze_stmt (vec_info *vinfo,
10923 stmt_vec_info stmt_info, bool *need_to_vectorize,
10924 slp_tree node, slp_instance node_instance,
10925 stmt_vector_for_cost *cost_vec)
10927 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10928 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10929 bool ok;
10930 gimple_seq pattern_def_seq;
10932 if (dump_enabled_p ())
10933 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10934 stmt_info->stmt);
10936 if (gimple_has_volatile_ops (stmt_info->stmt))
10937 return opt_result::failure_at (stmt_info->stmt,
10938 "not vectorized:"
10939 " stmt has volatile operands: %G\n",
10940 stmt_info->stmt);
10942 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10943 && node == NULL
10944 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10946 gimple_stmt_iterator si;
10948 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10950 stmt_vec_info pattern_def_stmt_info
10951 = vinfo->lookup_stmt (gsi_stmt (si));
10952 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10953 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10955 /* Analyze def stmt of STMT if it's a pattern stmt. */
10956 if (dump_enabled_p ())
10957 dump_printf_loc (MSG_NOTE, vect_location,
10958 "==> examining pattern def statement: %G",
10959 pattern_def_stmt_info->stmt);
10961 opt_result res
10962 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10963 need_to_vectorize, node, node_instance,
10964 cost_vec);
10965 if (!res)
10966 return res;
10971 /* Skip stmts that do not need to be vectorized. In loops this is expected
10972 to include:
10973 - the COND_EXPR which is the loop exit condition
10974 - any LABEL_EXPRs in the loop
10975 - computations that are used only for array indexing or loop control.
10976 In basic blocks we only analyze statements that are a part of some SLP
10977 instance, therefore, all the statements are relevant.
10979 Pattern statement needs to be analyzed instead of the original statement
10980 if the original statement is not relevant. Otherwise, we analyze both
10981 statements. In basic blocks we are called from some SLP instance
10982 traversal, don't analyze pattern stmts instead, the pattern stmts
10983 already will be part of SLP instance. */
10985 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10986 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10987 && !STMT_VINFO_LIVE_P (stmt_info))
10989 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10990 && pattern_stmt_info
10991 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10992 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10994 /* Analyze PATTERN_STMT instead of the original stmt. */
10995 stmt_info = pattern_stmt_info;
10996 if (dump_enabled_p ())
10997 dump_printf_loc (MSG_NOTE, vect_location,
10998 "==> examining pattern statement: %G",
10999 stmt_info->stmt);
11001 else
11003 if (dump_enabled_p ())
11004 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
11006 return opt_result::success ();
11009 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
11010 && node == NULL
11011 && pattern_stmt_info
11012 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
11013 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
11015 /* Analyze PATTERN_STMT too. */
11016 if (dump_enabled_p ())
11017 dump_printf_loc (MSG_NOTE, vect_location,
11018 "==> examining pattern statement: %G",
11019 pattern_stmt_info->stmt);
11021 opt_result res
11022 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
11023 node_instance, cost_vec);
11024 if (!res)
11025 return res;
11028 switch (STMT_VINFO_DEF_TYPE (stmt_info))
11030 case vect_internal_def:
11031 break;
11033 case vect_reduction_def:
11034 case vect_nested_cycle:
11035 gcc_assert (!bb_vinfo
11036 && (relevance == vect_used_in_outer
11037 || relevance == vect_used_in_outer_by_reduction
11038 || relevance == vect_used_by_reduction
11039 || relevance == vect_unused_in_scope
11040 || relevance == vect_used_only_live));
11041 break;
11043 case vect_induction_def:
11044 gcc_assert (!bb_vinfo);
11045 break;
11047 case vect_constant_def:
11048 case vect_external_def:
11049 case vect_unknown_def_type:
11050 default:
11051 gcc_unreachable ();
11054 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11055 if (node)
11056 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
11058 if (STMT_VINFO_RELEVANT_P (stmt_info))
11060 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
11061 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
11062 || (call && gimple_call_lhs (call) == NULL_TREE));
11063 *need_to_vectorize = true;
11066 if (PURE_SLP_STMT (stmt_info) && !node)
11068 if (dump_enabled_p ())
11069 dump_printf_loc (MSG_NOTE, vect_location,
11070 "handled only by SLP analysis\n");
11071 return opt_result::success ();
11074 ok = true;
11075 if (!bb_vinfo
11076 && (STMT_VINFO_RELEVANT_P (stmt_info)
11077 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
11078 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
11079 -mveclibabi= takes preference over library functions with
11080 the simd attribute. */
11081 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11082 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
11083 cost_vec)
11084 || vectorizable_conversion (vinfo, stmt_info,
11085 NULL, NULL, node, cost_vec)
11086 || vectorizable_operation (vinfo, stmt_info,
11087 NULL, NULL, node, cost_vec)
11088 || vectorizable_assignment (vinfo, stmt_info,
11089 NULL, NULL, node, cost_vec)
11090 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11091 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11092 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11093 node, node_instance, cost_vec)
11094 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
11095 NULL, node, cost_vec)
11096 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11097 || vectorizable_condition (vinfo, stmt_info,
11098 NULL, NULL, node, cost_vec)
11099 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11100 cost_vec)
11101 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11102 stmt_info, NULL, node));
11103 else
11105 if (bb_vinfo)
11106 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
11107 || vectorizable_simd_clone_call (vinfo, stmt_info,
11108 NULL, NULL, node, cost_vec)
11109 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
11110 cost_vec)
11111 || vectorizable_shift (vinfo, stmt_info,
11112 NULL, NULL, node, cost_vec)
11113 || vectorizable_operation (vinfo, stmt_info,
11114 NULL, NULL, node, cost_vec)
11115 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11116 cost_vec)
11117 || vectorizable_load (vinfo, stmt_info,
11118 NULL, NULL, node, cost_vec)
11119 || vectorizable_store (vinfo, stmt_info,
11120 NULL, NULL, node, cost_vec)
11121 || vectorizable_condition (vinfo, stmt_info,
11122 NULL, NULL, node, cost_vec)
11123 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11124 cost_vec)
11125 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11128 if (node)
11129 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11131 if (!ok)
11132 return opt_result::failure_at (stmt_info->stmt,
11133 "not vectorized:"
11134 " relevant stmt not supported: %G",
11135 stmt_info->stmt);
11137 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11138 need extra handling, except for vectorizable reductions. */
11139 if (!bb_vinfo
11140 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11141 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11142 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11143 stmt_info, NULL, node, node_instance,
11144 false, cost_vec))
11145 return opt_result::failure_at (stmt_info->stmt,
11146 "not vectorized:"
11147 " live stmt not supported: %G",
11148 stmt_info->stmt);
11150 return opt_result::success ();
11154 /* Function vect_transform_stmt.
11156 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11158 bool
11159 vect_transform_stmt (vec_info *vinfo,
11160 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11161 slp_tree slp_node, slp_instance slp_node_instance)
11163 bool is_store = false;
11164 gimple *vec_stmt = NULL;
11165 bool done;
11167 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11169 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11170 if (slp_node)
11171 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11173 switch (STMT_VINFO_TYPE (stmt_info))
11175 case type_demotion_vec_info_type:
11176 case type_promotion_vec_info_type:
11177 case type_conversion_vec_info_type:
11178 done = vectorizable_conversion (vinfo, stmt_info,
11179 gsi, &vec_stmt, slp_node, NULL);
11180 gcc_assert (done);
11181 break;
11183 case induc_vec_info_type:
11184 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11185 stmt_info, &vec_stmt, slp_node,
11186 NULL);
11187 gcc_assert (done);
11188 break;
11190 case shift_vec_info_type:
11191 done = vectorizable_shift (vinfo, stmt_info,
11192 gsi, &vec_stmt, slp_node, NULL);
11193 gcc_assert (done);
11194 break;
11196 case op_vec_info_type:
11197 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11198 NULL);
11199 gcc_assert (done);
11200 break;
11202 case assignment_vec_info_type:
11203 done = vectorizable_assignment (vinfo, stmt_info,
11204 gsi, &vec_stmt, slp_node, NULL);
11205 gcc_assert (done);
11206 break;
11208 case load_vec_info_type:
11209 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11210 NULL);
11211 gcc_assert (done);
11212 break;
11214 case store_vec_info_type:
11215 done = vectorizable_store (vinfo, stmt_info,
11216 gsi, &vec_stmt, slp_node, NULL);
11217 gcc_assert (done);
11218 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11220 /* In case of interleaving, the whole chain is vectorized when the
11221 last store in the chain is reached. Store stmts before the last
11222 one are skipped, and there vec_stmt_info shouldn't be freed
11223 meanwhile. */
11224 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11225 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11226 is_store = true;
11228 else
11229 is_store = true;
11230 break;
11232 case condition_vec_info_type:
11233 done = vectorizable_condition (vinfo, stmt_info,
11234 gsi, &vec_stmt, slp_node, NULL);
11235 gcc_assert (done);
11236 break;
11238 case comparison_vec_info_type:
11239 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11240 slp_node, NULL);
11241 gcc_assert (done);
11242 break;
11244 case call_vec_info_type:
11245 done = vectorizable_call (vinfo, stmt_info,
11246 gsi, &vec_stmt, slp_node, NULL);
11247 break;
11249 case call_simd_clone_vec_info_type:
11250 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11251 slp_node, NULL);
11252 break;
11254 case reduc_vec_info_type:
11255 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11256 gsi, &vec_stmt, slp_node);
11257 gcc_assert (done);
11258 break;
11260 case cycle_phi_info_type:
11261 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11262 &vec_stmt, slp_node, slp_node_instance);
11263 gcc_assert (done);
11264 break;
11266 case lc_phi_info_type:
11267 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11268 stmt_info, &vec_stmt, slp_node);
11269 gcc_assert (done);
11270 break;
11272 case phi_info_type:
11273 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11274 gcc_assert (done);
11275 break;
11277 default:
11278 if (!STMT_VINFO_LIVE_P (stmt_info))
11280 if (dump_enabled_p ())
11281 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11282 "stmt not supported.\n");
11283 gcc_unreachable ();
11285 done = true;
11288 if (!slp_node && vec_stmt)
11289 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11291 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11293 /* Handle stmts whose DEF is used outside the loop-nest that is
11294 being vectorized. */
11295 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11296 slp_node_instance, true, NULL);
11297 gcc_assert (done);
11300 if (slp_node)
11301 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11303 return is_store;
11307 /* Remove a group of stores (for SLP or interleaving), free their
11308 stmt_vec_info. */
11310 void
11311 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11313 stmt_vec_info next_stmt_info = first_stmt_info;
11315 while (next_stmt_info)
11317 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11318 next_stmt_info = vect_orig_stmt (next_stmt_info);
11319 /* Free the attached stmt_vec_info and remove the stmt. */
11320 vinfo->remove_stmt (next_stmt_info);
11321 next_stmt_info = tmp;
11325 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11326 elements of type SCALAR_TYPE, or null if the target doesn't support
11327 such a type.
11329 If NUNITS is zero, return a vector type that contains elements of
11330 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11332 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11333 for this vectorization region and want to "autodetect" the best choice.
11334 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11335 and we want the new type to be interoperable with it. PREVAILING_MODE
11336 in this case can be a scalar integer mode or a vector mode; when it
11337 is a vector mode, the function acts like a tree-level version of
11338 related_vector_mode. */
11340 tree
11341 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11342 tree scalar_type, poly_uint64 nunits)
11344 tree orig_scalar_type = scalar_type;
11345 scalar_mode inner_mode;
11346 machine_mode simd_mode;
11347 tree vectype;
11349 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11350 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11351 return NULL_TREE;
11353 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11355 /* For vector types of elements whose mode precision doesn't
11356 match their types precision we use a element type of mode
11357 precision. The vectorization routines will have to make sure
11358 they support the proper result truncation/extension.
11359 We also make sure to build vector types with INTEGER_TYPE
11360 component type only. */
11361 if (INTEGRAL_TYPE_P (scalar_type)
11362 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11363 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11364 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11365 TYPE_UNSIGNED (scalar_type));
11367 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11368 When the component mode passes the above test simply use a type
11369 corresponding to that mode. The theory is that any use that
11370 would cause problems with this will disable vectorization anyway. */
11371 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11372 && !INTEGRAL_TYPE_P (scalar_type))
11373 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11375 /* We can't build a vector type of elements with alignment bigger than
11376 their size. */
11377 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11378 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11379 TYPE_UNSIGNED (scalar_type));
11381 /* If we felt back to using the mode fail if there was
11382 no scalar type for it. */
11383 if (scalar_type == NULL_TREE)
11384 return NULL_TREE;
11386 /* If no prevailing mode was supplied, use the mode the target prefers.
11387 Otherwise lookup a vector mode based on the prevailing mode. */
11388 if (prevailing_mode == VOIDmode)
11390 gcc_assert (known_eq (nunits, 0U));
11391 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11392 if (SCALAR_INT_MODE_P (simd_mode))
11394 /* Traditional behavior is not to take the integer mode
11395 literally, but simply to use it as a way of determining
11396 the vector size. It is up to mode_for_vector to decide
11397 what the TYPE_MODE should be.
11399 Note that nunits == 1 is allowed in order to support single
11400 element vector types. */
11401 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11402 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11403 return NULL_TREE;
11406 else if (SCALAR_INT_MODE_P (prevailing_mode)
11407 || !related_vector_mode (prevailing_mode,
11408 inner_mode, nunits).exists (&simd_mode))
11410 /* Fall back to using mode_for_vector, mostly in the hope of being
11411 able to use an integer mode. */
11412 if (known_eq (nunits, 0U)
11413 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11414 return NULL_TREE;
11416 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11417 return NULL_TREE;
11420 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11422 /* In cases where the mode was chosen by mode_for_vector, check that
11423 the target actually supports the chosen mode, or that it at least
11424 allows the vector mode to be replaced by a like-sized integer. */
11425 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11426 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11427 return NULL_TREE;
11429 /* Re-attach the address-space qualifier if we canonicalized the scalar
11430 type. */
11431 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11432 return build_qualified_type
11433 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11435 return vectype;
11438 /* Function get_vectype_for_scalar_type.
11440 Returns the vector type corresponding to SCALAR_TYPE as supported
11441 by the target. If GROUP_SIZE is nonzero and we're performing BB
11442 vectorization, make sure that the number of elements in the vector
11443 is no bigger than GROUP_SIZE. */
11445 tree
11446 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11447 unsigned int group_size)
11449 /* For BB vectorization, we should always have a group size once we've
11450 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11451 are tentative requests during things like early data reference
11452 analysis and pattern recognition. */
11453 if (is_a <bb_vec_info> (vinfo))
11454 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11455 else
11456 group_size = 0;
11458 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11459 scalar_type);
11460 if (vectype && vinfo->vector_mode == VOIDmode)
11461 vinfo->vector_mode = TYPE_MODE (vectype);
11463 /* Register the natural choice of vector type, before the group size
11464 has been applied. */
11465 if (vectype)
11466 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11468 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11469 try again with an explicit number of elements. */
11470 if (vectype
11471 && group_size
11472 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11474 /* Start with the biggest number of units that fits within
11475 GROUP_SIZE and halve it until we find a valid vector type.
11476 Usually either the first attempt will succeed or all will
11477 fail (in the latter case because GROUP_SIZE is too small
11478 for the target), but it's possible that a target could have
11479 a hole between supported vector types.
11481 If GROUP_SIZE is not a power of 2, this has the effect of
11482 trying the largest power of 2 that fits within the group,
11483 even though the group is not a multiple of that vector size.
11484 The BB vectorizer will then try to carve up the group into
11485 smaller pieces. */
11486 unsigned int nunits = 1 << floor_log2 (group_size);
11489 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11490 scalar_type, nunits);
11491 nunits /= 2;
11493 while (nunits > 1 && !vectype);
11496 return vectype;
11499 /* Return the vector type corresponding to SCALAR_TYPE as supported
11500 by the target. NODE, if nonnull, is the SLP tree node that will
11501 use the returned vector type. */
11503 tree
11504 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11506 unsigned int group_size = 0;
11507 if (node)
11508 group_size = SLP_TREE_LANES (node);
11509 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11512 /* Function get_mask_type_for_scalar_type.
11514 Returns the mask type corresponding to a result of comparison
11515 of vectors of specified SCALAR_TYPE as supported by target.
11516 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11517 make sure that the number of elements in the vector is no bigger
11518 than GROUP_SIZE. */
11520 tree
11521 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11522 unsigned int group_size)
11524 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11526 if (!vectype)
11527 return NULL;
11529 return truth_type_for (vectype);
11532 /* Function get_same_sized_vectype
11534 Returns a vector type corresponding to SCALAR_TYPE of size
11535 VECTOR_TYPE if supported by the target. */
11537 tree
11538 get_same_sized_vectype (tree scalar_type, tree vector_type)
11540 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11541 return truth_type_for (vector_type);
11543 poly_uint64 nunits;
11544 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11545 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11546 return NULL_TREE;
11548 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11549 scalar_type, nunits);
11552 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11553 would not change the chosen vector modes. */
11555 bool
11556 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11558 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11559 i != vinfo->used_vector_modes.end (); ++i)
11560 if (!VECTOR_MODE_P (*i)
11561 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11562 return false;
11563 return true;
11566 /* Function vect_is_simple_use.
11568 Input:
11569 VINFO - the vect info of the loop or basic block that is being vectorized.
11570 OPERAND - operand in the loop or bb.
11571 Output:
11572 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11573 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11574 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11575 the definition could be anywhere in the function
11576 DT - the type of definition
11578 Returns whether a stmt with OPERAND can be vectorized.
11579 For loops, supportable operands are constants, loop invariants, and operands
11580 that are defined by the current iteration of the loop. Unsupportable
11581 operands are those that are defined by a previous iteration of the loop (as
11582 is the case in reduction/induction computations).
11583 For basic blocks, supportable operands are constants and bb invariants.
11584 For now, operands defined outside the basic block are not supported. */
11586 bool
11587 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11588 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11590 if (def_stmt_info_out)
11591 *def_stmt_info_out = NULL;
11592 if (def_stmt_out)
11593 *def_stmt_out = NULL;
11594 *dt = vect_unknown_def_type;
11596 if (dump_enabled_p ())
11598 dump_printf_loc (MSG_NOTE, vect_location,
11599 "vect_is_simple_use: operand ");
11600 if (TREE_CODE (operand) == SSA_NAME
11601 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11602 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11603 else
11604 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11607 if (CONSTANT_CLASS_P (operand))
11608 *dt = vect_constant_def;
11609 else if (is_gimple_min_invariant (operand))
11610 *dt = vect_external_def;
11611 else if (TREE_CODE (operand) != SSA_NAME)
11612 *dt = vect_unknown_def_type;
11613 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11614 *dt = vect_external_def;
11615 else
11617 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11618 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11619 if (!stmt_vinfo)
11620 *dt = vect_external_def;
11621 else
11623 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11624 def_stmt = stmt_vinfo->stmt;
11625 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11626 if (def_stmt_info_out)
11627 *def_stmt_info_out = stmt_vinfo;
11629 if (def_stmt_out)
11630 *def_stmt_out = def_stmt;
11633 if (dump_enabled_p ())
11635 dump_printf (MSG_NOTE, ", type of def: ");
11636 switch (*dt)
11638 case vect_uninitialized_def:
11639 dump_printf (MSG_NOTE, "uninitialized\n");
11640 break;
11641 case vect_constant_def:
11642 dump_printf (MSG_NOTE, "constant\n");
11643 break;
11644 case vect_external_def:
11645 dump_printf (MSG_NOTE, "external\n");
11646 break;
11647 case vect_internal_def:
11648 dump_printf (MSG_NOTE, "internal\n");
11649 break;
11650 case vect_induction_def:
11651 dump_printf (MSG_NOTE, "induction\n");
11652 break;
11653 case vect_reduction_def:
11654 dump_printf (MSG_NOTE, "reduction\n");
11655 break;
11656 case vect_double_reduction_def:
11657 dump_printf (MSG_NOTE, "double reduction\n");
11658 break;
11659 case vect_nested_cycle:
11660 dump_printf (MSG_NOTE, "nested cycle\n");
11661 break;
11662 case vect_unknown_def_type:
11663 dump_printf (MSG_NOTE, "unknown\n");
11664 break;
11668 if (*dt == vect_unknown_def_type)
11670 if (dump_enabled_p ())
11671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11672 "Unsupported pattern.\n");
11673 return false;
11676 return true;
11679 /* Function vect_is_simple_use.
11681 Same as vect_is_simple_use but also determines the vector operand
11682 type of OPERAND and stores it to *VECTYPE. If the definition of
11683 OPERAND is vect_uninitialized_def, vect_constant_def or
11684 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11685 is responsible to compute the best suited vector type for the
11686 scalar operand. */
11688 bool
11689 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11690 tree *vectype, stmt_vec_info *def_stmt_info_out,
11691 gimple **def_stmt_out)
11693 stmt_vec_info def_stmt_info;
11694 gimple *def_stmt;
11695 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11696 return false;
11698 if (def_stmt_out)
11699 *def_stmt_out = def_stmt;
11700 if (def_stmt_info_out)
11701 *def_stmt_info_out = def_stmt_info;
11703 /* Now get a vector type if the def is internal, otherwise supply
11704 NULL_TREE and leave it up to the caller to figure out a proper
11705 type for the use stmt. */
11706 if (*dt == vect_internal_def
11707 || *dt == vect_induction_def
11708 || *dt == vect_reduction_def
11709 || *dt == vect_double_reduction_def
11710 || *dt == vect_nested_cycle)
11712 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11713 gcc_assert (*vectype != NULL_TREE);
11714 if (dump_enabled_p ())
11715 dump_printf_loc (MSG_NOTE, vect_location,
11716 "vect_is_simple_use: vectype %T\n", *vectype);
11718 else if (*dt == vect_uninitialized_def
11719 || *dt == vect_constant_def
11720 || *dt == vect_external_def)
11721 *vectype = NULL_TREE;
11722 else
11723 gcc_unreachable ();
11725 return true;
11728 /* Function vect_is_simple_use.
11730 Same as vect_is_simple_use but determines the operand by operand
11731 position OPERAND from either STMT or SLP_NODE, filling in *OP
11732 and *SLP_DEF (when SLP_NODE is not NULL). */
11734 bool
11735 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11736 unsigned operand, tree *op, slp_tree *slp_def,
11737 enum vect_def_type *dt,
11738 tree *vectype, stmt_vec_info *def_stmt_info_out)
11740 if (slp_node)
11742 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11743 *slp_def = child;
11744 *vectype = SLP_TREE_VECTYPE (child);
11745 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11747 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11748 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11750 else
11752 if (def_stmt_info_out)
11753 *def_stmt_info_out = NULL;
11754 *op = SLP_TREE_SCALAR_OPS (child)[0];
11755 *dt = SLP_TREE_DEF_TYPE (child);
11756 return true;
11759 else
11761 *slp_def = NULL;
11762 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11764 if (gimple_assign_rhs_code (ass) == COND_EXPR
11765 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11767 if (operand < 2)
11768 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11769 else
11770 *op = gimple_op (ass, operand);
11772 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11773 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11774 else
11775 *op = gimple_op (ass, operand + 1);
11777 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11778 *op = gimple_call_arg (call, operand);
11779 else
11780 gcc_unreachable ();
11781 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11785 /* If OP is not NULL and is external or constant update its vector
11786 type with VECTYPE. Returns true if successful or false if not,
11787 for example when conflicting vector types are present. */
11789 bool
11790 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11792 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11793 return true;
11794 if (SLP_TREE_VECTYPE (op))
11795 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11796 SLP_TREE_VECTYPE (op) = vectype;
11797 return true;
11800 /* Function supportable_widening_operation
11802 Check whether an operation represented by the code CODE is a
11803 widening operation that is supported by the target platform in
11804 vector form (i.e., when operating on arguments of type VECTYPE_IN
11805 producing a result of type VECTYPE_OUT).
11807 Widening operations we currently support are NOP (CONVERT), FLOAT,
11808 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11809 are supported by the target platform either directly (via vector
11810 tree-codes), or via target builtins.
11812 Output:
11813 - CODE1 and CODE2 are codes of vector operations to be used when
11814 vectorizing the operation, if available.
11815 - MULTI_STEP_CVT determines the number of required intermediate steps in
11816 case of multi-step conversion (like char->short->int - in that case
11817 MULTI_STEP_CVT will be 1).
11818 - INTERM_TYPES contains the intermediate type required to perform the
11819 widening operation (short in the above example). */
11821 bool
11822 supportable_widening_operation (vec_info *vinfo,
11823 enum tree_code code, stmt_vec_info stmt_info,
11824 tree vectype_out, tree vectype_in,
11825 enum tree_code *code1, enum tree_code *code2,
11826 int *multi_step_cvt,
11827 vec<tree> *interm_types)
11829 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11830 class loop *vect_loop = NULL;
11831 machine_mode vec_mode;
11832 enum insn_code icode1, icode2;
11833 optab optab1, optab2;
11834 tree vectype = vectype_in;
11835 tree wide_vectype = vectype_out;
11836 enum tree_code c1, c2;
11837 int i;
11838 tree prev_type, intermediate_type;
11839 machine_mode intermediate_mode, prev_mode;
11840 optab optab3, optab4;
11842 *multi_step_cvt = 0;
11843 if (loop_info)
11844 vect_loop = LOOP_VINFO_LOOP (loop_info);
11846 switch (code)
11848 case WIDEN_MULT_EXPR:
11849 /* The result of a vectorized widening operation usually requires
11850 two vectors (because the widened results do not fit into one vector).
11851 The generated vector results would normally be expected to be
11852 generated in the same order as in the original scalar computation,
11853 i.e. if 8 results are generated in each vector iteration, they are
11854 to be organized as follows:
11855 vect1: [res1,res2,res3,res4],
11856 vect2: [res5,res6,res7,res8].
11858 However, in the special case that the result of the widening
11859 operation is used in a reduction computation only, the order doesn't
11860 matter (because when vectorizing a reduction we change the order of
11861 the computation). Some targets can take advantage of this and
11862 generate more efficient code. For example, targets like Altivec,
11863 that support widen_mult using a sequence of {mult_even,mult_odd}
11864 generate the following vectors:
11865 vect1: [res1,res3,res5,res7],
11866 vect2: [res2,res4,res6,res8].
11868 When vectorizing outer-loops, we execute the inner-loop sequentially
11869 (each vectorized inner-loop iteration contributes to VF outer-loop
11870 iterations in parallel). We therefore don't allow to change the
11871 order of the computation in the inner-loop during outer-loop
11872 vectorization. */
11873 /* TODO: Another case in which order doesn't *really* matter is when we
11874 widen and then contract again, e.g. (short)((int)x * y >> 8).
11875 Normally, pack_trunc performs an even/odd permute, whereas the
11876 repack from an even/odd expansion would be an interleave, which
11877 would be significantly simpler for e.g. AVX2. */
11878 /* In any case, in order to avoid duplicating the code below, recurse
11879 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11880 are properly set up for the caller. If we fail, we'll continue with
11881 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11882 if (vect_loop
11883 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11884 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11885 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11886 stmt_info, vectype_out,
11887 vectype_in, code1, code2,
11888 multi_step_cvt, interm_types))
11890 /* Elements in a vector with vect_used_by_reduction property cannot
11891 be reordered if the use chain with this property does not have the
11892 same operation. One such an example is s += a * b, where elements
11893 in a and b cannot be reordered. Here we check if the vector defined
11894 by STMT is only directly used in the reduction statement. */
11895 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11896 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11897 if (use_stmt_info
11898 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11899 return true;
11901 c1 = VEC_WIDEN_MULT_LO_EXPR;
11902 c2 = VEC_WIDEN_MULT_HI_EXPR;
11903 break;
11905 case DOT_PROD_EXPR:
11906 c1 = DOT_PROD_EXPR;
11907 c2 = DOT_PROD_EXPR;
11908 break;
11910 case SAD_EXPR:
11911 c1 = SAD_EXPR;
11912 c2 = SAD_EXPR;
11913 break;
11915 case VEC_WIDEN_MULT_EVEN_EXPR:
11916 /* Support the recursion induced just above. */
11917 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11918 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11919 break;
11921 case WIDEN_LSHIFT_EXPR:
11922 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11923 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11924 break;
11926 case WIDEN_PLUS_EXPR:
11927 c1 = VEC_WIDEN_PLUS_LO_EXPR;
11928 c2 = VEC_WIDEN_PLUS_HI_EXPR;
11929 break;
11931 case WIDEN_MINUS_EXPR:
11932 c1 = VEC_WIDEN_MINUS_LO_EXPR;
11933 c2 = VEC_WIDEN_MINUS_HI_EXPR;
11934 break;
11936 CASE_CONVERT:
11937 c1 = VEC_UNPACK_LO_EXPR;
11938 c2 = VEC_UNPACK_HI_EXPR;
11939 break;
11941 case FLOAT_EXPR:
11942 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11943 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11944 break;
11946 case FIX_TRUNC_EXPR:
11947 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11948 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11949 break;
11951 default:
11952 gcc_unreachable ();
11955 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11956 std::swap (c1, c2);
11958 if (code == FIX_TRUNC_EXPR)
11960 /* The signedness is determined from output operand. */
11961 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11962 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11964 else if (CONVERT_EXPR_CODE_P (code)
11965 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11966 && VECTOR_BOOLEAN_TYPE_P (vectype)
11967 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11968 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11970 /* If the input and result modes are the same, a different optab
11971 is needed where we pass in the number of units in vectype. */
11972 optab1 = vec_unpacks_sbool_lo_optab;
11973 optab2 = vec_unpacks_sbool_hi_optab;
11975 else
11977 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11978 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11981 if (!optab1 || !optab2)
11982 return false;
11984 vec_mode = TYPE_MODE (vectype);
11985 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11986 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11987 return false;
11989 *code1 = c1;
11990 *code2 = c2;
11992 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11993 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11995 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11996 return true;
11997 /* For scalar masks we may have different boolean
11998 vector types having the same QImode. Thus we
11999 add additional check for elements number. */
12000 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
12001 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12002 return true;
12005 /* Check if it's a multi-step conversion that can be done using intermediate
12006 types. */
12008 prev_type = vectype;
12009 prev_mode = vec_mode;
12011 if (!CONVERT_EXPR_CODE_P (code))
12012 return false;
12014 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12015 intermediate steps in promotion sequence. We try
12016 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
12017 not. */
12018 interm_types->create (MAX_INTERM_CVT_STEPS);
12019 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12021 intermediate_mode = insn_data[icode1].operand[0].mode;
12022 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12023 intermediate_type
12024 = vect_halve_mask_nunits (prev_type, intermediate_mode);
12025 else
12026 intermediate_type
12027 = lang_hooks.types.type_for_mode (intermediate_mode,
12028 TYPE_UNSIGNED (prev_type));
12030 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12031 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12032 && intermediate_mode == prev_mode
12033 && SCALAR_INT_MODE_P (prev_mode))
12035 /* If the input and result modes are the same, a different optab
12036 is needed where we pass in the number of units in vectype. */
12037 optab3 = vec_unpacks_sbool_lo_optab;
12038 optab4 = vec_unpacks_sbool_hi_optab;
12040 else
12042 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
12043 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
12046 if (!optab3 || !optab4
12047 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
12048 || insn_data[icode1].operand[0].mode != intermediate_mode
12049 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
12050 || insn_data[icode2].operand[0].mode != intermediate_mode
12051 || ((icode1 = optab_handler (optab3, intermediate_mode))
12052 == CODE_FOR_nothing)
12053 || ((icode2 = optab_handler (optab4, intermediate_mode))
12054 == CODE_FOR_nothing))
12055 break;
12057 interm_types->quick_push (intermediate_type);
12058 (*multi_step_cvt)++;
12060 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
12061 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
12063 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12064 return true;
12065 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
12066 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
12067 return true;
12070 prev_type = intermediate_type;
12071 prev_mode = intermediate_mode;
12074 interm_types->release ();
12075 return false;
12079 /* Function supportable_narrowing_operation
12081 Check whether an operation represented by the code CODE is a
12082 narrowing operation that is supported by the target platform in
12083 vector form (i.e., when operating on arguments of type VECTYPE_IN
12084 and producing a result of type VECTYPE_OUT).
12086 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
12087 and FLOAT. This function checks if these operations are supported by
12088 the target platform directly via vector tree-codes.
12090 Output:
12091 - CODE1 is the code of a vector operation to be used when
12092 vectorizing the operation, if available.
12093 - MULTI_STEP_CVT determines the number of required intermediate steps in
12094 case of multi-step conversion (like int->short->char - in that case
12095 MULTI_STEP_CVT will be 1).
12096 - INTERM_TYPES contains the intermediate type required to perform the
12097 narrowing operation (short in the above example). */
12099 bool
12100 supportable_narrowing_operation (enum tree_code code,
12101 tree vectype_out, tree vectype_in,
12102 enum tree_code *code1, int *multi_step_cvt,
12103 vec<tree> *interm_types)
12105 machine_mode vec_mode;
12106 enum insn_code icode1;
12107 optab optab1, interm_optab;
12108 tree vectype = vectype_in;
12109 tree narrow_vectype = vectype_out;
12110 enum tree_code c1;
12111 tree intermediate_type, prev_type;
12112 machine_mode intermediate_mode, prev_mode;
12113 int i;
12114 bool uns;
12116 *multi_step_cvt = 0;
12117 switch (code)
12119 CASE_CONVERT:
12120 c1 = VEC_PACK_TRUNC_EXPR;
12121 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12122 && VECTOR_BOOLEAN_TYPE_P (vectype)
12123 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
12124 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12125 optab1 = vec_pack_sbool_trunc_optab;
12126 else
12127 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12128 break;
12130 case FIX_TRUNC_EXPR:
12131 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12132 /* The signedness is determined from output operand. */
12133 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12134 break;
12136 case FLOAT_EXPR:
12137 c1 = VEC_PACK_FLOAT_EXPR;
12138 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12139 break;
12141 default:
12142 gcc_unreachable ();
12145 if (!optab1)
12146 return false;
12148 vec_mode = TYPE_MODE (vectype);
12149 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12150 return false;
12152 *code1 = c1;
12154 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12156 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12157 return true;
12158 /* For scalar masks we may have different boolean
12159 vector types having the same QImode. Thus we
12160 add additional check for elements number. */
12161 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12162 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12163 return true;
12166 if (code == FLOAT_EXPR)
12167 return false;
12169 /* Check if it's a multi-step conversion that can be done using intermediate
12170 types. */
12171 prev_mode = vec_mode;
12172 prev_type = vectype;
12173 if (code == FIX_TRUNC_EXPR)
12174 uns = TYPE_UNSIGNED (vectype_out);
12175 else
12176 uns = TYPE_UNSIGNED (vectype);
12178 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12179 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12180 costly than signed. */
12181 if (code == FIX_TRUNC_EXPR && uns)
12183 enum insn_code icode2;
12185 intermediate_type
12186 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12187 interm_optab
12188 = optab_for_tree_code (c1, intermediate_type, optab_default);
12189 if (interm_optab != unknown_optab
12190 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12191 && insn_data[icode1].operand[0].mode
12192 == insn_data[icode2].operand[0].mode)
12194 uns = false;
12195 optab1 = interm_optab;
12196 icode1 = icode2;
12200 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12201 intermediate steps in promotion sequence. We try
12202 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12203 interm_types->create (MAX_INTERM_CVT_STEPS);
12204 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12206 intermediate_mode = insn_data[icode1].operand[0].mode;
12207 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12208 intermediate_type
12209 = vect_double_mask_nunits (prev_type, intermediate_mode);
12210 else
12211 intermediate_type
12212 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12213 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12214 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12215 && intermediate_mode == prev_mode
12216 && SCALAR_INT_MODE_P (prev_mode))
12217 interm_optab = vec_pack_sbool_trunc_optab;
12218 else
12219 interm_optab
12220 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12221 optab_default);
12222 if (!interm_optab
12223 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12224 || insn_data[icode1].operand[0].mode != intermediate_mode
12225 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12226 == CODE_FOR_nothing))
12227 break;
12229 interm_types->quick_push (intermediate_type);
12230 (*multi_step_cvt)++;
12232 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12234 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12235 return true;
12236 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12237 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12238 return true;
12241 prev_mode = intermediate_mode;
12242 prev_type = intermediate_type;
12243 optab1 = interm_optab;
12246 interm_types->release ();
12247 return false;
12250 /* Generate and return a vector mask of MASK_TYPE such that
12251 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12252 Add the statements to SEQ. */
12254 tree
12255 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12256 tree end_index, const char *name)
12258 tree cmp_type = TREE_TYPE (start_index);
12259 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12260 cmp_type, mask_type,
12261 OPTIMIZE_FOR_SPEED));
12262 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12263 start_index, end_index,
12264 build_zero_cst (mask_type));
12265 tree tmp;
12266 if (name)
12267 tmp = make_temp_ssa_name (mask_type, NULL, name);
12268 else
12269 tmp = make_ssa_name (mask_type);
12270 gimple_call_set_lhs (call, tmp);
12271 gimple_seq_add_stmt (seq, call);
12272 return tmp;
12275 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12276 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12278 tree
12279 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12280 tree end_index)
12282 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12283 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12286 /* Try to compute the vector types required to vectorize STMT_INFO,
12287 returning true on success and false if vectorization isn't possible.
12288 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12289 take sure that the number of elements in the vectors is no bigger
12290 than GROUP_SIZE.
12292 On success:
12294 - Set *STMT_VECTYPE_OUT to:
12295 - NULL_TREE if the statement doesn't need to be vectorized;
12296 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12298 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12299 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12300 statement does not help to determine the overall number of units. */
12302 opt_result
12303 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12304 tree *stmt_vectype_out,
12305 tree *nunits_vectype_out,
12306 unsigned int group_size)
12308 gimple *stmt = stmt_info->stmt;
12310 /* For BB vectorization, we should always have a group size once we've
12311 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12312 are tentative requests during things like early data reference
12313 analysis and pattern recognition. */
12314 if (is_a <bb_vec_info> (vinfo))
12315 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12316 else
12317 group_size = 0;
12319 *stmt_vectype_out = NULL_TREE;
12320 *nunits_vectype_out = NULL_TREE;
12322 if (gimple_get_lhs (stmt) == NULL_TREE
12323 /* MASK_STORE has no lhs, but is ok. */
12324 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12326 if (is_a <gcall *> (stmt))
12328 /* Ignore calls with no lhs. These must be calls to
12329 #pragma omp simd functions, and what vectorization factor
12330 it really needs can't be determined until
12331 vectorizable_simd_clone_call. */
12332 if (dump_enabled_p ())
12333 dump_printf_loc (MSG_NOTE, vect_location,
12334 "defer to SIMD clone analysis.\n");
12335 return opt_result::success ();
12338 return opt_result::failure_at (stmt,
12339 "not vectorized: irregular stmt.%G", stmt);
12342 tree vectype;
12343 tree scalar_type = NULL_TREE;
12344 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12346 vectype = STMT_VINFO_VECTYPE (stmt_info);
12347 if (dump_enabled_p ())
12348 dump_printf_loc (MSG_NOTE, vect_location,
12349 "precomputed vectype: %T\n", vectype);
12351 else if (vect_use_mask_type_p (stmt_info))
12353 unsigned int precision = stmt_info->mask_precision;
12354 scalar_type = build_nonstandard_integer_type (precision, 1);
12355 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12356 if (!vectype)
12357 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12358 " data-type %T\n", scalar_type);
12359 if (dump_enabled_p ())
12360 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12362 else
12364 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12365 scalar_type = TREE_TYPE (DR_REF (dr));
12366 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12367 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12368 else
12369 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12371 if (dump_enabled_p ())
12373 if (group_size)
12374 dump_printf_loc (MSG_NOTE, vect_location,
12375 "get vectype for scalar type (group size %d):"
12376 " %T\n", group_size, scalar_type);
12377 else
12378 dump_printf_loc (MSG_NOTE, vect_location,
12379 "get vectype for scalar type: %T\n", scalar_type);
12381 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12382 if (!vectype)
12383 return opt_result::failure_at (stmt,
12384 "not vectorized:"
12385 " unsupported data-type %T\n",
12386 scalar_type);
12388 if (dump_enabled_p ())
12389 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12392 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12393 return opt_result::failure_at (stmt,
12394 "not vectorized: vector stmt in loop:%G",
12395 stmt);
12397 *stmt_vectype_out = vectype;
12399 /* Don't try to compute scalar types if the stmt produces a boolean
12400 vector; use the existing vector type instead. */
12401 tree nunits_vectype = vectype;
12402 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12404 /* The number of units is set according to the smallest scalar
12405 type (or the largest vector size, but we only support one
12406 vector size per vectorization). */
12407 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12408 TREE_TYPE (vectype));
12409 if (scalar_type != TREE_TYPE (vectype))
12411 if (dump_enabled_p ())
12412 dump_printf_loc (MSG_NOTE, vect_location,
12413 "get vectype for smallest scalar type: %T\n",
12414 scalar_type);
12415 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12416 group_size);
12417 if (!nunits_vectype)
12418 return opt_result::failure_at
12419 (stmt, "not vectorized: unsupported data-type %T\n",
12420 scalar_type);
12421 if (dump_enabled_p ())
12422 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12423 nunits_vectype);
12427 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12428 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12429 return opt_result::failure_at (stmt,
12430 "Not vectorized: Incompatible number "
12431 "of vector subparts between %T and %T\n",
12432 nunits_vectype, *stmt_vectype_out);
12434 if (dump_enabled_p ())
12436 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12437 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12438 dump_printf (MSG_NOTE, "\n");
12441 *nunits_vectype_out = nunits_vectype;
12442 return opt_result::success ();
12445 /* Generate and return statement sequence that sets vector length LEN that is:
12447 min_of_start_and_end = min (START_INDEX, END_INDEX);
12448 left_len = END_INDEX - min_of_start_and_end;
12449 rhs = min (left_len, LEN_LIMIT);
12450 LEN = rhs;
12452 Note: the cost of the code generated by this function is modeled
12453 by vect_estimate_min_profitable_iters, so changes here may need
12454 corresponding changes there. */
12456 gimple_seq
12457 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12459 gimple_seq stmts = NULL;
12460 tree len_type = TREE_TYPE (len);
12461 gcc_assert (TREE_TYPE (start_index) == len_type);
12463 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12464 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12465 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12466 gimple* stmt = gimple_build_assign (len, rhs);
12467 gimple_seq_add_stmt (&stmts, stmt);
12469 return stmts;