Support TI mode and soft float on PA64
[official-gcc.git] / gcc / tree-vect-stmts.c
blob03cc7267cf80d4ce73c0d89ab86b07e84752456a
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2021 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 tree vectype, int misalign,
96 enum vect_cost_model_location where)
98 if ((kind == vector_load || kind == unaligned_load)
99 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
100 kind = vector_gather_load;
101 if ((kind == vector_store || kind == unaligned_store)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_scatter_store;
105 stmt_info_for_cost si = { count, kind, where, stmt_info, vectype, misalign };
106 body_cost_vec->safe_push (si);
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
126 static tree
127 read_vector_array (vec_info *vinfo,
128 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
129 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
153 static void
154 write_vector_array (vec_info *vinfo,
155 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
156 tree vect, tree array, unsigned HOST_WIDE_INT n)
158 tree array_ref;
159 gimple *new_stmt;
161 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
162 build_int_cst (size_type_node, n),
163 NULL_TREE, NULL_TREE);
165 new_stmt = gimple_build_assign (array_ref, vect);
166 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
171 (and its group). */
173 static tree
174 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
176 tree mem_ref;
178 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
181 return mem_ref;
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
187 static void
188 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
189 gimple_stmt_iterator *gsi, tree var)
191 tree clobber = build_clobber (TREE_TYPE (var));
192 gimple *new_stmt = gimple_build_assign (var, clobber);
193 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
202 static void
203 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
204 enum vect_relevant relevant, bool live_p)
206 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
207 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE, vect_location,
211 "mark relevant %d, live %d: %G", relevant, live_p,
212 stmt_info->stmt);
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE, vect_location,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info = stmt_info;
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
232 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
233 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
236 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
237 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
238 STMT_VINFO_RELEVANT (stmt_info) = relevant;
240 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE, vect_location,
245 "already marked relevant/live.\n");
246 return;
249 worklist->safe_push (stmt_info);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
257 bool
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
259 loop_vec_info loop_vinfo)
261 tree op;
262 ssa_op_iter iter;
264 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
265 if (!stmt)
266 return false;
268 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
270 enum vect_def_type dt = vect_uninitialized_def;
272 if (!vect_is_simple_use (op, loop_vinfo, &dt))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
276 "use not simple.\n");
277 return false;
280 if (dt != vect_external_def && dt != vect_constant_def)
281 return false;
283 return true;
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
298 static bool
299 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
300 enum vect_relevant *relevant, bool *live_p)
302 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
303 ssa_op_iter op_iter;
304 imm_use_iterator imm_iter;
305 use_operand_p use_p;
306 def_operand_p def_p;
308 *relevant = vect_unused_in_scope;
309 *live_p = false;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info->stmt)
313 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
314 *relevant = vect_used_in_scope;
316 /* changing memory. */
317 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
318 if (gimple_vdef (stmt_info->stmt)
319 && !gimple_clobber_p (stmt_info->stmt))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE, vect_location,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant = vect_used_in_scope;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
330 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
332 basic_block bb = gimple_bb (USE_STMT (use_p));
333 if (!flow_bb_inside_loop_p (loop, bb))
335 if (is_gimple_debug (USE_STMT (use_p)))
336 continue;
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE, vect_location,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
345 gcc_assert (bb == single_exit (loop)->dest);
347 *live_p = true;
352 if (*live_p && *relevant == vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE, vect_location,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant = vect_used_only_live;
361 return (*live_p || *relevant);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
370 static bool
371 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
373 tree operand;
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info))
379 return true;
381 /* STMT has a data_ref. FORNOW this means that its of one of
382 the following forms:
383 -1- ARRAY_REF = var
384 -2- var = ARRAY_REF
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
389 for array indexing.
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
395 if (!assign || !gimple_assign_copy_p (assign))
397 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
398 if (call && gimple_call_internal_p (call))
400 internal_fn ifn = gimple_call_internal_fn (call);
401 int mask_index = internal_fn_mask_index (ifn);
402 if (mask_index >= 0
403 && use == gimple_call_arg (call, mask_index))
404 return true;
405 int stored_value_index = internal_fn_stored_value_index (ifn);
406 if (stored_value_index >= 0
407 && use == gimple_call_arg (call, stored_value_index))
408 return true;
409 if (internal_gather_scatter_fn_p (ifn)
410 && use == gimple_call_arg (call, 1))
411 return true;
413 return false;
416 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
417 return false;
418 operand = gimple_assign_rhs1 (assign);
419 if (TREE_CODE (operand) != SSA_NAME)
420 return false;
422 if (operand == use)
423 return true;
425 return false;
430 Function process_use.
432 Inputs:
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
438 be performed.
440 Outputs:
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
445 Exceptions:
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
456 static opt_result
457 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
458 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
459 bool force)
461 stmt_vec_info dstmt_vinfo;
462 enum vect_def_type dt;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
470 return opt_result::failure_at (stmt_vinfo->stmt,
471 "not vectorized:"
472 " unsupported use in stmt.\n");
474 if (!dstmt_vinfo)
475 return opt_result::success ();
477 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
478 basic_block bb = gimple_bb (stmt_vinfo->stmt);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
485 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
487 && bb->loop_father == def_bb->loop_father)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE, vect_location,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
498 d = dstmt_vinfo
499 inner-loop:
500 stmt # use (d)
501 outer-loop-tail-bb:
502 ... */
503 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
509 switch (relevant)
511 case vect_unused_in_scope:
512 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
513 vect_used_in_scope : vect_unused_in_scope;
514 break;
516 case vect_used_in_outer_by_reduction:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
518 relevant = vect_used_by_reduction;
519 break;
521 case vect_used_in_outer:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
523 relevant = vect_used_in_scope;
524 break;
526 case vect_used_in_scope:
527 break;
529 default:
530 gcc_unreachable ();
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
537 inner-loop:
538 d = dstmt_vinfo
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
540 stmt # use (d) */
541 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE, vect_location,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
547 switch (relevant)
549 case vect_unused_in_scope:
550 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
552 vect_used_in_outer_by_reduction : vect_unused_in_scope;
553 break;
555 case vect_used_by_reduction:
556 case vect_used_only_live:
557 relevant = vect_used_in_outer_by_reduction;
558 break;
560 case vect_used_in_scope:
561 relevant = vect_used_in_outer;
562 break;
564 default:
565 gcc_unreachable ();
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
571 of course. */
572 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
576 loop_latch_edge (bb->loop_father))
577 == use))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE, vect_location,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
595 for i...
596 for j...
597 1. T0 = i + j
598 2. T1 = a[T0]
600 3. j = j + 1
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
607 opt_result
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
610 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
611 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
612 unsigned int nbbs = loop->num_nodes;
613 gimple_stmt_iterator si;
614 unsigned int i;
615 basic_block bb;
616 bool live_p;
617 enum vect_relevant relevant;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec<stmt_vec_info, 64> worklist;
623 /* 1. Init worklist. */
624 for (i = 0; i < nbbs; i++)
626 bb = bbs[i];
627 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
632 phi_info->stmt);
634 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
635 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
637 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
639 if (is_gimple_debug (gsi_stmt (si)))
640 continue;
641 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
642 if (dump_enabled_p ())
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "init: stmt relevant? %G", stmt_info->stmt);
646 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
647 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
651 /* 2. Process_worklist */
652 while (worklist.length () > 0)
654 use_operand_p use_p;
655 ssa_op_iter iter;
657 stmt_vec_info stmt_vinfo = worklist.pop ();
658 if (dump_enabled_p ())
659 dump_printf_loc (MSG_NOTE, vect_location,
660 "worklist: examine stmt: %G", stmt_vinfo->stmt);
662 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
663 (DEF_STMT) as relevant/irrelevant according to the relevance property
664 of STMT. */
665 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
667 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
668 propagated as is to the DEF_STMTs of its USEs.
670 One exception is when STMT has been identified as defining a reduction
671 variable; in this case we set the relevance to vect_used_by_reduction.
672 This is because we distinguish between two kinds of relevant stmts -
673 those that are used by a reduction computation, and those that are
674 (also) used by a regular computation. This allows us later on to
675 identify stmts that are used solely by a reduction, and therefore the
676 order of the results that they produce does not have to be kept. */
678 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
680 case vect_reduction_def:
681 gcc_assert (relevant != vect_unused_in_scope);
682 if (relevant != vect_unused_in_scope
683 && relevant != vect_used_in_scope
684 && relevant != vect_used_by_reduction
685 && relevant != vect_used_only_live)
686 return opt_result::failure_at
687 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
688 break;
690 case vect_nested_cycle:
691 if (relevant != vect_unused_in_scope
692 && relevant != vect_used_in_outer_by_reduction
693 && relevant != vect_used_in_outer)
694 return opt_result::failure_at
695 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
696 break;
698 case vect_double_reduction_def:
699 if (relevant != vect_unused_in_scope
700 && relevant != vect_used_by_reduction
701 && relevant != vect_used_only_live)
702 return opt_result::failure_at
703 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
704 break;
706 default:
707 break;
710 if (is_pattern_stmt_p (stmt_vinfo))
712 /* Pattern statements are not inserted into the code, so
713 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
714 have to scan the RHS or function arguments instead. */
715 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
717 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
718 tree op = gimple_assign_rhs1 (assign);
720 i = 1;
721 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
723 opt_result res
724 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
729 loop_vinfo, relevant, &worklist, false);
730 if (!res)
731 return res;
732 i = 2;
734 for (; i < gimple_num_ops (assign); i++)
736 op = gimple_op (assign, i);
737 if (TREE_CODE (op) == SSA_NAME)
739 opt_result res
740 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
741 &worklist, false);
742 if (!res)
743 return res;
747 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
749 for (i = 0; i < gimple_call_num_args (call); i++)
751 tree arg = gimple_call_arg (call, i);
752 opt_result res
753 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
754 &worklist, false);
755 if (!res)
756 return res;
760 else
761 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
763 tree op = USE_FROM_PTR (use_p);
764 opt_result res
765 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
766 &worklist, false);
767 if (!res)
768 return res;
771 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
773 gather_scatter_info gs_info;
774 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
775 gcc_unreachable ();
776 opt_result res
777 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
778 &worklist, true);
779 if (!res)
781 if (fatal)
782 *fatal = false;
783 return res;
786 } /* while worklist */
788 return opt_result::success ();
791 /* Function vect_model_simple_cost.
793 Models cost for simple operations, i.e. those that only emit ncopies of a
794 single op. Right now, this does not account for multiple insns that could
795 be generated for the single vector op. We will handle that shortly. */
797 static void
798 vect_model_simple_cost (vec_info *,
799 stmt_vec_info stmt_info, int ncopies,
800 enum vect_def_type *dt,
801 int ndts,
802 slp_tree node,
803 stmt_vector_for_cost *cost_vec,
804 vect_cost_for_stmt kind = vector_stmt)
806 int inside_cost = 0, prologue_cost = 0;
808 gcc_assert (cost_vec != NULL);
810 /* ??? Somehow we need to fix this at the callers. */
811 if (node)
812 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
814 if (!node)
815 /* Cost the "broadcast" of a scalar operand in to a vector operand.
816 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
817 cost model. */
818 for (int i = 0; i < ndts; i++)
819 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
820 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
821 stmt_info, 0, vect_prologue);
823 /* Pass the inside-of-loop statements to the target-specific cost model. */
824 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
825 stmt_info, 0, vect_body);
827 if (dump_enabled_p ())
828 dump_printf_loc (MSG_NOTE, vect_location,
829 "vect_model_simple_cost: inside_cost = %d, "
830 "prologue_cost = %d .\n", inside_cost, prologue_cost);
834 /* Model cost for type demotion and promotion operations. PWR is
835 normally zero for single-step promotions and demotions. It will be
836 one if two-step promotion/demotion is required, and so on. NCOPIES
837 is the number of vector results (and thus number of instructions)
838 for the narrowest end of the operation chain. Each additional
839 step doubles the number of instructions required. If WIDEN_ARITH
840 is true the stmt is doing widening arithmetic. */
842 static void
843 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
844 enum vect_def_type *dt,
845 unsigned int ncopies, int pwr,
846 stmt_vector_for_cost *cost_vec,
847 bool widen_arith)
849 int i;
850 int inside_cost = 0, prologue_cost = 0;
852 for (i = 0; i < pwr + 1; i++)
854 inside_cost += record_stmt_cost (cost_vec, ncopies,
855 widen_arith
856 ? vector_stmt : vec_promote_demote,
857 stmt_info, 0, vect_body);
858 ncopies *= 2;
861 /* FORNOW: Assuming maximum 2 args per stmts. */
862 for (i = 0; i < 2; i++)
863 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
864 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
865 stmt_info, 0, vect_prologue);
867 if (dump_enabled_p ())
868 dump_printf_loc (MSG_NOTE, vect_location,
869 "vect_model_promotion_demotion_cost: inside_cost = %d, "
870 "prologue_cost = %d .\n", inside_cost, prologue_cost);
873 /* Returns true if the current function returns DECL. */
875 static bool
876 cfun_returns (tree decl)
878 edge_iterator ei;
879 edge e;
880 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
882 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
883 if (!ret)
884 continue;
885 if (gimple_return_retval (ret) == decl)
886 return true;
887 /* We often end up with an aggregate copy to the result decl,
888 handle that case as well. First skip intermediate clobbers
889 though. */
890 gimple *def = ret;
893 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
895 while (gimple_clobber_p (def));
896 if (is_a <gassign *> (def)
897 && gimple_assign_lhs (def) == gimple_return_retval (ret)
898 && gimple_assign_rhs1 (def) == decl)
899 return true;
901 return false;
904 /* Function vect_model_store_cost
906 Models cost for stores. In the case of grouped accesses, one access
907 has the overhead of the grouped access attributed to it. */
909 static void
910 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
911 vect_memory_access_type memory_access_type,
912 dr_alignment_support alignment_support_scheme,
913 int misalignment,
914 vec_load_store_type vls_type, slp_tree slp_node,
915 stmt_vector_for_cost *cost_vec)
917 unsigned int inside_cost = 0, prologue_cost = 0;
918 stmt_vec_info first_stmt_info = stmt_info;
919 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
921 /* ??? Somehow we need to fix this at the callers. */
922 if (slp_node)
923 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
925 if (vls_type == VLS_STORE_INVARIANT)
927 if (!slp_node)
928 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
929 stmt_info, 0, vect_prologue);
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node && grouped_access_p)
935 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p = (first_stmt_info == stmt_info);
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
946 if (first_stmt_p
947 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
949 /* Uses a high and low interleave or shuffle operations for each
950 needed permute. */
951 int group_size = DR_GROUP_SIZE (first_stmt_info);
952 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
953 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
954 stmt_info, 0, vect_body);
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE, vect_location,
958 "vect_model_store_cost: strided group_size = %d .\n",
959 group_size);
962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
963 /* Costs of the stores. */
964 if (memory_access_type == VMAT_ELEMENTWISE
965 || memory_access_type == VMAT_GATHER_SCATTER)
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
969 inside_cost += record_stmt_cost (cost_vec,
970 ncopies * assumed_nunits,
971 scalar_store, stmt_info, 0, vect_body);
973 else
974 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
975 misalignment, &inside_cost, cost_vec);
977 if (memory_access_type == VMAT_ELEMENTWISE
978 || memory_access_type == VMAT_STRIDED_SLP)
980 /* N scalar stores plus extracting the elements. */
981 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
982 inside_cost += record_stmt_cost (cost_vec,
983 ncopies * assumed_nunits,
984 vec_to_scalar, stmt_info, 0, vect_body);
987 /* When vectorizing a store into the function result assign
988 a penalty if the function returns in a multi-register location.
989 In this case we assume we'll end up with having to spill the
990 vector result and do piecewise loads as a conservative estimate. */
991 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
992 if (base
993 && (TREE_CODE (base) == RESULT_DECL
994 || (DECL_P (base) && cfun_returns (base)))
995 && !aggregate_value_p (base, cfun->decl))
997 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
998 /* ??? Handle PARALLEL in some way. */
999 if (REG_P (reg))
1001 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1002 /* Assume that a single reg-reg move is possible and cheap,
1003 do not account for vector to gp register move cost. */
1004 if (nregs > 1)
1006 /* Spill. */
1007 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1008 vector_store,
1009 stmt_info, 0, vect_epilogue);
1010 /* Loads. */
1011 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1012 scalar_load,
1013 stmt_info, 0, vect_epilogue);
1018 if (dump_enabled_p ())
1019 dump_printf_loc (MSG_NOTE, vect_location,
1020 "vect_model_store_cost: inside_cost = %d, "
1021 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1025 /* Calculate cost of DR's memory access. */
1026 void
1027 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1028 dr_alignment_support alignment_support_scheme,
1029 int misalignment,
1030 unsigned int *inside_cost,
1031 stmt_vector_for_cost *body_cost_vec)
1033 switch (alignment_support_scheme)
1035 case dr_aligned:
1037 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1038 vector_store, stmt_info, 0,
1039 vect_body);
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_NOTE, vect_location,
1043 "vect_model_store_cost: aligned.\n");
1044 break;
1047 case dr_unaligned_supported:
1049 /* Here, we assign an additional cost for the unaligned store. */
1050 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1051 unaligned_store, stmt_info,
1052 misalignment, vect_body);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE, vect_location,
1055 "vect_model_store_cost: unaligned supported by "
1056 "hardware.\n");
1057 break;
1060 case dr_unaligned_unsupported:
1062 *inside_cost = VECT_MAX_COST;
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1066 "vect_model_store_cost: unsupported access.\n");
1067 break;
1070 default:
1071 gcc_unreachable ();
1076 /* Function vect_model_load_cost
1078 Models cost for loads. In the case of grouped accesses, one access has
1079 the overhead of the grouped access attributed to it. Since unaligned
1080 accesses are supported for loads, we also account for the costs of the
1081 access scheme chosen. */
1083 static void
1084 vect_model_load_cost (vec_info *vinfo,
1085 stmt_vec_info stmt_info, unsigned ncopies, poly_uint64 vf,
1086 vect_memory_access_type memory_access_type,
1087 dr_alignment_support alignment_support_scheme,
1088 int misalignment,
1089 gather_scatter_info *gs_info,
1090 slp_tree slp_node,
1091 stmt_vector_for_cost *cost_vec)
1093 unsigned int inside_cost = 0, prologue_cost = 0;
1094 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1096 gcc_assert (cost_vec);
1098 /* ??? Somehow we need to fix this at the callers. */
1099 if (slp_node)
1100 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1102 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1104 /* If the load is permuted then the alignment is determined by
1105 the first group element not by the first scalar stmt DR. */
1106 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1107 /* Record the cost for the permutation. */
1108 unsigned n_perms, n_loads;
1109 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL,
1110 vf, true, &n_perms, &n_loads);
1111 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1112 first_stmt_info, 0, vect_body);
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1116 ncopies = n_loads;
1119 /* Grouped loads read all elements in the group at once,
1120 so we want the DR for the first statement. */
1121 stmt_vec_info first_stmt_info = stmt_info;
1122 if (!slp_node && grouped_access_p)
1123 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1125 /* True if we should include any once-per-group costs as well as
1126 the cost of the statement itself. For SLP we only get called
1127 once per group anyhow. */
1128 bool first_stmt_p = (first_stmt_info == stmt_info);
1130 /* An IFN_LOAD_LANES will load all its vector results, regardless of which
1131 ones we actually need. Account for the cost of unused results. */
1132 if (first_stmt_p && !slp_node && memory_access_type == VMAT_LOAD_STORE_LANES)
1134 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
1135 stmt_vec_info next_stmt_info = first_stmt_info;
1138 gaps -= 1;
1139 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
1141 while (next_stmt_info);
1142 if (gaps)
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_NOTE, vect_location,
1146 "vect_model_load_cost: %d unused vectors.\n",
1147 gaps);
1148 vect_get_load_cost (vinfo, stmt_info, ncopies * gaps,
1149 alignment_support_scheme, misalignment, false,
1150 &inside_cost, &prologue_cost,
1151 cost_vec, cost_vec, true);
1155 /* We assume that the cost of a single load-lanes instruction is
1156 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1157 access is instead being provided by a load-and-permute operation,
1158 include the cost of the permutes. */
1159 if (first_stmt_p
1160 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1162 /* Uses an even and odd extract operations or shuffle operations
1163 for each needed permute. */
1164 int group_size = DR_GROUP_SIZE (first_stmt_info);
1165 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1166 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1167 stmt_info, 0, vect_body);
1169 if (dump_enabled_p ())
1170 dump_printf_loc (MSG_NOTE, vect_location,
1171 "vect_model_load_cost: strided group_size = %d .\n",
1172 group_size);
1175 /* The loads themselves. */
1176 if (memory_access_type == VMAT_ELEMENTWISE
1177 || memory_access_type == VMAT_GATHER_SCATTER)
1179 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1180 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1181 if (memory_access_type == VMAT_GATHER_SCATTER
1182 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1183 /* For emulated gathers N offset vector element extracts
1184 (we assume the scalar scaling and ptr + offset add is consumed by
1185 the load). */
1186 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1187 vec_to_scalar, stmt_info, 0,
1188 vect_body);
1189 /* N scalar loads plus gathering them into a vector. */
1190 inside_cost += record_stmt_cost (cost_vec,
1191 ncopies * assumed_nunits,
1192 scalar_load, stmt_info, 0, vect_body);
1194 else if (memory_access_type == VMAT_INVARIANT)
1196 /* Invariant loads will ideally be hoisted and splat to a vector. */
1197 prologue_cost += record_stmt_cost (cost_vec, 1,
1198 scalar_load, stmt_info, 0,
1199 vect_prologue);
1200 prologue_cost += record_stmt_cost (cost_vec, 1,
1201 scalar_to_vec, stmt_info, 0,
1202 vect_prologue);
1204 else
1205 vect_get_load_cost (vinfo, stmt_info, ncopies,
1206 alignment_support_scheme, misalignment, first_stmt_p,
1207 &inside_cost, &prologue_cost,
1208 cost_vec, cost_vec, true);
1209 if (memory_access_type == VMAT_ELEMENTWISE
1210 || memory_access_type == VMAT_STRIDED_SLP
1211 || (memory_access_type == VMAT_GATHER_SCATTER
1212 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1213 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1214 stmt_info, 0, vect_body);
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_NOTE, vect_location,
1218 "vect_model_load_cost: inside_cost = %d, "
1219 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1223 /* Calculate cost of DR's memory access. */
1224 void
1225 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1226 dr_alignment_support alignment_support_scheme,
1227 int misalignment,
1228 bool add_realign_cost, unsigned int *inside_cost,
1229 unsigned int *prologue_cost,
1230 stmt_vector_for_cost *prologue_cost_vec,
1231 stmt_vector_for_cost *body_cost_vec,
1232 bool record_prologue_costs)
1234 switch (alignment_support_scheme)
1236 case dr_aligned:
1238 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1239 stmt_info, 0, vect_body);
1241 if (dump_enabled_p ())
1242 dump_printf_loc (MSG_NOTE, vect_location,
1243 "vect_model_load_cost: aligned.\n");
1245 break;
1247 case dr_unaligned_supported:
1249 /* Here, we assign an additional cost for the unaligned load. */
1250 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1251 unaligned_load, stmt_info,
1252 misalignment, vect_body);
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE, vect_location,
1256 "vect_model_load_cost: unaligned supported by "
1257 "hardware.\n");
1259 break;
1261 case dr_explicit_realign:
1263 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1264 vector_load, stmt_info, 0, vect_body);
1265 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1266 vec_perm, stmt_info, 0, vect_body);
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
1270 prologue costs. */
1271 if (targetm.vectorize.builtin_mask_for_load)
1272 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1273 stmt_info, 0, vect_body);
1275 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE, vect_location,
1277 "vect_model_load_cost: explicit realign\n");
1279 break;
1281 case dr_explicit_realign_optimized:
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE, vect_location,
1285 "vect_model_load_cost: unaligned software "
1286 "pipelined.\n");
1288 /* Unaligned software pipeline has a load of an address, an initial
1289 load, and possibly a mask operation to "prime" the loop. However,
1290 if this is an access in a group of loads, which provide grouped
1291 access, then the above cost should only be considered for one
1292 access in the group. Inside the loop, there is a load op
1293 and a realignment op. */
1295 if (add_realign_cost && record_prologue_costs)
1297 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1298 vector_stmt, stmt_info,
1299 0, vect_prologue);
1300 if (targetm.vectorize.builtin_mask_for_load)
1301 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1302 vector_stmt, stmt_info,
1303 0, vect_prologue);
1306 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1307 stmt_info, 0, vect_body);
1308 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1309 stmt_info, 0, vect_body);
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE, vect_location,
1313 "vect_model_load_cost: explicit realign optimized"
1314 "\n");
1316 break;
1319 case dr_unaligned_unsupported:
1321 *inside_cost = VECT_MAX_COST;
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1325 "vect_model_load_cost: unsupported access.\n");
1326 break;
1329 default:
1330 gcc_unreachable ();
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335 the loop preheader for the vectorized stmt STMT_VINFO. */
1337 static void
1338 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1339 gimple_stmt_iterator *gsi)
1341 if (gsi)
1342 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1343 else
1344 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1346 if (dump_enabled_p ())
1347 dump_printf_loc (MSG_NOTE, vect_location,
1348 "created new init_stmt: %G", new_stmt);
1351 /* Function vect_init_vector.
1353 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1354 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1355 vector type a vector with all elements equal to VAL is created first.
1356 Place the initialization at GSI if it is not NULL. Otherwise, place the
1357 initialization at the loop preheader.
1358 Return the DEF of INIT_STMT.
1359 It will be used in the vectorization of STMT_INFO. */
1361 tree
1362 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1363 gimple_stmt_iterator *gsi)
1365 gimple *init_stmt;
1366 tree new_temp;
1368 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1369 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1371 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1372 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1374 /* Scalar boolean value should be transformed into
1375 all zeros or all ones value before building a vector. */
1376 if (VECTOR_BOOLEAN_TYPE_P (type))
1378 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1379 tree false_val = build_zero_cst (TREE_TYPE (type));
1381 if (CONSTANT_CLASS_P (val))
1382 val = integer_zerop (val) ? false_val : true_val;
1383 else
1385 new_temp = make_ssa_name (TREE_TYPE (type));
1386 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1387 val, true_val, false_val);
1388 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1389 val = new_temp;
1392 else
1394 gimple_seq stmts = NULL;
1395 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1396 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1397 TREE_TYPE (type), val);
1398 else
1399 /* ??? Condition vectorization expects us to do
1400 promotion of invariant/external defs. */
1401 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1402 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1403 !gsi_end_p (gsi2); )
1405 init_stmt = gsi_stmt (gsi2);
1406 gsi_remove (&gsi2, false);
1407 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1411 val = build_vector_from_val (type, val);
1414 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1415 init_stmt = gimple_build_assign (new_temp, val);
1416 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1417 return new_temp;
1421 /* Function vect_get_vec_defs_for_operand.
1423 OP is an operand in STMT_VINFO. This function returns a vector of
1424 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1426 In the case that OP is an SSA_NAME which is defined in the loop, then
1427 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1429 In case OP is an invariant or constant, a new stmt that creates a vector def
1430 needs to be introduced. VECTYPE may be used to specify a required type for
1431 vector invariant. */
1433 void
1434 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1435 unsigned ncopies,
1436 tree op, vec<tree> *vec_oprnds, tree vectype)
1438 gimple *def_stmt;
1439 enum vect_def_type dt;
1440 bool is_simple_use;
1441 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1443 if (dump_enabled_p ())
1444 dump_printf_loc (MSG_NOTE, vect_location,
1445 "vect_get_vec_defs_for_operand: %T\n", op);
1447 stmt_vec_info def_stmt_info;
1448 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1449 &def_stmt_info, &def_stmt);
1450 gcc_assert (is_simple_use);
1451 if (def_stmt && dump_enabled_p ())
1452 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1454 vec_oprnds->create (ncopies);
1455 if (dt == vect_constant_def || dt == vect_external_def)
1457 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1458 tree vector_type;
1460 if (vectype)
1461 vector_type = vectype;
1462 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1463 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1464 vector_type = truth_type_for (stmt_vectype);
1465 else
1466 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1468 gcc_assert (vector_type);
1469 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1470 while (ncopies--)
1471 vec_oprnds->quick_push (vop);
1473 else
1475 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1476 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1477 for (unsigned i = 0; i < ncopies; ++i)
1478 vec_oprnds->quick_push (gimple_get_lhs
1479 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1484 /* Get vectorized definitions for OP0 and OP1. */
1486 void
1487 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1488 unsigned ncopies,
1489 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1490 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1491 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1492 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1494 if (slp_node)
1496 if (op0)
1497 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1498 if (op1)
1499 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1500 if (op2)
1501 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1502 if (op3)
1503 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1505 else
1507 if (op0)
1508 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1509 op0, vec_oprnds0, vectype0);
1510 if (op1)
1511 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1512 op1, vec_oprnds1, vectype1);
1513 if (op2)
1514 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1515 op2, vec_oprnds2, vectype2);
1516 if (op3)
1517 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1518 op3, vec_oprnds3, vectype3);
1522 void
1523 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1524 unsigned ncopies,
1525 tree op0, vec<tree> *vec_oprnds0,
1526 tree op1, vec<tree> *vec_oprnds1,
1527 tree op2, vec<tree> *vec_oprnds2,
1528 tree op3, vec<tree> *vec_oprnds3)
1530 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1531 op0, vec_oprnds0, NULL_TREE,
1532 op1, vec_oprnds1, NULL_TREE,
1533 op2, vec_oprnds2, NULL_TREE,
1534 op3, vec_oprnds3, NULL_TREE);
1537 /* Helper function called by vect_finish_replace_stmt and
1538 vect_finish_stmt_generation. Set the location of the new
1539 statement and create and return a stmt_vec_info for it. */
1541 static void
1542 vect_finish_stmt_generation_1 (vec_info *,
1543 stmt_vec_info stmt_info, gimple *vec_stmt)
1545 if (dump_enabled_p ())
1546 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1548 if (stmt_info)
1550 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1552 /* While EH edges will generally prevent vectorization, stmt might
1553 e.g. be in a must-not-throw region. Ensure newly created stmts
1554 that could throw are part of the same region. */
1555 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1556 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1557 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1559 else
1560 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1563 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1564 which sets the same scalar result as STMT_INFO did. Create and return a
1565 stmt_vec_info for VEC_STMT. */
1567 void
1568 vect_finish_replace_stmt (vec_info *vinfo,
1569 stmt_vec_info stmt_info, gimple *vec_stmt)
1571 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1572 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1574 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1575 gsi_replace (&gsi, vec_stmt, true);
1577 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1580 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1581 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1583 void
1584 vect_finish_stmt_generation (vec_info *vinfo,
1585 stmt_vec_info stmt_info, gimple *vec_stmt,
1586 gimple_stmt_iterator *gsi)
1588 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1590 if (!gsi_end_p (*gsi)
1591 && gimple_has_mem_ops (vec_stmt))
1593 gimple *at_stmt = gsi_stmt (*gsi);
1594 tree vuse = gimple_vuse (at_stmt);
1595 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1597 tree vdef = gimple_vdef (at_stmt);
1598 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1599 gimple_set_modified (vec_stmt, true);
1600 /* If we have an SSA vuse and insert a store, update virtual
1601 SSA form to avoid triggering the renamer. Do so only
1602 if we can easily see all uses - which is what almost always
1603 happens with the way vectorized stmts are inserted. */
1604 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1605 && ((is_gimple_assign (vec_stmt)
1606 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1607 || (is_gimple_call (vec_stmt)
1608 && !(gimple_call_flags (vec_stmt)
1609 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1611 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1612 gimple_set_vdef (vec_stmt, new_vdef);
1613 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1617 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1618 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1621 /* We want to vectorize a call to combined function CFN with function
1622 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1623 as the types of all inputs. Check whether this is possible using
1624 an internal function, returning its code if so or IFN_LAST if not. */
1626 static internal_fn
1627 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1628 tree vectype_out, tree vectype_in)
1630 internal_fn ifn;
1631 if (internal_fn_p (cfn))
1632 ifn = as_internal_fn (cfn);
1633 else
1634 ifn = associated_internal_fn (fndecl);
1635 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1637 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1638 if (info.vectorizable)
1640 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1641 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1642 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1643 OPTIMIZE_FOR_SPEED))
1644 return ifn;
1647 return IFN_LAST;
1651 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1652 gimple_stmt_iterator *);
1654 /* Check whether a load or store statement in the loop described by
1655 LOOP_VINFO is possible in a loop using partial vectors. This is
1656 testing whether the vectorizer pass has the appropriate support,
1657 as well as whether the target does.
1659 VLS_TYPE says whether the statement is a load or store and VECTYPE
1660 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1661 says how the load or store is going to be implemented and GROUP_SIZE
1662 is the number of load or store statements in the containing group.
1663 If the access is a gather load or scatter store, GS_INFO describes
1664 its arguments. If the load or store is conditional, SCALAR_MASK is the
1665 condition under which it occurs.
1667 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1668 vectors is not supported, otherwise record the required rgroup control
1669 types. */
1671 static void
1672 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1673 vec_load_store_type vls_type,
1674 int group_size,
1675 vect_memory_access_type
1676 memory_access_type,
1677 gather_scatter_info *gs_info,
1678 tree scalar_mask)
1680 /* Invariant loads need no special support. */
1681 if (memory_access_type == VMAT_INVARIANT)
1682 return;
1684 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1685 machine_mode vecmode = TYPE_MODE (vectype);
1686 bool is_load = (vls_type == VLS_LOAD);
1687 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1689 if (is_load
1690 ? !vect_load_lanes_supported (vectype, group_size, true)
1691 : !vect_store_lanes_supported (vectype, group_size, true))
1693 if (dump_enabled_p ())
1694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1695 "can't operate on partial vectors because"
1696 " the target doesn't have an appropriate"
1697 " load/store-lanes instruction.\n");
1698 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1699 return;
1701 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1702 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1703 return;
1706 if (memory_access_type == VMAT_GATHER_SCATTER)
1708 internal_fn ifn = (is_load
1709 ? IFN_MASK_GATHER_LOAD
1710 : IFN_MASK_SCATTER_STORE);
1711 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1712 gs_info->memory_type,
1713 gs_info->offset_vectype,
1714 gs_info->scale))
1716 if (dump_enabled_p ())
1717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1718 "can't operate on partial vectors because"
1719 " the target doesn't have an appropriate"
1720 " gather load or scatter store instruction.\n");
1721 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1722 return;
1724 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1725 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1726 return;
1729 if (memory_access_type != VMAT_CONTIGUOUS
1730 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1732 /* Element X of the data must come from iteration i * VF + X of the
1733 scalar loop. We need more work to support other mappings. */
1734 if (dump_enabled_p ())
1735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1736 "can't operate on partial vectors because an"
1737 " access isn't contiguous.\n");
1738 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1739 return;
1742 if (!VECTOR_MODE_P (vecmode))
1744 if (dump_enabled_p ())
1745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1746 "can't operate on partial vectors when emulating"
1747 " vector operations.\n");
1748 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1749 return;
1752 /* We might load more scalars than we need for permuting SLP loads.
1753 We checked in get_group_load_store_type that the extra elements
1754 don't leak into a new vector. */
1755 auto get_valid_nvectors = [] (poly_uint64 size, poly_uint64 nunits)
1757 unsigned int nvectors;
1758 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1759 return nvectors;
1760 gcc_unreachable ();
1763 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1764 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1765 machine_mode mask_mode;
1766 bool using_partial_vectors_p = false;
1767 if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1768 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1770 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1771 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1772 using_partial_vectors_p = true;
1775 machine_mode vmode;
1776 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1778 unsigned int nvectors = get_valid_nvectors (group_size * vf, nunits);
1779 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1780 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1781 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1782 using_partial_vectors_p = true;
1785 if (!using_partial_vectors_p)
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1789 "can't operate on partial vectors because the"
1790 " target doesn't have the appropriate partial"
1791 " vectorization load or store.\n");
1792 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1796 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1797 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1798 that needs to be applied to all loads and stores in a vectorized loop.
1799 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1801 MASK_TYPE is the type of both masks. If new statements are needed,
1802 insert them before GSI. */
1804 static tree
1805 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1806 gimple_stmt_iterator *gsi)
1808 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1809 if (!loop_mask)
1810 return vec_mask;
1812 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1813 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1814 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1815 vec_mask, loop_mask);
1816 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1817 return and_res;
1820 /* Determine whether we can use a gather load or scatter store to vectorize
1821 strided load or store STMT_INFO by truncating the current offset to a
1822 smaller width. We need to be able to construct an offset vector:
1824 { 0, X, X*2, X*3, ... }
1826 without loss of precision, where X is STMT_INFO's DR_STEP.
1828 Return true if this is possible, describing the gather load or scatter
1829 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1831 static bool
1832 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1833 loop_vec_info loop_vinfo, bool masked_p,
1834 gather_scatter_info *gs_info)
1836 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1837 data_reference *dr = dr_info->dr;
1838 tree step = DR_STEP (dr);
1839 if (TREE_CODE (step) != INTEGER_CST)
1841 /* ??? Perhaps we could use range information here? */
1842 if (dump_enabled_p ())
1843 dump_printf_loc (MSG_NOTE, vect_location,
1844 "cannot truncate variable step.\n");
1845 return false;
1848 /* Get the number of bits in an element. */
1849 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1850 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1851 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1853 /* Set COUNT to the upper limit on the number of elements - 1.
1854 Start with the maximum vectorization factor. */
1855 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1857 /* Try lowering COUNT to the number of scalar latch iterations. */
1858 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1859 widest_int max_iters;
1860 if (max_loop_iterations (loop, &max_iters)
1861 && max_iters < count)
1862 count = max_iters.to_shwi ();
1864 /* Try scales of 1 and the element size. */
1865 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1866 wi::overflow_type overflow = wi::OVF_NONE;
1867 for (int i = 0; i < 2; ++i)
1869 int scale = scales[i];
1870 widest_int factor;
1871 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1872 continue;
1874 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1875 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1876 if (overflow)
1877 continue;
1878 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1879 unsigned int min_offset_bits = wi::min_precision (range, sign);
1881 /* Find the narrowest viable offset type. */
1882 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1883 tree offset_type = build_nonstandard_integer_type (offset_bits,
1884 sign == UNSIGNED);
1886 /* See whether the target supports the operation with an offset
1887 no narrower than OFFSET_TYPE. */
1888 tree memory_type = TREE_TYPE (DR_REF (dr));
1889 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1890 vectype, memory_type, offset_type, scale,
1891 &gs_info->ifn, &gs_info->offset_vectype)
1892 || gs_info->ifn == IFN_LAST)
1893 continue;
1895 gs_info->decl = NULL_TREE;
1896 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1897 but we don't need to store that here. */
1898 gs_info->base = NULL_TREE;
1899 gs_info->element_type = TREE_TYPE (vectype);
1900 gs_info->offset = fold_convert (offset_type, step);
1901 gs_info->offset_dt = vect_constant_def;
1902 gs_info->scale = scale;
1903 gs_info->memory_type = memory_type;
1904 return true;
1907 if (overflow && dump_enabled_p ())
1908 dump_printf_loc (MSG_NOTE, vect_location,
1909 "truncating gather/scatter offset to %d bits"
1910 " might change its value.\n", element_bits);
1912 return false;
1915 /* Return true if we can use gather/scatter internal functions to
1916 vectorize STMT_INFO, which is a grouped or strided load or store.
1917 MASKED_P is true if load or store is conditional. When returning
1918 true, fill in GS_INFO with the information required to perform the
1919 operation. */
1921 static bool
1922 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1923 loop_vec_info loop_vinfo, bool masked_p,
1924 gather_scatter_info *gs_info)
1926 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1927 || gs_info->ifn == IFN_LAST)
1928 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1929 masked_p, gs_info);
1931 tree old_offset_type = TREE_TYPE (gs_info->offset);
1932 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1934 gcc_assert (TYPE_PRECISION (new_offset_type)
1935 >= TYPE_PRECISION (old_offset_type));
1936 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1938 if (dump_enabled_p ())
1939 dump_printf_loc (MSG_NOTE, vect_location,
1940 "using gather/scatter for strided/grouped access,"
1941 " scale = %d\n", gs_info->scale);
1943 return true;
1946 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1947 elements with a known constant step. Return -1 if that step
1948 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1950 static int
1951 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1953 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1954 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1955 size_zero_node);
1958 /* If the target supports a permute mask that reverses the elements in
1959 a vector of type VECTYPE, return that mask, otherwise return null. */
1961 static tree
1962 perm_mask_for_reverse (tree vectype)
1964 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1966 /* The encoding has a single stepped pattern. */
1967 vec_perm_builder sel (nunits, 1, 3);
1968 for (int i = 0; i < 3; ++i)
1969 sel.quick_push (nunits - 1 - i);
1971 vec_perm_indices indices (sel, 1, nunits);
1972 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1973 return NULL_TREE;
1974 return vect_gen_perm_mask_checked (vectype, indices);
1977 /* A subroutine of get_load_store_type, with a subset of the same
1978 arguments. Handle the case where STMT_INFO is a load or store that
1979 accesses consecutive elements with a negative step. Sets *POFFSET
1980 to the offset to be applied to the DR for the first access. */
1982 static vect_memory_access_type
1983 get_negative_load_store_type (vec_info *vinfo,
1984 stmt_vec_info stmt_info, tree vectype,
1985 vec_load_store_type vls_type,
1986 unsigned int ncopies, poly_int64 *poffset)
1988 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1989 dr_alignment_support alignment_support_scheme;
1991 if (ncopies > 1)
1993 if (dump_enabled_p ())
1994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1995 "multiple types with negative step.\n");
1996 return VMAT_ELEMENTWISE;
1999 /* For backward running DRs the first access in vectype actually is
2000 N-1 elements before the address of the DR. */
2001 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
2002 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
2004 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
2005 alignment_support_scheme
2006 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
2007 if (alignment_support_scheme != dr_aligned
2008 && alignment_support_scheme != dr_unaligned_supported)
2010 if (dump_enabled_p ())
2011 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2012 "negative step but alignment required.\n");
2013 *poffset = 0;
2014 return VMAT_ELEMENTWISE;
2017 if (vls_type == VLS_STORE_INVARIANT)
2019 if (dump_enabled_p ())
2020 dump_printf_loc (MSG_NOTE, vect_location,
2021 "negative step with invariant source;"
2022 " no permute needed.\n");
2023 return VMAT_CONTIGUOUS_DOWN;
2026 if (!perm_mask_for_reverse (vectype))
2028 if (dump_enabled_p ())
2029 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2030 "negative step and reversing not supported.\n");
2031 *poffset = 0;
2032 return VMAT_ELEMENTWISE;
2035 return VMAT_CONTIGUOUS_REVERSE;
2038 /* STMT_INFO is either a masked or unconditional store. Return the value
2039 being stored. */
2041 tree
2042 vect_get_store_rhs (stmt_vec_info stmt_info)
2044 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2046 gcc_assert (gimple_assign_single_p (assign));
2047 return gimple_assign_rhs1 (assign);
2049 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2051 internal_fn ifn = gimple_call_internal_fn (call);
2052 int index = internal_fn_stored_value_index (ifn);
2053 gcc_assert (index >= 0);
2054 return gimple_call_arg (call, index);
2056 gcc_unreachable ();
2059 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2061 This function returns a vector type which can be composed with NETLS pieces,
2062 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2063 same vector size as the return vector. It checks target whether supports
2064 pieces-size vector mode for construction firstly, if target fails to, check
2065 pieces-size scalar mode for construction further. It returns NULL_TREE if
2066 fails to find the available composition.
2068 For example, for (vtype=V16QI, nelts=4), we can probably get:
2069 - V16QI with PTYPE V4QI.
2070 - V4SI with PTYPE SI.
2071 - NULL_TREE. */
2073 static tree
2074 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2076 gcc_assert (VECTOR_TYPE_P (vtype));
2077 gcc_assert (known_gt (nelts, 0U));
2079 machine_mode vmode = TYPE_MODE (vtype);
2080 if (!VECTOR_MODE_P (vmode))
2081 return NULL_TREE;
2083 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2084 unsigned int pbsize;
2085 if (constant_multiple_p (vbsize, nelts, &pbsize))
2087 /* First check if vec_init optab supports construction from
2088 vector pieces directly. */
2089 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2090 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2091 machine_mode rmode;
2092 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2093 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2094 != CODE_FOR_nothing))
2096 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2097 return vtype;
2100 /* Otherwise check if exists an integer type of the same piece size and
2101 if vec_init optab supports construction from it directly. */
2102 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2103 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2104 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2105 != CODE_FOR_nothing))
2107 *ptype = build_nonstandard_integer_type (pbsize, 1);
2108 return build_vector_type (*ptype, nelts);
2112 return NULL_TREE;
2115 /* A subroutine of get_load_store_type, with a subset of the same
2116 arguments. Handle the case where STMT_INFO is part of a grouped load
2117 or store.
2119 For stores, the statements in the group are all consecutive
2120 and there is no gap at the end. For loads, the statements in the
2121 group might not be consecutive; there can be gaps between statements
2122 as well as at the end. */
2124 static bool
2125 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2126 tree vectype, slp_tree slp_node,
2127 bool masked_p, vec_load_store_type vls_type,
2128 vect_memory_access_type *memory_access_type,
2129 poly_int64 *poffset,
2130 dr_alignment_support *alignment_support_scheme,
2131 int *misalignment,
2132 gather_scatter_info *gs_info)
2134 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2135 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2136 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2137 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2138 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2139 bool single_element_p = (stmt_info == first_stmt_info
2140 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2141 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2142 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2144 /* True if the vectorized statements would access beyond the last
2145 statement in the group. */
2146 bool overrun_p = false;
2148 /* True if we can cope with such overrun by peeling for gaps, so that
2149 there is at least one final scalar iteration after the vector loop. */
2150 bool can_overrun_p = (!masked_p
2151 && vls_type == VLS_LOAD
2152 && loop_vinfo
2153 && !loop->inner);
2155 /* There can only be a gap at the end of the group if the stride is
2156 known at compile time. */
2157 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2159 /* Stores can't yet have gaps. */
2160 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2162 if (slp_node)
2164 /* For SLP vectorization we directly vectorize a subchain
2165 without permutation. */
2166 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2167 first_dr_info
2168 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2169 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2171 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2172 separated by the stride, until we have a complete vector.
2173 Fall back to scalar accesses if that isn't possible. */
2174 if (multiple_p (nunits, group_size))
2175 *memory_access_type = VMAT_STRIDED_SLP;
2176 else
2177 *memory_access_type = VMAT_ELEMENTWISE;
2179 else
2181 overrun_p = loop_vinfo && gap != 0;
2182 if (overrun_p && vls_type != VLS_LOAD)
2184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2185 "Grouped store with gaps requires"
2186 " non-consecutive accesses\n");
2187 return false;
2189 /* An overrun is fine if the trailing elements are smaller
2190 than the alignment boundary B. Every vector access will
2191 be a multiple of B and so we are guaranteed to access a
2192 non-gap element in the same B-sized block. */
2193 if (overrun_p
2194 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2195 vectype)
2196 / vect_get_scalar_dr_size (first_dr_info)))
2197 overrun_p = false;
2199 /* If the gap splits the vector in half and the target
2200 can do half-vector operations avoid the epilogue peeling
2201 by simply loading half of the vector only. Usually
2202 the construction with an upper zero half will be elided. */
2203 dr_alignment_support alss;
2204 int misalign = dr_misalignment (first_dr_info, vectype);
2205 tree half_vtype;
2206 if (overrun_p
2207 && !masked_p
2208 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2209 vectype, misalign)))
2210 == dr_aligned
2211 || alss == dr_unaligned_supported)
2212 && known_eq (nunits, (group_size - gap) * 2)
2213 && known_eq (nunits, group_size)
2214 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2215 != NULL_TREE))
2216 overrun_p = false;
2218 if (overrun_p && !can_overrun_p)
2220 if (dump_enabled_p ())
2221 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2222 "Peeling for outer loop is not supported\n");
2223 return false;
2225 int cmp = compare_step_with_zero (vinfo, stmt_info);
2226 if (cmp < 0)
2228 if (single_element_p)
2229 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2230 only correct for single element "interleaving" SLP. */
2231 *memory_access_type = get_negative_load_store_type
2232 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2233 else
2235 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2236 separated by the stride, until we have a complete vector.
2237 Fall back to scalar accesses if that isn't possible. */
2238 if (multiple_p (nunits, group_size))
2239 *memory_access_type = VMAT_STRIDED_SLP;
2240 else
2241 *memory_access_type = VMAT_ELEMENTWISE;
2244 else
2246 gcc_assert (!loop_vinfo || cmp > 0);
2247 *memory_access_type = VMAT_CONTIGUOUS;
2251 else
2253 /* We can always handle this case using elementwise accesses,
2254 but see if something more efficient is available. */
2255 *memory_access_type = VMAT_ELEMENTWISE;
2257 /* If there is a gap at the end of the group then these optimizations
2258 would access excess elements in the last iteration. */
2259 bool would_overrun_p = (gap != 0);
2260 /* An overrun is fine if the trailing elements are smaller than the
2261 alignment boundary B. Every vector access will be a multiple of B
2262 and so we are guaranteed to access a non-gap element in the
2263 same B-sized block. */
2264 if (would_overrun_p
2265 && !masked_p
2266 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2267 / vect_get_scalar_dr_size (first_dr_info)))
2268 would_overrun_p = false;
2270 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2271 && (can_overrun_p || !would_overrun_p)
2272 && compare_step_with_zero (vinfo, stmt_info) > 0)
2274 /* First cope with the degenerate case of a single-element
2275 vector. */
2276 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2279 /* Otherwise try using LOAD/STORE_LANES. */
2280 else if (vls_type == VLS_LOAD
2281 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2282 : vect_store_lanes_supported (vectype, group_size,
2283 masked_p))
2285 *memory_access_type = VMAT_LOAD_STORE_LANES;
2286 overrun_p = would_overrun_p;
2289 /* If that fails, try using permuting loads. */
2290 else if (vls_type == VLS_LOAD
2291 ? vect_grouped_load_supported (vectype, single_element_p,
2292 group_size)
2293 : vect_grouped_store_supported (vectype, group_size))
2295 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2296 overrun_p = would_overrun_p;
2300 /* As a last resort, trying using a gather load or scatter store.
2302 ??? Although the code can handle all group sizes correctly,
2303 it probably isn't a win to use separate strided accesses based
2304 on nearby locations. Or, even if it's a win over scalar code,
2305 it might not be a win over vectorizing at a lower VF, if that
2306 allows us to use contiguous accesses. */
2307 if (*memory_access_type == VMAT_ELEMENTWISE
2308 && single_element_p
2309 && loop_vinfo
2310 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2311 masked_p, gs_info))
2312 *memory_access_type = VMAT_GATHER_SCATTER;
2315 if (*memory_access_type == VMAT_GATHER_SCATTER
2316 || *memory_access_type == VMAT_ELEMENTWISE)
2318 *alignment_support_scheme = dr_unaligned_supported;
2319 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2321 else
2323 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2324 *alignment_support_scheme
2325 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2326 *misalignment);
2329 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2331 /* STMT is the leader of the group. Check the operands of all the
2332 stmts of the group. */
2333 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2334 while (next_stmt_info)
2336 tree op = vect_get_store_rhs (next_stmt_info);
2337 enum vect_def_type dt;
2338 if (!vect_is_simple_use (op, vinfo, &dt))
2340 if (dump_enabled_p ())
2341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2342 "use not simple.\n");
2343 return false;
2345 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2349 if (overrun_p)
2351 gcc_assert (can_overrun_p);
2352 if (dump_enabled_p ())
2353 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2354 "Data access with gaps requires scalar "
2355 "epilogue loop\n");
2356 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2359 return true;
2362 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2363 if there is a memory access type that the vectorized form can use,
2364 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2365 or scatters, fill in GS_INFO accordingly. In addition
2366 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2367 the target does not support the alignment scheme. *MISALIGNMENT
2368 is set according to the alignment of the access (including
2369 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2371 SLP says whether we're performing SLP rather than loop vectorization.
2372 MASKED_P is true if the statement is conditional on a vectorized mask.
2373 VECTYPE is the vector type that the vectorized statements will use.
2374 NCOPIES is the number of vector statements that will be needed. */
2376 static bool
2377 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2378 tree vectype, slp_tree slp_node,
2379 bool masked_p, vec_load_store_type vls_type,
2380 unsigned int ncopies,
2381 vect_memory_access_type *memory_access_type,
2382 poly_int64 *poffset,
2383 dr_alignment_support *alignment_support_scheme,
2384 int *misalignment,
2385 gather_scatter_info *gs_info)
2387 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2388 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2389 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2390 *poffset = 0;
2391 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2393 *memory_access_type = VMAT_GATHER_SCATTER;
2394 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2395 gcc_unreachable ();
2396 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2397 &gs_info->offset_dt,
2398 &gs_info->offset_vectype))
2400 if (dump_enabled_p ())
2401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2402 "%s index use not simple.\n",
2403 vls_type == VLS_LOAD ? "gather" : "scatter");
2404 return false;
2406 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2408 if (vls_type != VLS_LOAD)
2410 if (dump_enabled_p ())
2411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2412 "unsupported emulated scatter.\n");
2413 return false;
2415 else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2416 || !TYPE_VECTOR_SUBPARTS
2417 (gs_info->offset_vectype).is_constant ()
2418 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2419 (gs_info->offset_vectype),
2420 TYPE_VECTOR_SUBPARTS (vectype)))
2422 if (dump_enabled_p ())
2423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2424 "unsupported vector types for emulated "
2425 "gather.\n");
2426 return false;
2429 /* Gather-scatter accesses perform only component accesses, alignment
2430 is irrelevant for them. */
2431 *alignment_support_scheme = dr_unaligned_supported;
2433 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2435 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2436 masked_p,
2437 vls_type, memory_access_type, poffset,
2438 alignment_support_scheme,
2439 misalignment, gs_info))
2440 return false;
2442 else if (STMT_VINFO_STRIDED_P (stmt_info))
2444 gcc_assert (!slp_node);
2445 if (loop_vinfo
2446 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2447 masked_p, gs_info))
2448 *memory_access_type = VMAT_GATHER_SCATTER;
2449 else
2450 *memory_access_type = VMAT_ELEMENTWISE;
2451 /* Alignment is irrelevant here. */
2452 *alignment_support_scheme = dr_unaligned_supported;
2454 else
2456 int cmp = compare_step_with_zero (vinfo, stmt_info);
2457 if (cmp == 0)
2459 gcc_assert (vls_type == VLS_LOAD);
2460 *memory_access_type = VMAT_INVARIANT;
2461 /* Invariant accesses perform only component accesses, alignment
2462 is irrelevant for them. */
2463 *alignment_support_scheme = dr_unaligned_supported;
2465 else
2467 if (cmp < 0)
2468 *memory_access_type = get_negative_load_store_type
2469 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2470 else
2471 *memory_access_type = VMAT_CONTIGUOUS;
2472 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2473 vectype, *poffset);
2474 *alignment_support_scheme
2475 = vect_supportable_dr_alignment (vinfo,
2476 STMT_VINFO_DR_INFO (stmt_info),
2477 vectype, *misalignment);
2481 if ((*memory_access_type == VMAT_ELEMENTWISE
2482 || *memory_access_type == VMAT_STRIDED_SLP)
2483 && !nunits.is_constant ())
2485 if (dump_enabled_p ())
2486 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2487 "Not using elementwise accesses due to variable "
2488 "vectorization factor.\n");
2489 return false;
2492 if (*alignment_support_scheme == dr_unaligned_unsupported)
2494 if (dump_enabled_p ())
2495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2496 "unsupported unaligned access\n");
2497 return false;
2500 /* FIXME: At the moment the cost model seems to underestimate the
2501 cost of using elementwise accesses. This check preserves the
2502 traditional behavior until that can be fixed. */
2503 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2504 if (!first_stmt_info)
2505 first_stmt_info = stmt_info;
2506 if (*memory_access_type == VMAT_ELEMENTWISE
2507 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2508 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2509 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2510 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2512 if (dump_enabled_p ())
2513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2514 "not falling back to elementwise accesses\n");
2515 return false;
2517 return true;
2520 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2521 conditional operation STMT_INFO. When returning true, store the mask
2522 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2523 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2524 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2526 static bool
2527 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2528 slp_tree slp_node, unsigned mask_index,
2529 tree *mask, slp_tree *mask_node,
2530 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2532 enum vect_def_type mask_dt;
2533 tree mask_vectype;
2534 slp_tree mask_node_1;
2535 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2536 mask, &mask_node_1, &mask_dt, &mask_vectype))
2538 if (dump_enabled_p ())
2539 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2540 "mask use not simple.\n");
2541 return false;
2544 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2546 if (dump_enabled_p ())
2547 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2548 "mask argument is not a boolean.\n");
2549 return false;
2552 /* If the caller is not prepared for adjusting an external/constant
2553 SLP mask vector type fail. */
2554 if (slp_node
2555 && !mask_node
2556 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2558 if (dump_enabled_p ())
2559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2560 "SLP mask argument is not vectorized.\n");
2561 return false;
2564 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2565 if (!mask_vectype)
2566 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2568 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2570 if (dump_enabled_p ())
2571 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2572 "could not find an appropriate vector mask type.\n");
2573 return false;
2576 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2577 TYPE_VECTOR_SUBPARTS (vectype)))
2579 if (dump_enabled_p ())
2580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2581 "vector mask type %T"
2582 " does not match vector data type %T.\n",
2583 mask_vectype, vectype);
2585 return false;
2588 *mask_dt_out = mask_dt;
2589 *mask_vectype_out = mask_vectype;
2590 if (mask_node)
2591 *mask_node = mask_node_1;
2592 return true;
2595 /* Return true if stored value RHS is suitable for vectorizing store
2596 statement STMT_INFO. When returning true, store the type of the
2597 definition in *RHS_DT_OUT, the type of the vectorized store value in
2598 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2600 static bool
2601 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2602 slp_tree slp_node, tree rhs,
2603 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2604 vec_load_store_type *vls_type_out)
2606 /* In the case this is a store from a constant make sure
2607 native_encode_expr can handle it. */
2608 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2610 if (dump_enabled_p ())
2611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2612 "cannot encode constant as a byte sequence.\n");
2613 return false;
2616 unsigned op_no = 0;
2617 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2619 if (gimple_call_internal_p (call)
2620 && internal_store_fn_p (gimple_call_internal_fn (call)))
2621 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2624 enum vect_def_type rhs_dt;
2625 tree rhs_vectype;
2626 slp_tree slp_op;
2627 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2628 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2630 if (dump_enabled_p ())
2631 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2632 "use not simple.\n");
2633 return false;
2636 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2637 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2639 if (dump_enabled_p ())
2640 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2641 "incompatible vector types.\n");
2642 return false;
2645 *rhs_dt_out = rhs_dt;
2646 *rhs_vectype_out = rhs_vectype;
2647 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2648 *vls_type_out = VLS_STORE_INVARIANT;
2649 else
2650 *vls_type_out = VLS_STORE;
2651 return true;
2654 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2655 Note that we support masks with floating-point type, in which case the
2656 floats are interpreted as a bitmask. */
2658 static tree
2659 vect_build_all_ones_mask (vec_info *vinfo,
2660 stmt_vec_info stmt_info, tree masktype)
2662 if (TREE_CODE (masktype) == INTEGER_TYPE)
2663 return build_int_cst (masktype, -1);
2664 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2666 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2667 mask = build_vector_from_val (masktype, mask);
2668 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2670 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2672 REAL_VALUE_TYPE r;
2673 long tmp[6];
2674 for (int j = 0; j < 6; ++j)
2675 tmp[j] = -1;
2676 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2677 tree mask = build_real (TREE_TYPE (masktype), r);
2678 mask = build_vector_from_val (masktype, mask);
2679 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2681 gcc_unreachable ();
2684 /* Build an all-zero merge value of type VECTYPE while vectorizing
2685 STMT_INFO as a gather load. */
2687 static tree
2688 vect_build_zero_merge_argument (vec_info *vinfo,
2689 stmt_vec_info stmt_info, tree vectype)
2691 tree merge;
2692 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2693 merge = build_int_cst (TREE_TYPE (vectype), 0);
2694 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2696 REAL_VALUE_TYPE r;
2697 long tmp[6];
2698 for (int j = 0; j < 6; ++j)
2699 tmp[j] = 0;
2700 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2701 merge = build_real (TREE_TYPE (vectype), r);
2703 else
2704 gcc_unreachable ();
2705 merge = build_vector_from_val (vectype, merge);
2706 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2709 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2710 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2711 the gather load operation. If the load is conditional, MASK is the
2712 unvectorized condition and MASK_DT is its definition type, otherwise
2713 MASK is null. */
2715 static void
2716 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2717 gimple_stmt_iterator *gsi,
2718 gimple **vec_stmt,
2719 gather_scatter_info *gs_info,
2720 tree mask)
2722 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2723 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2724 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2725 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2726 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2727 edge pe = loop_preheader_edge (loop);
2728 enum { NARROW, NONE, WIDEN } modifier;
2729 poly_uint64 gather_off_nunits
2730 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2732 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2733 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2734 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2735 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2736 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2737 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2738 tree scaletype = TREE_VALUE (arglist);
2739 tree real_masktype = masktype;
2740 gcc_checking_assert (types_compatible_p (srctype, rettype)
2741 && (!mask
2742 || TREE_CODE (masktype) == INTEGER_TYPE
2743 || types_compatible_p (srctype, masktype)));
2744 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2745 masktype = truth_type_for (srctype);
2747 tree mask_halftype = masktype;
2748 tree perm_mask = NULL_TREE;
2749 tree mask_perm_mask = NULL_TREE;
2750 if (known_eq (nunits, gather_off_nunits))
2751 modifier = NONE;
2752 else if (known_eq (nunits * 2, gather_off_nunits))
2754 modifier = WIDEN;
2756 /* Currently widening gathers and scatters are only supported for
2757 fixed-length vectors. */
2758 int count = gather_off_nunits.to_constant ();
2759 vec_perm_builder sel (count, count, 1);
2760 for (int i = 0; i < count; ++i)
2761 sel.quick_push (i | (count / 2));
2763 vec_perm_indices indices (sel, 1, count);
2764 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2765 indices);
2767 else if (known_eq (nunits, gather_off_nunits * 2))
2769 modifier = NARROW;
2771 /* Currently narrowing gathers and scatters are only supported for
2772 fixed-length vectors. */
2773 int count = nunits.to_constant ();
2774 vec_perm_builder sel (count, count, 1);
2775 sel.quick_grow (count);
2776 for (int i = 0; i < count; ++i)
2777 sel[i] = i < count / 2 ? i : i + count / 2;
2778 vec_perm_indices indices (sel, 2, count);
2779 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2781 ncopies *= 2;
2783 if (mask && masktype == real_masktype)
2785 for (int i = 0; i < count; ++i)
2786 sel[i] = i | (count / 2);
2787 indices.new_vector (sel, 2, count);
2788 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2790 else if (mask)
2791 mask_halftype = truth_type_for (gs_info->offset_vectype);
2793 else
2794 gcc_unreachable ();
2796 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2797 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2799 tree ptr = fold_convert (ptrtype, gs_info->base);
2800 if (!is_gimple_min_invariant (ptr))
2802 gimple_seq seq;
2803 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2804 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2805 gcc_assert (!new_bb);
2808 tree scale = build_int_cst (scaletype, gs_info->scale);
2810 tree vec_oprnd0 = NULL_TREE;
2811 tree vec_mask = NULL_TREE;
2812 tree src_op = NULL_TREE;
2813 tree mask_op = NULL_TREE;
2814 tree prev_res = NULL_TREE;
2816 if (!mask)
2818 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2819 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2822 auto_vec<tree> vec_oprnds0;
2823 auto_vec<tree> vec_masks;
2824 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2825 modifier == WIDEN ? ncopies / 2 : ncopies,
2826 gs_info->offset, &vec_oprnds0);
2827 if (mask)
2828 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2829 modifier == NARROW ? ncopies / 2 : ncopies,
2830 mask, &vec_masks, masktype);
2831 for (int j = 0; j < ncopies; ++j)
2833 tree op, var;
2834 if (modifier == WIDEN && (j & 1))
2835 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2836 perm_mask, stmt_info, gsi);
2837 else
2838 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2840 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2842 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2843 TYPE_VECTOR_SUBPARTS (idxtype)));
2844 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2845 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2846 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2847 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2848 op = var;
2851 if (mask)
2853 if (mask_perm_mask && (j & 1))
2854 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2855 mask_perm_mask, stmt_info, gsi);
2856 else
2858 if (modifier == NARROW)
2860 if ((j & 1) == 0)
2861 vec_mask = vec_masks[j / 2];
2863 else
2864 vec_mask = vec_masks[j];
2866 mask_op = vec_mask;
2867 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2869 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2870 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2871 gcc_assert (known_eq (sub1, sub2));
2872 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2873 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2874 gassign *new_stmt
2875 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2876 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2877 mask_op = var;
2880 if (modifier == NARROW && masktype != real_masktype)
2882 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2883 gassign *new_stmt
2884 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2885 : VEC_UNPACK_LO_EXPR,
2886 mask_op);
2887 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2888 mask_op = var;
2890 src_op = mask_op;
2893 tree mask_arg = mask_op;
2894 if (masktype != real_masktype)
2896 tree utype, optype = TREE_TYPE (mask_op);
2897 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2898 utype = real_masktype;
2899 else
2900 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2901 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2902 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2903 gassign *new_stmt
2904 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2905 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2906 mask_arg = var;
2907 if (!useless_type_conversion_p (real_masktype, utype))
2909 gcc_assert (TYPE_PRECISION (utype)
2910 <= TYPE_PRECISION (real_masktype));
2911 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2912 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2913 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2914 mask_arg = var;
2916 src_op = build_zero_cst (srctype);
2918 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2919 mask_arg, scale);
2921 if (!useless_type_conversion_p (vectype, rettype))
2923 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2924 TYPE_VECTOR_SUBPARTS (rettype)));
2925 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2926 gimple_call_set_lhs (new_stmt, op);
2927 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2928 var = make_ssa_name (vec_dest);
2929 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2930 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2931 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2933 else
2935 var = make_ssa_name (vec_dest, new_stmt);
2936 gimple_call_set_lhs (new_stmt, var);
2937 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2940 if (modifier == NARROW)
2942 if ((j & 1) == 0)
2944 prev_res = var;
2945 continue;
2947 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2948 stmt_info, gsi);
2949 new_stmt = SSA_NAME_DEF_STMT (var);
2952 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2954 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2957 /* Prepare the base and offset in GS_INFO for vectorization.
2958 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2959 to the vectorized offset argument for the first copy of STMT_INFO.
2960 STMT_INFO is the statement described by GS_INFO and LOOP is the
2961 containing loop. */
2963 static void
2964 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2965 class loop *loop, stmt_vec_info stmt_info,
2966 gather_scatter_info *gs_info,
2967 tree *dataref_ptr, vec<tree> *vec_offset)
2969 gimple_seq stmts = NULL;
2970 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2971 if (stmts != NULL)
2973 basic_block new_bb;
2974 edge pe = loop_preheader_edge (loop);
2975 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2976 gcc_assert (!new_bb);
2978 unsigned ncopies = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2979 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2980 gs_info->offset, vec_offset,
2981 gs_info->offset_vectype);
2984 /* Prepare to implement a grouped or strided load or store using
2985 the gather load or scatter store operation described by GS_INFO.
2986 STMT_INFO is the load or store statement.
2988 Set *DATAREF_BUMP to the amount that should be added to the base
2989 address after each copy of the vectorized statement. Set *VEC_OFFSET
2990 to an invariant offset vector in which element I has the value
2991 I * DR_STEP / SCALE. */
2993 static void
2994 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2995 loop_vec_info loop_vinfo,
2996 gather_scatter_info *gs_info,
2997 tree *dataref_bump, tree *vec_offset)
2999 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3000 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3002 tree bump = size_binop (MULT_EXPR,
3003 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3004 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3005 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3007 /* The offset given in GS_INFO can have pointer type, so use the element
3008 type of the vector instead. */
3009 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3011 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3012 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3013 ssize_int (gs_info->scale));
3014 step = fold_convert (offset_type, step);
3016 /* Create {0, X, X*2, X*3, ...}. */
3017 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3018 build_zero_cst (offset_type), step);
3019 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3022 /* Return the amount that should be added to a vector pointer to move
3023 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3024 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3025 vectorization. */
3027 static tree
3028 vect_get_data_ptr_increment (vec_info *vinfo,
3029 dr_vec_info *dr_info, tree aggr_type,
3030 vect_memory_access_type memory_access_type)
3032 if (memory_access_type == VMAT_INVARIANT)
3033 return size_zero_node;
3035 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3036 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3037 if (tree_int_cst_sgn (step) == -1)
3038 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3039 return iv_step;
3042 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3044 static bool
3045 vectorizable_bswap (vec_info *vinfo,
3046 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3047 gimple **vec_stmt, slp_tree slp_node,
3048 slp_tree *slp_op,
3049 tree vectype_in, stmt_vector_for_cost *cost_vec)
3051 tree op, vectype;
3052 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3053 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3054 unsigned ncopies;
3056 op = gimple_call_arg (stmt, 0);
3057 vectype = STMT_VINFO_VECTYPE (stmt_info);
3058 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3060 /* Multiple types in SLP are handled by creating the appropriate number of
3061 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3062 case of SLP. */
3063 if (slp_node)
3064 ncopies = 1;
3065 else
3066 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3068 gcc_assert (ncopies >= 1);
3070 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3071 if (! char_vectype)
3072 return false;
3074 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3075 unsigned word_bytes;
3076 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3077 return false;
3079 /* The encoding uses one stepped pattern for each byte in the word. */
3080 vec_perm_builder elts (num_bytes, word_bytes, 3);
3081 for (unsigned i = 0; i < 3; ++i)
3082 for (unsigned j = 0; j < word_bytes; ++j)
3083 elts.quick_push ((i + 1) * word_bytes - j - 1);
3085 vec_perm_indices indices (elts, 1, num_bytes);
3086 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3087 return false;
3089 if (! vec_stmt)
3091 if (slp_node
3092 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3094 if (dump_enabled_p ())
3095 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3096 "incompatible vector types for invariants\n");
3097 return false;
3100 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3101 DUMP_VECT_SCOPE ("vectorizable_bswap");
3102 record_stmt_cost (cost_vec,
3103 1, vector_stmt, stmt_info, 0, vect_prologue);
3104 record_stmt_cost (cost_vec,
3105 slp_node
3106 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3107 vec_perm, stmt_info, 0, vect_body);
3108 return true;
3111 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3113 /* Transform. */
3114 vec<tree> vec_oprnds = vNULL;
3115 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3116 op, &vec_oprnds);
3117 /* Arguments are ready. create the new vector stmt. */
3118 unsigned i;
3119 tree vop;
3120 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3122 gimple *new_stmt;
3123 tree tem = make_ssa_name (char_vectype);
3124 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3125 char_vectype, vop));
3126 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3127 tree tem2 = make_ssa_name (char_vectype);
3128 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3129 tem, tem, bswap_vconst);
3130 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3131 tem = make_ssa_name (vectype);
3132 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3133 vectype, tem2));
3134 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3135 if (slp_node)
3136 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3137 else
3138 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3141 if (!slp_node)
3142 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3144 vec_oprnds.release ();
3145 return true;
3148 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3149 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3150 in a single step. On success, store the binary pack code in
3151 *CONVERT_CODE. */
3153 static bool
3154 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3155 tree_code *convert_code)
3157 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3158 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3159 return false;
3161 tree_code code;
3162 int multi_step_cvt = 0;
3163 auto_vec <tree, 8> interm_types;
3164 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3165 &code, &multi_step_cvt, &interm_types)
3166 || multi_step_cvt)
3167 return false;
3169 *convert_code = code;
3170 return true;
3173 /* Function vectorizable_call.
3175 Check if STMT_INFO performs a function call that can be vectorized.
3176 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3177 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3178 Return true if STMT_INFO is vectorizable in this way. */
3180 static bool
3181 vectorizable_call (vec_info *vinfo,
3182 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3183 gimple **vec_stmt, slp_tree slp_node,
3184 stmt_vector_for_cost *cost_vec)
3186 gcall *stmt;
3187 tree vec_dest;
3188 tree scalar_dest;
3189 tree op;
3190 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3191 tree vectype_out, vectype_in;
3192 poly_uint64 nunits_in;
3193 poly_uint64 nunits_out;
3194 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3195 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3196 tree fndecl, new_temp, rhs_type;
3197 enum vect_def_type dt[4]
3198 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3199 vect_unknown_def_type };
3200 tree vectypes[ARRAY_SIZE (dt)] = {};
3201 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3202 int ndts = ARRAY_SIZE (dt);
3203 int ncopies, j;
3204 auto_vec<tree, 8> vargs;
3205 auto_vec<tree, 8> orig_vargs;
3206 enum { NARROW, NONE, WIDEN } modifier;
3207 size_t i, nargs;
3208 tree lhs;
3210 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3211 return false;
3213 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3214 && ! vec_stmt)
3215 return false;
3217 /* Is STMT_INFO a vectorizable call? */
3218 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3219 if (!stmt)
3220 return false;
3222 if (gimple_call_internal_p (stmt)
3223 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3224 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3225 /* Handled by vectorizable_load and vectorizable_store. */
3226 return false;
3228 if (gimple_call_lhs (stmt) == NULL_TREE
3229 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3230 return false;
3232 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3234 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3236 /* Process function arguments. */
3237 rhs_type = NULL_TREE;
3238 vectype_in = NULL_TREE;
3239 nargs = gimple_call_num_args (stmt);
3241 /* Bail out if the function has more than four arguments, we do not have
3242 interesting builtin functions to vectorize with more than two arguments
3243 except for fma. No arguments is also not good. */
3244 if (nargs == 0 || nargs > 4)
3245 return false;
3247 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3248 combined_fn cfn = gimple_call_combined_fn (stmt);
3249 if (cfn == CFN_GOMP_SIMD_LANE)
3251 nargs = 0;
3252 rhs_type = unsigned_type_node;
3255 int mask_opno = -1;
3256 if (internal_fn_p (cfn))
3257 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3259 for (i = 0; i < nargs; i++)
3261 if ((int) i == mask_opno)
3263 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3264 &op, &slp_op[i], &dt[i], &vectypes[i]))
3265 return false;
3266 continue;
3269 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3270 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3272 if (dump_enabled_p ())
3273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3274 "use not simple.\n");
3275 return false;
3278 /* We can only handle calls with arguments of the same type. */
3279 if (rhs_type
3280 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3282 if (dump_enabled_p ())
3283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3284 "argument types differ.\n");
3285 return false;
3287 if (!rhs_type)
3288 rhs_type = TREE_TYPE (op);
3290 if (!vectype_in)
3291 vectype_in = vectypes[i];
3292 else if (vectypes[i]
3293 && !types_compatible_p (vectypes[i], vectype_in))
3295 if (dump_enabled_p ())
3296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3297 "argument vector types differ.\n");
3298 return false;
3301 /* If all arguments are external or constant defs, infer the vector type
3302 from the scalar type. */
3303 if (!vectype_in)
3304 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3305 if (vec_stmt)
3306 gcc_assert (vectype_in);
3307 if (!vectype_in)
3309 if (dump_enabled_p ())
3310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3311 "no vectype for scalar type %T\n", rhs_type);
3313 return false;
3315 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3316 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3317 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3318 by a pack of the two vectors into an SI vector. We would need
3319 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3320 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3322 if (dump_enabled_p ())
3323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3324 "mismatched vector sizes %T and %T\n",
3325 vectype_in, vectype_out);
3326 return false;
3329 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3330 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3332 if (dump_enabled_p ())
3333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3334 "mixed mask and nonmask vector types\n");
3335 return false;
3338 /* FORNOW */
3339 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3340 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3341 if (known_eq (nunits_in * 2, nunits_out))
3342 modifier = NARROW;
3343 else if (known_eq (nunits_out, nunits_in))
3344 modifier = NONE;
3345 else if (known_eq (nunits_out * 2, nunits_in))
3346 modifier = WIDEN;
3347 else
3348 return false;
3350 /* We only handle functions that do not read or clobber memory. */
3351 if (gimple_vuse (stmt))
3353 if (dump_enabled_p ())
3354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3355 "function reads from or writes to memory.\n");
3356 return false;
3359 /* For now, we only vectorize functions if a target specific builtin
3360 is available. TODO -- in some cases, it might be profitable to
3361 insert the calls for pieces of the vector, in order to be able
3362 to vectorize other operations in the loop. */
3363 fndecl = NULL_TREE;
3364 internal_fn ifn = IFN_LAST;
3365 tree callee = gimple_call_fndecl (stmt);
3367 /* First try using an internal function. */
3368 tree_code convert_code = ERROR_MARK;
3369 if (cfn != CFN_LAST
3370 && (modifier == NONE
3371 || (modifier == NARROW
3372 && simple_integer_narrowing (vectype_out, vectype_in,
3373 &convert_code))))
3374 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3375 vectype_in);
3377 /* If that fails, try asking for a target-specific built-in function. */
3378 if (ifn == IFN_LAST)
3380 if (cfn != CFN_LAST)
3381 fndecl = targetm.vectorize.builtin_vectorized_function
3382 (cfn, vectype_out, vectype_in);
3383 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3384 fndecl = targetm.vectorize.builtin_md_vectorized_function
3385 (callee, vectype_out, vectype_in);
3388 if (ifn == IFN_LAST && !fndecl)
3390 if (cfn == CFN_GOMP_SIMD_LANE
3391 && !slp_node
3392 && loop_vinfo
3393 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3394 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3395 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3396 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3398 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3399 { 0, 1, 2, ... vf - 1 } vector. */
3400 gcc_assert (nargs == 0);
3402 else if (modifier == NONE
3403 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3404 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3405 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3406 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3407 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3408 slp_op, vectype_in, cost_vec);
3409 else
3411 if (dump_enabled_p ())
3412 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3413 "function is not vectorizable.\n");
3414 return false;
3418 if (slp_node)
3419 ncopies = 1;
3420 else if (modifier == NARROW && ifn == IFN_LAST)
3421 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3422 else
3423 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3425 /* Sanity check: make sure that at least one copy of the vectorized stmt
3426 needs to be generated. */
3427 gcc_assert (ncopies >= 1);
3429 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3430 if (!vec_stmt) /* transformation not required. */
3432 if (slp_node)
3433 for (i = 0; i < nargs; ++i)
3434 if (!vect_maybe_update_slp_op_vectype (slp_op[i], vectype_in))
3436 if (dump_enabled_p ())
3437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3438 "incompatible vector types for invariants\n");
3439 return false;
3441 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3442 DUMP_VECT_SCOPE ("vectorizable_call");
3443 vect_model_simple_cost (vinfo, stmt_info,
3444 ncopies, dt, ndts, slp_node, cost_vec);
3445 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3446 record_stmt_cost (cost_vec, ncopies / 2,
3447 vec_promote_demote, stmt_info, 0, vect_body);
3449 if (loop_vinfo && mask_opno >= 0)
3451 unsigned int nvectors = (slp_node
3452 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3453 : ncopies);
3454 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3455 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3456 vectype_out, scalar_mask);
3458 return true;
3461 /* Transform. */
3463 if (dump_enabled_p ())
3464 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3466 /* Handle def. */
3467 scalar_dest = gimple_call_lhs (stmt);
3468 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3470 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3472 if (modifier == NONE || ifn != IFN_LAST)
3474 tree prev_res = NULL_TREE;
3475 vargs.safe_grow (nargs, true);
3476 orig_vargs.safe_grow (nargs, true);
3477 auto_vec<vec<tree> > vec_defs (nargs);
3478 for (j = 0; j < ncopies; ++j)
3480 /* Build argument list for the vectorized call. */
3481 if (slp_node)
3483 vec<tree> vec_oprnds0;
3485 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3486 vec_oprnds0 = vec_defs[0];
3488 /* Arguments are ready. Create the new vector stmt. */
3489 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3491 size_t k;
3492 for (k = 0; k < nargs; k++)
3494 vec<tree> vec_oprndsk = vec_defs[k];
3495 vargs[k] = vec_oprndsk[i];
3497 gimple *new_stmt;
3498 if (modifier == NARROW)
3500 /* We don't define any narrowing conditional functions
3501 at present. */
3502 gcc_assert (mask_opno < 0);
3503 tree half_res = make_ssa_name (vectype_in);
3504 gcall *call
3505 = gimple_build_call_internal_vec (ifn, vargs);
3506 gimple_call_set_lhs (call, half_res);
3507 gimple_call_set_nothrow (call, true);
3508 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3509 if ((i & 1) == 0)
3511 prev_res = half_res;
3512 continue;
3514 new_temp = make_ssa_name (vec_dest);
3515 new_stmt = gimple_build_assign (new_temp, convert_code,
3516 prev_res, half_res);
3517 vect_finish_stmt_generation (vinfo, stmt_info,
3518 new_stmt, gsi);
3520 else
3522 if (mask_opno >= 0 && masked_loop_p)
3524 unsigned int vec_num = vec_oprnds0.length ();
3525 /* Always true for SLP. */
3526 gcc_assert (ncopies == 1);
3527 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3528 vectype_out, i);
3529 vargs[mask_opno] = prepare_load_store_mask
3530 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3533 gcall *call;
3534 if (ifn != IFN_LAST)
3535 call = gimple_build_call_internal_vec (ifn, vargs);
3536 else
3537 call = gimple_build_call_vec (fndecl, vargs);
3538 new_temp = make_ssa_name (vec_dest, call);
3539 gimple_call_set_lhs (call, new_temp);
3540 gimple_call_set_nothrow (call, true);
3541 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3542 new_stmt = call;
3544 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3546 continue;
3549 for (i = 0; i < nargs; i++)
3551 op = gimple_call_arg (stmt, i);
3552 if (j == 0)
3554 vec_defs.quick_push (vNULL);
3555 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3556 op, &vec_defs[i],
3557 vectypes[i]);
3559 orig_vargs[i] = vargs[i] = vec_defs[i][j];
3562 if (mask_opno >= 0 && masked_loop_p)
3564 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3565 vectype_out, j);
3566 vargs[mask_opno]
3567 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3568 vargs[mask_opno], gsi);
3571 gimple *new_stmt;
3572 if (cfn == CFN_GOMP_SIMD_LANE)
3574 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3575 tree new_var
3576 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3577 gimple *init_stmt = gimple_build_assign (new_var, cst);
3578 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3579 new_temp = make_ssa_name (vec_dest);
3580 new_stmt = gimple_build_assign (new_temp, new_var);
3581 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3583 else if (modifier == NARROW)
3585 /* We don't define any narrowing conditional functions at
3586 present. */
3587 gcc_assert (mask_opno < 0);
3588 tree half_res = make_ssa_name (vectype_in);
3589 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3590 gimple_call_set_lhs (call, half_res);
3591 gimple_call_set_nothrow (call, true);
3592 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3593 if ((j & 1) == 0)
3595 prev_res = half_res;
3596 continue;
3598 new_temp = make_ssa_name (vec_dest);
3599 new_stmt = gimple_build_assign (new_temp, convert_code,
3600 prev_res, half_res);
3601 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3603 else
3605 gcall *call;
3606 if (ifn != IFN_LAST)
3607 call = gimple_build_call_internal_vec (ifn, vargs);
3608 else
3609 call = gimple_build_call_vec (fndecl, vargs);
3610 new_temp = make_ssa_name (vec_dest, call);
3611 gimple_call_set_lhs (call, new_temp);
3612 gimple_call_set_nothrow (call, true);
3613 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3614 new_stmt = call;
3617 if (j == (modifier == NARROW ? 1 : 0))
3618 *vec_stmt = new_stmt;
3619 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3621 for (i = 0; i < nargs; i++)
3623 vec<tree> vec_oprndsi = vec_defs[i];
3624 vec_oprndsi.release ();
3627 else if (modifier == NARROW)
3629 auto_vec<vec<tree> > vec_defs (nargs);
3630 /* We don't define any narrowing conditional functions at present. */
3631 gcc_assert (mask_opno < 0);
3632 for (j = 0; j < ncopies; ++j)
3634 /* Build argument list for the vectorized call. */
3635 if (j == 0)
3636 vargs.create (nargs * 2);
3637 else
3638 vargs.truncate (0);
3640 if (slp_node)
3642 vec<tree> vec_oprnds0;
3644 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3645 vec_oprnds0 = vec_defs[0];
3647 /* Arguments are ready. Create the new vector stmt. */
3648 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3650 size_t k;
3651 vargs.truncate (0);
3652 for (k = 0; k < nargs; k++)
3654 vec<tree> vec_oprndsk = vec_defs[k];
3655 vargs.quick_push (vec_oprndsk[i]);
3656 vargs.quick_push (vec_oprndsk[i + 1]);
3658 gcall *call;
3659 if (ifn != IFN_LAST)
3660 call = gimple_build_call_internal_vec (ifn, vargs);
3661 else
3662 call = gimple_build_call_vec (fndecl, vargs);
3663 new_temp = make_ssa_name (vec_dest, call);
3664 gimple_call_set_lhs (call, new_temp);
3665 gimple_call_set_nothrow (call, true);
3666 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3667 SLP_TREE_VEC_STMTS (slp_node).quick_push (call);
3669 continue;
3672 for (i = 0; i < nargs; i++)
3674 op = gimple_call_arg (stmt, i);
3675 if (j == 0)
3677 vec_defs.quick_push (vNULL);
3678 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3679 op, &vec_defs[i], vectypes[i]);
3681 vec_oprnd0 = vec_defs[i][2*j];
3682 vec_oprnd1 = vec_defs[i][2*j+1];
3684 vargs.quick_push (vec_oprnd0);
3685 vargs.quick_push (vec_oprnd1);
3688 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3689 new_temp = make_ssa_name (vec_dest, new_stmt);
3690 gimple_call_set_lhs (new_stmt, new_temp);
3691 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3693 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3696 if (!slp_node)
3697 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3699 for (i = 0; i < nargs; i++)
3701 vec<tree> vec_oprndsi = vec_defs[i];
3702 vec_oprndsi.release ();
3705 else
3706 /* No current target implements this case. */
3707 return false;
3709 vargs.release ();
3711 /* The call in STMT might prevent it from being removed in dce.
3712 We however cannot remove it here, due to the way the ssa name
3713 it defines is mapped to the new definition. So just replace
3714 rhs of the statement with something harmless. */
3716 if (slp_node)
3717 return true;
3719 stmt_info = vect_orig_stmt (stmt_info);
3720 lhs = gimple_get_lhs (stmt_info->stmt);
3722 gassign *new_stmt
3723 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3724 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3726 return true;
3730 struct simd_call_arg_info
3732 tree vectype;
3733 tree op;
3734 HOST_WIDE_INT linear_step;
3735 enum vect_def_type dt;
3736 unsigned int align;
3737 bool simd_lane_linear;
3740 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3741 is linear within simd lane (but not within whole loop), note it in
3742 *ARGINFO. */
3744 static void
3745 vect_simd_lane_linear (tree op, class loop *loop,
3746 struct simd_call_arg_info *arginfo)
3748 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3750 if (!is_gimple_assign (def_stmt)
3751 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3752 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3753 return;
3755 tree base = gimple_assign_rhs1 (def_stmt);
3756 HOST_WIDE_INT linear_step = 0;
3757 tree v = gimple_assign_rhs2 (def_stmt);
3758 while (TREE_CODE (v) == SSA_NAME)
3760 tree t;
3761 def_stmt = SSA_NAME_DEF_STMT (v);
3762 if (is_gimple_assign (def_stmt))
3763 switch (gimple_assign_rhs_code (def_stmt))
3765 case PLUS_EXPR:
3766 t = gimple_assign_rhs2 (def_stmt);
3767 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3768 return;
3769 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3770 v = gimple_assign_rhs1 (def_stmt);
3771 continue;
3772 case MULT_EXPR:
3773 t = gimple_assign_rhs2 (def_stmt);
3774 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3775 return;
3776 linear_step = tree_to_shwi (t);
3777 v = gimple_assign_rhs1 (def_stmt);
3778 continue;
3779 CASE_CONVERT:
3780 t = gimple_assign_rhs1 (def_stmt);
3781 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3782 || (TYPE_PRECISION (TREE_TYPE (v))
3783 < TYPE_PRECISION (TREE_TYPE (t))))
3784 return;
3785 if (!linear_step)
3786 linear_step = 1;
3787 v = t;
3788 continue;
3789 default:
3790 return;
3792 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3793 && loop->simduid
3794 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3795 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3796 == loop->simduid))
3798 if (!linear_step)
3799 linear_step = 1;
3800 arginfo->linear_step = linear_step;
3801 arginfo->op = base;
3802 arginfo->simd_lane_linear = true;
3803 return;
3808 /* Return the number of elements in vector type VECTYPE, which is associated
3809 with a SIMD clone. At present these vectors always have a constant
3810 length. */
3812 static unsigned HOST_WIDE_INT
3813 simd_clone_subparts (tree vectype)
3815 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3818 /* Function vectorizable_simd_clone_call.
3820 Check if STMT_INFO performs a function call that can be vectorized
3821 by calling a simd clone of the function.
3822 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3823 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3824 Return true if STMT_INFO is vectorizable in this way. */
3826 static bool
3827 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3828 gimple_stmt_iterator *gsi,
3829 gimple **vec_stmt, slp_tree slp_node,
3830 stmt_vector_for_cost *)
3832 tree vec_dest;
3833 tree scalar_dest;
3834 tree op, type;
3835 tree vec_oprnd0 = NULL_TREE;
3836 tree vectype;
3837 poly_uint64 nunits;
3838 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3839 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3840 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3841 tree fndecl, new_temp;
3842 int ncopies, j;
3843 auto_vec<simd_call_arg_info> arginfo;
3844 vec<tree> vargs = vNULL;
3845 size_t i, nargs;
3846 tree lhs, rtype, ratype;
3847 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3849 /* Is STMT a vectorizable call? */
3850 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3851 if (!stmt)
3852 return false;
3854 fndecl = gimple_call_fndecl (stmt);
3855 if (fndecl == NULL_TREE)
3856 return false;
3858 struct cgraph_node *node = cgraph_node::get (fndecl);
3859 if (node == NULL || node->simd_clones == NULL)
3860 return false;
3862 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3863 return false;
3865 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3866 && ! vec_stmt)
3867 return false;
3869 if (gimple_call_lhs (stmt)
3870 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3871 return false;
3873 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3875 vectype = STMT_VINFO_VECTYPE (stmt_info);
3877 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3878 return false;
3880 /* FORNOW */
3881 if (slp_node)
3882 return false;
3884 /* Process function arguments. */
3885 nargs = gimple_call_num_args (stmt);
3887 /* Bail out if the function has zero arguments. */
3888 if (nargs == 0)
3889 return false;
3891 arginfo.reserve (nargs, true);
3893 for (i = 0; i < nargs; i++)
3895 simd_call_arg_info thisarginfo;
3896 affine_iv iv;
3898 thisarginfo.linear_step = 0;
3899 thisarginfo.align = 0;
3900 thisarginfo.op = NULL_TREE;
3901 thisarginfo.simd_lane_linear = false;
3903 op = gimple_call_arg (stmt, i);
3904 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3905 &thisarginfo.vectype)
3906 || thisarginfo.dt == vect_uninitialized_def)
3908 if (dump_enabled_p ())
3909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3910 "use not simple.\n");
3911 return false;
3914 if (thisarginfo.dt == vect_constant_def
3915 || thisarginfo.dt == vect_external_def)
3916 gcc_assert (thisarginfo.vectype == NULL_TREE);
3917 else
3919 gcc_assert (thisarginfo.vectype != NULL_TREE);
3920 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3922 if (dump_enabled_p ())
3923 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3924 "vector mask arguments are not supported\n");
3925 return false;
3929 /* For linear arguments, the analyze phase should have saved
3930 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3931 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3932 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3934 gcc_assert (vec_stmt);
3935 thisarginfo.linear_step
3936 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3937 thisarginfo.op
3938 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3939 thisarginfo.simd_lane_linear
3940 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3941 == boolean_true_node);
3942 /* If loop has been peeled for alignment, we need to adjust it. */
3943 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3944 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3945 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3947 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3948 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3949 tree opt = TREE_TYPE (thisarginfo.op);
3950 bias = fold_convert (TREE_TYPE (step), bias);
3951 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3952 thisarginfo.op
3953 = fold_build2 (POINTER_TYPE_P (opt)
3954 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3955 thisarginfo.op, bias);
3958 else if (!vec_stmt
3959 && thisarginfo.dt != vect_constant_def
3960 && thisarginfo.dt != vect_external_def
3961 && loop_vinfo
3962 && TREE_CODE (op) == SSA_NAME
3963 && simple_iv (loop, loop_containing_stmt (stmt), op,
3964 &iv, false)
3965 && tree_fits_shwi_p (iv.step))
3967 thisarginfo.linear_step = tree_to_shwi (iv.step);
3968 thisarginfo.op = iv.base;
3970 else if ((thisarginfo.dt == vect_constant_def
3971 || thisarginfo.dt == vect_external_def)
3972 && POINTER_TYPE_P (TREE_TYPE (op)))
3973 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3974 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3975 linear too. */
3976 if (POINTER_TYPE_P (TREE_TYPE (op))
3977 && !thisarginfo.linear_step
3978 && !vec_stmt
3979 && thisarginfo.dt != vect_constant_def
3980 && thisarginfo.dt != vect_external_def
3981 && loop_vinfo
3982 && !slp_node
3983 && TREE_CODE (op) == SSA_NAME)
3984 vect_simd_lane_linear (op, loop, &thisarginfo);
3986 arginfo.quick_push (thisarginfo);
3989 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3990 if (!vf.is_constant ())
3992 if (dump_enabled_p ())
3993 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3994 "not considering SIMD clones; not yet supported"
3995 " for variable-width vectors.\n");
3996 return false;
3999 unsigned int badness = 0;
4000 struct cgraph_node *bestn = NULL;
4001 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4002 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4003 else
4004 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4005 n = n->simdclone->next_clone)
4007 unsigned int this_badness = 0;
4008 unsigned int num_calls;
4009 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4010 || n->simdclone->nargs != nargs)
4011 continue;
4012 if (num_calls != 1)
4013 this_badness += exact_log2 (num_calls) * 4096;
4014 if (n->simdclone->inbranch)
4015 this_badness += 8192;
4016 int target_badness = targetm.simd_clone.usable (n);
4017 if (target_badness < 0)
4018 continue;
4019 this_badness += target_badness * 512;
4020 /* FORNOW: Have to add code to add the mask argument. */
4021 if (n->simdclone->inbranch)
4022 continue;
4023 for (i = 0; i < nargs; i++)
4025 switch (n->simdclone->args[i].arg_type)
4027 case SIMD_CLONE_ARG_TYPE_VECTOR:
4028 if (!useless_type_conversion_p
4029 (n->simdclone->args[i].orig_type,
4030 TREE_TYPE (gimple_call_arg (stmt, i))))
4031 i = -1;
4032 else if (arginfo[i].dt == vect_constant_def
4033 || arginfo[i].dt == vect_external_def
4034 || arginfo[i].linear_step)
4035 this_badness += 64;
4036 break;
4037 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4038 if (arginfo[i].dt != vect_constant_def
4039 && arginfo[i].dt != vect_external_def)
4040 i = -1;
4041 break;
4042 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4043 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4044 if (arginfo[i].dt == vect_constant_def
4045 || arginfo[i].dt == vect_external_def
4046 || (arginfo[i].linear_step
4047 != n->simdclone->args[i].linear_step))
4048 i = -1;
4049 break;
4050 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4051 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4052 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4053 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4054 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4055 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4056 /* FORNOW */
4057 i = -1;
4058 break;
4059 case SIMD_CLONE_ARG_TYPE_MASK:
4060 gcc_unreachable ();
4062 if (i == (size_t) -1)
4063 break;
4064 if (n->simdclone->args[i].alignment > arginfo[i].align)
4066 i = -1;
4067 break;
4069 if (arginfo[i].align)
4070 this_badness += (exact_log2 (arginfo[i].align)
4071 - exact_log2 (n->simdclone->args[i].alignment));
4073 if (i == (size_t) -1)
4074 continue;
4075 if (bestn == NULL || this_badness < badness)
4077 bestn = n;
4078 badness = this_badness;
4082 if (bestn == NULL)
4083 return false;
4085 for (i = 0; i < nargs; i++)
4086 if ((arginfo[i].dt == vect_constant_def
4087 || arginfo[i].dt == vect_external_def)
4088 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4090 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4091 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4092 slp_node);
4093 if (arginfo[i].vectype == NULL
4094 || !constant_multiple_p (bestn->simdclone->simdlen,
4095 simd_clone_subparts (arginfo[i].vectype)))
4096 return false;
4099 fndecl = bestn->decl;
4100 nunits = bestn->simdclone->simdlen;
4101 ncopies = vector_unroll_factor (vf, nunits);
4103 /* If the function isn't const, only allow it in simd loops where user
4104 has asserted that at least nunits consecutive iterations can be
4105 performed using SIMD instructions. */
4106 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4107 && gimple_vuse (stmt))
4108 return false;
4110 /* Sanity check: make sure that at least one copy of the vectorized stmt
4111 needs to be generated. */
4112 gcc_assert (ncopies >= 1);
4114 if (!vec_stmt) /* transformation not required. */
4116 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4117 for (i = 0; i < nargs; i++)
4118 if ((bestn->simdclone->args[i].arg_type
4119 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4120 || (bestn->simdclone->args[i].arg_type
4121 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4123 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4124 + 1,
4125 true);
4126 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4127 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4128 ? size_type_node : TREE_TYPE (arginfo[i].op);
4129 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4130 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4131 tree sll = arginfo[i].simd_lane_linear
4132 ? boolean_true_node : boolean_false_node;
4133 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4135 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4136 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4137 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4138 dt, slp_node, cost_vec); */
4139 return true;
4142 /* Transform. */
4144 if (dump_enabled_p ())
4145 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4147 /* Handle def. */
4148 scalar_dest = gimple_call_lhs (stmt);
4149 vec_dest = NULL_TREE;
4150 rtype = NULL_TREE;
4151 ratype = NULL_TREE;
4152 if (scalar_dest)
4154 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4155 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4156 if (TREE_CODE (rtype) == ARRAY_TYPE)
4158 ratype = rtype;
4159 rtype = TREE_TYPE (ratype);
4163 auto_vec<vec<tree> > vec_oprnds;
4164 auto_vec<unsigned> vec_oprnds_i;
4165 vec_oprnds.safe_grow_cleared (nargs, true);
4166 vec_oprnds_i.safe_grow_cleared (nargs, true);
4167 for (j = 0; j < ncopies; ++j)
4169 /* Build argument list for the vectorized call. */
4170 if (j == 0)
4171 vargs.create (nargs);
4172 else
4173 vargs.truncate (0);
4175 for (i = 0; i < nargs; i++)
4177 unsigned int k, l, m, o;
4178 tree atype;
4179 op = gimple_call_arg (stmt, i);
4180 switch (bestn->simdclone->args[i].arg_type)
4182 case SIMD_CLONE_ARG_TYPE_VECTOR:
4183 atype = bestn->simdclone->args[i].vector_type;
4184 o = vector_unroll_factor (nunits,
4185 simd_clone_subparts (atype));
4186 for (m = j * o; m < (j + 1) * o; m++)
4188 if (simd_clone_subparts (atype)
4189 < simd_clone_subparts (arginfo[i].vectype))
4191 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4192 k = (simd_clone_subparts (arginfo[i].vectype)
4193 / simd_clone_subparts (atype));
4194 gcc_assert ((k & (k - 1)) == 0);
4195 if (m == 0)
4197 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4198 ncopies * o / k, op,
4199 &vec_oprnds[i]);
4200 vec_oprnds_i[i] = 0;
4201 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4203 else
4205 vec_oprnd0 = arginfo[i].op;
4206 if ((m & (k - 1)) == 0)
4207 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4209 arginfo[i].op = vec_oprnd0;
4210 vec_oprnd0
4211 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4212 bitsize_int (prec),
4213 bitsize_int ((m & (k - 1)) * prec));
4214 gassign *new_stmt
4215 = gimple_build_assign (make_ssa_name (atype),
4216 vec_oprnd0);
4217 vect_finish_stmt_generation (vinfo, stmt_info,
4218 new_stmt, gsi);
4219 vargs.safe_push (gimple_assign_lhs (new_stmt));
4221 else
4223 k = (simd_clone_subparts (atype)
4224 / simd_clone_subparts (arginfo[i].vectype));
4225 gcc_assert ((k & (k - 1)) == 0);
4226 vec<constructor_elt, va_gc> *ctor_elts;
4227 if (k != 1)
4228 vec_alloc (ctor_elts, k);
4229 else
4230 ctor_elts = NULL;
4231 for (l = 0; l < k; l++)
4233 if (m == 0 && l == 0)
4235 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4236 k * o * ncopies,
4238 &vec_oprnds[i]);
4239 vec_oprnds_i[i] = 0;
4240 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4242 else
4243 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4244 arginfo[i].op = vec_oprnd0;
4245 if (k == 1)
4246 break;
4247 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4248 vec_oprnd0);
4250 if (k == 1)
4251 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4252 atype))
4254 vec_oprnd0
4255 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4256 gassign *new_stmt
4257 = gimple_build_assign (make_ssa_name (atype),
4258 vec_oprnd0);
4259 vect_finish_stmt_generation (vinfo, stmt_info,
4260 new_stmt, gsi);
4261 vargs.safe_push (gimple_assign_lhs (new_stmt));
4263 else
4264 vargs.safe_push (vec_oprnd0);
4265 else
4267 vec_oprnd0 = build_constructor (atype, ctor_elts);
4268 gassign *new_stmt
4269 = gimple_build_assign (make_ssa_name (atype),
4270 vec_oprnd0);
4271 vect_finish_stmt_generation (vinfo, stmt_info,
4272 new_stmt, gsi);
4273 vargs.safe_push (gimple_assign_lhs (new_stmt));
4277 break;
4278 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4279 vargs.safe_push (op);
4280 break;
4281 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4282 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4283 if (j == 0)
4285 gimple_seq stmts;
4286 arginfo[i].op
4287 = force_gimple_operand (unshare_expr (arginfo[i].op),
4288 &stmts, true, NULL_TREE);
4289 if (stmts != NULL)
4291 basic_block new_bb;
4292 edge pe = loop_preheader_edge (loop);
4293 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4294 gcc_assert (!new_bb);
4296 if (arginfo[i].simd_lane_linear)
4298 vargs.safe_push (arginfo[i].op);
4299 break;
4301 tree phi_res = copy_ssa_name (op);
4302 gphi *new_phi = create_phi_node (phi_res, loop->header);
4303 add_phi_arg (new_phi, arginfo[i].op,
4304 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4305 enum tree_code code
4306 = POINTER_TYPE_P (TREE_TYPE (op))
4307 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4308 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4309 ? sizetype : TREE_TYPE (op);
4310 poly_widest_int cst
4311 = wi::mul (bestn->simdclone->args[i].linear_step,
4312 ncopies * nunits);
4313 tree tcst = wide_int_to_tree (type, cst);
4314 tree phi_arg = copy_ssa_name (op);
4315 gassign *new_stmt
4316 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4317 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4318 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4319 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4320 UNKNOWN_LOCATION);
4321 arginfo[i].op = phi_res;
4322 vargs.safe_push (phi_res);
4324 else
4326 enum tree_code code
4327 = POINTER_TYPE_P (TREE_TYPE (op))
4328 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4329 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4330 ? sizetype : TREE_TYPE (op);
4331 poly_widest_int cst
4332 = wi::mul (bestn->simdclone->args[i].linear_step,
4333 j * nunits);
4334 tree tcst = wide_int_to_tree (type, cst);
4335 new_temp = make_ssa_name (TREE_TYPE (op));
4336 gassign *new_stmt
4337 = gimple_build_assign (new_temp, code,
4338 arginfo[i].op, tcst);
4339 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4340 vargs.safe_push (new_temp);
4342 break;
4343 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4344 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4345 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4346 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4347 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4348 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4349 default:
4350 gcc_unreachable ();
4354 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4355 if (vec_dest)
4357 gcc_assert (ratype
4358 || known_eq (simd_clone_subparts (rtype), nunits));
4359 if (ratype)
4360 new_temp = create_tmp_var (ratype);
4361 else if (useless_type_conversion_p (vectype, rtype))
4362 new_temp = make_ssa_name (vec_dest, new_call);
4363 else
4364 new_temp = make_ssa_name (rtype, new_call);
4365 gimple_call_set_lhs (new_call, new_temp);
4367 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4368 gimple *new_stmt = new_call;
4370 if (vec_dest)
4372 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4374 unsigned int k, l;
4375 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4376 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4377 k = vector_unroll_factor (nunits,
4378 simd_clone_subparts (vectype));
4379 gcc_assert ((k & (k - 1)) == 0);
4380 for (l = 0; l < k; l++)
4382 tree t;
4383 if (ratype)
4385 t = build_fold_addr_expr (new_temp);
4386 t = build2 (MEM_REF, vectype, t,
4387 build_int_cst (TREE_TYPE (t), l * bytes));
4389 else
4390 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4391 bitsize_int (prec), bitsize_int (l * prec));
4392 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4393 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4395 if (j == 0 && l == 0)
4396 *vec_stmt = new_stmt;
4397 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4400 if (ratype)
4401 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4402 continue;
4404 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4406 unsigned int k = (simd_clone_subparts (vectype)
4407 / simd_clone_subparts (rtype));
4408 gcc_assert ((k & (k - 1)) == 0);
4409 if ((j & (k - 1)) == 0)
4410 vec_alloc (ret_ctor_elts, k);
4411 if (ratype)
4413 unsigned int m, o;
4414 o = vector_unroll_factor (nunits,
4415 simd_clone_subparts (rtype));
4416 for (m = 0; m < o; m++)
4418 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4419 size_int (m), NULL_TREE, NULL_TREE);
4420 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4421 tem);
4422 vect_finish_stmt_generation (vinfo, stmt_info,
4423 new_stmt, gsi);
4424 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4425 gimple_assign_lhs (new_stmt));
4427 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4429 else
4430 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4431 if ((j & (k - 1)) != k - 1)
4432 continue;
4433 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4434 new_stmt
4435 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4436 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4438 if ((unsigned) j == k - 1)
4439 *vec_stmt = new_stmt;
4440 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4441 continue;
4443 else if (ratype)
4445 tree t = build_fold_addr_expr (new_temp);
4446 t = build2 (MEM_REF, vectype, t,
4447 build_int_cst (TREE_TYPE (t), 0));
4448 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4449 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4450 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4452 else if (!useless_type_conversion_p (vectype, rtype))
4454 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4455 new_stmt
4456 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4457 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4461 if (j == 0)
4462 *vec_stmt = new_stmt;
4463 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4466 for (i = 0; i < nargs; ++i)
4468 vec<tree> oprndsi = vec_oprnds[i];
4469 oprndsi.release ();
4471 vargs.release ();
4473 /* The call in STMT might prevent it from being removed in dce.
4474 We however cannot remove it here, due to the way the ssa name
4475 it defines is mapped to the new definition. So just replace
4476 rhs of the statement with something harmless. */
4478 if (slp_node)
4479 return true;
4481 gimple *new_stmt;
4482 if (scalar_dest)
4484 type = TREE_TYPE (scalar_dest);
4485 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4486 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4488 else
4489 new_stmt = gimple_build_nop ();
4490 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4491 unlink_stmt_vdef (stmt);
4493 return true;
4497 /* Function vect_gen_widened_results_half
4499 Create a vector stmt whose code, type, number of arguments, and result
4500 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4501 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4502 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4503 needs to be created (DECL is a function-decl of a target-builtin).
4504 STMT_INFO is the original scalar stmt that we are vectorizing. */
4506 static gimple *
4507 vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code,
4508 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4509 tree vec_dest, gimple_stmt_iterator *gsi,
4510 stmt_vec_info stmt_info)
4512 gimple *new_stmt;
4513 tree new_temp;
4515 /* Generate half of the widened result: */
4516 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4517 if (op_type != binary_op)
4518 vec_oprnd1 = NULL;
4519 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4520 new_temp = make_ssa_name (vec_dest, new_stmt);
4521 gimple_assign_set_lhs (new_stmt, new_temp);
4522 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4524 return new_stmt;
4528 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4529 For multi-step conversions store the resulting vectors and call the function
4530 recursively. */
4532 static void
4533 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4534 int multi_step_cvt,
4535 stmt_vec_info stmt_info,
4536 vec<tree> &vec_dsts,
4537 gimple_stmt_iterator *gsi,
4538 slp_tree slp_node, enum tree_code code)
4540 unsigned int i;
4541 tree vop0, vop1, new_tmp, vec_dest;
4543 vec_dest = vec_dsts.pop ();
4545 for (i = 0; i < vec_oprnds->length (); i += 2)
4547 /* Create demotion operation. */
4548 vop0 = (*vec_oprnds)[i];
4549 vop1 = (*vec_oprnds)[i + 1];
4550 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4551 new_tmp = make_ssa_name (vec_dest, new_stmt);
4552 gimple_assign_set_lhs (new_stmt, new_tmp);
4553 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4555 if (multi_step_cvt)
4556 /* Store the resulting vector for next recursive call. */
4557 (*vec_oprnds)[i/2] = new_tmp;
4558 else
4560 /* This is the last step of the conversion sequence. Store the
4561 vectors in SLP_NODE or in vector info of the scalar statement
4562 (or in STMT_VINFO_RELATED_STMT chain). */
4563 if (slp_node)
4564 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4565 else
4566 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4570 /* For multi-step demotion operations we first generate demotion operations
4571 from the source type to the intermediate types, and then combine the
4572 results (stored in VEC_OPRNDS) in demotion operation to the destination
4573 type. */
4574 if (multi_step_cvt)
4576 /* At each level of recursion we have half of the operands we had at the
4577 previous level. */
4578 vec_oprnds->truncate ((i+1)/2);
4579 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4580 multi_step_cvt - 1,
4581 stmt_info, vec_dsts, gsi,
4582 slp_node, VEC_PACK_TRUNC_EXPR);
4585 vec_dsts.quick_push (vec_dest);
4589 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4590 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4591 STMT_INFO. For multi-step conversions store the resulting vectors and
4592 call the function recursively. */
4594 static void
4595 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4596 vec<tree> *vec_oprnds0,
4597 vec<tree> *vec_oprnds1,
4598 stmt_vec_info stmt_info, tree vec_dest,
4599 gimple_stmt_iterator *gsi,
4600 enum tree_code code1,
4601 enum tree_code code2, int op_type)
4603 int i;
4604 tree vop0, vop1, new_tmp1, new_tmp2;
4605 gimple *new_stmt1, *new_stmt2;
4606 vec<tree> vec_tmp = vNULL;
4608 vec_tmp.create (vec_oprnds0->length () * 2);
4609 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4611 if (op_type == binary_op)
4612 vop1 = (*vec_oprnds1)[i];
4613 else
4614 vop1 = NULL_TREE;
4616 /* Generate the two halves of promotion operation. */
4617 new_stmt1 = vect_gen_widened_results_half (vinfo, code1, vop0, vop1,
4618 op_type, vec_dest, gsi,
4619 stmt_info);
4620 new_stmt2 = vect_gen_widened_results_half (vinfo, code2, vop0, vop1,
4621 op_type, vec_dest, gsi,
4622 stmt_info);
4623 if (is_gimple_call (new_stmt1))
4625 new_tmp1 = gimple_call_lhs (new_stmt1);
4626 new_tmp2 = gimple_call_lhs (new_stmt2);
4628 else
4630 new_tmp1 = gimple_assign_lhs (new_stmt1);
4631 new_tmp2 = gimple_assign_lhs (new_stmt2);
4634 /* Store the results for the next step. */
4635 vec_tmp.quick_push (new_tmp1);
4636 vec_tmp.quick_push (new_tmp2);
4639 vec_oprnds0->release ();
4640 *vec_oprnds0 = vec_tmp;
4643 /* Create vectorized promotion stmts for widening stmts using only half the
4644 potential vector size for input. */
4645 static void
4646 vect_create_half_widening_stmts (vec_info *vinfo,
4647 vec<tree> *vec_oprnds0,
4648 vec<tree> *vec_oprnds1,
4649 stmt_vec_info stmt_info, tree vec_dest,
4650 gimple_stmt_iterator *gsi,
4651 enum tree_code code1,
4652 int op_type)
4654 int i;
4655 tree vop0, vop1;
4656 gimple *new_stmt1;
4657 gimple *new_stmt2;
4658 gimple *new_stmt3;
4659 vec<tree> vec_tmp = vNULL;
4661 vec_tmp.create (vec_oprnds0->length ());
4662 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4664 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4666 gcc_assert (op_type == binary_op);
4667 vop1 = (*vec_oprnds1)[i];
4669 /* Widen the first vector input. */
4670 out_type = TREE_TYPE (vec_dest);
4671 new_tmp1 = make_ssa_name (out_type);
4672 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4673 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4674 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
4676 /* Widen the second vector input. */
4677 new_tmp2 = make_ssa_name (out_type);
4678 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
4679 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
4680 /* Perform the operation. With both vector inputs widened. */
4681 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, new_tmp2);
4683 else
4685 /* Perform the operation. With the single vector input widened. */
4686 new_stmt3 = gimple_build_assign (vec_dest, code1, new_tmp1, vop1);
4689 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
4690 gimple_assign_set_lhs (new_stmt3, new_tmp3);
4691 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
4693 /* Store the results for the next step. */
4694 vec_tmp.quick_push (new_tmp3);
4697 vec_oprnds0->release ();
4698 *vec_oprnds0 = vec_tmp;
4702 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4703 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4704 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4705 Return true if STMT_INFO is vectorizable in this way. */
4707 static bool
4708 vectorizable_conversion (vec_info *vinfo,
4709 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4710 gimple **vec_stmt, slp_tree slp_node,
4711 stmt_vector_for_cost *cost_vec)
4713 tree vec_dest;
4714 tree scalar_dest;
4715 tree op0, op1 = NULL_TREE;
4716 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4717 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4718 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4719 tree new_temp;
4720 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4721 int ndts = 2;
4722 poly_uint64 nunits_in;
4723 poly_uint64 nunits_out;
4724 tree vectype_out, vectype_in;
4725 int ncopies, i;
4726 tree lhs_type, rhs_type;
4727 enum { NARROW, NONE, WIDEN } modifier;
4728 vec<tree> vec_oprnds0 = vNULL;
4729 vec<tree> vec_oprnds1 = vNULL;
4730 tree vop0;
4731 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4732 int multi_step_cvt = 0;
4733 vec<tree> interm_types = vNULL;
4734 tree intermediate_type, cvt_type = NULL_TREE;
4735 int op_type;
4736 unsigned short fltsz;
4738 /* Is STMT a vectorizable conversion? */
4740 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4741 return false;
4743 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4744 && ! vec_stmt)
4745 return false;
4747 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4748 if (!stmt)
4749 return false;
4751 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4752 return false;
4754 code = gimple_assign_rhs_code (stmt);
4755 if (!CONVERT_EXPR_CODE_P (code)
4756 && code != FIX_TRUNC_EXPR
4757 && code != FLOAT_EXPR
4758 && code != WIDEN_PLUS_EXPR
4759 && code != WIDEN_MINUS_EXPR
4760 && code != WIDEN_MULT_EXPR
4761 && code != WIDEN_LSHIFT_EXPR)
4762 return false;
4764 bool widen_arith = (code == WIDEN_PLUS_EXPR
4765 || code == WIDEN_MINUS_EXPR
4766 || code == WIDEN_MULT_EXPR
4767 || code == WIDEN_LSHIFT_EXPR);
4768 op_type = TREE_CODE_LENGTH (code);
4770 /* Check types of lhs and rhs. */
4771 scalar_dest = gimple_assign_lhs (stmt);
4772 lhs_type = TREE_TYPE (scalar_dest);
4773 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4775 /* Check the operands of the operation. */
4776 slp_tree slp_op0, slp_op1 = NULL;
4777 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4778 0, &op0, &slp_op0, &dt[0], &vectype_in))
4780 if (dump_enabled_p ())
4781 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4782 "use not simple.\n");
4783 return false;
4786 rhs_type = TREE_TYPE (op0);
4787 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4788 && !((INTEGRAL_TYPE_P (lhs_type)
4789 && INTEGRAL_TYPE_P (rhs_type))
4790 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4791 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4792 return false;
4794 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4795 && ((INTEGRAL_TYPE_P (lhs_type)
4796 && !type_has_mode_precision_p (lhs_type))
4797 || (INTEGRAL_TYPE_P (rhs_type)
4798 && !type_has_mode_precision_p (rhs_type))))
4800 if (dump_enabled_p ())
4801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4802 "type conversion to/from bit-precision unsupported."
4803 "\n");
4804 return false;
4807 if (op_type == binary_op)
4809 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR
4810 || code == WIDEN_PLUS_EXPR || code == WIDEN_MINUS_EXPR);
4812 op1 = gimple_assign_rhs2 (stmt);
4813 tree vectype1_in;
4814 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
4815 &op1, &slp_op1, &dt[1], &vectype1_in))
4817 if (dump_enabled_p ())
4818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4819 "use not simple.\n");
4820 return false;
4822 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4823 OP1. */
4824 if (!vectype_in)
4825 vectype_in = vectype1_in;
4828 /* If op0 is an external or constant def, infer the vector type
4829 from the scalar type. */
4830 if (!vectype_in)
4831 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4832 if (vec_stmt)
4833 gcc_assert (vectype_in);
4834 if (!vectype_in)
4836 if (dump_enabled_p ())
4837 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4838 "no vectype for scalar type %T\n", rhs_type);
4840 return false;
4843 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4844 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4846 if (dump_enabled_p ())
4847 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4848 "can't convert between boolean and non "
4849 "boolean vectors %T\n", rhs_type);
4851 return false;
4854 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4855 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4856 if (known_eq (nunits_out, nunits_in))
4857 if (widen_arith)
4858 modifier = WIDEN;
4859 else
4860 modifier = NONE;
4861 else if (multiple_p (nunits_out, nunits_in))
4862 modifier = NARROW;
4863 else
4865 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4866 modifier = WIDEN;
4869 /* Multiple types in SLP are handled by creating the appropriate number of
4870 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4871 case of SLP. */
4872 if (slp_node)
4873 ncopies = 1;
4874 else if (modifier == NARROW)
4875 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4876 else
4877 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4879 /* Sanity check: make sure that at least one copy of the vectorized stmt
4880 needs to be generated. */
4881 gcc_assert (ncopies >= 1);
4883 bool found_mode = false;
4884 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4885 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4886 opt_scalar_mode rhs_mode_iter;
4888 /* Supportable by target? */
4889 switch (modifier)
4891 case NONE:
4892 if (code != FIX_TRUNC_EXPR
4893 && code != FLOAT_EXPR
4894 && !CONVERT_EXPR_CODE_P (code))
4895 return false;
4896 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4897 break;
4898 /* FALLTHRU */
4899 unsupported:
4900 if (dump_enabled_p ())
4901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4902 "conversion not supported by target.\n");
4903 return false;
4905 case WIDEN:
4906 if (known_eq (nunits_in, nunits_out))
4908 if (!supportable_half_widening_operation (code, vectype_out,
4909 vectype_in, &code1))
4910 goto unsupported;
4911 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4912 break;
4914 if (supportable_widening_operation (vinfo, code, stmt_info,
4915 vectype_out, vectype_in, &code1,
4916 &code2, &multi_step_cvt,
4917 &interm_types))
4919 /* Binary widening operation can only be supported directly by the
4920 architecture. */
4921 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4922 break;
4925 if (code != FLOAT_EXPR
4926 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4927 goto unsupported;
4929 fltsz = GET_MODE_SIZE (lhs_mode);
4930 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4932 rhs_mode = rhs_mode_iter.require ();
4933 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4934 break;
4936 cvt_type
4937 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4938 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4939 if (cvt_type == NULL_TREE)
4940 goto unsupported;
4942 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4944 if (!supportable_convert_operation (code, vectype_out,
4945 cvt_type, &codecvt1))
4946 goto unsupported;
4948 else if (!supportable_widening_operation (vinfo, code, stmt_info,
4949 vectype_out, cvt_type,
4950 &codecvt1, &codecvt2,
4951 &multi_step_cvt,
4952 &interm_types))
4953 continue;
4954 else
4955 gcc_assert (multi_step_cvt == 0);
4957 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
4958 cvt_type,
4959 vectype_in, &code1, &code2,
4960 &multi_step_cvt, &interm_types))
4962 found_mode = true;
4963 break;
4967 if (!found_mode)
4968 goto unsupported;
4970 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4971 codecvt2 = ERROR_MARK;
4972 else
4974 multi_step_cvt++;
4975 interm_types.safe_push (cvt_type);
4976 cvt_type = NULL_TREE;
4978 break;
4980 case NARROW:
4981 gcc_assert (op_type == unary_op);
4982 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4983 &code1, &multi_step_cvt,
4984 &interm_types))
4985 break;
4987 if (code != FIX_TRUNC_EXPR
4988 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4989 goto unsupported;
4991 cvt_type
4992 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4993 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4994 if (cvt_type == NULL_TREE)
4995 goto unsupported;
4996 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4997 &codecvt1))
4998 goto unsupported;
4999 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5000 &code1, &multi_step_cvt,
5001 &interm_types))
5002 break;
5003 goto unsupported;
5005 default:
5006 gcc_unreachable ();
5009 if (!vec_stmt) /* transformation not required. */
5011 if (slp_node
5012 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5013 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5015 if (dump_enabled_p ())
5016 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5017 "incompatible vector types for invariants\n");
5018 return false;
5020 DUMP_VECT_SCOPE ("vectorizable_conversion");
5021 if (modifier == NONE)
5023 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5024 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5025 cost_vec);
5027 else if (modifier == NARROW)
5029 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5030 /* The final packing step produces one vector result per copy. */
5031 unsigned int nvectors
5032 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5033 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5034 multi_step_cvt, cost_vec,
5035 widen_arith);
5037 else
5039 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5040 /* The initial unpacking step produces two vector results
5041 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5042 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5043 unsigned int nvectors
5044 = (slp_node
5045 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5046 : ncopies * 2);
5047 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5048 multi_step_cvt, cost_vec,
5049 widen_arith);
5051 interm_types.release ();
5052 return true;
5055 /* Transform. */
5056 if (dump_enabled_p ())
5057 dump_printf_loc (MSG_NOTE, vect_location,
5058 "transform conversion. ncopies = %d.\n", ncopies);
5060 if (op_type == binary_op)
5062 if (CONSTANT_CLASS_P (op0))
5063 op0 = fold_convert (TREE_TYPE (op1), op0);
5064 else if (CONSTANT_CLASS_P (op1))
5065 op1 = fold_convert (TREE_TYPE (op0), op1);
5068 /* In case of multi-step conversion, we first generate conversion operations
5069 to the intermediate types, and then from that types to the final one.
5070 We create vector destinations for the intermediate type (TYPES) received
5071 from supportable_*_operation, and store them in the correct order
5072 for future use in vect_create_vectorized_*_stmts (). */
5073 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5074 vec_dest = vect_create_destination_var (scalar_dest,
5075 (cvt_type && modifier == WIDEN)
5076 ? cvt_type : vectype_out);
5077 vec_dsts.quick_push (vec_dest);
5079 if (multi_step_cvt)
5081 for (i = interm_types.length () - 1;
5082 interm_types.iterate (i, &intermediate_type); i--)
5084 vec_dest = vect_create_destination_var (scalar_dest,
5085 intermediate_type);
5086 vec_dsts.quick_push (vec_dest);
5090 if (cvt_type)
5091 vec_dest = vect_create_destination_var (scalar_dest,
5092 modifier == WIDEN
5093 ? vectype_out : cvt_type);
5095 int ninputs = 1;
5096 if (!slp_node)
5098 if (modifier == WIDEN)
5100 else if (modifier == NARROW)
5102 if (multi_step_cvt)
5103 ninputs = vect_pow2 (multi_step_cvt);
5104 ninputs *= 2;
5108 switch (modifier)
5110 case NONE:
5111 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5112 op0, &vec_oprnds0);
5113 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5115 /* Arguments are ready, create the new vector stmt. */
5116 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5117 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5118 new_temp = make_ssa_name (vec_dest, new_stmt);
5119 gimple_assign_set_lhs (new_stmt, new_temp);
5120 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5122 if (slp_node)
5123 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5124 else
5125 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5127 break;
5129 case WIDEN:
5130 /* In case the vectorization factor (VF) is bigger than the number
5131 of elements that we can fit in a vectype (nunits), we have to
5132 generate more than one vector stmt - i.e - we need to "unroll"
5133 the vector stmt by a factor VF/nunits. */
5134 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5135 op0, &vec_oprnds0,
5136 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5137 &vec_oprnds1);
5138 if (code == WIDEN_LSHIFT_EXPR)
5140 int oprnds_size = vec_oprnds0.length ();
5141 vec_oprnds1.create (oprnds_size);
5142 for (i = 0; i < oprnds_size; ++i)
5143 vec_oprnds1.quick_push (op1);
5145 /* Arguments are ready. Create the new vector stmts. */
5146 for (i = multi_step_cvt; i >= 0; i--)
5148 tree this_dest = vec_dsts[i];
5149 enum tree_code c1 = code1, c2 = code2;
5150 if (i == 0 && codecvt2 != ERROR_MARK)
5152 c1 = codecvt1;
5153 c2 = codecvt2;
5155 if (known_eq (nunits_out, nunits_in))
5156 vect_create_half_widening_stmts (vinfo, &vec_oprnds0,
5157 &vec_oprnds1, stmt_info,
5158 this_dest, gsi,
5159 c1, op_type);
5160 else
5161 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5162 &vec_oprnds1, stmt_info,
5163 this_dest, gsi,
5164 c1, c2, op_type);
5167 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5169 gimple *new_stmt;
5170 if (cvt_type)
5172 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5173 new_temp = make_ssa_name (vec_dest);
5174 new_stmt = gimple_build_assign (new_temp, codecvt1, vop0);
5175 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5177 else
5178 new_stmt = SSA_NAME_DEF_STMT (vop0);
5180 if (slp_node)
5181 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5182 else
5183 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5185 break;
5187 case NARROW:
5188 /* In case the vectorization factor (VF) is bigger than the number
5189 of elements that we can fit in a vectype (nunits), we have to
5190 generate more than one vector stmt - i.e - we need to "unroll"
5191 the vector stmt by a factor VF/nunits. */
5192 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5193 op0, &vec_oprnds0);
5194 /* Arguments are ready. Create the new vector stmts. */
5195 if (cvt_type)
5196 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5198 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5199 new_temp = make_ssa_name (vec_dest);
5200 gassign *new_stmt
5201 = gimple_build_assign (new_temp, codecvt1, vop0);
5202 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5203 vec_oprnds0[i] = new_temp;
5206 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5207 multi_step_cvt,
5208 stmt_info, vec_dsts, gsi,
5209 slp_node, code1);
5210 break;
5212 if (!slp_node)
5213 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5215 vec_oprnds0.release ();
5216 vec_oprnds1.release ();
5217 interm_types.release ();
5219 return true;
5222 /* Return true if we can assume from the scalar form of STMT_INFO that
5223 neither the scalar nor the vector forms will generate code. STMT_INFO
5224 is known not to involve a data reference. */
5226 bool
5227 vect_nop_conversion_p (stmt_vec_info stmt_info)
5229 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5230 if (!stmt)
5231 return false;
5233 tree lhs = gimple_assign_lhs (stmt);
5234 tree_code code = gimple_assign_rhs_code (stmt);
5235 tree rhs = gimple_assign_rhs1 (stmt);
5237 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5238 return true;
5240 if (CONVERT_EXPR_CODE_P (code))
5241 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5243 return false;
5246 /* Function vectorizable_assignment.
5248 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5249 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5250 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5251 Return true if STMT_INFO is vectorizable in this way. */
5253 static bool
5254 vectorizable_assignment (vec_info *vinfo,
5255 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5256 gimple **vec_stmt, slp_tree slp_node,
5257 stmt_vector_for_cost *cost_vec)
5259 tree vec_dest;
5260 tree scalar_dest;
5261 tree op;
5262 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5263 tree new_temp;
5264 enum vect_def_type dt[1] = {vect_unknown_def_type};
5265 int ndts = 1;
5266 int ncopies;
5267 int i;
5268 vec<tree> vec_oprnds = vNULL;
5269 tree vop;
5270 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5271 enum tree_code code;
5272 tree vectype_in;
5274 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5275 return false;
5277 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5278 && ! vec_stmt)
5279 return false;
5281 /* Is vectorizable assignment? */
5282 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5283 if (!stmt)
5284 return false;
5286 scalar_dest = gimple_assign_lhs (stmt);
5287 if (TREE_CODE (scalar_dest) != SSA_NAME)
5288 return false;
5290 if (STMT_VINFO_DATA_REF (stmt_info))
5291 return false;
5293 code = gimple_assign_rhs_code (stmt);
5294 if (!(gimple_assign_single_p (stmt)
5295 || code == PAREN_EXPR
5296 || CONVERT_EXPR_CODE_P (code)))
5297 return false;
5299 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5300 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5302 /* Multiple types in SLP are handled by creating the appropriate number of
5303 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5304 case of SLP. */
5305 if (slp_node)
5306 ncopies = 1;
5307 else
5308 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5310 gcc_assert (ncopies >= 1);
5312 slp_tree slp_op;
5313 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5314 &dt[0], &vectype_in))
5316 if (dump_enabled_p ())
5317 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5318 "use not simple.\n");
5319 return false;
5321 if (!vectype_in)
5322 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5324 /* We can handle NOP_EXPR conversions that do not change the number
5325 of elements or the vector size. */
5326 if ((CONVERT_EXPR_CODE_P (code)
5327 || code == VIEW_CONVERT_EXPR)
5328 && (!vectype_in
5329 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5330 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5331 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5332 return false;
5334 if (VECTOR_BOOLEAN_TYPE_P (vectype)
5335 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5337 if (dump_enabled_p ())
5338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5339 "can't convert between boolean and non "
5340 "boolean vectors %T\n", TREE_TYPE (op));
5342 return false;
5345 /* We do not handle bit-precision changes. */
5346 if ((CONVERT_EXPR_CODE_P (code)
5347 || code == VIEW_CONVERT_EXPR)
5348 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5349 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5350 || !type_has_mode_precision_p (TREE_TYPE (op)))
5351 /* But a conversion that does not change the bit-pattern is ok. */
5352 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5353 > TYPE_PRECISION (TREE_TYPE (op)))
5354 && TYPE_UNSIGNED (TREE_TYPE (op))))
5356 if (dump_enabled_p ())
5357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5358 "type conversion to/from bit-precision "
5359 "unsupported.\n");
5360 return false;
5363 if (!vec_stmt) /* transformation not required. */
5365 if (slp_node
5366 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5368 if (dump_enabled_p ())
5369 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5370 "incompatible vector types for invariants\n");
5371 return false;
5373 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5374 DUMP_VECT_SCOPE ("vectorizable_assignment");
5375 if (!vect_nop_conversion_p (stmt_info))
5376 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5377 cost_vec);
5378 return true;
5381 /* Transform. */
5382 if (dump_enabled_p ())
5383 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5385 /* Handle def. */
5386 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5388 /* Handle use. */
5389 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5391 /* Arguments are ready. create the new vector stmt. */
5392 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5394 if (CONVERT_EXPR_CODE_P (code)
5395 || code == VIEW_CONVERT_EXPR)
5396 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5397 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5398 new_temp = make_ssa_name (vec_dest, new_stmt);
5399 gimple_assign_set_lhs (new_stmt, new_temp);
5400 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5401 if (slp_node)
5402 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5403 else
5404 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5406 if (!slp_node)
5407 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5409 vec_oprnds.release ();
5410 return true;
5414 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5415 either as shift by a scalar or by a vector. */
5417 bool
5418 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5421 machine_mode vec_mode;
5422 optab optab;
5423 int icode;
5424 tree vectype;
5426 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5427 if (!vectype)
5428 return false;
5430 optab = optab_for_tree_code (code, vectype, optab_scalar);
5431 if (!optab
5432 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5434 optab = optab_for_tree_code (code, vectype, optab_vector);
5435 if (!optab
5436 || (optab_handler (optab, TYPE_MODE (vectype))
5437 == CODE_FOR_nothing))
5438 return false;
5441 vec_mode = TYPE_MODE (vectype);
5442 icode = (int) optab_handler (optab, vec_mode);
5443 if (icode == CODE_FOR_nothing)
5444 return false;
5446 return true;
5450 /* Function vectorizable_shift.
5452 Check if STMT_INFO performs a shift operation that can be vectorized.
5453 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5454 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5455 Return true if STMT_INFO is vectorizable in this way. */
5457 static bool
5458 vectorizable_shift (vec_info *vinfo,
5459 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5460 gimple **vec_stmt, slp_tree slp_node,
5461 stmt_vector_for_cost *cost_vec)
5463 tree vec_dest;
5464 tree scalar_dest;
5465 tree op0, op1 = NULL;
5466 tree vec_oprnd1 = NULL_TREE;
5467 tree vectype;
5468 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5469 enum tree_code code;
5470 machine_mode vec_mode;
5471 tree new_temp;
5472 optab optab;
5473 int icode;
5474 machine_mode optab_op2_mode;
5475 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5476 int ndts = 2;
5477 poly_uint64 nunits_in;
5478 poly_uint64 nunits_out;
5479 tree vectype_out;
5480 tree op1_vectype;
5481 int ncopies;
5482 int i;
5483 vec<tree> vec_oprnds0 = vNULL;
5484 vec<tree> vec_oprnds1 = vNULL;
5485 tree vop0, vop1;
5486 unsigned int k;
5487 bool scalar_shift_arg = true;
5488 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5489 bool incompatible_op1_vectype_p = false;
5491 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5492 return false;
5494 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5495 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5496 && ! vec_stmt)
5497 return false;
5499 /* Is STMT a vectorizable binary/unary operation? */
5500 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5501 if (!stmt)
5502 return false;
5504 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5505 return false;
5507 code = gimple_assign_rhs_code (stmt);
5509 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5510 || code == RROTATE_EXPR))
5511 return false;
5513 scalar_dest = gimple_assign_lhs (stmt);
5514 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5515 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5517 if (dump_enabled_p ())
5518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5519 "bit-precision shifts not supported.\n");
5520 return false;
5523 slp_tree slp_op0;
5524 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5525 0, &op0, &slp_op0, &dt[0], &vectype))
5527 if (dump_enabled_p ())
5528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5529 "use not simple.\n");
5530 return false;
5532 /* If op0 is an external or constant def, infer the vector type
5533 from the scalar type. */
5534 if (!vectype)
5535 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5536 if (vec_stmt)
5537 gcc_assert (vectype);
5538 if (!vectype)
5540 if (dump_enabled_p ())
5541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5542 "no vectype for scalar type\n");
5543 return false;
5546 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5547 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5548 if (maybe_ne (nunits_out, nunits_in))
5549 return false;
5551 stmt_vec_info op1_def_stmt_info;
5552 slp_tree slp_op1;
5553 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
5554 &dt[1], &op1_vectype, &op1_def_stmt_info))
5556 if (dump_enabled_p ())
5557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5558 "use not simple.\n");
5559 return false;
5562 /* Multiple types in SLP are handled by creating the appropriate number of
5563 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5564 case of SLP. */
5565 if (slp_node)
5566 ncopies = 1;
5567 else
5568 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5570 gcc_assert (ncopies >= 1);
5572 /* Determine whether the shift amount is a vector, or scalar. If the
5573 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5575 if ((dt[1] == vect_internal_def
5576 || dt[1] == vect_induction_def
5577 || dt[1] == vect_nested_cycle)
5578 && !slp_node)
5579 scalar_shift_arg = false;
5580 else if (dt[1] == vect_constant_def
5581 || dt[1] == vect_external_def
5582 || dt[1] == vect_internal_def)
5584 /* In SLP, need to check whether the shift count is the same,
5585 in loops if it is a constant or invariant, it is always
5586 a scalar shift. */
5587 if (slp_node)
5589 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5590 stmt_vec_info slpstmt_info;
5592 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5594 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5595 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5596 scalar_shift_arg = false;
5599 /* For internal SLP defs we have to make sure we see scalar stmts
5600 for all vector elements.
5601 ??? For different vectors we could resort to a different
5602 scalar shift operand but code-generation below simply always
5603 takes the first. */
5604 if (dt[1] == vect_internal_def
5605 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5606 stmts.length ()))
5607 scalar_shift_arg = false;
5610 /* If the shift amount is computed by a pattern stmt we cannot
5611 use the scalar amount directly thus give up and use a vector
5612 shift. */
5613 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5614 scalar_shift_arg = false;
5616 else
5618 if (dump_enabled_p ())
5619 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5620 "operand mode requires invariant argument.\n");
5621 return false;
5624 /* Vector shifted by vector. */
5625 bool was_scalar_shift_arg = scalar_shift_arg;
5626 if (!scalar_shift_arg)
5628 optab = optab_for_tree_code (code, vectype, optab_vector);
5629 if (dump_enabled_p ())
5630 dump_printf_loc (MSG_NOTE, vect_location,
5631 "vector/vector shift/rotate found.\n");
5633 if (!op1_vectype)
5634 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5635 slp_op1);
5636 incompatible_op1_vectype_p
5637 = (op1_vectype == NULL_TREE
5638 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5639 TYPE_VECTOR_SUBPARTS (vectype))
5640 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5641 if (incompatible_op1_vectype_p
5642 && (!slp_node
5643 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
5644 || slp_op1->refcnt != 1))
5646 if (dump_enabled_p ())
5647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5648 "unusable type for last operand in"
5649 " vector/vector shift/rotate.\n");
5650 return false;
5653 /* See if the machine has a vector shifted by scalar insn and if not
5654 then see if it has a vector shifted by vector insn. */
5655 else
5657 optab = optab_for_tree_code (code, vectype, optab_scalar);
5658 if (optab
5659 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5661 if (dump_enabled_p ())
5662 dump_printf_loc (MSG_NOTE, vect_location,
5663 "vector/scalar shift/rotate found.\n");
5665 else
5667 optab = optab_for_tree_code (code, vectype, optab_vector);
5668 if (optab
5669 && (optab_handler (optab, TYPE_MODE (vectype))
5670 != CODE_FOR_nothing))
5672 scalar_shift_arg = false;
5674 if (dump_enabled_p ())
5675 dump_printf_loc (MSG_NOTE, vect_location,
5676 "vector/vector shift/rotate found.\n");
5678 if (!op1_vectype)
5679 op1_vectype = get_vectype_for_scalar_type (vinfo,
5680 TREE_TYPE (op1),
5681 slp_op1);
5683 /* Unlike the other binary operators, shifts/rotates have
5684 the rhs being int, instead of the same type as the lhs,
5685 so make sure the scalar is the right type if we are
5686 dealing with vectors of long long/long/short/char. */
5687 incompatible_op1_vectype_p
5688 = (!op1_vectype
5689 || !tree_nop_conversion_p (TREE_TYPE (vectype),
5690 TREE_TYPE (op1)));
5691 if (incompatible_op1_vectype_p
5692 && dt[1] == vect_internal_def)
5694 if (dump_enabled_p ())
5695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5696 "unusable type for last operand in"
5697 " vector/vector shift/rotate.\n");
5698 return false;
5704 /* Supportable by target? */
5705 if (!optab)
5707 if (dump_enabled_p ())
5708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5709 "no optab.\n");
5710 return false;
5712 vec_mode = TYPE_MODE (vectype);
5713 icode = (int) optab_handler (optab, vec_mode);
5714 if (icode == CODE_FOR_nothing)
5716 if (dump_enabled_p ())
5717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5718 "op not supported by target.\n");
5719 return false;
5721 /* vector lowering cannot optimize vector shifts using word arithmetic. */
5722 if (vect_emulated_vector_p (vectype))
5723 return false;
5725 if (!vec_stmt) /* transformation not required. */
5727 if (slp_node
5728 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
5729 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
5730 && (!incompatible_op1_vectype_p
5731 || dt[1] == vect_constant_def)
5732 && !vect_maybe_update_slp_op_vectype
5733 (slp_op1,
5734 incompatible_op1_vectype_p ? vectype : op1_vectype))))
5736 if (dump_enabled_p ())
5737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5738 "incompatible vector types for invariants\n");
5739 return false;
5741 /* Now adjust the constant shift amount in place. */
5742 if (slp_node
5743 && incompatible_op1_vectype_p
5744 && dt[1] == vect_constant_def)
5746 for (unsigned i = 0;
5747 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
5749 SLP_TREE_SCALAR_OPS (slp_op1)[i]
5750 = fold_convert (TREE_TYPE (vectype),
5751 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
5752 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
5753 == INTEGER_CST));
5756 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5757 DUMP_VECT_SCOPE ("vectorizable_shift");
5758 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
5759 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5760 return true;
5763 /* Transform. */
5765 if (dump_enabled_p ())
5766 dump_printf_loc (MSG_NOTE, vect_location,
5767 "transform binary/unary operation.\n");
5769 if (incompatible_op1_vectype_p && !slp_node)
5771 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
5772 op1 = fold_convert (TREE_TYPE (vectype), op1);
5773 if (dt[1] != vect_constant_def)
5774 op1 = vect_init_vector (vinfo, stmt_info, op1,
5775 TREE_TYPE (vectype), NULL);
5778 /* Handle def. */
5779 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5781 if (scalar_shift_arg && dt[1] != vect_internal_def)
5783 /* Vector shl and shr insn patterns can be defined with scalar
5784 operand 2 (shift operand). In this case, use constant or loop
5785 invariant op1 directly, without extending it to vector mode
5786 first. */
5787 optab_op2_mode = insn_data[icode].operand[2].mode;
5788 if (!VECTOR_MODE_P (optab_op2_mode))
5790 if (dump_enabled_p ())
5791 dump_printf_loc (MSG_NOTE, vect_location,
5792 "operand 1 using scalar mode.\n");
5793 vec_oprnd1 = op1;
5794 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
5795 vec_oprnds1.quick_push (vec_oprnd1);
5796 /* Store vec_oprnd1 for every vector stmt to be created.
5797 We check during the analysis that all the shift arguments
5798 are the same.
5799 TODO: Allow different constants for different vector
5800 stmts generated for an SLP instance. */
5801 for (k = 0;
5802 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
5803 vec_oprnds1.quick_push (vec_oprnd1);
5806 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
5808 if (was_scalar_shift_arg)
5810 /* If the argument was the same in all lanes create
5811 the correctly typed vector shift amount directly. */
5812 op1 = fold_convert (TREE_TYPE (vectype), op1);
5813 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
5814 !loop_vinfo ? gsi : NULL);
5815 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
5816 !loop_vinfo ? gsi : NULL);
5817 vec_oprnds1.create (slp_node->vec_stmts_size);
5818 for (k = 0; k < slp_node->vec_stmts_size; k++)
5819 vec_oprnds1.quick_push (vec_oprnd1);
5821 else if (dt[1] == vect_constant_def)
5822 /* The constant shift amount has been adjusted in place. */
5824 else
5825 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5828 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5829 (a special case for certain kind of vector shifts); otherwise,
5830 operand 1 should be of a vector type (the usual case). */
5831 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5832 op0, &vec_oprnds0,
5833 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
5835 /* Arguments are ready. Create the new vector stmt. */
5836 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5838 /* For internal defs where we need to use a scalar shift arg
5839 extract the first lane. */
5840 if (scalar_shift_arg && dt[1] == vect_internal_def)
5842 vop1 = vec_oprnds1[0];
5843 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
5844 gassign *new_stmt
5845 = gimple_build_assign (new_temp,
5846 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
5847 vop1,
5848 TYPE_SIZE (TREE_TYPE (new_temp)),
5849 bitsize_zero_node));
5850 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5851 vop1 = new_temp;
5853 else
5854 vop1 = vec_oprnds1[i];
5855 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5856 new_temp = make_ssa_name (vec_dest, new_stmt);
5857 gimple_assign_set_lhs (new_stmt, new_temp);
5858 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5859 if (slp_node)
5860 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5861 else
5862 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5865 if (!slp_node)
5866 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5868 vec_oprnds0.release ();
5869 vec_oprnds1.release ();
5871 return true;
5875 /* Function vectorizable_operation.
5877 Check if STMT_INFO performs a binary, unary or ternary operation that can
5878 be vectorized.
5879 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5880 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5881 Return true if STMT_INFO is vectorizable in this way. */
5883 static bool
5884 vectorizable_operation (vec_info *vinfo,
5885 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5886 gimple **vec_stmt, slp_tree slp_node,
5887 stmt_vector_for_cost *cost_vec)
5889 tree vec_dest;
5890 tree scalar_dest;
5891 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5892 tree vectype;
5893 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5894 enum tree_code code, orig_code;
5895 machine_mode vec_mode;
5896 tree new_temp;
5897 int op_type;
5898 optab optab;
5899 bool target_support_p;
5900 enum vect_def_type dt[3]
5901 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5902 int ndts = 3;
5903 poly_uint64 nunits_in;
5904 poly_uint64 nunits_out;
5905 tree vectype_out;
5906 int ncopies, vec_num;
5907 int i;
5908 vec<tree> vec_oprnds0 = vNULL;
5909 vec<tree> vec_oprnds1 = vNULL;
5910 vec<tree> vec_oprnds2 = vNULL;
5911 tree vop0, vop1, vop2;
5912 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5914 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5915 return false;
5917 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5918 && ! vec_stmt)
5919 return false;
5921 /* Is STMT a vectorizable binary/unary operation? */
5922 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5923 if (!stmt)
5924 return false;
5926 /* Loads and stores are handled in vectorizable_{load,store}. */
5927 if (STMT_VINFO_DATA_REF (stmt_info))
5928 return false;
5930 orig_code = code = gimple_assign_rhs_code (stmt);
5932 /* Shifts are handled in vectorizable_shift. */
5933 if (code == LSHIFT_EXPR
5934 || code == RSHIFT_EXPR
5935 || code == LROTATE_EXPR
5936 || code == RROTATE_EXPR)
5937 return false;
5939 /* Comparisons are handled in vectorizable_comparison. */
5940 if (TREE_CODE_CLASS (code) == tcc_comparison)
5941 return false;
5943 /* Conditions are handled in vectorizable_condition. */
5944 if (code == COND_EXPR)
5945 return false;
5947 /* For pointer addition and subtraction, we should use the normal
5948 plus and minus for the vector operation. */
5949 if (code == POINTER_PLUS_EXPR)
5950 code = PLUS_EXPR;
5951 if (code == POINTER_DIFF_EXPR)
5952 code = MINUS_EXPR;
5954 /* Support only unary or binary operations. */
5955 op_type = TREE_CODE_LENGTH (code);
5956 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5958 if (dump_enabled_p ())
5959 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5960 "num. args = %d (not unary/binary/ternary op).\n",
5961 op_type);
5962 return false;
5965 scalar_dest = gimple_assign_lhs (stmt);
5966 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5968 /* Most operations cannot handle bit-precision types without extra
5969 truncations. */
5970 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
5971 if (!mask_op_p
5972 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5973 /* Exception are bitwise binary operations. */
5974 && code != BIT_IOR_EXPR
5975 && code != BIT_XOR_EXPR
5976 && code != BIT_AND_EXPR)
5978 if (dump_enabled_p ())
5979 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5980 "bit-precision arithmetic not supported.\n");
5981 return false;
5984 slp_tree slp_op0;
5985 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5986 0, &op0, &slp_op0, &dt[0], &vectype))
5988 if (dump_enabled_p ())
5989 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5990 "use not simple.\n");
5991 return false;
5993 /* If op0 is an external or constant def, infer the vector type
5994 from the scalar type. */
5995 if (!vectype)
5997 /* For boolean type we cannot determine vectype by
5998 invariant value (don't know whether it is a vector
5999 of booleans or vector of integers). We use output
6000 vectype because operations on boolean don't change
6001 type. */
6002 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6004 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6006 if (dump_enabled_p ())
6007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6008 "not supported operation on bool value.\n");
6009 return false;
6011 vectype = vectype_out;
6013 else
6014 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6015 slp_node);
6017 if (vec_stmt)
6018 gcc_assert (vectype);
6019 if (!vectype)
6021 if (dump_enabled_p ())
6022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6023 "no vectype for scalar type %T\n",
6024 TREE_TYPE (op0));
6026 return false;
6029 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6030 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6031 if (maybe_ne (nunits_out, nunits_in))
6032 return false;
6034 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6035 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6036 if (op_type == binary_op || op_type == ternary_op)
6038 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6039 1, &op1, &slp_op1, &dt[1], &vectype2))
6041 if (dump_enabled_p ())
6042 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6043 "use not simple.\n");
6044 return false;
6047 if (op_type == ternary_op)
6049 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6050 2, &op2, &slp_op2, &dt[2], &vectype3))
6052 if (dump_enabled_p ())
6053 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6054 "use not simple.\n");
6055 return false;
6059 /* Multiple types in SLP are handled by creating the appropriate number of
6060 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6061 case of SLP. */
6062 if (slp_node)
6064 ncopies = 1;
6065 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6067 else
6069 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6070 vec_num = 1;
6073 gcc_assert (ncopies >= 1);
6075 /* Reject attempts to combine mask types with nonmask types, e.g. if
6076 we have an AND between a (nonmask) boolean loaded from memory and
6077 a (mask) boolean result of a comparison.
6079 TODO: We could easily fix these cases up using pattern statements. */
6080 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6081 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6082 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6084 if (dump_enabled_p ())
6085 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6086 "mixed mask and nonmask vector types\n");
6087 return false;
6090 /* Supportable by target? */
6092 vec_mode = TYPE_MODE (vectype);
6093 if (code == MULT_HIGHPART_EXPR)
6094 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6095 else
6097 optab = optab_for_tree_code (code, vectype, optab_default);
6098 if (!optab)
6100 if (dump_enabled_p ())
6101 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6102 "no optab.\n");
6103 return false;
6105 target_support_p = (optab_handler (optab, vec_mode)
6106 != CODE_FOR_nothing);
6109 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6110 if (!target_support_p)
6112 if (dump_enabled_p ())
6113 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6114 "op not supported by target.\n");
6115 /* Check only during analysis. */
6116 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6117 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6118 return false;
6119 if (dump_enabled_p ())
6120 dump_printf_loc (MSG_NOTE, vect_location,
6121 "proceeding using word mode.\n");
6122 using_emulated_vectors_p = true;
6125 if (using_emulated_vectors_p
6126 && !vect_can_vectorize_without_simd_p (code))
6128 if (dump_enabled_p ())
6129 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6130 return false;
6133 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6134 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6135 internal_fn cond_fn = get_conditional_internal_fn (code);
6137 if (!vec_stmt) /* transformation not required. */
6139 /* If this operation is part of a reduction, a fully-masked loop
6140 should only change the active lanes of the reduction chain,
6141 keeping the inactive lanes as-is. */
6142 if (loop_vinfo
6143 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6144 && reduc_idx >= 0)
6146 if (cond_fn == IFN_LAST
6147 || !direct_internal_fn_supported_p (cond_fn, vectype,
6148 OPTIMIZE_FOR_SPEED))
6150 if (dump_enabled_p ())
6151 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6152 "can't use a fully-masked loop because no"
6153 " conditional operation is available.\n");
6154 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6156 else
6157 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6158 vectype, NULL);
6161 /* Put types on constant and invariant SLP children. */
6162 if (slp_node
6163 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6164 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6165 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6167 if (dump_enabled_p ())
6168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6169 "incompatible vector types for invariants\n");
6170 return false;
6173 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6174 DUMP_VECT_SCOPE ("vectorizable_operation");
6175 vect_model_simple_cost (vinfo, stmt_info,
6176 ncopies, dt, ndts, slp_node, cost_vec);
6177 if (using_emulated_vectors_p)
6179 /* The above vect_model_simple_cost call handles constants
6180 in the prologue and (mis-)costs one of the stmts as
6181 vector stmt. See tree-vect-generic.c:do_plus_minus/do_negate
6182 for the actual lowering that will be applied. */
6183 unsigned n
6184 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6185 switch (code)
6187 case PLUS_EXPR:
6188 n *= 5;
6189 break;
6190 case MINUS_EXPR:
6191 n *= 6;
6192 break;
6193 case NEGATE_EXPR:
6194 n *= 4;
6195 break;
6196 default:;
6198 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info, 0, vect_body);
6200 return true;
6203 /* Transform. */
6205 if (dump_enabled_p ())
6206 dump_printf_loc (MSG_NOTE, vect_location,
6207 "transform binary/unary operation.\n");
6209 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6211 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6212 vectors with unsigned elements, but the result is signed. So, we
6213 need to compute the MINUS_EXPR into vectype temporary and
6214 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6215 tree vec_cvt_dest = NULL_TREE;
6216 if (orig_code == POINTER_DIFF_EXPR)
6218 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6219 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6221 /* Handle def. */
6222 else
6223 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6225 /* In case the vectorization factor (VF) is bigger than the number
6226 of elements that we can fit in a vectype (nunits), we have to generate
6227 more than one vector stmt - i.e - we need to "unroll" the
6228 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6229 from one copy of the vector stmt to the next, in the field
6230 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6231 stages to find the correct vector defs to be used when vectorizing
6232 stmts that use the defs of the current stmt. The example below
6233 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6234 we need to create 4 vectorized stmts):
6236 before vectorization:
6237 RELATED_STMT VEC_STMT
6238 S1: x = memref - -
6239 S2: z = x + 1 - -
6241 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6242 there):
6243 RELATED_STMT VEC_STMT
6244 VS1_0: vx0 = memref0 VS1_1 -
6245 VS1_1: vx1 = memref1 VS1_2 -
6246 VS1_2: vx2 = memref2 VS1_3 -
6247 VS1_3: vx3 = memref3 - -
6248 S1: x = load - VS1_0
6249 S2: z = x + 1 - -
6251 step2: vectorize stmt S2 (done here):
6252 To vectorize stmt S2 we first need to find the relevant vector
6253 def for the first operand 'x'. This is, as usual, obtained from
6254 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6255 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6256 relevant vector def 'vx0'. Having found 'vx0' we can generate
6257 the vector stmt VS2_0, and as usual, record it in the
6258 STMT_VINFO_VEC_STMT of stmt S2.
6259 When creating the second copy (VS2_1), we obtain the relevant vector
6260 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6261 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6262 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6263 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6264 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6265 chain of stmts and pointers:
6266 RELATED_STMT VEC_STMT
6267 VS1_0: vx0 = memref0 VS1_1 -
6268 VS1_1: vx1 = memref1 VS1_2 -
6269 VS1_2: vx2 = memref2 VS1_3 -
6270 VS1_3: vx3 = memref3 - -
6271 S1: x = load - VS1_0
6272 VS2_0: vz0 = vx0 + v1 VS2_1 -
6273 VS2_1: vz1 = vx1 + v1 VS2_2 -
6274 VS2_2: vz2 = vx2 + v1 VS2_3 -
6275 VS2_3: vz3 = vx3 + v1 - -
6276 S2: z = x + 1 - VS2_0 */
6278 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6279 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6280 /* Arguments are ready. Create the new vector stmt. */
6281 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6283 gimple *new_stmt = NULL;
6284 vop1 = ((op_type == binary_op || op_type == ternary_op)
6285 ? vec_oprnds1[i] : NULL_TREE);
6286 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6287 if (masked_loop_p && reduc_idx >= 0)
6289 /* Perform the operation on active elements only and take
6290 inactive elements from the reduction chain input. */
6291 gcc_assert (!vop2);
6292 vop2 = reduc_idx == 1 ? vop1 : vop0;
6293 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6294 vectype, i);
6295 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6296 vop0, vop1, vop2);
6297 new_temp = make_ssa_name (vec_dest, call);
6298 gimple_call_set_lhs (call, new_temp);
6299 gimple_call_set_nothrow (call, true);
6300 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6301 new_stmt = call;
6303 else
6305 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
6306 new_temp = make_ssa_name (vec_dest, new_stmt);
6307 gimple_assign_set_lhs (new_stmt, new_temp);
6308 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6309 if (vec_cvt_dest)
6311 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6312 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6313 new_temp);
6314 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6315 gimple_assign_set_lhs (new_stmt, new_temp);
6316 vect_finish_stmt_generation (vinfo, stmt_info,
6317 new_stmt, gsi);
6320 if (slp_node)
6321 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6322 else
6323 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6326 if (!slp_node)
6327 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6329 vec_oprnds0.release ();
6330 vec_oprnds1.release ();
6331 vec_oprnds2.release ();
6333 return true;
6336 /* A helper function to ensure data reference DR_INFO's base alignment. */
6338 static void
6339 ensure_base_align (dr_vec_info *dr_info)
6341 /* Alignment is only analyzed for the first element of a DR group,
6342 use that to look at base alignment we need to enforce. */
6343 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
6344 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
6346 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
6348 if (dr_info->base_misaligned)
6350 tree base_decl = dr_info->base_decl;
6352 // We should only be able to increase the alignment of a base object if
6353 // we know what its new alignment should be at compile time.
6354 unsigned HOST_WIDE_INT align_base_to =
6355 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6357 if (decl_in_symtab_p (base_decl))
6358 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6359 else if (DECL_ALIGN (base_decl) < align_base_to)
6361 SET_DECL_ALIGN (base_decl, align_base_to);
6362 DECL_USER_ALIGN (base_decl) = 1;
6364 dr_info->base_misaligned = false;
6369 /* Function get_group_alias_ptr_type.
6371 Return the alias type for the group starting at FIRST_STMT_INFO. */
6373 static tree
6374 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6376 struct data_reference *first_dr, *next_dr;
6378 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6379 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6380 while (next_stmt_info)
6382 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6383 if (get_alias_set (DR_REF (first_dr))
6384 != get_alias_set (DR_REF (next_dr)))
6386 if (dump_enabled_p ())
6387 dump_printf_loc (MSG_NOTE, vect_location,
6388 "conflicting alias set types.\n");
6389 return ptr_type_node;
6391 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6393 return reference_alias_ptr_type (DR_REF (first_dr));
6397 /* Function scan_operand_equal_p.
6399 Helper function for check_scan_store. Compare two references
6400 with .GOMP_SIMD_LANE bases. */
6402 static bool
6403 scan_operand_equal_p (tree ref1, tree ref2)
6405 tree ref[2] = { ref1, ref2 };
6406 poly_int64 bitsize[2], bitpos[2];
6407 tree offset[2], base[2];
6408 for (int i = 0; i < 2; ++i)
6410 machine_mode mode;
6411 int unsignedp, reversep, volatilep = 0;
6412 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6413 &offset[i], &mode, &unsignedp,
6414 &reversep, &volatilep);
6415 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6416 return false;
6417 if (TREE_CODE (base[i]) == MEM_REF
6418 && offset[i] == NULL_TREE
6419 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6421 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6422 if (is_gimple_assign (def_stmt)
6423 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6424 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6425 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6427 if (maybe_ne (mem_ref_offset (base[i]), 0))
6428 return false;
6429 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6430 offset[i] = gimple_assign_rhs2 (def_stmt);
6435 if (!operand_equal_p (base[0], base[1], 0))
6436 return false;
6437 if (maybe_ne (bitsize[0], bitsize[1]))
6438 return false;
6439 if (offset[0] != offset[1])
6441 if (!offset[0] || !offset[1])
6442 return false;
6443 if (!operand_equal_p (offset[0], offset[1], 0))
6445 tree step[2];
6446 for (int i = 0; i < 2; ++i)
6448 step[i] = integer_one_node;
6449 if (TREE_CODE (offset[i]) == SSA_NAME)
6451 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6452 if (is_gimple_assign (def_stmt)
6453 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6454 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6455 == INTEGER_CST))
6457 step[i] = gimple_assign_rhs2 (def_stmt);
6458 offset[i] = gimple_assign_rhs1 (def_stmt);
6461 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6463 step[i] = TREE_OPERAND (offset[i], 1);
6464 offset[i] = TREE_OPERAND (offset[i], 0);
6466 tree rhs1 = NULL_TREE;
6467 if (TREE_CODE (offset[i]) == SSA_NAME)
6469 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6470 if (gimple_assign_cast_p (def_stmt))
6471 rhs1 = gimple_assign_rhs1 (def_stmt);
6473 else if (CONVERT_EXPR_P (offset[i]))
6474 rhs1 = TREE_OPERAND (offset[i], 0);
6475 if (rhs1
6476 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6477 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6478 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6479 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6480 offset[i] = rhs1;
6482 if (!operand_equal_p (offset[0], offset[1], 0)
6483 || !operand_equal_p (step[0], step[1], 0))
6484 return false;
6487 return true;
6491 enum scan_store_kind {
6492 /* Normal permutation. */
6493 scan_store_kind_perm,
6495 /* Whole vector left shift permutation with zero init. */
6496 scan_store_kind_lshift_zero,
6498 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6499 scan_store_kind_lshift_cond
6502 /* Function check_scan_store.
6504 Verify if we can perform the needed permutations or whole vector shifts.
6505 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6506 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6507 to do at each step. */
6509 static int
6510 scan_store_can_perm_p (tree vectype, tree init,
6511 vec<enum scan_store_kind> *use_whole_vector = NULL)
6513 enum machine_mode vec_mode = TYPE_MODE (vectype);
6514 unsigned HOST_WIDE_INT nunits;
6515 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6516 return -1;
6517 int units_log2 = exact_log2 (nunits);
6518 if (units_log2 <= 0)
6519 return -1;
6521 int i;
6522 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6523 for (i = 0; i <= units_log2; ++i)
6525 unsigned HOST_WIDE_INT j, k;
6526 enum scan_store_kind kind = scan_store_kind_perm;
6527 vec_perm_builder sel (nunits, nunits, 1);
6528 sel.quick_grow (nunits);
6529 if (i == units_log2)
6531 for (j = 0; j < nunits; ++j)
6532 sel[j] = nunits - 1;
6534 else
6536 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6537 sel[j] = j;
6538 for (k = 0; j < nunits; ++j, ++k)
6539 sel[j] = nunits + k;
6541 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6542 if (!can_vec_perm_const_p (vec_mode, indices))
6544 if (i == units_log2)
6545 return -1;
6547 if (whole_vector_shift_kind == scan_store_kind_perm)
6549 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6550 return -1;
6551 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6552 /* Whole vector shifts shift in zeros, so if init is all zero
6553 constant, there is no need to do anything further. */
6554 if ((TREE_CODE (init) != INTEGER_CST
6555 && TREE_CODE (init) != REAL_CST)
6556 || !initializer_zerop (init))
6558 tree masktype = truth_type_for (vectype);
6559 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6560 return -1;
6561 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6564 kind = whole_vector_shift_kind;
6566 if (use_whole_vector)
6568 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6569 use_whole_vector->safe_grow_cleared (i, true);
6570 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6571 use_whole_vector->safe_push (kind);
6575 return units_log2;
6579 /* Function check_scan_store.
6581 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6583 static bool
6584 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
6585 enum vect_def_type rhs_dt, bool slp, tree mask,
6586 vect_memory_access_type memory_access_type)
6588 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6589 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6590 tree ref_type;
6592 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6593 if (slp
6594 || mask
6595 || memory_access_type != VMAT_CONTIGUOUS
6596 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6597 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6598 || loop_vinfo == NULL
6599 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6600 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6601 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
6602 || !integer_zerop (DR_INIT (dr_info->dr))
6603 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6604 || !alias_sets_conflict_p (get_alias_set (vectype),
6605 get_alias_set (TREE_TYPE (ref_type))))
6607 if (dump_enabled_p ())
6608 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6609 "unsupported OpenMP scan store.\n");
6610 return false;
6613 /* We need to pattern match code built by OpenMP lowering and simplified
6614 by following optimizations into something we can handle.
6615 #pragma omp simd reduction(inscan,+:r)
6616 for (...)
6618 r += something ();
6619 #pragma omp scan inclusive (r)
6620 use (r);
6622 shall have body with:
6623 // Initialization for input phase, store the reduction initializer:
6624 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6625 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6626 D.2042[_21] = 0;
6627 // Actual input phase:
6629 r.0_5 = D.2042[_20];
6630 _6 = _4 + r.0_5;
6631 D.2042[_20] = _6;
6632 // Initialization for scan phase:
6633 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6634 _26 = D.2043[_25];
6635 _27 = D.2042[_25];
6636 _28 = _26 + _27;
6637 D.2043[_25] = _28;
6638 D.2042[_25] = _28;
6639 // Actual scan phase:
6641 r.1_8 = D.2042[_20];
6643 The "omp simd array" variable D.2042 holds the privatized copy used
6644 inside of the loop and D.2043 is another one that holds copies of
6645 the current original list item. The separate GOMP_SIMD_LANE ifn
6646 kinds are there in order to allow optimizing the initializer store
6647 and combiner sequence, e.g. if it is originally some C++ish user
6648 defined reduction, but allow the vectorizer to pattern recognize it
6649 and turn into the appropriate vectorized scan.
6651 For exclusive scan, this is slightly different:
6652 #pragma omp simd reduction(inscan,+:r)
6653 for (...)
6655 use (r);
6656 #pragma omp scan exclusive (r)
6657 r += something ();
6659 shall have body with:
6660 // Initialization for input phase, store the reduction initializer:
6661 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6662 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6663 D.2042[_21] = 0;
6664 // Actual input phase:
6666 r.0_5 = D.2042[_20];
6667 _6 = _4 + r.0_5;
6668 D.2042[_20] = _6;
6669 // Initialization for scan phase:
6670 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6671 _26 = D.2043[_25];
6672 D.2044[_25] = _26;
6673 _27 = D.2042[_25];
6674 _28 = _26 + _27;
6675 D.2043[_25] = _28;
6676 // Actual scan phase:
6678 r.1_8 = D.2044[_20];
6679 ... */
6681 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6683 /* Match the D.2042[_21] = 0; store above. Just require that
6684 it is a constant or external definition store. */
6685 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6687 fail_init:
6688 if (dump_enabled_p ())
6689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6690 "unsupported OpenMP scan initializer store.\n");
6691 return false;
6694 if (! loop_vinfo->scan_map)
6695 loop_vinfo->scan_map = new hash_map<tree, tree>;
6696 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6697 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6698 if (cached)
6699 goto fail_init;
6700 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6702 /* These stores can be vectorized normally. */
6703 return true;
6706 if (rhs_dt != vect_internal_def)
6708 fail:
6709 if (dump_enabled_p ())
6710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6711 "unsupported OpenMP scan combiner pattern.\n");
6712 return false;
6715 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6716 tree rhs = gimple_assign_rhs1 (stmt);
6717 if (TREE_CODE (rhs) != SSA_NAME)
6718 goto fail;
6720 gimple *other_store_stmt = NULL;
6721 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6722 bool inscan_var_store
6723 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6725 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6727 if (!inscan_var_store)
6729 use_operand_p use_p;
6730 imm_use_iterator iter;
6731 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6733 gimple *use_stmt = USE_STMT (use_p);
6734 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6735 continue;
6736 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6737 || !is_gimple_assign (use_stmt)
6738 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6739 || other_store_stmt
6740 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6741 goto fail;
6742 other_store_stmt = use_stmt;
6744 if (other_store_stmt == NULL)
6745 goto fail;
6746 rhs = gimple_assign_lhs (other_store_stmt);
6747 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6748 goto fail;
6751 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6753 use_operand_p use_p;
6754 imm_use_iterator iter;
6755 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6757 gimple *use_stmt = USE_STMT (use_p);
6758 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6759 continue;
6760 if (other_store_stmt)
6761 goto fail;
6762 other_store_stmt = use_stmt;
6765 else
6766 goto fail;
6768 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6769 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6770 || !is_gimple_assign (def_stmt)
6771 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6772 goto fail;
6774 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6775 /* For pointer addition, we should use the normal plus for the vector
6776 operation. */
6777 switch (code)
6779 case POINTER_PLUS_EXPR:
6780 code = PLUS_EXPR;
6781 break;
6782 case MULT_HIGHPART_EXPR:
6783 goto fail;
6784 default:
6785 break;
6787 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6788 goto fail;
6790 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6791 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6792 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6793 goto fail;
6795 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6796 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6797 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6798 || !gimple_assign_load_p (load1_stmt)
6799 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6800 || !gimple_assign_load_p (load2_stmt))
6801 goto fail;
6803 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6804 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6805 if (load1_stmt_info == NULL
6806 || load2_stmt_info == NULL
6807 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6808 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6809 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6810 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6811 goto fail;
6813 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6815 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6816 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6817 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6818 goto fail;
6819 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6820 tree lrhs;
6821 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6822 lrhs = rhs1;
6823 else
6824 lrhs = rhs2;
6825 use_operand_p use_p;
6826 imm_use_iterator iter;
6827 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6829 gimple *use_stmt = USE_STMT (use_p);
6830 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6831 continue;
6832 if (other_store_stmt)
6833 goto fail;
6834 other_store_stmt = use_stmt;
6838 if (other_store_stmt == NULL)
6839 goto fail;
6840 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6841 || !gimple_store_p (other_store_stmt))
6842 goto fail;
6844 stmt_vec_info other_store_stmt_info
6845 = loop_vinfo->lookup_stmt (other_store_stmt);
6846 if (other_store_stmt_info == NULL
6847 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6848 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6849 goto fail;
6851 gimple *stmt1 = stmt;
6852 gimple *stmt2 = other_store_stmt;
6853 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6854 std::swap (stmt1, stmt2);
6855 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6856 gimple_assign_rhs1 (load2_stmt)))
6858 std::swap (rhs1, rhs2);
6859 std::swap (load1_stmt, load2_stmt);
6860 std::swap (load1_stmt_info, load2_stmt_info);
6862 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6863 gimple_assign_rhs1 (load1_stmt)))
6864 goto fail;
6866 tree var3 = NULL_TREE;
6867 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6868 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6869 gimple_assign_rhs1 (load2_stmt)))
6870 goto fail;
6871 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6873 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6874 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6875 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6876 goto fail;
6877 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6878 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6879 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6880 || lookup_attribute ("omp simd inscan exclusive",
6881 DECL_ATTRIBUTES (var3)))
6882 goto fail;
6885 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6886 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6887 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6888 goto fail;
6890 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6891 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6892 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6893 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6894 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6895 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6896 goto fail;
6898 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6899 std::swap (var1, var2);
6901 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6903 if (!lookup_attribute ("omp simd inscan exclusive",
6904 DECL_ATTRIBUTES (var1)))
6905 goto fail;
6906 var1 = var3;
6909 if (loop_vinfo->scan_map == NULL)
6910 goto fail;
6911 tree *init = loop_vinfo->scan_map->get (var1);
6912 if (init == NULL)
6913 goto fail;
6915 /* The IL is as expected, now check if we can actually vectorize it.
6916 Inclusive scan:
6917 _26 = D.2043[_25];
6918 _27 = D.2042[_25];
6919 _28 = _26 + _27;
6920 D.2043[_25] = _28;
6921 D.2042[_25] = _28;
6922 should be vectorized as (where _40 is the vectorized rhs
6923 from the D.2042[_21] = 0; store):
6924 _30 = MEM <vector(8) int> [(int *)&D.2043];
6925 _31 = MEM <vector(8) int> [(int *)&D.2042];
6926 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6927 _33 = _31 + _32;
6928 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6929 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6930 _35 = _33 + _34;
6931 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6932 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6933 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6934 _37 = _35 + _36;
6935 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6936 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6937 _38 = _30 + _37;
6938 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6939 MEM <vector(8) int> [(int *)&D.2043] = _39;
6940 MEM <vector(8) int> [(int *)&D.2042] = _38;
6941 Exclusive scan:
6942 _26 = D.2043[_25];
6943 D.2044[_25] = _26;
6944 _27 = D.2042[_25];
6945 _28 = _26 + _27;
6946 D.2043[_25] = _28;
6947 should be vectorized as (where _40 is the vectorized rhs
6948 from the D.2042[_21] = 0; store):
6949 _30 = MEM <vector(8) int> [(int *)&D.2043];
6950 _31 = MEM <vector(8) int> [(int *)&D.2042];
6951 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6952 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6953 _34 = _32 + _33;
6954 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6955 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6956 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6957 _36 = _34 + _35;
6958 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6959 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6960 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6961 _38 = _36 + _37;
6962 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6963 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6964 _39 = _30 + _38;
6965 _50 = _31 + _39;
6966 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6967 MEM <vector(8) int> [(int *)&D.2044] = _39;
6968 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6969 enum machine_mode vec_mode = TYPE_MODE (vectype);
6970 optab optab = optab_for_tree_code (code, vectype, optab_default);
6971 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6972 goto fail;
6974 int units_log2 = scan_store_can_perm_p (vectype, *init);
6975 if (units_log2 == -1)
6976 goto fail;
6978 return true;
6982 /* Function vectorizable_scan_store.
6984 Helper of vectorizable_score, arguments like on vectorizable_store.
6985 Handle only the transformation, checking is done in check_scan_store. */
6987 static bool
6988 vectorizable_scan_store (vec_info *vinfo,
6989 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6990 gimple **vec_stmt, int ncopies)
6992 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6993 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6994 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
6995 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6997 if (dump_enabled_p ())
6998 dump_printf_loc (MSG_NOTE, vect_location,
6999 "transform scan store. ncopies = %d\n", ncopies);
7001 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7002 tree rhs = gimple_assign_rhs1 (stmt);
7003 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7005 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7006 bool inscan_var_store
7007 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7009 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7011 use_operand_p use_p;
7012 imm_use_iterator iter;
7013 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7015 gimple *use_stmt = USE_STMT (use_p);
7016 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7017 continue;
7018 rhs = gimple_assign_lhs (use_stmt);
7019 break;
7023 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7024 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7025 if (code == POINTER_PLUS_EXPR)
7026 code = PLUS_EXPR;
7027 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7028 && commutative_tree_code (code));
7029 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7030 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7031 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7032 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7033 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7034 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7035 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7036 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7037 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7038 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7039 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7041 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7043 std::swap (rhs1, rhs2);
7044 std::swap (var1, var2);
7045 std::swap (load1_dr_info, load2_dr_info);
7048 tree *init = loop_vinfo->scan_map->get (var1);
7049 gcc_assert (init);
7051 unsigned HOST_WIDE_INT nunits;
7052 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7053 gcc_unreachable ();
7054 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7055 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7056 gcc_assert (units_log2 > 0);
7057 auto_vec<tree, 16> perms;
7058 perms.quick_grow (units_log2 + 1);
7059 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7060 for (int i = 0; i <= units_log2; ++i)
7062 unsigned HOST_WIDE_INT j, k;
7063 vec_perm_builder sel (nunits, nunits, 1);
7064 sel.quick_grow (nunits);
7065 if (i == units_log2)
7066 for (j = 0; j < nunits; ++j)
7067 sel[j] = nunits - 1;
7068 else
7070 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7071 sel[j] = j;
7072 for (k = 0; j < nunits; ++j, ++k)
7073 sel[j] = nunits + k;
7075 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7076 if (!use_whole_vector.is_empty ()
7077 && use_whole_vector[i] != scan_store_kind_perm)
7079 if (zero_vec == NULL_TREE)
7080 zero_vec = build_zero_cst (vectype);
7081 if (masktype == NULL_TREE
7082 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7083 masktype = truth_type_for (vectype);
7084 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7086 else
7087 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7090 tree vec_oprnd1 = NULL_TREE;
7091 tree vec_oprnd2 = NULL_TREE;
7092 tree vec_oprnd3 = NULL_TREE;
7093 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7094 tree dataref_offset = build_int_cst (ref_type, 0);
7095 tree bump = vect_get_data_ptr_increment (vinfo, dr_info,
7096 vectype, VMAT_CONTIGUOUS);
7097 tree ldataref_ptr = NULL_TREE;
7098 tree orig = NULL_TREE;
7099 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7100 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7101 auto_vec<tree> vec_oprnds1;
7102 auto_vec<tree> vec_oprnds2;
7103 auto_vec<tree> vec_oprnds3;
7104 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7105 *init, &vec_oprnds1,
7106 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7107 rhs2, &vec_oprnds3);
7108 for (int j = 0; j < ncopies; j++)
7110 vec_oprnd1 = vec_oprnds1[j];
7111 if (ldataref_ptr == NULL)
7112 vec_oprnd2 = vec_oprnds2[j];
7113 vec_oprnd3 = vec_oprnds3[j];
7114 if (j == 0)
7115 orig = vec_oprnd3;
7116 else if (!inscan_var_store)
7117 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7119 if (ldataref_ptr)
7121 vec_oprnd2 = make_ssa_name (vectype);
7122 tree data_ref = fold_build2 (MEM_REF, vectype,
7123 unshare_expr (ldataref_ptr),
7124 dataref_offset);
7125 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7126 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7127 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7128 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7129 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7132 tree v = vec_oprnd2;
7133 for (int i = 0; i < units_log2; ++i)
7135 tree new_temp = make_ssa_name (vectype);
7136 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7137 (zero_vec
7138 && (use_whole_vector[i]
7139 != scan_store_kind_perm))
7140 ? zero_vec : vec_oprnd1, v,
7141 perms[i]);
7142 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7143 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7144 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7146 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7148 /* Whole vector shift shifted in zero bits, but if *init
7149 is not initializer_zerop, we need to replace those elements
7150 with elements from vec_oprnd1. */
7151 tree_vector_builder vb (masktype, nunits, 1);
7152 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7153 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7154 ? boolean_false_node : boolean_true_node);
7156 tree new_temp2 = make_ssa_name (vectype);
7157 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7158 new_temp, vec_oprnd1);
7159 vect_finish_stmt_generation (vinfo, stmt_info,
7160 g, gsi);
7161 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7162 new_temp = new_temp2;
7165 /* For exclusive scan, perform the perms[i] permutation once
7166 more. */
7167 if (i == 0
7168 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7169 && v == vec_oprnd2)
7171 v = new_temp;
7172 --i;
7173 continue;
7176 tree new_temp2 = make_ssa_name (vectype);
7177 g = gimple_build_assign (new_temp2, code, v, new_temp);
7178 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7179 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7181 v = new_temp2;
7184 tree new_temp = make_ssa_name (vectype);
7185 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7186 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7187 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7189 tree last_perm_arg = new_temp;
7190 /* For exclusive scan, new_temp computed above is the exclusive scan
7191 prefix sum. Turn it into inclusive prefix sum for the broadcast
7192 of the last element into orig. */
7193 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7195 last_perm_arg = make_ssa_name (vectype);
7196 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7197 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7198 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7201 orig = make_ssa_name (vectype);
7202 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7203 last_perm_arg, perms[units_log2]);
7204 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7205 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7207 if (!inscan_var_store)
7209 tree data_ref = fold_build2 (MEM_REF, vectype,
7210 unshare_expr (dataref_ptr),
7211 dataref_offset);
7212 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7213 g = gimple_build_assign (data_ref, new_temp);
7214 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7215 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7219 if (inscan_var_store)
7220 for (int j = 0; j < ncopies; j++)
7222 if (j != 0)
7223 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7225 tree data_ref = fold_build2 (MEM_REF, vectype,
7226 unshare_expr (dataref_ptr),
7227 dataref_offset);
7228 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7229 gimple *g = gimple_build_assign (data_ref, orig);
7230 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7231 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7233 return true;
7237 /* Function vectorizable_store.
7239 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7240 that can be vectorized.
7241 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7242 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7243 Return true if STMT_INFO is vectorizable in this way. */
7245 static bool
7246 vectorizable_store (vec_info *vinfo,
7247 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7248 gimple **vec_stmt, slp_tree slp_node,
7249 stmt_vector_for_cost *cost_vec)
7251 tree data_ref;
7252 tree op;
7253 tree vec_oprnd = NULL_TREE;
7254 tree elem_type;
7255 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7256 class loop *loop = NULL;
7257 machine_mode vec_mode;
7258 tree dummy;
7259 enum vect_def_type rhs_dt = vect_unknown_def_type;
7260 enum vect_def_type mask_dt = vect_unknown_def_type;
7261 tree dataref_ptr = NULL_TREE;
7262 tree dataref_offset = NULL_TREE;
7263 gimple *ptr_incr = NULL;
7264 int ncopies;
7265 int j;
7266 stmt_vec_info first_stmt_info;
7267 bool grouped_store;
7268 unsigned int group_size, i;
7269 vec<tree> oprnds = vNULL;
7270 vec<tree> result_chain = vNULL;
7271 vec<tree> vec_oprnds = vNULL;
7272 bool slp = (slp_node != NULL);
7273 unsigned int vec_num;
7274 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7275 tree aggr_type;
7276 gather_scatter_info gs_info;
7277 poly_uint64 vf;
7278 vec_load_store_type vls_type;
7279 tree ref_type;
7281 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7282 return false;
7284 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7285 && ! vec_stmt)
7286 return false;
7288 /* Is vectorizable store? */
7290 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7291 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7293 tree scalar_dest = gimple_assign_lhs (assign);
7294 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7295 && is_pattern_stmt_p (stmt_info))
7296 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7297 if (TREE_CODE (scalar_dest) != ARRAY_REF
7298 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7299 && TREE_CODE (scalar_dest) != INDIRECT_REF
7300 && TREE_CODE (scalar_dest) != COMPONENT_REF
7301 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7302 && TREE_CODE (scalar_dest) != REALPART_EXPR
7303 && TREE_CODE (scalar_dest) != MEM_REF)
7304 return false;
7306 else
7308 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7309 if (!call || !gimple_call_internal_p (call))
7310 return false;
7312 internal_fn ifn = gimple_call_internal_fn (call);
7313 if (!internal_store_fn_p (ifn))
7314 return false;
7316 if (slp_node != NULL)
7318 if (dump_enabled_p ())
7319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7320 "SLP of masked stores not supported.\n");
7321 return false;
7324 int mask_index = internal_fn_mask_index (ifn);
7325 if (mask_index >= 0
7326 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
7327 &mask, NULL, &mask_dt, &mask_vectype))
7328 return false;
7331 op = vect_get_store_rhs (stmt_info);
7333 /* Cannot have hybrid store SLP -- that would mean storing to the
7334 same location twice. */
7335 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7337 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7338 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7340 if (loop_vinfo)
7342 loop = LOOP_VINFO_LOOP (loop_vinfo);
7343 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7345 else
7346 vf = 1;
7348 /* Multiple types in SLP are handled by creating the appropriate number of
7349 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7350 case of SLP. */
7351 if (slp)
7352 ncopies = 1;
7353 else
7354 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7356 gcc_assert (ncopies >= 1);
7358 /* FORNOW. This restriction should be relaxed. */
7359 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7361 if (dump_enabled_p ())
7362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7363 "multiple types in nested loop.\n");
7364 return false;
7367 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
7368 op, &rhs_dt, &rhs_vectype, &vls_type))
7369 return false;
7371 elem_type = TREE_TYPE (vectype);
7372 vec_mode = TYPE_MODE (vectype);
7374 if (!STMT_VINFO_DATA_REF (stmt_info))
7375 return false;
7377 vect_memory_access_type memory_access_type;
7378 enum dr_alignment_support alignment_support_scheme;
7379 int misalignment;
7380 poly_int64 poffset;
7381 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
7382 ncopies, &memory_access_type, &poffset,
7383 &alignment_support_scheme, &misalignment, &gs_info))
7384 return false;
7386 if (mask)
7388 if (memory_access_type == VMAT_CONTIGUOUS)
7390 if (!VECTOR_MODE_P (vec_mode)
7391 || !can_vec_mask_load_store_p (vec_mode,
7392 TYPE_MODE (mask_vectype), false))
7393 return false;
7395 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7396 && (memory_access_type != VMAT_GATHER_SCATTER
7397 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7399 if (dump_enabled_p ())
7400 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7401 "unsupported access type for masked store.\n");
7402 return false;
7405 else
7407 /* FORNOW. In some cases can vectorize even if data-type not supported
7408 (e.g. - array initialization with 0). */
7409 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7410 return false;
7413 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7414 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7415 && memory_access_type != VMAT_GATHER_SCATTER
7416 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7417 if (grouped_store)
7419 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7420 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7421 group_size = DR_GROUP_SIZE (first_stmt_info);
7423 else
7425 first_stmt_info = stmt_info;
7426 first_dr_info = dr_info;
7427 group_size = vec_num = 1;
7430 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7432 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
7433 memory_access_type))
7434 return false;
7437 if (!vec_stmt) /* transformation not required. */
7439 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7441 if (loop_vinfo
7442 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
7443 check_load_store_for_partial_vectors (loop_vinfo, vectype, vls_type,
7444 group_size, memory_access_type,
7445 &gs_info, mask);
7447 if (slp_node
7448 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
7449 vectype))
7451 if (dump_enabled_p ())
7452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7453 "incompatible vector types for invariants\n");
7454 return false;
7457 if (dump_enabled_p ()
7458 && memory_access_type != VMAT_ELEMENTWISE
7459 && memory_access_type != VMAT_GATHER_SCATTER
7460 && alignment_support_scheme != dr_aligned)
7461 dump_printf_loc (MSG_NOTE, vect_location,
7462 "Vectorizing an unaligned access.\n");
7464 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7465 vect_model_store_cost (vinfo, stmt_info, ncopies,
7466 memory_access_type, alignment_support_scheme,
7467 misalignment, vls_type, slp_node, cost_vec);
7468 return true;
7470 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7472 /* Transform. */
7474 ensure_base_align (dr_info);
7476 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7478 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7479 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7480 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7481 tree ptr, var, scale, vec_mask;
7482 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7483 tree mask_halfvectype = mask_vectype;
7484 edge pe = loop_preheader_edge (loop);
7485 gimple_seq seq;
7486 basic_block new_bb;
7487 enum { NARROW, NONE, WIDEN } modifier;
7488 poly_uint64 scatter_off_nunits
7489 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7491 if (known_eq (nunits, scatter_off_nunits))
7492 modifier = NONE;
7493 else if (known_eq (nunits * 2, scatter_off_nunits))
7495 modifier = WIDEN;
7497 /* Currently gathers and scatters are only supported for
7498 fixed-length vectors. */
7499 unsigned int count = scatter_off_nunits.to_constant ();
7500 vec_perm_builder sel (count, count, 1);
7501 for (i = 0; i < (unsigned int) count; ++i)
7502 sel.quick_push (i | (count / 2));
7504 vec_perm_indices indices (sel, 1, count);
7505 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7506 indices);
7507 gcc_assert (perm_mask != NULL_TREE);
7509 else if (known_eq (nunits, scatter_off_nunits * 2))
7511 modifier = NARROW;
7513 /* Currently gathers and scatters are only supported for
7514 fixed-length vectors. */
7515 unsigned int count = nunits.to_constant ();
7516 vec_perm_builder sel (count, count, 1);
7517 for (i = 0; i < (unsigned int) count; ++i)
7518 sel.quick_push (i | (count / 2));
7520 vec_perm_indices indices (sel, 2, count);
7521 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7522 gcc_assert (perm_mask != NULL_TREE);
7523 ncopies *= 2;
7525 if (mask)
7526 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7528 else
7529 gcc_unreachable ();
7531 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7532 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7533 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7534 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7535 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7536 scaletype = TREE_VALUE (arglist);
7538 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7539 && TREE_CODE (rettype) == VOID_TYPE);
7541 ptr = fold_convert (ptrtype, gs_info.base);
7542 if (!is_gimple_min_invariant (ptr))
7544 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7545 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7546 gcc_assert (!new_bb);
7549 if (mask == NULL_TREE)
7551 mask_arg = build_int_cst (masktype, -1);
7552 mask_arg = vect_init_vector (vinfo, stmt_info,
7553 mask_arg, masktype, NULL);
7556 scale = build_int_cst (scaletype, gs_info.scale);
7558 auto_vec<tree> vec_oprnds0;
7559 auto_vec<tree> vec_oprnds1;
7560 auto_vec<tree> vec_masks;
7561 if (mask)
7563 tree mask_vectype = truth_type_for (vectype);
7564 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7565 modifier == NARROW
7566 ? ncopies / 2 : ncopies,
7567 mask, &vec_masks, mask_vectype);
7569 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7570 modifier == WIDEN
7571 ? ncopies / 2 : ncopies,
7572 gs_info.offset, &vec_oprnds0);
7573 vect_get_vec_defs_for_operand (vinfo, stmt_info,
7574 modifier == NARROW
7575 ? ncopies / 2 : ncopies,
7576 op, &vec_oprnds1);
7577 for (j = 0; j < ncopies; ++j)
7579 if (modifier == WIDEN)
7581 if (j & 1)
7582 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
7583 perm_mask, stmt_info, gsi);
7584 else
7585 op = vec_oprnd0 = vec_oprnds0[j / 2];
7586 src = vec_oprnd1 = vec_oprnds1[j];
7587 if (mask)
7588 mask_op = vec_mask = vec_masks[j];
7590 else if (modifier == NARROW)
7592 if (j & 1)
7593 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
7594 perm_mask, stmt_info, gsi);
7595 else
7596 src = vec_oprnd1 = vec_oprnds1[j / 2];
7597 op = vec_oprnd0 = vec_oprnds0[j];
7598 if (mask)
7599 mask_op = vec_mask = vec_masks[j / 2];
7601 else
7603 op = vec_oprnd0 = vec_oprnds0[j];
7604 src = vec_oprnd1 = vec_oprnds1[j];
7605 if (mask)
7606 mask_op = vec_mask = vec_masks[j];
7609 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7611 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7612 TYPE_VECTOR_SUBPARTS (srctype)));
7613 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7614 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7615 gassign *new_stmt
7616 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7617 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7618 src = var;
7621 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7623 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7624 TYPE_VECTOR_SUBPARTS (idxtype)));
7625 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7626 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7627 gassign *new_stmt
7628 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7629 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7630 op = var;
7633 if (mask)
7635 tree utype;
7636 mask_arg = mask_op;
7637 if (modifier == NARROW)
7639 var = vect_get_new_ssa_name (mask_halfvectype,
7640 vect_simple_var);
7641 gassign *new_stmt
7642 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7643 : VEC_UNPACK_LO_EXPR,
7644 mask_op);
7645 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7646 mask_arg = var;
7648 tree optype = TREE_TYPE (mask_arg);
7649 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7650 utype = masktype;
7651 else
7652 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7653 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7654 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7655 gassign *new_stmt
7656 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7657 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7658 mask_arg = var;
7659 if (!useless_type_conversion_p (masktype, utype))
7661 gcc_assert (TYPE_PRECISION (utype)
7662 <= TYPE_PRECISION (masktype));
7663 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7664 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7665 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7666 mask_arg = var;
7670 gcall *new_stmt
7671 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7672 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7674 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7676 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7677 return true;
7679 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7680 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
7682 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7683 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7685 if (grouped_store)
7687 /* FORNOW */
7688 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7690 /* We vectorize all the stmts of the interleaving group when we
7691 reach the last stmt in the group. */
7692 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7693 < DR_GROUP_SIZE (first_stmt_info)
7694 && !slp)
7696 *vec_stmt = NULL;
7697 return true;
7700 if (slp)
7702 grouped_store = false;
7703 /* VEC_NUM is the number of vect stmts to be created for this
7704 group. */
7705 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7706 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7707 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7708 == first_stmt_info);
7709 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7710 op = vect_get_store_rhs (first_stmt_info);
7712 else
7713 /* VEC_NUM is the number of vect stmts to be created for this
7714 group. */
7715 vec_num = group_size;
7717 ref_type = get_group_alias_ptr_type (first_stmt_info);
7719 else
7720 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7722 if (dump_enabled_p ())
7723 dump_printf_loc (MSG_NOTE, vect_location,
7724 "transform store. ncopies = %d\n", ncopies);
7726 if (memory_access_type == VMAT_ELEMENTWISE
7727 || memory_access_type == VMAT_STRIDED_SLP)
7729 gimple_stmt_iterator incr_gsi;
7730 bool insert_after;
7731 gimple *incr;
7732 tree offvar;
7733 tree ivstep;
7734 tree running_off;
7735 tree stride_base, stride_step, alias_off;
7736 tree vec_oprnd;
7737 tree dr_offset;
7738 unsigned int g;
7739 /* Checked by get_load_store_type. */
7740 unsigned int const_nunits = nunits.to_constant ();
7742 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7743 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7745 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
7746 stride_base
7747 = fold_build_pointer_plus
7748 (DR_BASE_ADDRESS (first_dr_info->dr),
7749 size_binop (PLUS_EXPR,
7750 convert_to_ptrofftype (dr_offset),
7751 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7752 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7754 /* For a store with loop-invariant (but other than power-of-2)
7755 stride (i.e. not a grouped access) like so:
7757 for (i = 0; i < n; i += stride)
7758 array[i] = ...;
7760 we generate a new induction variable and new stores from
7761 the components of the (vectorized) rhs:
7763 for (j = 0; ; j += VF*stride)
7764 vectemp = ...;
7765 tmp1 = vectemp[0];
7766 array[j] = tmp1;
7767 tmp2 = vectemp[1];
7768 array[j + stride] = tmp2;
7772 unsigned nstores = const_nunits;
7773 unsigned lnel = 1;
7774 tree ltype = elem_type;
7775 tree lvectype = vectype;
7776 if (slp)
7778 if (group_size < const_nunits
7779 && const_nunits % group_size == 0)
7781 nstores = const_nunits / group_size;
7782 lnel = group_size;
7783 ltype = build_vector_type (elem_type, group_size);
7784 lvectype = vectype;
7786 /* First check if vec_extract optab doesn't support extraction
7787 of vector elts directly. */
7788 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7789 machine_mode vmode;
7790 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7791 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7792 group_size).exists (&vmode)
7793 || (convert_optab_handler (vec_extract_optab,
7794 TYPE_MODE (vectype), vmode)
7795 == CODE_FOR_nothing))
7797 /* Try to avoid emitting an extract of vector elements
7798 by performing the extracts using an integer type of the
7799 same size, extracting from a vector of those and then
7800 re-interpreting it as the original vector type if
7801 supported. */
7802 unsigned lsize
7803 = group_size * GET_MODE_BITSIZE (elmode);
7804 unsigned int lnunits = const_nunits / group_size;
7805 /* If we can't construct such a vector fall back to
7806 element extracts from the original vector type and
7807 element size stores. */
7808 if (int_mode_for_size (lsize, 0).exists (&elmode)
7809 && VECTOR_MODE_P (TYPE_MODE (vectype))
7810 && related_vector_mode (TYPE_MODE (vectype), elmode,
7811 lnunits).exists (&vmode)
7812 && (convert_optab_handler (vec_extract_optab,
7813 vmode, elmode)
7814 != CODE_FOR_nothing))
7816 nstores = lnunits;
7817 lnel = group_size;
7818 ltype = build_nonstandard_integer_type (lsize, 1);
7819 lvectype = build_vector_type (ltype, nstores);
7821 /* Else fall back to vector extraction anyway.
7822 Fewer stores are more important than avoiding spilling
7823 of the vector we extract from. Compared to the
7824 construction case in vectorizable_load no store-forwarding
7825 issue exists here for reasonable archs. */
7828 else if (group_size >= const_nunits
7829 && group_size % const_nunits == 0)
7831 nstores = 1;
7832 lnel = const_nunits;
7833 ltype = vectype;
7834 lvectype = vectype;
7836 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7837 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7840 ivstep = stride_step;
7841 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7842 build_int_cst (TREE_TYPE (ivstep), vf));
7844 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7846 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7847 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7848 create_iv (stride_base, ivstep, NULL,
7849 loop, &incr_gsi, insert_after,
7850 &offvar, NULL);
7851 incr = gsi_stmt (incr_gsi);
7853 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7855 alias_off = build_int_cst (ref_type, 0);
7856 stmt_vec_info next_stmt_info = first_stmt_info;
7857 for (g = 0; g < group_size; g++)
7859 running_off = offvar;
7860 if (g)
7862 tree size = TYPE_SIZE_UNIT (ltype);
7863 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7864 size);
7865 tree newoff = copy_ssa_name (running_off, NULL);
7866 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7867 running_off, pos);
7868 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7869 running_off = newoff;
7871 if (!slp)
7872 op = vect_get_store_rhs (next_stmt_info);
7873 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
7874 op, &vec_oprnds);
7875 unsigned int group_el = 0;
7876 unsigned HOST_WIDE_INT
7877 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7878 for (j = 0; j < ncopies; j++)
7880 vec_oprnd = vec_oprnds[j];
7881 /* Pun the vector to extract from if necessary. */
7882 if (lvectype != vectype)
7884 tree tem = make_ssa_name (lvectype);
7885 gimple *pun
7886 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7887 lvectype, vec_oprnd));
7888 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
7889 vec_oprnd = tem;
7891 for (i = 0; i < nstores; i++)
7893 tree newref, newoff;
7894 gimple *incr, *assign;
7895 tree size = TYPE_SIZE (ltype);
7896 /* Extract the i'th component. */
7897 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7898 bitsize_int (i), size);
7899 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7900 size, pos);
7902 elem = force_gimple_operand_gsi (gsi, elem, true,
7903 NULL_TREE, true,
7904 GSI_SAME_STMT);
7906 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7907 group_el * elsz);
7908 newref = build2 (MEM_REF, ltype,
7909 running_off, this_off);
7910 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7912 /* And store it to *running_off. */
7913 assign = gimple_build_assign (newref, elem);
7914 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
7916 group_el += lnel;
7917 if (! slp
7918 || group_el == group_size)
7920 newoff = copy_ssa_name (running_off, NULL);
7921 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7922 running_off, stride_step);
7923 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
7925 running_off = newoff;
7926 group_el = 0;
7928 if (g == group_size - 1
7929 && !slp)
7931 if (j == 0 && i == 0)
7932 *vec_stmt = assign;
7933 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
7937 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7938 vec_oprnds.release ();
7939 if (slp)
7940 break;
7943 return true;
7946 auto_vec<tree> dr_chain (group_size);
7947 oprnds.create (group_size);
7949 gcc_assert (alignment_support_scheme);
7950 vec_loop_masks *loop_masks
7951 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7952 ? &LOOP_VINFO_MASKS (loop_vinfo)
7953 : NULL);
7954 vec_loop_lens *loop_lens
7955 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
7956 ? &LOOP_VINFO_LENS (loop_vinfo)
7957 : NULL);
7959 /* Shouldn't go with length-based approach if fully masked. */
7960 gcc_assert (!loop_lens || !loop_masks);
7962 /* Targets with store-lane instructions must not require explicit
7963 realignment. vect_supportable_dr_alignment always returns either
7964 dr_aligned or dr_unaligned_supported for masked operations. */
7965 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7966 && !mask
7967 && !loop_masks)
7968 || alignment_support_scheme == dr_aligned
7969 || alignment_support_scheme == dr_unaligned_supported);
7971 tree offset = NULL_TREE;
7972 if (!known_eq (poffset, 0))
7973 offset = size_int (poffset);
7975 tree bump;
7976 tree vec_offset = NULL_TREE;
7977 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
7979 aggr_type = NULL_TREE;
7980 bump = NULL_TREE;
7982 else if (memory_access_type == VMAT_GATHER_SCATTER)
7984 aggr_type = elem_type;
7985 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
7986 &bump, &vec_offset);
7988 else
7990 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7991 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7992 else
7993 aggr_type = vectype;
7994 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
7995 memory_access_type);
7998 if (mask)
7999 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8001 /* In case the vectorization factor (VF) is bigger than the number
8002 of elements that we can fit in a vectype (nunits), we have to generate
8003 more than one vector stmt - i.e - we need to "unroll" the
8004 vector stmt by a factor VF/nunits. */
8006 /* In case of interleaving (non-unit grouped access):
8008 S1: &base + 2 = x2
8009 S2: &base = x0
8010 S3: &base + 1 = x1
8011 S4: &base + 3 = x3
8013 We create vectorized stores starting from base address (the access of the
8014 first stmt in the chain (S2 in the above example), when the last store stmt
8015 of the chain (S4) is reached:
8017 VS1: &base = vx2
8018 VS2: &base + vec_size*1 = vx0
8019 VS3: &base + vec_size*2 = vx1
8020 VS4: &base + vec_size*3 = vx3
8022 Then permutation statements are generated:
8024 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8025 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8028 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8029 (the order of the data-refs in the output of vect_permute_store_chain
8030 corresponds to the order of scalar stmts in the interleaving chain - see
8031 the documentation of vect_permute_store_chain()).
8033 In case of both multiple types and interleaving, above vector stores and
8034 permutation stmts are created for every copy. The result vector stmts are
8035 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8036 STMT_VINFO_RELATED_STMT for the next copies.
8039 auto_vec<tree> vec_masks;
8040 tree vec_mask = NULL;
8041 auto_vec<tree> vec_offsets;
8042 auto_vec<vec<tree> > gvec_oprnds;
8043 gvec_oprnds.safe_grow_cleared (group_size, true);
8044 for (j = 0; j < ncopies; j++)
8046 gimple *new_stmt;
8047 if (j == 0)
8049 if (slp)
8051 /* Get vectorized arguments for SLP_NODE. */
8052 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8053 op, &vec_oprnds);
8054 vec_oprnd = vec_oprnds[0];
8056 else
8058 /* For interleaved stores we collect vectorized defs for all the
8059 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8060 used as an input to vect_permute_store_chain().
8062 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8063 and OPRNDS are of size 1. */
8064 stmt_vec_info next_stmt_info = first_stmt_info;
8065 for (i = 0; i < group_size; i++)
8067 /* Since gaps are not supported for interleaved stores,
8068 DR_GROUP_SIZE is the exact number of stmts in the chain.
8069 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8070 that there is no interleaving, DR_GROUP_SIZE is 1,
8071 and only one iteration of the loop will be executed. */
8072 op = vect_get_store_rhs (next_stmt_info);
8073 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8074 ncopies, op, &gvec_oprnds[i]);
8075 vec_oprnd = gvec_oprnds[i][0];
8076 dr_chain.quick_push (gvec_oprnds[i][0]);
8077 oprnds.quick_push (gvec_oprnds[i][0]);
8078 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8080 if (mask)
8082 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8083 mask, &vec_masks, mask_vectype);
8084 vec_mask = vec_masks[0];
8088 /* We should have catched mismatched types earlier. */
8089 gcc_assert (useless_type_conversion_p (vectype,
8090 TREE_TYPE (vec_oprnd)));
8091 bool simd_lane_access_p
8092 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8093 if (simd_lane_access_p
8094 && !loop_masks
8095 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8096 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8097 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8098 && integer_zerop (DR_INIT (first_dr_info->dr))
8099 && alias_sets_conflict_p (get_alias_set (aggr_type),
8100 get_alias_set (TREE_TYPE (ref_type))))
8102 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8103 dataref_offset = build_int_cst (ref_type, 0);
8105 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8107 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8108 &gs_info, &dataref_ptr,
8109 &vec_offsets);
8110 vec_offset = vec_offsets[0];
8112 else
8113 dataref_ptr
8114 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8115 simd_lane_access_p ? loop : NULL,
8116 offset, &dummy, gsi, &ptr_incr,
8117 simd_lane_access_p, bump);
8119 else
8121 /* For interleaved stores we created vectorized defs for all the
8122 defs stored in OPRNDS in the previous iteration (previous copy).
8123 DR_CHAIN is then used as an input to vect_permute_store_chain().
8124 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8125 OPRNDS are of size 1. */
8126 for (i = 0; i < group_size; i++)
8128 vec_oprnd = gvec_oprnds[i][j];
8129 dr_chain[i] = gvec_oprnds[i][j];
8130 oprnds[i] = gvec_oprnds[i][j];
8132 if (mask)
8133 vec_mask = vec_masks[j];
8134 if (dataref_offset)
8135 dataref_offset
8136 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8137 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8138 vec_offset = vec_offsets[j];
8139 else
8140 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8141 stmt_info, bump);
8144 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8146 tree vec_array;
8148 /* Get an array into which we can store the individual vectors. */
8149 vec_array = create_vector_array (vectype, vec_num);
8151 /* Invalidate the current contents of VEC_ARRAY. This should
8152 become an RTL clobber too, which prevents the vector registers
8153 from being upward-exposed. */
8154 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8156 /* Store the individual vectors into the array. */
8157 for (i = 0; i < vec_num; i++)
8159 vec_oprnd = dr_chain[i];
8160 write_vector_array (vinfo, stmt_info,
8161 gsi, vec_oprnd, vec_array, i);
8164 tree final_mask = NULL;
8165 if (loop_masks)
8166 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8167 vectype, j);
8168 if (vec_mask)
8169 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8170 vec_mask, gsi);
8172 gcall *call;
8173 if (final_mask)
8175 /* Emit:
8176 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8177 VEC_ARRAY). */
8178 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8179 tree alias_ptr = build_int_cst (ref_type, align);
8180 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8181 dataref_ptr, alias_ptr,
8182 final_mask, vec_array);
8184 else
8186 /* Emit:
8187 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8188 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8189 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8190 vec_array);
8191 gimple_call_set_lhs (call, data_ref);
8193 gimple_call_set_nothrow (call, true);
8194 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8195 new_stmt = call;
8197 /* Record that VEC_ARRAY is now dead. */
8198 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8200 else
8202 new_stmt = NULL;
8203 if (grouped_store)
8205 if (j == 0)
8206 result_chain.create (group_size);
8207 /* Permute. */
8208 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8209 gsi, &result_chain);
8212 stmt_vec_info next_stmt_info = first_stmt_info;
8213 for (i = 0; i < vec_num; i++)
8215 unsigned misalign;
8216 unsigned HOST_WIDE_INT align;
8218 tree final_mask = NULL_TREE;
8219 if (loop_masks)
8220 final_mask = vect_get_loop_mask (gsi, loop_masks,
8221 vec_num * ncopies,
8222 vectype, vec_num * j + i);
8223 if (vec_mask)
8224 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8225 vec_mask, gsi);
8227 if (memory_access_type == VMAT_GATHER_SCATTER)
8229 tree scale = size_int (gs_info.scale);
8230 gcall *call;
8231 if (final_mask)
8232 call = gimple_build_call_internal
8233 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8234 scale, vec_oprnd, final_mask);
8235 else
8236 call = gimple_build_call_internal
8237 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8238 scale, vec_oprnd);
8239 gimple_call_set_nothrow (call, true);
8240 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8241 new_stmt = call;
8242 break;
8245 if (i > 0)
8246 /* Bump the vector pointer. */
8247 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
8248 gsi, stmt_info, bump);
8250 if (slp)
8251 vec_oprnd = vec_oprnds[i];
8252 else if (grouped_store)
8253 /* For grouped stores vectorized defs are interleaved in
8254 vect_permute_store_chain(). */
8255 vec_oprnd = result_chain[i];
8257 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8258 if (alignment_support_scheme == dr_aligned)
8259 misalign = 0;
8260 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
8262 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
8263 misalign = 0;
8265 else
8266 misalign = misalignment;
8267 if (dataref_offset == NULL_TREE
8268 && TREE_CODE (dataref_ptr) == SSA_NAME)
8269 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8270 misalign);
8271 align = least_bit_hwi (misalign | align);
8273 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8275 tree perm_mask = perm_mask_for_reverse (vectype);
8276 tree perm_dest = vect_create_destination_var
8277 (vect_get_store_rhs (stmt_info), vectype);
8278 tree new_temp = make_ssa_name (perm_dest);
8280 /* Generate the permute statement. */
8281 gimple *perm_stmt
8282 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8283 vec_oprnd, perm_mask);
8284 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8286 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8287 vec_oprnd = new_temp;
8290 /* Arguments are ready. Create the new vector stmt. */
8291 if (final_mask)
8293 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8294 gcall *call
8295 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8296 dataref_ptr, ptr,
8297 final_mask, vec_oprnd);
8298 gimple_call_set_nothrow (call, true);
8299 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8300 new_stmt = call;
8302 else if (loop_lens)
8304 tree final_len
8305 = vect_get_loop_len (loop_vinfo, loop_lens,
8306 vec_num * ncopies, vec_num * j + i);
8307 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
8308 machine_mode vmode = TYPE_MODE (vectype);
8309 opt_machine_mode new_ovmode
8310 = get_len_load_store_mode (vmode, false);
8311 machine_mode new_vmode = new_ovmode.require ();
8312 /* Need conversion if it's wrapped with VnQI. */
8313 if (vmode != new_vmode)
8315 tree new_vtype
8316 = build_vector_type_for_mode (unsigned_intQI_type_node,
8317 new_vmode);
8318 tree var
8319 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
8320 vec_oprnd
8321 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
8322 gassign *new_stmt
8323 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
8324 vec_oprnd);
8325 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
8326 gsi);
8327 vec_oprnd = var;
8329 gcall *call
8330 = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
8331 ptr, final_len, vec_oprnd);
8332 gimple_call_set_nothrow (call, true);
8333 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8334 new_stmt = call;
8336 else
8338 data_ref = fold_build2 (MEM_REF, vectype,
8339 dataref_ptr,
8340 dataref_offset
8341 ? dataref_offset
8342 : build_int_cst (ref_type, 0));
8343 if (alignment_support_scheme == dr_aligned)
8345 else
8346 TREE_TYPE (data_ref)
8347 = build_aligned_type (TREE_TYPE (data_ref),
8348 align * BITS_PER_UNIT);
8349 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8350 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
8351 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8354 if (slp)
8355 continue;
8357 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8358 if (!next_stmt_info)
8359 break;
8362 if (!slp)
8364 if (j == 0)
8365 *vec_stmt = new_stmt;
8366 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8370 for (i = 0; i < group_size; ++i)
8372 vec<tree> oprndsi = gvec_oprnds[i];
8373 oprndsi.release ();
8375 oprnds.release ();
8376 result_chain.release ();
8377 vec_oprnds.release ();
8379 return true;
8382 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8383 VECTOR_CST mask. No checks are made that the target platform supports the
8384 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8385 vect_gen_perm_mask_checked. */
8387 tree
8388 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8390 tree mask_type;
8392 poly_uint64 nunits = sel.length ();
8393 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8395 mask_type = build_vector_type (ssizetype, nunits);
8396 return vec_perm_indices_to_tree (mask_type, sel);
8399 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8400 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8402 tree
8403 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8405 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8406 return vect_gen_perm_mask_any (vectype, sel);
8409 /* Given a vector variable X and Y, that was generated for the scalar
8410 STMT_INFO, generate instructions to permute the vector elements of X and Y
8411 using permutation mask MASK_VEC, insert them at *GSI and return the
8412 permuted vector variable. */
8414 static tree
8415 permute_vec_elements (vec_info *vinfo,
8416 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8417 gimple_stmt_iterator *gsi)
8419 tree vectype = TREE_TYPE (x);
8420 tree perm_dest, data_ref;
8421 gimple *perm_stmt;
8423 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8424 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8425 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8426 else
8427 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8428 data_ref = make_ssa_name (perm_dest);
8430 /* Generate the permute statement. */
8431 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8432 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
8434 return data_ref;
8437 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8438 inserting them on the loops preheader edge. Returns true if we
8439 were successful in doing so (and thus STMT_INFO can be moved then),
8440 otherwise returns false. */
8442 static bool
8443 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8445 ssa_op_iter i;
8446 tree op;
8447 bool any = false;
8449 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8451 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8452 if (!gimple_nop_p (def_stmt)
8453 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8455 /* Make sure we don't need to recurse. While we could do
8456 so in simple cases when there are more complex use webs
8457 we don't have an easy way to preserve stmt order to fulfil
8458 dependencies within them. */
8459 tree op2;
8460 ssa_op_iter i2;
8461 if (gimple_code (def_stmt) == GIMPLE_PHI)
8462 return false;
8463 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8465 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8466 if (!gimple_nop_p (def_stmt2)
8467 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8468 return false;
8470 any = true;
8474 if (!any)
8475 return true;
8477 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8479 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8480 if (!gimple_nop_p (def_stmt)
8481 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8483 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8484 gsi_remove (&gsi, false);
8485 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8489 return true;
8492 /* vectorizable_load.
8494 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8495 that can be vectorized.
8496 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8497 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8498 Return true if STMT_INFO is vectorizable in this way. */
8500 static bool
8501 vectorizable_load (vec_info *vinfo,
8502 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8503 gimple **vec_stmt, slp_tree slp_node,
8504 stmt_vector_for_cost *cost_vec)
8506 tree scalar_dest;
8507 tree vec_dest = NULL;
8508 tree data_ref = NULL;
8509 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8510 class loop *loop = NULL;
8511 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8512 bool nested_in_vect_loop = false;
8513 tree elem_type;
8514 tree new_temp;
8515 machine_mode mode;
8516 tree dummy;
8517 tree dataref_ptr = NULL_TREE;
8518 tree dataref_offset = NULL_TREE;
8519 gimple *ptr_incr = NULL;
8520 int ncopies;
8521 int i, j;
8522 unsigned int group_size;
8523 poly_uint64 group_gap_adj;
8524 tree msq = NULL_TREE, lsq;
8525 tree realignment_token = NULL_TREE;
8526 gphi *phi = NULL;
8527 vec<tree> dr_chain = vNULL;
8528 bool grouped_load = false;
8529 stmt_vec_info first_stmt_info;
8530 stmt_vec_info first_stmt_info_for_drptr = NULL;
8531 bool compute_in_loop = false;
8532 class loop *at_loop;
8533 int vec_num;
8534 bool slp = (slp_node != NULL);
8535 bool slp_perm = false;
8536 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8537 poly_uint64 vf;
8538 tree aggr_type;
8539 gather_scatter_info gs_info;
8540 tree ref_type;
8541 enum vect_def_type mask_dt = vect_unknown_def_type;
8543 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8544 return false;
8546 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8547 && ! vec_stmt)
8548 return false;
8550 if (!STMT_VINFO_DATA_REF (stmt_info))
8551 return false;
8553 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8554 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8556 scalar_dest = gimple_assign_lhs (assign);
8557 if (TREE_CODE (scalar_dest) != SSA_NAME)
8558 return false;
8560 tree_code code = gimple_assign_rhs_code (assign);
8561 if (code != ARRAY_REF
8562 && code != BIT_FIELD_REF
8563 && code != INDIRECT_REF
8564 && code != COMPONENT_REF
8565 && code != IMAGPART_EXPR
8566 && code != REALPART_EXPR
8567 && code != MEM_REF
8568 && TREE_CODE_CLASS (code) != tcc_declaration)
8569 return false;
8571 else
8573 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8574 if (!call || !gimple_call_internal_p (call))
8575 return false;
8577 internal_fn ifn = gimple_call_internal_fn (call);
8578 if (!internal_load_fn_p (ifn))
8579 return false;
8581 scalar_dest = gimple_call_lhs (call);
8582 if (!scalar_dest)
8583 return false;
8585 int mask_index = internal_fn_mask_index (ifn);
8586 if (mask_index >= 0
8587 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node,
8588 /* ??? For SLP we only have operands for
8589 the mask operand. */
8590 slp_node ? 0 : mask_index,
8591 &mask, NULL, &mask_dt, &mask_vectype))
8592 return false;
8595 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8596 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8598 if (loop_vinfo)
8600 loop = LOOP_VINFO_LOOP (loop_vinfo);
8601 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8602 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8604 else
8605 vf = 1;
8607 /* Multiple types in SLP are handled by creating the appropriate number of
8608 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8609 case of SLP. */
8610 if (slp)
8611 ncopies = 1;
8612 else
8613 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8615 gcc_assert (ncopies >= 1);
8617 /* FORNOW. This restriction should be relaxed. */
8618 if (nested_in_vect_loop && ncopies > 1)
8620 if (dump_enabled_p ())
8621 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8622 "multiple types in nested loop.\n");
8623 return false;
8626 /* Invalidate assumptions made by dependence analysis when vectorization
8627 on the unrolled body effectively re-orders stmts. */
8628 if (ncopies > 1
8629 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8630 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8631 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8633 if (dump_enabled_p ())
8634 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8635 "cannot perform implicit CSE when unrolling "
8636 "with negative dependence distance\n");
8637 return false;
8640 elem_type = TREE_TYPE (vectype);
8641 mode = TYPE_MODE (vectype);
8643 /* FORNOW. In some cases can vectorize even if data-type not supported
8644 (e.g. - data copies). */
8645 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8647 if (dump_enabled_p ())
8648 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8649 "Aligned load, but unsupported type.\n");
8650 return false;
8653 /* Check if the load is a part of an interleaving chain. */
8654 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8656 grouped_load = true;
8657 /* FORNOW */
8658 gcc_assert (!nested_in_vect_loop);
8659 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8661 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8662 group_size = DR_GROUP_SIZE (first_stmt_info);
8664 /* Refuse non-SLP vectorization of SLP-only groups. */
8665 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8667 if (dump_enabled_p ())
8668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8669 "cannot vectorize load in non-SLP mode.\n");
8670 return false;
8673 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8675 slp_perm = true;
8677 if (!loop_vinfo)
8679 /* In BB vectorization we may not actually use a loaded vector
8680 accessing elements in excess of DR_GROUP_SIZE. */
8681 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8682 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
8683 unsigned HOST_WIDE_INT nunits;
8684 unsigned j, k, maxk = 0;
8685 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
8686 if (k > maxk)
8687 maxk = k;
8688 tree vectype = SLP_TREE_VECTYPE (slp_node);
8689 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
8690 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
8692 if (dump_enabled_p ())
8693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8694 "BB vectorization with gaps at the end of "
8695 "a load is not supported\n");
8696 return false;
8700 auto_vec<tree> tem;
8701 unsigned n_perms;
8702 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
8703 true, &n_perms))
8705 if (dump_enabled_p ())
8706 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
8707 vect_location,
8708 "unsupported load permutation\n");
8709 return false;
8713 /* Invalidate assumptions made by dependence analysis when vectorization
8714 on the unrolled body effectively re-orders stmts. */
8715 if (!PURE_SLP_STMT (stmt_info)
8716 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8717 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8718 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8720 if (dump_enabled_p ())
8721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8722 "cannot perform implicit CSE when performing "
8723 "group loads with negative dependence distance\n");
8724 return false;
8727 else
8728 group_size = 1;
8730 vect_memory_access_type memory_access_type;
8731 enum dr_alignment_support alignment_support_scheme;
8732 int misalignment;
8733 poly_int64 poffset;
8734 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
8735 ncopies, &memory_access_type, &poffset,
8736 &alignment_support_scheme, &misalignment, &gs_info))
8737 return false;
8739 if (mask)
8741 if (memory_access_type == VMAT_CONTIGUOUS)
8743 machine_mode vec_mode = TYPE_MODE (vectype);
8744 if (!VECTOR_MODE_P (vec_mode)
8745 || !can_vec_mask_load_store_p (vec_mode,
8746 TYPE_MODE (mask_vectype), true))
8747 return false;
8749 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8750 && memory_access_type != VMAT_GATHER_SCATTER)
8752 if (dump_enabled_p ())
8753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8754 "unsupported access type for masked load.\n");
8755 return false;
8757 else if (memory_access_type == VMAT_GATHER_SCATTER
8758 && gs_info.ifn == IFN_LAST
8759 && !gs_info.decl)
8761 if (dump_enabled_p ())
8762 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8763 "unsupported masked emulated gather.\n");
8764 return false;
8768 if (!vec_stmt) /* transformation not required. */
8770 if (slp_node
8771 && mask
8772 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8773 mask_vectype))
8775 if (dump_enabled_p ())
8776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8777 "incompatible vector types for invariants\n");
8778 return false;
8781 if (!slp)
8782 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8784 if (loop_vinfo
8785 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8786 check_load_store_for_partial_vectors (loop_vinfo, vectype, VLS_LOAD,
8787 group_size, memory_access_type,
8788 &gs_info, mask);
8790 if (dump_enabled_p ()
8791 && memory_access_type != VMAT_ELEMENTWISE
8792 && memory_access_type != VMAT_GATHER_SCATTER
8793 && alignment_support_scheme != dr_aligned)
8794 dump_printf_loc (MSG_NOTE, vect_location,
8795 "Vectorizing an unaligned access.\n");
8797 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8798 vect_model_load_cost (vinfo, stmt_info, ncopies, vf, memory_access_type,
8799 alignment_support_scheme, misalignment,
8800 &gs_info, slp_node, cost_vec);
8801 return true;
8804 if (!slp)
8805 gcc_assert (memory_access_type
8806 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8808 if (dump_enabled_p ())
8809 dump_printf_loc (MSG_NOTE, vect_location,
8810 "transform load. ncopies = %d\n", ncopies);
8812 /* Transform. */
8814 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8815 ensure_base_align (dr_info);
8817 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8819 vect_build_gather_load_calls (vinfo,
8820 stmt_info, gsi, vec_stmt, &gs_info, mask);
8821 return true;
8824 if (memory_access_type == VMAT_INVARIANT)
8826 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8827 /* If we have versioned for aliasing or the loop doesn't
8828 have any data dependencies that would preclude this,
8829 then we are sure this is a loop invariant load and
8830 thus we can insert it on the preheader edge. */
8831 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8832 && !nested_in_vect_loop
8833 && hoist_defs_of_uses (stmt_info, loop));
8834 if (hoist_p)
8836 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8837 if (dump_enabled_p ())
8838 dump_printf_loc (MSG_NOTE, vect_location,
8839 "hoisting out of the vectorized loop: %G", stmt);
8840 scalar_dest = copy_ssa_name (scalar_dest);
8841 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8842 gsi_insert_on_edge_immediate
8843 (loop_preheader_edge (loop),
8844 gimple_build_assign (scalar_dest, rhs));
8846 /* These copies are all equivalent, but currently the representation
8847 requires a separate STMT_VINFO_VEC_STMT for each one. */
8848 gimple_stmt_iterator gsi2 = *gsi;
8849 gsi_next (&gsi2);
8850 for (j = 0; j < ncopies; j++)
8852 if (hoist_p)
8853 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8854 vectype, NULL);
8855 else
8856 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
8857 vectype, &gsi2);
8858 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8859 if (slp)
8860 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8861 else
8863 if (j == 0)
8864 *vec_stmt = new_stmt;
8865 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8868 return true;
8871 if (memory_access_type == VMAT_ELEMENTWISE
8872 || memory_access_type == VMAT_STRIDED_SLP)
8874 gimple_stmt_iterator incr_gsi;
8875 bool insert_after;
8876 tree offvar;
8877 tree ivstep;
8878 tree running_off;
8879 vec<constructor_elt, va_gc> *v = NULL;
8880 tree stride_base, stride_step, alias_off;
8881 /* Checked by get_load_store_type. */
8882 unsigned int const_nunits = nunits.to_constant ();
8883 unsigned HOST_WIDE_INT cst_offset = 0;
8884 tree dr_offset;
8886 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
8887 gcc_assert (!nested_in_vect_loop);
8889 if (grouped_load)
8891 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8892 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8894 else
8896 first_stmt_info = stmt_info;
8897 first_dr_info = dr_info;
8899 if (slp && grouped_load)
8901 group_size = DR_GROUP_SIZE (first_stmt_info);
8902 ref_type = get_group_alias_ptr_type (first_stmt_info);
8904 else
8906 if (grouped_load)
8907 cst_offset
8908 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8909 * vect_get_place_in_interleaving_chain (stmt_info,
8910 first_stmt_info));
8911 group_size = 1;
8912 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8915 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8916 stride_base
8917 = fold_build_pointer_plus
8918 (DR_BASE_ADDRESS (first_dr_info->dr),
8919 size_binop (PLUS_EXPR,
8920 convert_to_ptrofftype (dr_offset),
8921 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8922 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8924 /* For a load with loop-invariant (but other than power-of-2)
8925 stride (i.e. not a grouped access) like so:
8927 for (i = 0; i < n; i += stride)
8928 ... = array[i];
8930 we generate a new induction variable and new accesses to
8931 form a new vector (or vectors, depending on ncopies):
8933 for (j = 0; ; j += VF*stride)
8934 tmp1 = array[j];
8935 tmp2 = array[j + stride];
8937 vectemp = {tmp1, tmp2, ...}
8940 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8941 build_int_cst (TREE_TYPE (stride_step), vf));
8943 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8945 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8946 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8947 create_iv (stride_base, ivstep, NULL,
8948 loop, &incr_gsi, insert_after,
8949 &offvar, NULL);
8951 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8953 running_off = offvar;
8954 alias_off = build_int_cst (ref_type, 0);
8955 int nloads = const_nunits;
8956 int lnel = 1;
8957 tree ltype = TREE_TYPE (vectype);
8958 tree lvectype = vectype;
8959 auto_vec<tree> dr_chain;
8960 if (memory_access_type == VMAT_STRIDED_SLP)
8962 if (group_size < const_nunits)
8964 /* First check if vec_init optab supports construction from vector
8965 elts directly. Otherwise avoid emitting a constructor of
8966 vector elements by performing the loads using an integer type
8967 of the same size, constructing a vector of those and then
8968 re-interpreting it as the original vector type. This avoids a
8969 huge runtime penalty due to the general inability to perform
8970 store forwarding from smaller stores to a larger load. */
8971 tree ptype;
8972 tree vtype
8973 = vector_vector_composition_type (vectype,
8974 const_nunits / group_size,
8975 &ptype);
8976 if (vtype != NULL_TREE)
8978 nloads = const_nunits / group_size;
8979 lnel = group_size;
8980 lvectype = vtype;
8981 ltype = ptype;
8984 else
8986 nloads = 1;
8987 lnel = const_nunits;
8988 ltype = vectype;
8990 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8992 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8993 else if (nloads == 1)
8994 ltype = vectype;
8996 if (slp)
8998 /* For SLP permutation support we need to load the whole group,
8999 not only the number of vector stmts the permutation result
9000 fits in. */
9001 if (slp_perm)
9003 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9004 variable VF. */
9005 unsigned int const_vf = vf.to_constant ();
9006 ncopies = CEIL (group_size * const_vf, const_nunits);
9007 dr_chain.create (ncopies);
9009 else
9010 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9012 unsigned int group_el = 0;
9013 unsigned HOST_WIDE_INT
9014 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9015 for (j = 0; j < ncopies; j++)
9017 if (nloads > 1)
9018 vec_alloc (v, nloads);
9019 gimple *new_stmt = NULL;
9020 for (i = 0; i < nloads; i++)
9022 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9023 group_el * elsz + cst_offset);
9024 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9025 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9026 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9027 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9028 if (nloads > 1)
9029 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9030 gimple_assign_lhs (new_stmt));
9032 group_el += lnel;
9033 if (! slp
9034 || group_el == group_size)
9036 tree newoff = copy_ssa_name (running_off);
9037 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9038 running_off, stride_step);
9039 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
9041 running_off = newoff;
9042 group_el = 0;
9045 if (nloads > 1)
9047 tree vec_inv = build_constructor (lvectype, v);
9048 new_temp = vect_init_vector (vinfo, stmt_info,
9049 vec_inv, lvectype, gsi);
9050 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9051 if (lvectype != vectype)
9053 new_stmt = gimple_build_assign (make_ssa_name (vectype),
9054 VIEW_CONVERT_EXPR,
9055 build1 (VIEW_CONVERT_EXPR,
9056 vectype, new_temp));
9057 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9061 if (slp)
9063 if (slp_perm)
9064 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
9065 else
9066 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9068 else
9070 if (j == 0)
9071 *vec_stmt = new_stmt;
9072 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9075 if (slp_perm)
9077 unsigned n_perms;
9078 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
9079 false, &n_perms);
9081 return true;
9084 if (memory_access_type == VMAT_GATHER_SCATTER
9085 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9086 grouped_load = false;
9088 if (grouped_load)
9090 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9091 group_size = DR_GROUP_SIZE (first_stmt_info);
9092 /* For SLP vectorization we directly vectorize a subchain
9093 without permutation. */
9094 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9095 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9096 /* For BB vectorization always use the first stmt to base
9097 the data ref pointer on. */
9098 if (bb_vinfo)
9099 first_stmt_info_for_drptr
9100 = vect_find_first_scalar_stmt_in_slp (slp_node);
9102 /* Check if the chain of loads is already vectorized. */
9103 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
9104 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9105 ??? But we can only do so if there is exactly one
9106 as we have no way to get at the rest. Leave the CSE
9107 opportunity alone.
9108 ??? With the group load eventually participating
9109 in multiple different permutations (having multiple
9110 slp nodes which refer to the same group) the CSE
9111 is even wrong code. See PR56270. */
9112 && !slp)
9114 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9115 return true;
9117 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9118 group_gap_adj = 0;
9120 /* VEC_NUM is the number of vect stmts to be created for this group. */
9121 if (slp)
9123 grouped_load = false;
9124 /* If an SLP permutation is from N elements to N elements,
9125 and if one vector holds a whole number of N, we can load
9126 the inputs to the permutation in the same way as an
9127 unpermuted sequence. In other cases we need to load the
9128 whole group, not only the number of vector stmts the
9129 permutation result fits in. */
9130 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
9131 if (slp_perm
9132 && (group_size != scalar_lanes
9133 || !multiple_p (nunits, group_size)))
9135 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9136 variable VF; see vect_transform_slp_perm_load. */
9137 unsigned int const_vf = vf.to_constant ();
9138 unsigned int const_nunits = nunits.to_constant ();
9139 vec_num = CEIL (group_size * const_vf, const_nunits);
9140 group_gap_adj = vf * group_size - nunits * vec_num;
9142 else
9144 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9145 group_gap_adj
9146 = group_size - scalar_lanes;
9149 else
9150 vec_num = group_size;
9152 ref_type = get_group_alias_ptr_type (first_stmt_info);
9154 else
9156 first_stmt_info = stmt_info;
9157 first_dr_info = dr_info;
9158 group_size = vec_num = 1;
9159 group_gap_adj = 0;
9160 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9163 gcc_assert (alignment_support_scheme);
9164 vec_loop_masks *loop_masks
9165 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9166 ? &LOOP_VINFO_MASKS (loop_vinfo)
9167 : NULL);
9168 vec_loop_lens *loop_lens
9169 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
9170 ? &LOOP_VINFO_LENS (loop_vinfo)
9171 : NULL);
9173 /* Shouldn't go with length-based approach if fully masked. */
9174 gcc_assert (!loop_lens || !loop_masks);
9176 /* Targets with store-lane instructions must not require explicit
9177 realignment. vect_supportable_dr_alignment always returns either
9178 dr_aligned or dr_unaligned_supported for masked operations. */
9179 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9180 && !mask
9181 && !loop_masks)
9182 || alignment_support_scheme == dr_aligned
9183 || alignment_support_scheme == dr_unaligned_supported);
9185 /* In case the vectorization factor (VF) is bigger than the number
9186 of elements that we can fit in a vectype (nunits), we have to generate
9187 more than one vector stmt - i.e - we need to "unroll" the
9188 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9189 from one copy of the vector stmt to the next, in the field
9190 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9191 stages to find the correct vector defs to be used when vectorizing
9192 stmts that use the defs of the current stmt. The example below
9193 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9194 need to create 4 vectorized stmts):
9196 before vectorization:
9197 RELATED_STMT VEC_STMT
9198 S1: x = memref - -
9199 S2: z = x + 1 - -
9201 step 1: vectorize stmt S1:
9202 We first create the vector stmt VS1_0, and, as usual, record a
9203 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9204 Next, we create the vector stmt VS1_1, and record a pointer to
9205 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9206 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9207 stmts and pointers:
9208 RELATED_STMT VEC_STMT
9209 VS1_0: vx0 = memref0 VS1_1 -
9210 VS1_1: vx1 = memref1 VS1_2 -
9211 VS1_2: vx2 = memref2 VS1_3 -
9212 VS1_3: vx3 = memref3 - -
9213 S1: x = load - VS1_0
9214 S2: z = x + 1 - -
9217 /* In case of interleaving (non-unit grouped access):
9219 S1: x2 = &base + 2
9220 S2: x0 = &base
9221 S3: x1 = &base + 1
9222 S4: x3 = &base + 3
9224 Vectorized loads are created in the order of memory accesses
9225 starting from the access of the first stmt of the chain:
9227 VS1: vx0 = &base
9228 VS2: vx1 = &base + vec_size*1
9229 VS3: vx3 = &base + vec_size*2
9230 VS4: vx4 = &base + vec_size*3
9232 Then permutation statements are generated:
9234 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9235 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9238 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9239 (the order of the data-refs in the output of vect_permute_load_chain
9240 corresponds to the order of scalar stmts in the interleaving chain - see
9241 the documentation of vect_permute_load_chain()).
9242 The generation of permutation stmts and recording them in
9243 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9245 In case of both multiple types and interleaving, the vector loads and
9246 permutation stmts above are created for every copy. The result vector
9247 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9248 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9250 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9251 on a target that supports unaligned accesses (dr_unaligned_supported)
9252 we generate the following code:
9253 p = initial_addr;
9254 indx = 0;
9255 loop {
9256 p = p + indx * vectype_size;
9257 vec_dest = *(p);
9258 indx = indx + 1;
9261 Otherwise, the data reference is potentially unaligned on a target that
9262 does not support unaligned accesses (dr_explicit_realign_optimized) -
9263 then generate the following code, in which the data in each iteration is
9264 obtained by two vector loads, one from the previous iteration, and one
9265 from the current iteration:
9266 p1 = initial_addr;
9267 msq_init = *(floor(p1))
9268 p2 = initial_addr + VS - 1;
9269 realignment_token = call target_builtin;
9270 indx = 0;
9271 loop {
9272 p2 = p2 + indx * vectype_size
9273 lsq = *(floor(p2))
9274 vec_dest = realign_load (msq, lsq, realignment_token)
9275 indx = indx + 1;
9276 msq = lsq;
9277 } */
9279 /* If the misalignment remains the same throughout the execution of the
9280 loop, we can create the init_addr and permutation mask at the loop
9281 preheader. Otherwise, it needs to be created inside the loop.
9282 This can only occur when vectorizing memory accesses in the inner-loop
9283 nested within an outer-loop that is being vectorized. */
9285 if (nested_in_vect_loop
9286 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9287 GET_MODE_SIZE (TYPE_MODE (vectype))))
9289 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9290 compute_in_loop = true;
9293 bool diff_first_stmt_info
9294 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9296 tree offset = NULL_TREE;
9297 if ((alignment_support_scheme == dr_explicit_realign_optimized
9298 || alignment_support_scheme == dr_explicit_realign)
9299 && !compute_in_loop)
9301 /* If we have different first_stmt_info, we can't set up realignment
9302 here, since we can't guarantee first_stmt_info DR has been
9303 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9304 distance from first_stmt_info DR instead as below. */
9305 if (!diff_first_stmt_info)
9306 msq = vect_setup_realignment (vinfo,
9307 first_stmt_info, gsi, &realignment_token,
9308 alignment_support_scheme, NULL_TREE,
9309 &at_loop);
9310 if (alignment_support_scheme == dr_explicit_realign_optimized)
9312 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9313 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9314 size_one_node);
9315 gcc_assert (!first_stmt_info_for_drptr);
9318 else
9319 at_loop = loop;
9321 if (!known_eq (poffset, 0))
9322 offset = (offset
9323 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
9324 : size_int (poffset));
9326 tree bump;
9327 tree vec_offset = NULL_TREE;
9328 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9330 aggr_type = NULL_TREE;
9331 bump = NULL_TREE;
9333 else if (memory_access_type == VMAT_GATHER_SCATTER)
9335 aggr_type = elem_type;
9336 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9337 &bump, &vec_offset);
9339 else
9341 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9342 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9343 else
9344 aggr_type = vectype;
9345 bump = vect_get_data_ptr_increment (vinfo, dr_info, aggr_type,
9346 memory_access_type);
9349 vec<tree> vec_offsets = vNULL;
9350 auto_vec<tree> vec_masks;
9351 if (mask)
9352 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
9353 mask, &vec_masks, mask_vectype, NULL_TREE);
9354 tree vec_mask = NULL_TREE;
9355 poly_uint64 group_elt = 0;
9356 for (j = 0; j < ncopies; j++)
9358 /* 1. Create the vector or array pointer update chain. */
9359 if (j == 0)
9361 bool simd_lane_access_p
9362 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9363 if (simd_lane_access_p
9364 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9365 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9366 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9367 && integer_zerop (DR_INIT (first_dr_info->dr))
9368 && alias_sets_conflict_p (get_alias_set (aggr_type),
9369 get_alias_set (TREE_TYPE (ref_type)))
9370 && (alignment_support_scheme == dr_aligned
9371 || alignment_support_scheme == dr_unaligned_supported))
9373 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9374 dataref_offset = build_int_cst (ref_type, 0);
9376 else if (diff_first_stmt_info)
9378 dataref_ptr
9379 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
9380 aggr_type, at_loop, offset, &dummy,
9381 gsi, &ptr_incr, simd_lane_access_p,
9382 bump);
9383 /* Adjust the pointer by the difference to first_stmt. */
9384 data_reference_p ptrdr
9385 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9386 tree diff
9387 = fold_convert (sizetype,
9388 size_binop (MINUS_EXPR,
9389 DR_INIT (first_dr_info->dr),
9390 DR_INIT (ptrdr)));
9391 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9392 stmt_info, diff);
9393 if (alignment_support_scheme == dr_explicit_realign)
9395 msq = vect_setup_realignment (vinfo,
9396 first_stmt_info_for_drptr, gsi,
9397 &realignment_token,
9398 alignment_support_scheme,
9399 dataref_ptr, &at_loop);
9400 gcc_assert (!compute_in_loop);
9403 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9405 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9406 &gs_info, &dataref_ptr,
9407 &vec_offsets);
9409 else
9410 dataref_ptr
9411 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9412 at_loop,
9413 offset, &dummy, gsi, &ptr_incr,
9414 simd_lane_access_p, bump);
9415 if (mask)
9416 vec_mask = vec_masks[0];
9418 else
9420 if (dataref_offset)
9421 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9422 bump);
9423 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9424 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9425 stmt_info, bump);
9426 if (mask)
9427 vec_mask = vec_masks[j];
9430 if (grouped_load || slp_perm)
9431 dr_chain.create (vec_num);
9433 gimple *new_stmt = NULL;
9434 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9436 tree vec_array;
9438 vec_array = create_vector_array (vectype, vec_num);
9440 tree final_mask = NULL_TREE;
9441 if (loop_masks)
9442 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9443 vectype, j);
9444 if (vec_mask)
9445 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9446 vec_mask, gsi);
9448 gcall *call;
9449 if (final_mask)
9451 /* Emit:
9452 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9453 VEC_MASK). */
9454 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
9455 tree alias_ptr = build_int_cst (ref_type, align);
9456 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9457 dataref_ptr, alias_ptr,
9458 final_mask);
9460 else
9462 /* Emit:
9463 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9464 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9465 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9467 gimple_call_set_lhs (call, vec_array);
9468 gimple_call_set_nothrow (call, true);
9469 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9470 new_stmt = call;
9472 /* Extract each vector into an SSA_NAME. */
9473 for (i = 0; i < vec_num; i++)
9475 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
9476 vec_array, i);
9477 dr_chain.quick_push (new_temp);
9480 /* Record the mapping between SSA_NAMEs and statements. */
9481 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
9483 /* Record that VEC_ARRAY is now dead. */
9484 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9486 else
9488 for (i = 0; i < vec_num; i++)
9490 tree final_mask = NULL_TREE;
9491 if (loop_masks
9492 && memory_access_type != VMAT_INVARIANT)
9493 final_mask = vect_get_loop_mask (gsi, loop_masks,
9494 vec_num * ncopies,
9495 vectype, vec_num * j + i);
9496 if (vec_mask)
9497 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9498 vec_mask, gsi);
9500 if (i > 0)
9501 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9502 gsi, stmt_info, bump);
9504 /* 2. Create the vector-load in the loop. */
9505 switch (alignment_support_scheme)
9507 case dr_aligned:
9508 case dr_unaligned_supported:
9510 unsigned int misalign;
9511 unsigned HOST_WIDE_INT align;
9513 if (memory_access_type == VMAT_GATHER_SCATTER
9514 && gs_info.ifn != IFN_LAST)
9516 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9517 vec_offset = vec_offsets[j];
9518 tree zero = build_zero_cst (vectype);
9519 tree scale = size_int (gs_info.scale);
9520 gcall *call;
9521 if (final_mask)
9522 call = gimple_build_call_internal
9523 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9524 vec_offset, scale, zero, final_mask);
9525 else
9526 call = gimple_build_call_internal
9527 (IFN_GATHER_LOAD, 4, dataref_ptr,
9528 vec_offset, scale, zero);
9529 gimple_call_set_nothrow (call, true);
9530 new_stmt = call;
9531 data_ref = NULL_TREE;
9532 break;
9534 else if (memory_access_type == VMAT_GATHER_SCATTER)
9536 /* Emulated gather-scatter. */
9537 gcc_assert (!final_mask);
9538 unsigned HOST_WIDE_INT const_nunits
9539 = nunits.to_constant ();
9540 unsigned HOST_WIDE_INT const_offset_nunits
9541 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9542 .to_constant ();
9543 vec<constructor_elt, va_gc> *ctor_elts;
9544 vec_alloc (ctor_elts, const_nunits);
9545 gimple_seq stmts = NULL;
9546 /* We support offset vectors with more elements
9547 than the data vector for now. */
9548 unsigned HOST_WIDE_INT factor
9549 = const_offset_nunits / const_nunits;
9550 vec_offset = vec_offsets[j / factor];
9551 unsigned elt_offset = (j % factor) * const_nunits;
9552 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9553 tree scale = size_int (gs_info.scale);
9554 align
9555 = get_object_alignment (DR_REF (first_dr_info->dr));
9556 tree ltype = build_aligned_type (TREE_TYPE (vectype),
9557 align);
9558 for (unsigned k = 0; k < const_nunits; ++k)
9560 tree boff = size_binop (MULT_EXPR,
9561 TYPE_SIZE (idx_type),
9562 bitsize_int
9563 (k + elt_offset));
9564 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9565 idx_type, vec_offset,
9566 TYPE_SIZE (idx_type),
9567 boff);
9568 idx = gimple_convert (&stmts, sizetype, idx);
9569 idx = gimple_build (&stmts, MULT_EXPR,
9570 sizetype, idx, scale);
9571 tree ptr = gimple_build (&stmts, PLUS_EXPR,
9572 TREE_TYPE (dataref_ptr),
9573 dataref_ptr, idx);
9574 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9575 tree elt = make_ssa_name (TREE_TYPE (vectype));
9576 tree ref = build2 (MEM_REF, ltype, ptr,
9577 build_int_cst (ref_type, 0));
9578 new_stmt = gimple_build_assign (elt, ref);
9579 gimple_seq_add_stmt (&stmts, new_stmt);
9580 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
9582 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9583 new_stmt = gimple_build_assign (NULL_TREE,
9584 build_constructor
9585 (vectype, ctor_elts));
9586 data_ref = NULL_TREE;
9587 break;
9590 align =
9591 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9592 if (alignment_support_scheme == dr_aligned)
9593 misalign = 0;
9594 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9596 align = dr_alignment
9597 (vect_dr_behavior (vinfo, first_dr_info));
9598 misalign = 0;
9600 else
9601 misalign = misalignment;
9602 if (dataref_offset == NULL_TREE
9603 && TREE_CODE (dataref_ptr) == SSA_NAME)
9604 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9605 align, misalign);
9606 align = least_bit_hwi (misalign | align);
9608 if (final_mask)
9610 tree ptr = build_int_cst (ref_type,
9611 align * BITS_PER_UNIT);
9612 gcall *call
9613 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9614 dataref_ptr, ptr,
9615 final_mask);
9616 gimple_call_set_nothrow (call, true);
9617 new_stmt = call;
9618 data_ref = NULL_TREE;
9620 else if (loop_lens && memory_access_type != VMAT_INVARIANT)
9622 tree final_len
9623 = vect_get_loop_len (loop_vinfo, loop_lens,
9624 vec_num * ncopies,
9625 vec_num * j + i);
9626 tree ptr = build_int_cst (ref_type,
9627 align * BITS_PER_UNIT);
9628 gcall *call
9629 = gimple_build_call_internal (IFN_LEN_LOAD, 3,
9630 dataref_ptr, ptr,
9631 final_len);
9632 gimple_call_set_nothrow (call, true);
9633 new_stmt = call;
9634 data_ref = NULL_TREE;
9636 /* Need conversion if it's wrapped with VnQI. */
9637 machine_mode vmode = TYPE_MODE (vectype);
9638 opt_machine_mode new_ovmode
9639 = get_len_load_store_mode (vmode, true);
9640 machine_mode new_vmode = new_ovmode.require ();
9641 if (vmode != new_vmode)
9643 tree qi_type = unsigned_intQI_type_node;
9644 tree new_vtype
9645 = build_vector_type_for_mode (qi_type, new_vmode);
9646 tree var = vect_get_new_ssa_name (new_vtype,
9647 vect_simple_var);
9648 gimple_set_lhs (call, var);
9649 vect_finish_stmt_generation (vinfo, stmt_info, call,
9650 gsi);
9651 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
9652 new_stmt
9653 = gimple_build_assign (vec_dest,
9654 VIEW_CONVERT_EXPR, op);
9657 else
9659 tree ltype = vectype;
9660 tree new_vtype = NULL_TREE;
9661 unsigned HOST_WIDE_INT gap
9662 = DR_GROUP_GAP (first_stmt_info);
9663 unsigned int vect_align
9664 = vect_known_alignment_in_bytes (first_dr_info,
9665 vectype);
9666 unsigned int scalar_dr_size
9667 = vect_get_scalar_dr_size (first_dr_info);
9668 /* If there's no peeling for gaps but we have a gap
9669 with slp loads then load the lower half of the
9670 vector only. See get_group_load_store_type for
9671 when we apply this optimization. */
9672 if (slp
9673 && loop_vinfo
9674 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9675 && gap != 0
9676 && known_eq (nunits, (group_size - gap) * 2)
9677 && known_eq (nunits, group_size)
9678 && gap >= (vect_align / scalar_dr_size))
9680 tree half_vtype;
9681 new_vtype
9682 = vector_vector_composition_type (vectype, 2,
9683 &half_vtype);
9684 if (new_vtype != NULL_TREE)
9685 ltype = half_vtype;
9687 tree offset
9688 = (dataref_offset ? dataref_offset
9689 : build_int_cst (ref_type, 0));
9690 if (ltype != vectype
9691 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9693 unsigned HOST_WIDE_INT gap_offset
9694 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
9695 tree gapcst = build_int_cst (ref_type, gap_offset);
9696 offset = size_binop (PLUS_EXPR, offset, gapcst);
9698 data_ref
9699 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
9700 if (alignment_support_scheme == dr_aligned)
9702 else
9703 TREE_TYPE (data_ref)
9704 = build_aligned_type (TREE_TYPE (data_ref),
9705 align * BITS_PER_UNIT);
9706 if (ltype != vectype)
9708 vect_copy_ref_info (data_ref,
9709 DR_REF (first_dr_info->dr));
9710 tree tem = make_ssa_name (ltype);
9711 new_stmt = gimple_build_assign (tem, data_ref);
9712 vect_finish_stmt_generation (vinfo, stmt_info,
9713 new_stmt, gsi);
9714 data_ref = NULL;
9715 vec<constructor_elt, va_gc> *v;
9716 vec_alloc (v, 2);
9717 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9719 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9720 build_zero_cst (ltype));
9721 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9723 else
9725 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9726 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9727 build_zero_cst (ltype));
9729 gcc_assert (new_vtype != NULL_TREE);
9730 if (new_vtype == vectype)
9731 new_stmt = gimple_build_assign (
9732 vec_dest, build_constructor (vectype, v));
9733 else
9735 tree new_vname = make_ssa_name (new_vtype);
9736 new_stmt = gimple_build_assign (
9737 new_vname, build_constructor (new_vtype, v));
9738 vect_finish_stmt_generation (vinfo, stmt_info,
9739 new_stmt, gsi);
9740 new_stmt = gimple_build_assign (
9741 vec_dest, build1 (VIEW_CONVERT_EXPR, vectype,
9742 new_vname));
9746 break;
9748 case dr_explicit_realign:
9750 tree ptr, bump;
9752 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9754 if (compute_in_loop)
9755 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
9756 &realignment_token,
9757 dr_explicit_realign,
9758 dataref_ptr, NULL);
9760 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9761 ptr = copy_ssa_name (dataref_ptr);
9762 else
9763 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9764 // For explicit realign the target alignment should be
9765 // known at compile time.
9766 unsigned HOST_WIDE_INT align =
9767 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9768 new_stmt = gimple_build_assign
9769 (ptr, BIT_AND_EXPR, dataref_ptr,
9770 build_int_cst
9771 (TREE_TYPE (dataref_ptr),
9772 -(HOST_WIDE_INT) align));
9773 vect_finish_stmt_generation (vinfo, stmt_info,
9774 new_stmt, gsi);
9775 data_ref
9776 = build2 (MEM_REF, vectype, ptr,
9777 build_int_cst (ref_type, 0));
9778 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9779 vec_dest = vect_create_destination_var (scalar_dest,
9780 vectype);
9781 new_stmt = gimple_build_assign (vec_dest, data_ref);
9782 new_temp = make_ssa_name (vec_dest, new_stmt);
9783 gimple_assign_set_lhs (new_stmt, new_temp);
9784 gimple_move_vops (new_stmt, stmt_info->stmt);
9785 vect_finish_stmt_generation (vinfo, stmt_info,
9786 new_stmt, gsi);
9787 msq = new_temp;
9789 bump = size_binop (MULT_EXPR, vs,
9790 TYPE_SIZE_UNIT (elem_type));
9791 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9792 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi,
9793 stmt_info, bump);
9794 new_stmt = gimple_build_assign
9795 (NULL_TREE, BIT_AND_EXPR, ptr,
9796 build_int_cst
9797 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9798 ptr = copy_ssa_name (ptr, new_stmt);
9799 gimple_assign_set_lhs (new_stmt, ptr);
9800 vect_finish_stmt_generation (vinfo, stmt_info,
9801 new_stmt, gsi);
9802 data_ref
9803 = build2 (MEM_REF, vectype, ptr,
9804 build_int_cst (ref_type, 0));
9805 break;
9807 case dr_explicit_realign_optimized:
9809 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9810 new_temp = copy_ssa_name (dataref_ptr);
9811 else
9812 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9813 // We should only be doing this if we know the target
9814 // alignment at compile time.
9815 unsigned HOST_WIDE_INT align =
9816 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9817 new_stmt = gimple_build_assign
9818 (new_temp, BIT_AND_EXPR, dataref_ptr,
9819 build_int_cst (TREE_TYPE (dataref_ptr),
9820 -(HOST_WIDE_INT) align));
9821 vect_finish_stmt_generation (vinfo, stmt_info,
9822 new_stmt, gsi);
9823 data_ref
9824 = build2 (MEM_REF, vectype, new_temp,
9825 build_int_cst (ref_type, 0));
9826 break;
9828 default:
9829 gcc_unreachable ();
9831 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9832 /* DATA_REF is null if we've already built the statement. */
9833 if (data_ref)
9835 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9836 new_stmt = gimple_build_assign (vec_dest, data_ref);
9838 new_temp = make_ssa_name (vec_dest, new_stmt);
9839 gimple_set_lhs (new_stmt, new_temp);
9840 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9842 /* 3. Handle explicit realignment if necessary/supported.
9843 Create in loop:
9844 vec_dest = realign_load (msq, lsq, realignment_token) */
9845 if (alignment_support_scheme == dr_explicit_realign_optimized
9846 || alignment_support_scheme == dr_explicit_realign)
9848 lsq = gimple_assign_lhs (new_stmt);
9849 if (!realignment_token)
9850 realignment_token = dataref_ptr;
9851 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9852 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9853 msq, lsq, realignment_token);
9854 new_temp = make_ssa_name (vec_dest, new_stmt);
9855 gimple_assign_set_lhs (new_stmt, new_temp);
9856 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9858 if (alignment_support_scheme == dr_explicit_realign_optimized)
9860 gcc_assert (phi);
9861 if (i == vec_num - 1 && j == ncopies - 1)
9862 add_phi_arg (phi, lsq,
9863 loop_latch_edge (containing_loop),
9864 UNKNOWN_LOCATION);
9865 msq = lsq;
9869 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9871 tree perm_mask = perm_mask_for_reverse (vectype);
9872 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
9873 perm_mask, stmt_info, gsi);
9874 new_stmt = SSA_NAME_DEF_STMT (new_temp);
9877 /* Collect vector loads and later create their permutation in
9878 vect_transform_grouped_load (). */
9879 if (grouped_load || slp_perm)
9880 dr_chain.quick_push (new_temp);
9882 /* Store vector loads in the corresponding SLP_NODE. */
9883 if (slp && !slp_perm)
9884 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
9886 /* With SLP permutation we load the gaps as well, without
9887 we need to skip the gaps after we manage to fully load
9888 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9889 group_elt += nunits;
9890 if (maybe_ne (group_gap_adj, 0U)
9891 && !slp_perm
9892 && known_eq (group_elt, group_size - group_gap_adj))
9894 poly_wide_int bump_val
9895 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9896 * group_gap_adj);
9897 if (tree_int_cst_sgn
9898 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
9899 bump_val = -bump_val;
9900 tree bump = wide_int_to_tree (sizetype, bump_val);
9901 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9902 gsi, stmt_info, bump);
9903 group_elt = 0;
9906 /* Bump the vector pointer to account for a gap or for excess
9907 elements loaded for a permuted SLP load. */
9908 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9910 poly_wide_int bump_val
9911 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9912 * group_gap_adj);
9913 if (tree_int_cst_sgn
9914 (vect_dr_behavior (vinfo, dr_info)->step) == -1)
9915 bump_val = -bump_val;
9916 tree bump = wide_int_to_tree (sizetype, bump_val);
9917 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9918 stmt_info, bump);
9922 if (slp && !slp_perm)
9923 continue;
9925 if (slp_perm)
9927 unsigned n_perms;
9928 /* For SLP we know we've seen all possible uses of dr_chain so
9929 direct vect_transform_slp_perm_load to DCE the unused parts.
9930 ??? This is a hack to prevent compile-time issues as seen
9931 in PR101120 and friends. */
9932 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
9933 gsi, vf, false, &n_perms,
9934 nullptr, true);
9935 gcc_assert (ok);
9937 else
9939 if (grouped_load)
9941 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9942 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
9943 group_size, gsi);
9944 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9946 else
9948 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9951 dr_chain.release ();
9953 if (!slp)
9954 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
9956 return true;
9959 /* Function vect_is_simple_cond.
9961 Input:
9962 LOOP - the loop that is being vectorized.
9963 COND - Condition that is checked for simple use.
9965 Output:
9966 *COMP_VECTYPE - the vector type for the comparison.
9967 *DTS - The def types for the arguments of the comparison
9969 Returns whether a COND can be vectorized. Checks whether
9970 condition operands are supportable using vec_is_simple_use. */
9972 static bool
9973 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
9974 slp_tree slp_node, tree *comp_vectype,
9975 enum vect_def_type *dts, tree vectype)
9977 tree lhs, rhs;
9978 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9979 slp_tree slp_op;
9981 /* Mask case. */
9982 if (TREE_CODE (cond) == SSA_NAME
9983 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9985 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
9986 &slp_op, &dts[0], comp_vectype)
9987 || !*comp_vectype
9988 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9989 return false;
9990 return true;
9993 if (!COMPARISON_CLASS_P (cond))
9994 return false;
9996 lhs = TREE_OPERAND (cond, 0);
9997 rhs = TREE_OPERAND (cond, 1);
9999 if (TREE_CODE (lhs) == SSA_NAME)
10001 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
10002 &lhs, &slp_op, &dts[0], &vectype1))
10003 return false;
10005 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
10006 || TREE_CODE (lhs) == FIXED_CST)
10007 dts[0] = vect_constant_def;
10008 else
10009 return false;
10011 if (TREE_CODE (rhs) == SSA_NAME)
10013 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
10014 &rhs, &slp_op, &dts[1], &vectype2))
10015 return false;
10017 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
10018 || TREE_CODE (rhs) == FIXED_CST)
10019 dts[1] = vect_constant_def;
10020 else
10021 return false;
10023 if (vectype1 && vectype2
10024 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10025 TYPE_VECTOR_SUBPARTS (vectype2)))
10026 return false;
10028 *comp_vectype = vectype1 ? vectype1 : vectype2;
10029 /* Invariant comparison. */
10030 if (! *comp_vectype)
10032 tree scalar_type = TREE_TYPE (lhs);
10033 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
10034 *comp_vectype = truth_type_for (vectype);
10035 else
10037 /* If we can widen the comparison to match vectype do so. */
10038 if (INTEGRAL_TYPE_P (scalar_type)
10039 && !slp_node
10040 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
10041 TYPE_SIZE (TREE_TYPE (vectype))))
10042 scalar_type = build_nonstandard_integer_type
10043 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
10044 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
10045 slp_node);
10049 return true;
10052 /* vectorizable_condition.
10054 Check if STMT_INFO is conditional modify expression that can be vectorized.
10055 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10056 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
10057 at GSI.
10059 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
10061 Return true if STMT_INFO is vectorizable in this way. */
10063 static bool
10064 vectorizable_condition (vec_info *vinfo,
10065 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10066 gimple **vec_stmt,
10067 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10069 tree scalar_dest = NULL_TREE;
10070 tree vec_dest = NULL_TREE;
10071 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
10072 tree then_clause, else_clause;
10073 tree comp_vectype = NULL_TREE;
10074 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
10075 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
10076 tree vec_compare;
10077 tree new_temp;
10078 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10079 enum vect_def_type dts[4]
10080 = {vect_unknown_def_type, vect_unknown_def_type,
10081 vect_unknown_def_type, vect_unknown_def_type};
10082 int ndts = 4;
10083 int ncopies;
10084 int vec_num;
10085 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10086 int i;
10087 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10088 vec<tree> vec_oprnds0 = vNULL;
10089 vec<tree> vec_oprnds1 = vNULL;
10090 vec<tree> vec_oprnds2 = vNULL;
10091 vec<tree> vec_oprnds3 = vNULL;
10092 tree vec_cmp_type;
10093 bool masked = false;
10095 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10096 return false;
10098 /* Is vectorizable conditional operation? */
10099 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10100 if (!stmt)
10101 return false;
10103 code = gimple_assign_rhs_code (stmt);
10104 if (code != COND_EXPR)
10105 return false;
10107 stmt_vec_info reduc_info = NULL;
10108 int reduc_index = -1;
10109 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
10110 bool for_reduction
10111 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
10112 if (for_reduction)
10114 if (STMT_SLP_TYPE (stmt_info))
10115 return false;
10116 reduc_info = info_for_reduction (vinfo, stmt_info);
10117 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
10118 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
10119 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
10120 || reduc_index != -1);
10122 else
10124 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10125 return false;
10128 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10129 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10131 if (slp_node)
10133 ncopies = 1;
10134 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10136 else
10138 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10139 vec_num = 1;
10142 gcc_assert (ncopies >= 1);
10143 if (for_reduction && ncopies > 1)
10144 return false; /* FORNOW */
10146 cond_expr = gimple_assign_rhs1 (stmt);
10148 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
10149 &comp_vectype, &dts[0], vectype)
10150 || !comp_vectype)
10151 return false;
10153 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
10154 slp_tree then_slp_node, else_slp_node;
10155 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
10156 &then_clause, &then_slp_node, &dts[2], &vectype1))
10157 return false;
10158 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
10159 &else_clause, &else_slp_node, &dts[3], &vectype2))
10160 return false;
10162 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10163 return false;
10165 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10166 return false;
10168 masked = !COMPARISON_CLASS_P (cond_expr);
10169 vec_cmp_type = truth_type_for (comp_vectype);
10171 if (vec_cmp_type == NULL_TREE)
10172 return false;
10174 cond_code = TREE_CODE (cond_expr);
10175 if (!masked)
10177 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10178 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10181 /* For conditional reductions, the "then" value needs to be the candidate
10182 value calculated by this iteration while the "else" value needs to be
10183 the result carried over from previous iterations. If the COND_EXPR
10184 is the other way around, we need to swap it. */
10185 bool must_invert_cmp_result = false;
10186 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10188 if (masked)
10189 must_invert_cmp_result = true;
10190 else
10192 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10193 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10194 if (new_code == ERROR_MARK)
10195 must_invert_cmp_result = true;
10196 else
10198 cond_code = new_code;
10199 /* Make sure we don't accidentally use the old condition. */
10200 cond_expr = NULL_TREE;
10203 std::swap (then_clause, else_clause);
10206 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10208 /* Boolean values may have another representation in vectors
10209 and therefore we prefer bit operations over comparison for
10210 them (which also works for scalar masks). We store opcodes
10211 to use in bitop1 and bitop2. Statement is vectorized as
10212 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10213 depending on bitop1 and bitop2 arity. */
10214 switch (cond_code)
10216 case GT_EXPR:
10217 bitop1 = BIT_NOT_EXPR;
10218 bitop2 = BIT_AND_EXPR;
10219 break;
10220 case GE_EXPR:
10221 bitop1 = BIT_NOT_EXPR;
10222 bitop2 = BIT_IOR_EXPR;
10223 break;
10224 case LT_EXPR:
10225 bitop1 = BIT_NOT_EXPR;
10226 bitop2 = BIT_AND_EXPR;
10227 std::swap (cond_expr0, cond_expr1);
10228 break;
10229 case LE_EXPR:
10230 bitop1 = BIT_NOT_EXPR;
10231 bitop2 = BIT_IOR_EXPR;
10232 std::swap (cond_expr0, cond_expr1);
10233 break;
10234 case NE_EXPR:
10235 bitop1 = BIT_XOR_EXPR;
10236 break;
10237 case EQ_EXPR:
10238 bitop1 = BIT_XOR_EXPR;
10239 bitop2 = BIT_NOT_EXPR;
10240 break;
10241 default:
10242 return false;
10244 cond_code = SSA_NAME;
10247 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10248 && reduction_type == EXTRACT_LAST_REDUCTION
10249 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10251 if (dump_enabled_p ())
10252 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10253 "reduction comparison operation not supported.\n");
10254 return false;
10257 if (!vec_stmt)
10259 if (bitop1 != NOP_EXPR)
10261 machine_mode mode = TYPE_MODE (comp_vectype);
10262 optab optab;
10264 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10265 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10266 return false;
10268 if (bitop2 != NOP_EXPR)
10270 optab = optab_for_tree_code (bitop2, comp_vectype,
10271 optab_default);
10272 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10273 return false;
10277 vect_cost_for_stmt kind = vector_stmt;
10278 if (reduction_type == EXTRACT_LAST_REDUCTION)
10279 /* Count one reduction-like operation per vector. */
10280 kind = vec_to_scalar;
10281 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10282 return false;
10284 if (slp_node
10285 && (!vect_maybe_update_slp_op_vectype
10286 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
10287 || (op_adjust == 1
10288 && !vect_maybe_update_slp_op_vectype
10289 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
10290 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
10291 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
10293 if (dump_enabled_p ())
10294 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10295 "incompatible vector types for invariants\n");
10296 return false;
10299 if (loop_vinfo && for_reduction
10300 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10302 if (reduction_type == EXTRACT_LAST_REDUCTION)
10303 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10304 ncopies * vec_num, vectype, NULL);
10305 /* Extra inactive lanes should be safe for vect_nested_cycle. */
10306 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
10308 if (dump_enabled_p ())
10309 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10310 "conditional reduction prevents the use"
10311 " of partial vectors.\n");
10312 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
10316 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10317 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
10318 cost_vec, kind);
10319 return true;
10322 /* Transform. */
10324 /* Handle def. */
10325 scalar_dest = gimple_assign_lhs (stmt);
10326 if (reduction_type != EXTRACT_LAST_REDUCTION)
10327 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10329 bool swap_cond_operands = false;
10331 /* See whether another part of the vectorized code applies a loop
10332 mask to the condition, or to its inverse. */
10334 vec_loop_masks *masks = NULL;
10335 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10337 if (reduction_type == EXTRACT_LAST_REDUCTION)
10338 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10339 else
10341 scalar_cond_masked_key cond (cond_expr, ncopies);
10342 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10343 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10344 else
10346 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10347 cond.code = invert_tree_comparison (cond.code, honor_nans);
10348 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10350 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10351 cond_code = cond.code;
10352 swap_cond_operands = true;
10358 /* Handle cond expr. */
10359 if (masked)
10360 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10361 cond_expr, &vec_oprnds0, comp_vectype,
10362 then_clause, &vec_oprnds2, vectype,
10363 reduction_type != EXTRACT_LAST_REDUCTION
10364 ? else_clause : NULL, &vec_oprnds3, vectype);
10365 else
10366 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10367 cond_expr0, &vec_oprnds0, comp_vectype,
10368 cond_expr1, &vec_oprnds1, comp_vectype,
10369 then_clause, &vec_oprnds2, vectype,
10370 reduction_type != EXTRACT_LAST_REDUCTION
10371 ? else_clause : NULL, &vec_oprnds3, vectype);
10373 /* Arguments are ready. Create the new vector stmt. */
10374 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10376 vec_then_clause = vec_oprnds2[i];
10377 if (reduction_type != EXTRACT_LAST_REDUCTION)
10378 vec_else_clause = vec_oprnds3[i];
10380 if (swap_cond_operands)
10381 std::swap (vec_then_clause, vec_else_clause);
10383 if (masked)
10384 vec_compare = vec_cond_lhs;
10385 else
10387 vec_cond_rhs = vec_oprnds1[i];
10388 if (bitop1 == NOP_EXPR)
10390 gimple_seq stmts = NULL;
10391 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
10392 vec_cond_lhs, vec_cond_rhs);
10393 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
10395 else
10397 new_temp = make_ssa_name (vec_cmp_type);
10398 gassign *new_stmt;
10399 if (bitop1 == BIT_NOT_EXPR)
10400 new_stmt = gimple_build_assign (new_temp, bitop1,
10401 vec_cond_rhs);
10402 else
10403 new_stmt
10404 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10405 vec_cond_rhs);
10406 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10407 if (bitop2 == NOP_EXPR)
10408 vec_compare = new_temp;
10409 else if (bitop2 == BIT_NOT_EXPR)
10411 /* Instead of doing ~x ? y : z do x ? z : y. */
10412 vec_compare = new_temp;
10413 std::swap (vec_then_clause, vec_else_clause);
10415 else
10417 vec_compare = make_ssa_name (vec_cmp_type);
10418 new_stmt
10419 = gimple_build_assign (vec_compare, bitop2,
10420 vec_cond_lhs, new_temp);
10421 vect_finish_stmt_generation (vinfo, stmt_info,
10422 new_stmt, gsi);
10427 /* If we decided to apply a loop mask to the result of the vector
10428 comparison, AND the comparison with the mask now. Later passes
10429 should then be able to reuse the AND results between mulitple
10430 vector statements.
10432 For example:
10433 for (int i = 0; i < 100; ++i)
10434 x[i] = y[i] ? z[i] : 10;
10436 results in following optimized GIMPLE:
10438 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10439 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10440 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10441 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10442 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10443 vect_iftmp.11_47, { 10, ... }>;
10445 instead of using a masked and unmasked forms of
10446 vec != { 0, ... } (masked in the MASK_LOAD,
10447 unmasked in the VEC_COND_EXPR). */
10449 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10450 in cases where that's necessary. */
10452 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10454 if (!is_gimple_val (vec_compare))
10456 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10457 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10458 vec_compare);
10459 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10460 vec_compare = vec_compare_name;
10463 if (must_invert_cmp_result)
10465 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10466 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10467 BIT_NOT_EXPR,
10468 vec_compare);
10469 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10470 vec_compare = vec_compare_name;
10473 if (masks)
10475 tree loop_mask
10476 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10477 vectype, i);
10478 tree tmp2 = make_ssa_name (vec_cmp_type);
10479 gassign *g
10480 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10481 loop_mask);
10482 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
10483 vec_compare = tmp2;
10487 gimple *new_stmt;
10488 if (reduction_type == EXTRACT_LAST_REDUCTION)
10490 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10491 tree lhs = gimple_get_lhs (old_stmt);
10492 new_stmt = gimple_build_call_internal
10493 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10494 vec_then_clause);
10495 gimple_call_set_lhs (new_stmt, lhs);
10496 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10497 if (old_stmt == gsi_stmt (*gsi))
10498 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
10499 else
10501 /* In this case we're moving the definition to later in the
10502 block. That doesn't matter because the only uses of the
10503 lhs are in phi statements. */
10504 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10505 gsi_remove (&old_gsi, true);
10506 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10509 else
10511 new_temp = make_ssa_name (vec_dest);
10512 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10513 vec_then_clause, vec_else_clause);
10514 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10516 if (slp_node)
10517 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10518 else
10519 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10522 if (!slp_node)
10523 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10525 vec_oprnds0.release ();
10526 vec_oprnds1.release ();
10527 vec_oprnds2.release ();
10528 vec_oprnds3.release ();
10530 return true;
10533 /* vectorizable_comparison.
10535 Check if STMT_INFO is comparison expression that can be vectorized.
10536 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10537 comparison, put it in VEC_STMT, and insert it at GSI.
10539 Return true if STMT_INFO is vectorizable in this way. */
10541 static bool
10542 vectorizable_comparison (vec_info *vinfo,
10543 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10544 gimple **vec_stmt,
10545 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10547 tree lhs, rhs1, rhs2;
10548 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10549 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10550 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10551 tree new_temp;
10552 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
10553 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10554 int ndts = 2;
10555 poly_uint64 nunits;
10556 int ncopies;
10557 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10558 int i;
10559 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10560 vec<tree> vec_oprnds0 = vNULL;
10561 vec<tree> vec_oprnds1 = vNULL;
10562 tree mask_type;
10563 tree mask;
10565 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10566 return false;
10568 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10569 return false;
10571 mask_type = vectype;
10572 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10574 if (slp_node)
10575 ncopies = 1;
10576 else
10577 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10579 gcc_assert (ncopies >= 1);
10580 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10581 return false;
10583 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10584 if (!stmt)
10585 return false;
10587 code = gimple_assign_rhs_code (stmt);
10589 if (TREE_CODE_CLASS (code) != tcc_comparison)
10590 return false;
10592 slp_tree slp_rhs1, slp_rhs2;
10593 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10594 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
10595 return false;
10597 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
10598 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
10599 return false;
10601 if (vectype1 && vectype2
10602 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10603 TYPE_VECTOR_SUBPARTS (vectype2)))
10604 return false;
10606 vectype = vectype1 ? vectype1 : vectype2;
10608 /* Invariant comparison. */
10609 if (!vectype)
10611 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
10612 vectype = mask_type;
10613 else
10614 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10615 slp_node);
10616 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10617 return false;
10619 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10620 return false;
10622 /* Can't compare mask and non-mask types. */
10623 if (vectype1 && vectype2
10624 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10625 return false;
10627 /* Boolean values may have another representation in vectors
10628 and therefore we prefer bit operations over comparison for
10629 them (which also works for scalar masks). We store opcodes
10630 to use in bitop1 and bitop2. Statement is vectorized as
10631 BITOP2 (rhs1 BITOP1 rhs2) or
10632 rhs1 BITOP2 (BITOP1 rhs2)
10633 depending on bitop1 and bitop2 arity. */
10634 bool swap_p = false;
10635 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10637 if (code == GT_EXPR)
10639 bitop1 = BIT_NOT_EXPR;
10640 bitop2 = BIT_AND_EXPR;
10642 else if (code == GE_EXPR)
10644 bitop1 = BIT_NOT_EXPR;
10645 bitop2 = BIT_IOR_EXPR;
10647 else if (code == LT_EXPR)
10649 bitop1 = BIT_NOT_EXPR;
10650 bitop2 = BIT_AND_EXPR;
10651 swap_p = true;
10653 else if (code == LE_EXPR)
10655 bitop1 = BIT_NOT_EXPR;
10656 bitop2 = BIT_IOR_EXPR;
10657 swap_p = true;
10659 else
10661 bitop1 = BIT_XOR_EXPR;
10662 if (code == EQ_EXPR)
10663 bitop2 = BIT_NOT_EXPR;
10667 if (!vec_stmt)
10669 if (bitop1 == NOP_EXPR)
10671 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10672 return false;
10674 else
10676 machine_mode mode = TYPE_MODE (vectype);
10677 optab optab;
10679 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10680 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10681 return false;
10683 if (bitop2 != NOP_EXPR)
10685 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10686 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10687 return false;
10691 /* Put types on constant and invariant SLP children. */
10692 if (slp_node
10693 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
10694 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
10696 if (dump_enabled_p ())
10697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10698 "incompatible vector types for invariants\n");
10699 return false;
10702 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10703 vect_model_simple_cost (vinfo, stmt_info,
10704 ncopies * (1 + (bitop2 != NOP_EXPR)),
10705 dts, ndts, slp_node, cost_vec);
10706 return true;
10709 /* Transform. */
10711 /* Handle def. */
10712 lhs = gimple_assign_lhs (stmt);
10713 mask = vect_create_destination_var (lhs, mask_type);
10715 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
10716 rhs1, &vec_oprnds0, vectype,
10717 rhs2, &vec_oprnds1, vectype);
10718 if (swap_p)
10719 std::swap (vec_oprnds0, vec_oprnds1);
10721 /* Arguments are ready. Create the new vector stmt. */
10722 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10724 gimple *new_stmt;
10725 vec_rhs2 = vec_oprnds1[i];
10727 new_temp = make_ssa_name (mask);
10728 if (bitop1 == NOP_EXPR)
10730 new_stmt = gimple_build_assign (new_temp, code,
10731 vec_rhs1, vec_rhs2);
10732 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10734 else
10736 if (bitop1 == BIT_NOT_EXPR)
10737 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10738 else
10739 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10740 vec_rhs2);
10741 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10742 if (bitop2 != NOP_EXPR)
10744 tree res = make_ssa_name (mask);
10745 if (bitop2 == BIT_NOT_EXPR)
10746 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10747 else
10748 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10749 new_temp);
10750 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10753 if (slp_node)
10754 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
10755 else
10756 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10759 if (!slp_node)
10760 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10762 vec_oprnds0.release ();
10763 vec_oprnds1.release ();
10765 return true;
10768 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10769 can handle all live statements in the node. Otherwise return true
10770 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10771 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10773 static bool
10774 can_vectorize_live_stmts (vec_info *vinfo,
10775 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10776 slp_tree slp_node, slp_instance slp_node_instance,
10777 bool vec_stmt_p,
10778 stmt_vector_for_cost *cost_vec)
10780 if (slp_node)
10782 stmt_vec_info slp_stmt_info;
10783 unsigned int i;
10784 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10786 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10787 && !vectorizable_live_operation (vinfo,
10788 slp_stmt_info, gsi, slp_node,
10789 slp_node_instance, i,
10790 vec_stmt_p, cost_vec))
10791 return false;
10794 else if (STMT_VINFO_LIVE_P (stmt_info)
10795 && !vectorizable_live_operation (vinfo, stmt_info, gsi,
10796 slp_node, slp_node_instance, -1,
10797 vec_stmt_p, cost_vec))
10798 return false;
10800 return true;
10803 /* Make sure the statement is vectorizable. */
10805 opt_result
10806 vect_analyze_stmt (vec_info *vinfo,
10807 stmt_vec_info stmt_info, bool *need_to_vectorize,
10808 slp_tree node, slp_instance node_instance,
10809 stmt_vector_for_cost *cost_vec)
10811 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
10812 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10813 bool ok;
10814 gimple_seq pattern_def_seq;
10816 if (dump_enabled_p ())
10817 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10818 stmt_info->stmt);
10820 if (gimple_has_volatile_ops (stmt_info->stmt))
10821 return opt_result::failure_at (stmt_info->stmt,
10822 "not vectorized:"
10823 " stmt has volatile operands: %G\n",
10824 stmt_info->stmt);
10826 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10827 && node == NULL
10828 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10830 gimple_stmt_iterator si;
10832 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10834 stmt_vec_info pattern_def_stmt_info
10835 = vinfo->lookup_stmt (gsi_stmt (si));
10836 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10837 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10839 /* Analyze def stmt of STMT if it's a pattern stmt. */
10840 if (dump_enabled_p ())
10841 dump_printf_loc (MSG_NOTE, vect_location,
10842 "==> examining pattern def statement: %G",
10843 pattern_def_stmt_info->stmt);
10845 opt_result res
10846 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
10847 need_to_vectorize, node, node_instance,
10848 cost_vec);
10849 if (!res)
10850 return res;
10855 /* Skip stmts that do not need to be vectorized. In loops this is expected
10856 to include:
10857 - the COND_EXPR which is the loop exit condition
10858 - any LABEL_EXPRs in the loop
10859 - computations that are used only for array indexing or loop control.
10860 In basic blocks we only analyze statements that are a part of some SLP
10861 instance, therefore, all the statements are relevant.
10863 Pattern statement needs to be analyzed instead of the original statement
10864 if the original statement is not relevant. Otherwise, we analyze both
10865 statements. In basic blocks we are called from some SLP instance
10866 traversal, don't analyze pattern stmts instead, the pattern stmts
10867 already will be part of SLP instance. */
10869 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10870 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10871 && !STMT_VINFO_LIVE_P (stmt_info))
10873 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10874 && pattern_stmt_info
10875 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10876 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10878 /* Analyze PATTERN_STMT instead of the original stmt. */
10879 stmt_info = pattern_stmt_info;
10880 if (dump_enabled_p ())
10881 dump_printf_loc (MSG_NOTE, vect_location,
10882 "==> examining pattern statement: %G",
10883 stmt_info->stmt);
10885 else
10887 if (dump_enabled_p ())
10888 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10890 return opt_result::success ();
10893 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10894 && node == NULL
10895 && pattern_stmt_info
10896 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10897 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10899 /* Analyze PATTERN_STMT too. */
10900 if (dump_enabled_p ())
10901 dump_printf_loc (MSG_NOTE, vect_location,
10902 "==> examining pattern statement: %G",
10903 pattern_stmt_info->stmt);
10905 opt_result res
10906 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
10907 node_instance, cost_vec);
10908 if (!res)
10909 return res;
10912 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10914 case vect_internal_def:
10915 break;
10917 case vect_reduction_def:
10918 case vect_nested_cycle:
10919 gcc_assert (!bb_vinfo
10920 && (relevance == vect_used_in_outer
10921 || relevance == vect_used_in_outer_by_reduction
10922 || relevance == vect_used_by_reduction
10923 || relevance == vect_unused_in_scope
10924 || relevance == vect_used_only_live));
10925 break;
10927 case vect_induction_def:
10928 gcc_assert (!bb_vinfo);
10929 break;
10931 case vect_constant_def:
10932 case vect_external_def:
10933 case vect_unknown_def_type:
10934 default:
10935 gcc_unreachable ();
10938 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
10939 if (node)
10940 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
10942 if (STMT_VINFO_RELEVANT_P (stmt_info))
10944 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10945 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10946 || (call && gimple_call_lhs (call) == NULL_TREE));
10947 *need_to_vectorize = true;
10950 if (PURE_SLP_STMT (stmt_info) && !node)
10952 if (dump_enabled_p ())
10953 dump_printf_loc (MSG_NOTE, vect_location,
10954 "handled only by SLP analysis\n");
10955 return opt_result::success ();
10958 ok = true;
10959 if (!bb_vinfo
10960 && (STMT_VINFO_RELEVANT_P (stmt_info)
10961 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10962 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10963 -mveclibabi= takes preference over library functions with
10964 the simd attribute. */
10965 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10966 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
10967 cost_vec)
10968 || vectorizable_conversion (vinfo, stmt_info,
10969 NULL, NULL, node, cost_vec)
10970 || vectorizable_operation (vinfo, stmt_info,
10971 NULL, NULL, node, cost_vec)
10972 || vectorizable_assignment (vinfo, stmt_info,
10973 NULL, NULL, node, cost_vec)
10974 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10975 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10976 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
10977 node, node_instance, cost_vec)
10978 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
10979 NULL, node, cost_vec)
10980 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10981 || vectorizable_condition (vinfo, stmt_info,
10982 NULL, NULL, node, cost_vec)
10983 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
10984 cost_vec)
10985 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
10986 stmt_info, NULL, node));
10987 else
10989 if (bb_vinfo)
10990 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
10991 || vectorizable_simd_clone_call (vinfo, stmt_info,
10992 NULL, NULL, node, cost_vec)
10993 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
10994 cost_vec)
10995 || vectorizable_shift (vinfo, stmt_info,
10996 NULL, NULL, node, cost_vec)
10997 || vectorizable_operation (vinfo, stmt_info,
10998 NULL, NULL, node, cost_vec)
10999 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
11000 cost_vec)
11001 || vectorizable_load (vinfo, stmt_info,
11002 NULL, NULL, node, cost_vec)
11003 || vectorizable_store (vinfo, stmt_info,
11004 NULL, NULL, node, cost_vec)
11005 || vectorizable_condition (vinfo, stmt_info,
11006 NULL, NULL, node, cost_vec)
11007 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
11008 cost_vec)
11009 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
11012 if (node)
11013 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11015 if (!ok)
11016 return opt_result::failure_at (stmt_info->stmt,
11017 "not vectorized:"
11018 " relevant stmt not supported: %G",
11019 stmt_info->stmt);
11021 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
11022 need extra handling, except for vectorizable reductions. */
11023 if (!bb_vinfo
11024 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11025 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
11026 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
11027 stmt_info, NULL, node, node_instance,
11028 false, cost_vec))
11029 return opt_result::failure_at (stmt_info->stmt,
11030 "not vectorized:"
11031 " live stmt not supported: %G",
11032 stmt_info->stmt);
11034 return opt_result::success ();
11038 /* Function vect_transform_stmt.
11040 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
11042 bool
11043 vect_transform_stmt (vec_info *vinfo,
11044 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11045 slp_tree slp_node, slp_instance slp_node_instance)
11047 bool is_store = false;
11048 gimple *vec_stmt = NULL;
11049 bool done;
11051 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
11053 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
11054 if (slp_node)
11055 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
11057 switch (STMT_VINFO_TYPE (stmt_info))
11059 case type_demotion_vec_info_type:
11060 case type_promotion_vec_info_type:
11061 case type_conversion_vec_info_type:
11062 done = vectorizable_conversion (vinfo, stmt_info,
11063 gsi, &vec_stmt, slp_node, NULL);
11064 gcc_assert (done);
11065 break;
11067 case induc_vec_info_type:
11068 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
11069 stmt_info, &vec_stmt, slp_node,
11070 NULL);
11071 gcc_assert (done);
11072 break;
11074 case shift_vec_info_type:
11075 done = vectorizable_shift (vinfo, stmt_info,
11076 gsi, &vec_stmt, slp_node, NULL);
11077 gcc_assert (done);
11078 break;
11080 case op_vec_info_type:
11081 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11082 NULL);
11083 gcc_assert (done);
11084 break;
11086 case assignment_vec_info_type:
11087 done = vectorizable_assignment (vinfo, stmt_info,
11088 gsi, &vec_stmt, slp_node, NULL);
11089 gcc_assert (done);
11090 break;
11092 case load_vec_info_type:
11093 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
11094 NULL);
11095 gcc_assert (done);
11096 break;
11098 case store_vec_info_type:
11099 done = vectorizable_store (vinfo, stmt_info,
11100 gsi, &vec_stmt, slp_node, NULL);
11101 gcc_assert (done);
11102 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
11104 /* In case of interleaving, the whole chain is vectorized when the
11105 last store in the chain is reached. Store stmts before the last
11106 one are skipped, and there vec_stmt_info shouldn't be freed
11107 meanwhile. */
11108 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11109 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11110 is_store = true;
11112 else
11113 is_store = true;
11114 break;
11116 case condition_vec_info_type:
11117 done = vectorizable_condition (vinfo, stmt_info,
11118 gsi, &vec_stmt, slp_node, NULL);
11119 gcc_assert (done);
11120 break;
11122 case comparison_vec_info_type:
11123 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
11124 slp_node, NULL);
11125 gcc_assert (done);
11126 break;
11128 case call_vec_info_type:
11129 done = vectorizable_call (vinfo, stmt_info,
11130 gsi, &vec_stmt, slp_node, NULL);
11131 break;
11133 case call_simd_clone_vec_info_type:
11134 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
11135 slp_node, NULL);
11136 break;
11138 case reduc_vec_info_type:
11139 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
11140 gsi, &vec_stmt, slp_node);
11141 gcc_assert (done);
11142 break;
11144 case cycle_phi_info_type:
11145 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
11146 &vec_stmt, slp_node, slp_node_instance);
11147 gcc_assert (done);
11148 break;
11150 case lc_phi_info_type:
11151 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
11152 stmt_info, &vec_stmt, slp_node);
11153 gcc_assert (done);
11154 break;
11156 case phi_info_type:
11157 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
11158 gcc_assert (done);
11159 break;
11161 default:
11162 if (!STMT_VINFO_LIVE_P (stmt_info))
11164 if (dump_enabled_p ())
11165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11166 "stmt not supported.\n");
11167 gcc_unreachable ();
11169 done = true;
11172 if (!slp_node && vec_stmt)
11173 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
11175 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
11177 /* Handle stmts whose DEF is used outside the loop-nest that is
11178 being vectorized. */
11179 done = can_vectorize_live_stmts (vinfo, stmt_info, gsi, slp_node,
11180 slp_node_instance, true, NULL);
11181 gcc_assert (done);
11184 if (slp_node)
11185 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
11187 return is_store;
11191 /* Remove a group of stores (for SLP or interleaving), free their
11192 stmt_vec_info. */
11194 void
11195 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
11197 stmt_vec_info next_stmt_info = first_stmt_info;
11199 while (next_stmt_info)
11201 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11202 next_stmt_info = vect_orig_stmt (next_stmt_info);
11203 /* Free the attached stmt_vec_info and remove the stmt. */
11204 vinfo->remove_stmt (next_stmt_info);
11205 next_stmt_info = tmp;
11209 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11210 elements of type SCALAR_TYPE, or null if the target doesn't support
11211 such a type.
11213 If NUNITS is zero, return a vector type that contains elements of
11214 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11216 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11217 for this vectorization region and want to "autodetect" the best choice.
11218 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11219 and we want the new type to be interoperable with it. PREVAILING_MODE
11220 in this case can be a scalar integer mode or a vector mode; when it
11221 is a vector mode, the function acts like a tree-level version of
11222 related_vector_mode. */
11224 tree
11225 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11226 tree scalar_type, poly_uint64 nunits)
11228 tree orig_scalar_type = scalar_type;
11229 scalar_mode inner_mode;
11230 machine_mode simd_mode;
11231 tree vectype;
11233 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11234 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11235 return NULL_TREE;
11237 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11239 /* For vector types of elements whose mode precision doesn't
11240 match their types precision we use a element type of mode
11241 precision. The vectorization routines will have to make sure
11242 they support the proper result truncation/extension.
11243 We also make sure to build vector types with INTEGER_TYPE
11244 component type only. */
11245 if (INTEGRAL_TYPE_P (scalar_type)
11246 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11247 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11248 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11249 TYPE_UNSIGNED (scalar_type));
11251 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11252 When the component mode passes the above test simply use a type
11253 corresponding to that mode. The theory is that any use that
11254 would cause problems with this will disable vectorization anyway. */
11255 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11256 && !INTEGRAL_TYPE_P (scalar_type))
11257 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11259 /* We can't build a vector type of elements with alignment bigger than
11260 their size. */
11261 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11262 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11263 TYPE_UNSIGNED (scalar_type));
11265 /* If we felt back to using the mode fail if there was
11266 no scalar type for it. */
11267 if (scalar_type == NULL_TREE)
11268 return NULL_TREE;
11270 /* If no prevailing mode was supplied, use the mode the target prefers.
11271 Otherwise lookup a vector mode based on the prevailing mode. */
11272 if (prevailing_mode == VOIDmode)
11274 gcc_assert (known_eq (nunits, 0U));
11275 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11276 if (SCALAR_INT_MODE_P (simd_mode))
11278 /* Traditional behavior is not to take the integer mode
11279 literally, but simply to use it as a way of determining
11280 the vector size. It is up to mode_for_vector to decide
11281 what the TYPE_MODE should be.
11283 Note that nunits == 1 is allowed in order to support single
11284 element vector types. */
11285 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11286 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11287 return NULL_TREE;
11290 else if (SCALAR_INT_MODE_P (prevailing_mode)
11291 || !related_vector_mode (prevailing_mode,
11292 inner_mode, nunits).exists (&simd_mode))
11294 /* Fall back to using mode_for_vector, mostly in the hope of being
11295 able to use an integer mode. */
11296 if (known_eq (nunits, 0U)
11297 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11298 return NULL_TREE;
11300 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11301 return NULL_TREE;
11304 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11306 /* In cases where the mode was chosen by mode_for_vector, check that
11307 the target actually supports the chosen mode, or that it at least
11308 allows the vector mode to be replaced by a like-sized integer. */
11309 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11310 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11311 return NULL_TREE;
11313 /* Re-attach the address-space qualifier if we canonicalized the scalar
11314 type. */
11315 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11316 return build_qualified_type
11317 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11319 return vectype;
11322 /* Function get_vectype_for_scalar_type.
11324 Returns the vector type corresponding to SCALAR_TYPE as supported
11325 by the target. If GROUP_SIZE is nonzero and we're performing BB
11326 vectorization, make sure that the number of elements in the vector
11327 is no bigger than GROUP_SIZE. */
11329 tree
11330 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11331 unsigned int group_size)
11333 /* For BB vectorization, we should always have a group size once we've
11334 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11335 are tentative requests during things like early data reference
11336 analysis and pattern recognition. */
11337 if (is_a <bb_vec_info> (vinfo))
11338 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11339 else
11340 group_size = 0;
11342 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11343 scalar_type);
11344 if (vectype && vinfo->vector_mode == VOIDmode)
11345 vinfo->vector_mode = TYPE_MODE (vectype);
11347 /* Register the natural choice of vector type, before the group size
11348 has been applied. */
11349 if (vectype)
11350 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11352 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11353 try again with an explicit number of elements. */
11354 if (vectype
11355 && group_size
11356 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11358 /* Start with the biggest number of units that fits within
11359 GROUP_SIZE and halve it until we find a valid vector type.
11360 Usually either the first attempt will succeed or all will
11361 fail (in the latter case because GROUP_SIZE is too small
11362 for the target), but it's possible that a target could have
11363 a hole between supported vector types.
11365 If GROUP_SIZE is not a power of 2, this has the effect of
11366 trying the largest power of 2 that fits within the group,
11367 even though the group is not a multiple of that vector size.
11368 The BB vectorizer will then try to carve up the group into
11369 smaller pieces. */
11370 unsigned int nunits = 1 << floor_log2 (group_size);
11373 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11374 scalar_type, nunits);
11375 nunits /= 2;
11377 while (nunits > 1 && !vectype);
11380 return vectype;
11383 /* Return the vector type corresponding to SCALAR_TYPE as supported
11384 by the target. NODE, if nonnull, is the SLP tree node that will
11385 use the returned vector type. */
11387 tree
11388 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11390 unsigned int group_size = 0;
11391 if (node)
11392 group_size = SLP_TREE_LANES (node);
11393 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11396 /* Function get_mask_type_for_scalar_type.
11398 Returns the mask type corresponding to a result of comparison
11399 of vectors of specified SCALAR_TYPE as supported by target.
11400 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11401 make sure that the number of elements in the vector is no bigger
11402 than GROUP_SIZE. */
11404 tree
11405 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11406 unsigned int group_size)
11408 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11410 if (!vectype)
11411 return NULL;
11413 return truth_type_for (vectype);
11416 /* Function get_same_sized_vectype
11418 Returns a vector type corresponding to SCALAR_TYPE of size
11419 VECTOR_TYPE if supported by the target. */
11421 tree
11422 get_same_sized_vectype (tree scalar_type, tree vector_type)
11424 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11425 return truth_type_for (vector_type);
11427 poly_uint64 nunits;
11428 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11429 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11430 return NULL_TREE;
11432 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11433 scalar_type, nunits);
11436 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11437 would not change the chosen vector modes. */
11439 bool
11440 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11442 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11443 i != vinfo->used_vector_modes.end (); ++i)
11444 if (!VECTOR_MODE_P (*i)
11445 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11446 return false;
11447 return true;
11450 /* Function vect_is_simple_use.
11452 Input:
11453 VINFO - the vect info of the loop or basic block that is being vectorized.
11454 OPERAND - operand in the loop or bb.
11455 Output:
11456 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11457 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11458 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11459 the definition could be anywhere in the function
11460 DT - the type of definition
11462 Returns whether a stmt with OPERAND can be vectorized.
11463 For loops, supportable operands are constants, loop invariants, and operands
11464 that are defined by the current iteration of the loop. Unsupportable
11465 operands are those that are defined by a previous iteration of the loop (as
11466 is the case in reduction/induction computations).
11467 For basic blocks, supportable operands are constants and bb invariants.
11468 For now, operands defined outside the basic block are not supported. */
11470 bool
11471 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11472 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11474 if (def_stmt_info_out)
11475 *def_stmt_info_out = NULL;
11476 if (def_stmt_out)
11477 *def_stmt_out = NULL;
11478 *dt = vect_unknown_def_type;
11480 if (dump_enabled_p ())
11482 dump_printf_loc (MSG_NOTE, vect_location,
11483 "vect_is_simple_use: operand ");
11484 if (TREE_CODE (operand) == SSA_NAME
11485 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11486 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11487 else
11488 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11491 if (CONSTANT_CLASS_P (operand))
11492 *dt = vect_constant_def;
11493 else if (is_gimple_min_invariant (operand))
11494 *dt = vect_external_def;
11495 else if (TREE_CODE (operand) != SSA_NAME)
11496 *dt = vect_unknown_def_type;
11497 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11498 *dt = vect_external_def;
11499 else
11501 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11502 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11503 if (!stmt_vinfo)
11504 *dt = vect_external_def;
11505 else
11507 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11508 def_stmt = stmt_vinfo->stmt;
11509 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11510 if (def_stmt_info_out)
11511 *def_stmt_info_out = stmt_vinfo;
11513 if (def_stmt_out)
11514 *def_stmt_out = def_stmt;
11517 if (dump_enabled_p ())
11519 dump_printf (MSG_NOTE, ", type of def: ");
11520 switch (*dt)
11522 case vect_uninitialized_def:
11523 dump_printf (MSG_NOTE, "uninitialized\n");
11524 break;
11525 case vect_constant_def:
11526 dump_printf (MSG_NOTE, "constant\n");
11527 break;
11528 case vect_external_def:
11529 dump_printf (MSG_NOTE, "external\n");
11530 break;
11531 case vect_internal_def:
11532 dump_printf (MSG_NOTE, "internal\n");
11533 break;
11534 case vect_induction_def:
11535 dump_printf (MSG_NOTE, "induction\n");
11536 break;
11537 case vect_reduction_def:
11538 dump_printf (MSG_NOTE, "reduction\n");
11539 break;
11540 case vect_double_reduction_def:
11541 dump_printf (MSG_NOTE, "double reduction\n");
11542 break;
11543 case vect_nested_cycle:
11544 dump_printf (MSG_NOTE, "nested cycle\n");
11545 break;
11546 case vect_unknown_def_type:
11547 dump_printf (MSG_NOTE, "unknown\n");
11548 break;
11552 if (*dt == vect_unknown_def_type)
11554 if (dump_enabled_p ())
11555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11556 "Unsupported pattern.\n");
11557 return false;
11560 return true;
11563 /* Function vect_is_simple_use.
11565 Same as vect_is_simple_use but also determines the vector operand
11566 type of OPERAND and stores it to *VECTYPE. If the definition of
11567 OPERAND is vect_uninitialized_def, vect_constant_def or
11568 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11569 is responsible to compute the best suited vector type for the
11570 scalar operand. */
11572 bool
11573 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11574 tree *vectype, stmt_vec_info *def_stmt_info_out,
11575 gimple **def_stmt_out)
11577 stmt_vec_info def_stmt_info;
11578 gimple *def_stmt;
11579 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11580 return false;
11582 if (def_stmt_out)
11583 *def_stmt_out = def_stmt;
11584 if (def_stmt_info_out)
11585 *def_stmt_info_out = def_stmt_info;
11587 /* Now get a vector type if the def is internal, otherwise supply
11588 NULL_TREE and leave it up to the caller to figure out a proper
11589 type for the use stmt. */
11590 if (*dt == vect_internal_def
11591 || *dt == vect_induction_def
11592 || *dt == vect_reduction_def
11593 || *dt == vect_double_reduction_def
11594 || *dt == vect_nested_cycle)
11596 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11597 gcc_assert (*vectype != NULL_TREE);
11598 if (dump_enabled_p ())
11599 dump_printf_loc (MSG_NOTE, vect_location,
11600 "vect_is_simple_use: vectype %T\n", *vectype);
11602 else if (*dt == vect_uninitialized_def
11603 || *dt == vect_constant_def
11604 || *dt == vect_external_def)
11605 *vectype = NULL_TREE;
11606 else
11607 gcc_unreachable ();
11609 return true;
11612 /* Function vect_is_simple_use.
11614 Same as vect_is_simple_use but determines the operand by operand
11615 position OPERAND from either STMT or SLP_NODE, filling in *OP
11616 and *SLP_DEF (when SLP_NODE is not NULL). */
11618 bool
11619 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
11620 unsigned operand, tree *op, slp_tree *slp_def,
11621 enum vect_def_type *dt,
11622 tree *vectype, stmt_vec_info *def_stmt_info_out)
11624 if (slp_node)
11626 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
11627 *slp_def = child;
11628 *vectype = SLP_TREE_VECTYPE (child);
11629 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
11631 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
11632 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
11634 else
11636 if (def_stmt_info_out)
11637 *def_stmt_info_out = NULL;
11638 *op = SLP_TREE_SCALAR_OPS (child)[0];
11639 *dt = SLP_TREE_DEF_TYPE (child);
11640 return true;
11643 else
11645 *slp_def = NULL;
11646 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
11648 if (gimple_assign_rhs_code (ass) == COND_EXPR
11649 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
11651 if (operand < 2)
11652 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
11653 else
11654 *op = gimple_op (ass, operand);
11656 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
11657 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
11658 else
11659 *op = gimple_op (ass, operand + 1);
11661 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
11662 *op = gimple_call_arg (call, operand);
11663 else
11664 gcc_unreachable ();
11665 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
11669 /* If OP is not NULL and is external or constant update its vector
11670 type with VECTYPE. Returns true if successful or false if not,
11671 for example when conflicting vector types are present. */
11673 bool
11674 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
11676 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
11677 return true;
11678 if (SLP_TREE_VECTYPE (op))
11679 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
11680 SLP_TREE_VECTYPE (op) = vectype;
11681 return true;
11684 /* Function supportable_widening_operation
11686 Check whether an operation represented by the code CODE is a
11687 widening operation that is supported by the target platform in
11688 vector form (i.e., when operating on arguments of type VECTYPE_IN
11689 producing a result of type VECTYPE_OUT).
11691 Widening operations we currently support are NOP (CONVERT), FLOAT,
11692 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11693 are supported by the target platform either directly (via vector
11694 tree-codes), or via target builtins.
11696 Output:
11697 - CODE1 and CODE2 are codes of vector operations to be used when
11698 vectorizing the operation, if available.
11699 - MULTI_STEP_CVT determines the number of required intermediate steps in
11700 case of multi-step conversion (like char->short->int - in that case
11701 MULTI_STEP_CVT will be 1).
11702 - INTERM_TYPES contains the intermediate type required to perform the
11703 widening operation (short in the above example). */
11705 bool
11706 supportable_widening_operation (vec_info *vinfo,
11707 enum tree_code code, stmt_vec_info stmt_info,
11708 tree vectype_out, tree vectype_in,
11709 enum tree_code *code1, enum tree_code *code2,
11710 int *multi_step_cvt,
11711 vec<tree> *interm_types)
11713 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
11714 class loop *vect_loop = NULL;
11715 machine_mode vec_mode;
11716 enum insn_code icode1, icode2;
11717 optab optab1, optab2;
11718 tree vectype = vectype_in;
11719 tree wide_vectype = vectype_out;
11720 enum tree_code c1, c2;
11721 int i;
11722 tree prev_type, intermediate_type;
11723 machine_mode intermediate_mode, prev_mode;
11724 optab optab3, optab4;
11726 *multi_step_cvt = 0;
11727 if (loop_info)
11728 vect_loop = LOOP_VINFO_LOOP (loop_info);
11730 switch (code)
11732 case WIDEN_MULT_EXPR:
11733 /* The result of a vectorized widening operation usually requires
11734 two vectors (because the widened results do not fit into one vector).
11735 The generated vector results would normally be expected to be
11736 generated in the same order as in the original scalar computation,
11737 i.e. if 8 results are generated in each vector iteration, they are
11738 to be organized as follows:
11739 vect1: [res1,res2,res3,res4],
11740 vect2: [res5,res6,res7,res8].
11742 However, in the special case that the result of the widening
11743 operation is used in a reduction computation only, the order doesn't
11744 matter (because when vectorizing a reduction we change the order of
11745 the computation). Some targets can take advantage of this and
11746 generate more efficient code. For example, targets like Altivec,
11747 that support widen_mult using a sequence of {mult_even,mult_odd}
11748 generate the following vectors:
11749 vect1: [res1,res3,res5,res7],
11750 vect2: [res2,res4,res6,res8].
11752 When vectorizing outer-loops, we execute the inner-loop sequentially
11753 (each vectorized inner-loop iteration contributes to VF outer-loop
11754 iterations in parallel). We therefore don't allow to change the
11755 order of the computation in the inner-loop during outer-loop
11756 vectorization. */
11757 /* TODO: Another case in which order doesn't *really* matter is when we
11758 widen and then contract again, e.g. (short)((int)x * y >> 8).
11759 Normally, pack_trunc performs an even/odd permute, whereas the
11760 repack from an even/odd expansion would be an interleave, which
11761 would be significantly simpler for e.g. AVX2. */
11762 /* In any case, in order to avoid duplicating the code below, recurse
11763 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11764 are properly set up for the caller. If we fail, we'll continue with
11765 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11766 if (vect_loop
11767 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11768 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11769 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
11770 stmt_info, vectype_out,
11771 vectype_in, code1, code2,
11772 multi_step_cvt, interm_types))
11774 /* Elements in a vector with vect_used_by_reduction property cannot
11775 be reordered if the use chain with this property does not have the
11776 same operation. One such an example is s += a * b, where elements
11777 in a and b cannot be reordered. Here we check if the vector defined
11778 by STMT is only directly used in the reduction statement. */
11779 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11780 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11781 if (use_stmt_info
11782 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11783 return true;
11785 c1 = VEC_WIDEN_MULT_LO_EXPR;
11786 c2 = VEC_WIDEN_MULT_HI_EXPR;
11787 break;
11789 case DOT_PROD_EXPR:
11790 c1 = DOT_PROD_EXPR;
11791 c2 = DOT_PROD_EXPR;
11792 break;
11794 case SAD_EXPR:
11795 c1 = SAD_EXPR;
11796 c2 = SAD_EXPR;
11797 break;
11799 case VEC_WIDEN_MULT_EVEN_EXPR:
11800 /* Support the recursion induced just above. */
11801 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11802 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11803 break;
11805 case WIDEN_LSHIFT_EXPR:
11806 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11807 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11808 break;
11810 case WIDEN_PLUS_EXPR:
11811 c1 = VEC_WIDEN_PLUS_LO_EXPR;
11812 c2 = VEC_WIDEN_PLUS_HI_EXPR;
11813 break;
11815 case WIDEN_MINUS_EXPR:
11816 c1 = VEC_WIDEN_MINUS_LO_EXPR;
11817 c2 = VEC_WIDEN_MINUS_HI_EXPR;
11818 break;
11820 CASE_CONVERT:
11821 c1 = VEC_UNPACK_LO_EXPR;
11822 c2 = VEC_UNPACK_HI_EXPR;
11823 break;
11825 case FLOAT_EXPR:
11826 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11827 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11828 break;
11830 case FIX_TRUNC_EXPR:
11831 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11832 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11833 break;
11835 default:
11836 gcc_unreachable ();
11839 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11840 std::swap (c1, c2);
11842 if (code == FIX_TRUNC_EXPR)
11844 /* The signedness is determined from output operand. */
11845 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11846 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11848 else if (CONVERT_EXPR_CODE_P (code)
11849 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11850 && VECTOR_BOOLEAN_TYPE_P (vectype)
11851 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11852 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11854 /* If the input and result modes are the same, a different optab
11855 is needed where we pass in the number of units in vectype. */
11856 optab1 = vec_unpacks_sbool_lo_optab;
11857 optab2 = vec_unpacks_sbool_hi_optab;
11859 else
11861 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11862 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11865 if (!optab1 || !optab2)
11866 return false;
11868 vec_mode = TYPE_MODE (vectype);
11869 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11870 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11871 return false;
11873 *code1 = c1;
11874 *code2 = c2;
11876 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11877 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11879 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11880 return true;
11881 /* For scalar masks we may have different boolean
11882 vector types having the same QImode. Thus we
11883 add additional check for elements number. */
11884 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11885 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11886 return true;
11889 /* Check if it's a multi-step conversion that can be done using intermediate
11890 types. */
11892 prev_type = vectype;
11893 prev_mode = vec_mode;
11895 if (!CONVERT_EXPR_CODE_P (code))
11896 return false;
11898 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11899 intermediate steps in promotion sequence. We try
11900 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11901 not. */
11902 interm_types->create (MAX_INTERM_CVT_STEPS);
11903 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11905 intermediate_mode = insn_data[icode1].operand[0].mode;
11906 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11907 intermediate_type
11908 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11909 else
11910 intermediate_type
11911 = lang_hooks.types.type_for_mode (intermediate_mode,
11912 TYPE_UNSIGNED (prev_type));
11914 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11915 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11916 && intermediate_mode == prev_mode
11917 && SCALAR_INT_MODE_P (prev_mode))
11919 /* If the input and result modes are the same, a different optab
11920 is needed where we pass in the number of units in vectype. */
11921 optab3 = vec_unpacks_sbool_lo_optab;
11922 optab4 = vec_unpacks_sbool_hi_optab;
11924 else
11926 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11927 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11930 if (!optab3 || !optab4
11931 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11932 || insn_data[icode1].operand[0].mode != intermediate_mode
11933 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11934 || insn_data[icode2].operand[0].mode != intermediate_mode
11935 || ((icode1 = optab_handler (optab3, intermediate_mode))
11936 == CODE_FOR_nothing)
11937 || ((icode2 = optab_handler (optab4, intermediate_mode))
11938 == CODE_FOR_nothing))
11939 break;
11941 interm_types->quick_push (intermediate_type);
11942 (*multi_step_cvt)++;
11944 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11945 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11947 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11948 return true;
11949 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11950 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11951 return true;
11954 prev_type = intermediate_type;
11955 prev_mode = intermediate_mode;
11958 interm_types->release ();
11959 return false;
11963 /* Function supportable_narrowing_operation
11965 Check whether an operation represented by the code CODE is a
11966 narrowing operation that is supported by the target platform in
11967 vector form (i.e., when operating on arguments of type VECTYPE_IN
11968 and producing a result of type VECTYPE_OUT).
11970 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11971 and FLOAT. This function checks if these operations are supported by
11972 the target platform directly via vector tree-codes.
11974 Output:
11975 - CODE1 is the code of a vector operation to be used when
11976 vectorizing the operation, if available.
11977 - MULTI_STEP_CVT determines the number of required intermediate steps in
11978 case of multi-step conversion (like int->short->char - in that case
11979 MULTI_STEP_CVT will be 1).
11980 - INTERM_TYPES contains the intermediate type required to perform the
11981 narrowing operation (short in the above example). */
11983 bool
11984 supportable_narrowing_operation (enum tree_code code,
11985 tree vectype_out, tree vectype_in,
11986 enum tree_code *code1, int *multi_step_cvt,
11987 vec<tree> *interm_types)
11989 machine_mode vec_mode;
11990 enum insn_code icode1;
11991 optab optab1, interm_optab;
11992 tree vectype = vectype_in;
11993 tree narrow_vectype = vectype_out;
11994 enum tree_code c1;
11995 tree intermediate_type, prev_type;
11996 machine_mode intermediate_mode, prev_mode;
11997 int i;
11998 bool uns;
12000 *multi_step_cvt = 0;
12001 switch (code)
12003 CASE_CONVERT:
12004 c1 = VEC_PACK_TRUNC_EXPR;
12005 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
12006 && VECTOR_BOOLEAN_TYPE_P (vectype)
12007 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
12008 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
12009 optab1 = vec_pack_sbool_trunc_optab;
12010 else
12011 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12012 break;
12014 case FIX_TRUNC_EXPR:
12015 c1 = VEC_PACK_FIX_TRUNC_EXPR;
12016 /* The signedness is determined from output operand. */
12017 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
12018 break;
12020 case FLOAT_EXPR:
12021 c1 = VEC_PACK_FLOAT_EXPR;
12022 optab1 = optab_for_tree_code (c1, vectype, optab_default);
12023 break;
12025 default:
12026 gcc_unreachable ();
12029 if (!optab1)
12030 return false;
12032 vec_mode = TYPE_MODE (vectype);
12033 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
12034 return false;
12036 *code1 = c1;
12038 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12040 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12041 return true;
12042 /* For scalar masks we may have different boolean
12043 vector types having the same QImode. Thus we
12044 add additional check for elements number. */
12045 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
12046 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12047 return true;
12050 if (code == FLOAT_EXPR)
12051 return false;
12053 /* Check if it's a multi-step conversion that can be done using intermediate
12054 types. */
12055 prev_mode = vec_mode;
12056 prev_type = vectype;
12057 if (code == FIX_TRUNC_EXPR)
12058 uns = TYPE_UNSIGNED (vectype_out);
12059 else
12060 uns = TYPE_UNSIGNED (vectype);
12062 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
12063 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
12064 costly than signed. */
12065 if (code == FIX_TRUNC_EXPR && uns)
12067 enum insn_code icode2;
12069 intermediate_type
12070 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
12071 interm_optab
12072 = optab_for_tree_code (c1, intermediate_type, optab_default);
12073 if (interm_optab != unknown_optab
12074 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
12075 && insn_data[icode1].operand[0].mode
12076 == insn_data[icode2].operand[0].mode)
12078 uns = false;
12079 optab1 = interm_optab;
12080 icode1 = icode2;
12084 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
12085 intermediate steps in promotion sequence. We try
12086 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
12087 interm_types->create (MAX_INTERM_CVT_STEPS);
12088 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
12090 intermediate_mode = insn_data[icode1].operand[0].mode;
12091 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12092 intermediate_type
12093 = vect_double_mask_nunits (prev_type, intermediate_mode);
12094 else
12095 intermediate_type
12096 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12097 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12098 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12099 && intermediate_mode == prev_mode
12100 && SCALAR_INT_MODE_P (prev_mode))
12101 interm_optab = vec_pack_sbool_trunc_optab;
12102 else
12103 interm_optab
12104 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12105 optab_default);
12106 if (!interm_optab
12107 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12108 || insn_data[icode1].operand[0].mode != intermediate_mode
12109 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12110 == CODE_FOR_nothing))
12111 break;
12113 interm_types->quick_push (intermediate_type);
12114 (*multi_step_cvt)++;
12116 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12118 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12119 return true;
12120 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12121 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12122 return true;
12125 prev_mode = intermediate_mode;
12126 prev_type = intermediate_type;
12127 optab1 = interm_optab;
12130 interm_types->release ();
12131 return false;
12134 /* Generate and return a vector mask of MASK_TYPE such that
12135 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
12136 Add the statements to SEQ. */
12138 tree
12139 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
12140 tree end_index, const char *name)
12142 tree cmp_type = TREE_TYPE (start_index);
12143 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12144 cmp_type, mask_type,
12145 OPTIMIZE_FOR_SPEED));
12146 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12147 start_index, end_index,
12148 build_zero_cst (mask_type));
12149 tree tmp;
12150 if (name)
12151 tmp = make_temp_ssa_name (mask_type, NULL, name);
12152 else
12153 tmp = make_ssa_name (mask_type);
12154 gimple_call_set_lhs (call, tmp);
12155 gimple_seq_add_stmt (seq, call);
12156 return tmp;
12159 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12160 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12162 tree
12163 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12164 tree end_index)
12166 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
12167 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12170 /* Try to compute the vector types required to vectorize STMT_INFO,
12171 returning true on success and false if vectorization isn't possible.
12172 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12173 take sure that the number of elements in the vectors is no bigger
12174 than GROUP_SIZE.
12176 On success:
12178 - Set *STMT_VECTYPE_OUT to:
12179 - NULL_TREE if the statement doesn't need to be vectorized;
12180 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12182 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12183 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12184 statement does not help to determine the overall number of units. */
12186 opt_result
12187 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
12188 tree *stmt_vectype_out,
12189 tree *nunits_vectype_out,
12190 unsigned int group_size)
12192 gimple *stmt = stmt_info->stmt;
12194 /* For BB vectorization, we should always have a group size once we've
12195 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12196 are tentative requests during things like early data reference
12197 analysis and pattern recognition. */
12198 if (is_a <bb_vec_info> (vinfo))
12199 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12200 else
12201 group_size = 0;
12203 *stmt_vectype_out = NULL_TREE;
12204 *nunits_vectype_out = NULL_TREE;
12206 if (gimple_get_lhs (stmt) == NULL_TREE
12207 /* MASK_STORE has no lhs, but is ok. */
12208 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12210 if (is_a <gcall *> (stmt))
12212 /* Ignore calls with no lhs. These must be calls to
12213 #pragma omp simd functions, and what vectorization factor
12214 it really needs can't be determined until
12215 vectorizable_simd_clone_call. */
12216 if (dump_enabled_p ())
12217 dump_printf_loc (MSG_NOTE, vect_location,
12218 "defer to SIMD clone analysis.\n");
12219 return opt_result::success ();
12222 return opt_result::failure_at (stmt,
12223 "not vectorized: irregular stmt.%G", stmt);
12226 tree vectype;
12227 tree scalar_type = NULL_TREE;
12228 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12230 vectype = STMT_VINFO_VECTYPE (stmt_info);
12231 if (dump_enabled_p ())
12232 dump_printf_loc (MSG_NOTE, vect_location,
12233 "precomputed vectype: %T\n", vectype);
12235 else if (vect_use_mask_type_p (stmt_info))
12237 unsigned int precision = stmt_info->mask_precision;
12238 scalar_type = build_nonstandard_integer_type (precision, 1);
12239 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12240 if (!vectype)
12241 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12242 " data-type %T\n", scalar_type);
12243 if (dump_enabled_p ())
12244 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12246 else
12248 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12249 scalar_type = TREE_TYPE (DR_REF (dr));
12250 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12251 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12252 else
12253 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12255 if (dump_enabled_p ())
12257 if (group_size)
12258 dump_printf_loc (MSG_NOTE, vect_location,
12259 "get vectype for scalar type (group size %d):"
12260 " %T\n", group_size, scalar_type);
12261 else
12262 dump_printf_loc (MSG_NOTE, vect_location,
12263 "get vectype for scalar type: %T\n", scalar_type);
12265 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12266 if (!vectype)
12267 return opt_result::failure_at (stmt,
12268 "not vectorized:"
12269 " unsupported data-type %T\n",
12270 scalar_type);
12272 if (dump_enabled_p ())
12273 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12276 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
12277 return opt_result::failure_at (stmt,
12278 "not vectorized: vector stmt in loop:%G",
12279 stmt);
12281 *stmt_vectype_out = vectype;
12283 /* Don't try to compute scalar types if the stmt produces a boolean
12284 vector; use the existing vector type instead. */
12285 tree nunits_vectype = vectype;
12286 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12288 /* The number of units is set according to the smallest scalar
12289 type (or the largest vector size, but we only support one
12290 vector size per vectorization). */
12291 scalar_type = vect_get_smallest_scalar_type (stmt_info,
12292 TREE_TYPE (vectype));
12293 if (scalar_type != TREE_TYPE (vectype))
12295 if (dump_enabled_p ())
12296 dump_printf_loc (MSG_NOTE, vect_location,
12297 "get vectype for smallest scalar type: %T\n",
12298 scalar_type);
12299 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12300 group_size);
12301 if (!nunits_vectype)
12302 return opt_result::failure_at
12303 (stmt, "not vectorized: unsupported data-type %T\n",
12304 scalar_type);
12305 if (dump_enabled_p ())
12306 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12307 nunits_vectype);
12311 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12312 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
12313 return opt_result::failure_at (stmt,
12314 "Not vectorized: Incompatible number "
12315 "of vector subparts between %T and %T\n",
12316 nunits_vectype, *stmt_vectype_out);
12318 if (dump_enabled_p ())
12320 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12321 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12322 dump_printf (MSG_NOTE, "\n");
12325 *nunits_vectype_out = nunits_vectype;
12326 return opt_result::success ();
12329 /* Generate and return statement sequence that sets vector length LEN that is:
12331 min_of_start_and_end = min (START_INDEX, END_INDEX);
12332 left_len = END_INDEX - min_of_start_and_end;
12333 rhs = min (left_len, LEN_LIMIT);
12334 LEN = rhs;
12336 Note: the cost of the code generated by this function is modeled
12337 by vect_estimate_min_profitable_iters, so changes here may need
12338 corresponding changes there. */
12340 gimple_seq
12341 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
12343 gimple_seq stmts = NULL;
12344 tree len_type = TREE_TYPE (len);
12345 gcc_assert (TREE_TYPE (start_index) == len_type);
12347 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
12348 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
12349 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
12350 gimple* stmt = gimple_build_assign (len, rhs);
12351 gimple_seq_add_stmt (&stmts, stmt);
12353 return stmts;