C99 testsuite readiness: Compile more tests with -std=gnu89
[official-gcc.git] / gcc / tree-vect-stmts.cc
blob337b24c51f953667ad4146002052ce40f34b7529
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
57 #include "regs.h"
58 #include "attribs.h"
59 #include "optabs-libfuncs.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* Return the vectorized type for the given statement. */
66 tree
67 stmt_vectype (class _stmt_vec_info *stmt_info)
69 return STMT_VINFO_VECTYPE (stmt_info);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
74 bool
75 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
77 gimple *stmt = STMT_VINFO_STMT (stmt_info);
78 basic_block bb = gimple_bb (stmt);
79 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
80 class loop* loop;
82 if (!loop_vinfo)
83 return false;
85 loop = LOOP_VINFO_LOOP (loop_vinfo);
87 return (bb->loop_father == loop->inner);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
94 static unsigned
95 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
96 enum vect_cost_for_stmt kind,
97 stmt_vec_info stmt_info, slp_tree node,
98 tree vectype, int misalign,
99 enum vect_cost_model_location where)
101 if ((kind == vector_load || kind == unaligned_load)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_gather_load;
104 if ((kind == vector_store || kind == unaligned_store)
105 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
106 kind = vector_scatter_store;
108 stmt_info_for_cost si
109 = { count, kind, where, stmt_info, node, vectype, misalign };
110 body_cost_vec->safe_push (si);
112 return (unsigned)
113 (builtin_vectorization_cost (kind, vectype, misalign) * count);
116 unsigned
117 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
118 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
119 tree vectype, int misalign,
120 enum vect_cost_model_location where)
122 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
123 vectype, misalign, where);
126 unsigned
127 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
128 enum vect_cost_for_stmt kind, slp_tree node,
129 tree vectype, int misalign,
130 enum vect_cost_model_location where)
132 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
133 vectype, misalign, where);
136 unsigned
137 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
138 enum vect_cost_for_stmt kind,
139 enum vect_cost_model_location where)
141 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
142 || kind == scalar_stmt);
143 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
144 NULL_TREE, 0, where);
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
149 static tree
150 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
152 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
153 "vect_array");
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
161 static tree
162 read_vector_array (vec_info *vinfo,
163 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
164 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
166 tree vect_type, vect, vect_name, array_ref;
167 gimple *new_stmt;
169 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170 vect_type = TREE_TYPE (TREE_TYPE (array));
171 vect = vect_create_destination_var (scalar_dest, vect_type);
172 array_ref = build4 (ARRAY_REF, vect_type, array,
173 build_int_cst (size_type_node, n),
174 NULL_TREE, NULL_TREE);
176 new_stmt = gimple_build_assign (vect, array_ref);
177 vect_name = make_ssa_name (vect, new_stmt);
178 gimple_assign_set_lhs (new_stmt, vect_name);
179 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
181 return vect_name;
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
188 static void
189 write_vector_array (vec_info *vinfo,
190 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
191 tree vect, tree array, unsigned HOST_WIDE_INT n)
193 tree array_ref;
194 gimple *new_stmt;
196 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
197 build_int_cst (size_type_node, n),
198 NULL_TREE, NULL_TREE);
200 new_stmt = gimple_build_assign (array_ref, vect);
201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
206 (and its group). */
208 static tree
209 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
211 tree mem_ref;
213 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
216 return mem_ref;
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
222 static void
223 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
224 gimple_stmt_iterator *gsi, tree var)
226 tree clobber = build_clobber (TREE_TYPE (var));
227 gimple *new_stmt = gimple_build_assign (var, clobber);
228 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
233 /* Function vect_mark_relevant.
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
237 static void
238 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
239 enum vect_relevant relevant, bool live_p)
241 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
242 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "mark relevant %d, live %d: %G", relevant, live_p,
247 stmt_info->stmt);
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE, vect_location,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
265 stmt_vec_info old_stmt_info = stmt_info;
266 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
268 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
269 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
271 if (live_p && relevant == vect_unused_in_scope)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
276 "relevant.\n");
277 relevant = vect_used_only_live;
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE, vect_location,
282 "mark relevant %d, live %d: %G", relevant, live_p,
283 stmt_info->stmt);
286 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
287 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
288 STMT_VINFO_RELEVANT (stmt_info) = relevant;
290 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE, vect_location,
295 "already marked relevant/live.\n");
296 return;
299 worklist->safe_push (stmt_info);
303 /* Function is_simple_and_all_uses_invariant
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
307 bool
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
309 loop_vec_info loop_vinfo)
311 tree op;
312 ssa_op_iter iter;
314 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
315 if (!stmt)
316 return false;
318 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
320 enum vect_def_type dt = vect_uninitialized_def;
322 if (!vect_is_simple_use (op, loop_vinfo, &dt))
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "use not simple.\n");
327 return false;
330 if (dt != vect_external_def && dt != vect_constant_def)
331 return false;
333 return true;
336 /* Function vect_stmt_relevant_p.
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
346 CHECKME: what other side effects would the vectorizer allow? */
348 static bool
349 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
350 enum vect_relevant *relevant, bool *live_p)
352 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
353 ssa_op_iter op_iter;
354 imm_use_iterator imm_iter;
355 use_operand_p use_p;
356 def_operand_p def_p;
358 *relevant = vect_unused_in_scope;
359 *live_p = false;
361 /* cond stmt other than loop exit cond. */
362 if (is_ctrl_stmt (stmt_info->stmt)
363 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
364 *relevant = vect_used_in_scope;
366 /* changing memory. */
367 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
368 if (gimple_vdef (stmt_info->stmt)
369 && !gimple_clobber_p (stmt_info->stmt))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: stmt has vdefs.\n");
374 *relevant = vect_used_in_scope;
377 /* uses outside the loop. */
378 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
380 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
382 basic_block bb = gimple_bb (USE_STMT (use_p));
383 if (!flow_bb_inside_loop_p (loop, bb))
385 if (is_gimple_debug (USE_STMT (use_p)))
386 continue;
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: used out of loop.\n");
392 /* We expect all such uses to be in the loop exit phis
393 (because of loop closed form) */
394 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
395 gcc_assert (bb == single_exit (loop)->dest);
397 *live_p = true;
402 if (*live_p && *relevant == vect_unused_in_scope
403 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
405 if (dump_enabled_p ())
406 dump_printf_loc (MSG_NOTE, vect_location,
407 "vec_stmt_relevant_p: stmt live but not relevant.\n");
408 *relevant = vect_used_only_live;
411 return (*live_p || *relevant);
415 /* Function exist_non_indexing_operands_for_use_p
417 USE is one of the uses attached to STMT_INFO. Check if USE is
418 used in STMT_INFO for anything other than indexing an array. */
420 static bool
421 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
423 tree operand;
425 /* USE corresponds to some operand in STMT. If there is no data
426 reference in STMT, then any operand that corresponds to USE
427 is not indexing an array. */
428 if (!STMT_VINFO_DATA_REF (stmt_info))
429 return true;
431 /* STMT has a data_ref. FORNOW this means that its of one of
432 the following forms:
433 -1- ARRAY_REF = var
434 -2- var = ARRAY_REF
435 (This should have been verified in analyze_data_refs).
437 'var' in the second case corresponds to a def, not a use,
438 so USE cannot correspond to any operands that are not used
439 for array indexing.
441 Therefore, all we need to check is if STMT falls into the
442 first case, and whether var corresponds to USE. */
444 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
445 if (!assign || !gimple_assign_copy_p (assign))
447 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
448 if (call && gimple_call_internal_p (call))
450 internal_fn ifn = gimple_call_internal_fn (call);
451 int mask_index = internal_fn_mask_index (ifn);
452 if (mask_index >= 0
453 && use == gimple_call_arg (call, mask_index))
454 return true;
455 int stored_value_index = internal_fn_stored_value_index (ifn);
456 if (stored_value_index >= 0
457 && use == gimple_call_arg (call, stored_value_index))
458 return true;
459 if (internal_gather_scatter_fn_p (ifn)
460 && use == gimple_call_arg (call, 1))
461 return true;
463 return false;
466 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
467 return false;
468 operand = gimple_assign_rhs1 (assign);
469 if (TREE_CODE (operand) != SSA_NAME)
470 return false;
472 if (operand == use)
473 return true;
475 return false;
480 Function process_use.
482 Inputs:
483 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
484 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
485 that defined USE. This is done by calling mark_relevant and passing it
486 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
487 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
488 be performed.
490 Outputs:
491 Generally, LIVE_P and RELEVANT are used to define the liveness and
492 relevance info of the DEF_STMT of this USE:
493 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
494 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
495 Exceptions:
496 - case 1: If USE is used only for address computations (e.g. array indexing),
497 which does not need to be directly vectorized, then the liveness/relevance
498 of the respective DEF_STMT is left unchanged.
499 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
500 we skip DEF_STMT cause it had already been processed.
501 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
502 "relevant" will be modified accordingly.
504 Return true if everything is as expected. Return false otherwise. */
506 static opt_result
507 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
508 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
509 bool force)
511 stmt_vec_info dstmt_vinfo;
512 enum vect_def_type dt;
514 /* case 1: we are only interested in uses that need to be vectorized. Uses
515 that are used for address computation are not considered relevant. */
516 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
517 return opt_result::success ();
519 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
520 return opt_result::failure_at (stmt_vinfo->stmt,
521 "not vectorized:"
522 " unsupported use in stmt.\n");
524 if (!dstmt_vinfo)
525 return opt_result::success ();
527 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
528 basic_block bb = gimple_bb (stmt_vinfo->stmt);
530 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
531 We have to force the stmt live since the epilogue loop needs it to
532 continue computing the reduction. */
533 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
534 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
537 && bb->loop_father == def_bb->loop_father)
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_NOTE, vect_location,
541 "reduc-stmt defining reduc-phi in the same nest.\n");
542 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
543 return opt_result::success ();
546 /* case 3a: outer-loop stmt defining an inner-loop stmt:
547 outer-loop-header-bb:
548 d = dstmt_vinfo
549 inner-loop:
550 stmt # use (d)
551 outer-loop-tail-bb:
552 ... */
553 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE, vect_location,
557 "outer-loop def-stmt defining inner-loop stmt.\n");
559 switch (relevant)
561 case vect_unused_in_scope:
562 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
563 vect_used_in_scope : vect_unused_in_scope;
564 break;
566 case vect_used_in_outer_by_reduction:
567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
568 relevant = vect_used_by_reduction;
569 break;
571 case vect_used_in_outer:
572 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
573 relevant = vect_used_in_scope;
574 break;
576 case vect_used_in_scope:
577 break;
579 default:
580 gcc_unreachable ();
584 /* case 3b: inner-loop stmt defining an outer-loop stmt:
585 outer-loop-header-bb:
587 inner-loop:
588 d = dstmt_vinfo
589 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
590 stmt # use (d) */
591 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
593 if (dump_enabled_p ())
594 dump_printf_loc (MSG_NOTE, vect_location,
595 "inner-loop def-stmt defining outer-loop stmt.\n");
597 switch (relevant)
599 case vect_unused_in_scope:
600 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
601 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
602 vect_used_in_outer_by_reduction : vect_unused_in_scope;
603 break;
605 case vect_used_by_reduction:
606 case vect_used_only_live:
607 relevant = vect_used_in_outer_by_reduction;
608 break;
610 case vect_used_in_scope:
611 relevant = vect_used_in_outer;
612 break;
614 default:
615 gcc_unreachable ();
618 /* We are also not interested in uses on loop PHI backedges that are
619 inductions. Otherwise we'll needlessly vectorize the IV increment
620 and cause hybrid SLP for SLP inductions. Unless the PHI is live
621 of course. */
622 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
623 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
624 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
625 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
626 loop_latch_edge (bb->loop_father))
627 == use))
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE, vect_location,
631 "induction value on backedge.\n");
632 return opt_result::success ();
636 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
637 return opt_result::success ();
641 /* Function vect_mark_stmts_to_be_vectorized.
643 Not all stmts in the loop need to be vectorized. For example:
645 for i...
646 for j...
647 1. T0 = i + j
648 2. T1 = a[T0]
650 3. j = j + 1
652 Stmt 1 and 3 do not need to be vectorized, because loop control and
653 addressing of vectorized data-refs are handled differently.
655 This pass detects such stmts. */
657 opt_result
658 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
660 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
661 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
662 unsigned int nbbs = loop->num_nodes;
663 gimple_stmt_iterator si;
664 unsigned int i;
665 basic_block bb;
666 bool live_p;
667 enum vect_relevant relevant;
669 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
671 auto_vec<stmt_vec_info, 64> worklist;
673 /* 1. Init worklist. */
674 for (i = 0; i < nbbs; i++)
676 bb = bbs[i];
677 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
679 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
682 phi_info->stmt);
684 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
685 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
687 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
689 if (is_gimple_debug (gsi_stmt (si)))
690 continue;
691 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE, vect_location,
694 "init: stmt relevant? %G", stmt_info->stmt);
696 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
697 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
701 /* 2. Process_worklist */
702 while (worklist.length () > 0)
704 use_operand_p use_p;
705 ssa_op_iter iter;
707 stmt_vec_info stmt_vinfo = worklist.pop ();
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE, vect_location,
710 "worklist: examine stmt: %G", stmt_vinfo->stmt);
712 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
713 (DEF_STMT) as relevant/irrelevant according to the relevance property
714 of STMT. */
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
717 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
718 propagated as is to the DEF_STMTs of its USEs.
720 One exception is when STMT has been identified as defining a reduction
721 variable; in this case we set the relevance to vect_used_by_reduction.
722 This is because we distinguish between two kinds of relevant stmts -
723 those that are used by a reduction computation, and those that are
724 (also) used by a regular computation. This allows us later on to
725 identify stmts that are used solely by a reduction, and therefore the
726 order of the results that they produce does not have to be kept. */
728 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
730 case vect_reduction_def:
731 gcc_assert (relevant != vect_unused_in_scope);
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_in_scope
734 && relevant != vect_used_by_reduction
735 && relevant != vect_used_only_live)
736 return opt_result::failure_at
737 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
738 break;
740 case vect_nested_cycle:
741 if (relevant != vect_unused_in_scope
742 && relevant != vect_used_in_outer_by_reduction
743 && relevant != vect_used_in_outer)
744 return opt_result::failure_at
745 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
746 break;
748 case vect_double_reduction_def:
749 if (relevant != vect_unused_in_scope
750 && relevant != vect_used_by_reduction
751 && relevant != vect_used_only_live)
752 return opt_result::failure_at
753 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
754 break;
756 default:
757 break;
760 if (is_pattern_stmt_p (stmt_vinfo))
762 /* Pattern statements are not inserted into the code, so
763 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
764 have to scan the RHS or function arguments instead. */
765 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
767 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
768 tree op = gimple_assign_rhs1 (assign);
770 i = 1;
771 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
773 opt_result res
774 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
775 loop_vinfo, relevant, &worklist, false);
776 if (!res)
777 return res;
778 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
779 loop_vinfo, relevant, &worklist, false);
780 if (!res)
781 return res;
782 i = 2;
784 for (; i < gimple_num_ops (assign); i++)
786 op = gimple_op (assign, i);
787 if (TREE_CODE (op) == SSA_NAME)
789 opt_result res
790 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
791 &worklist, false);
792 if (!res)
793 return res;
797 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
799 for (i = 0; i < gimple_call_num_args (call); i++)
801 tree arg = gimple_call_arg (call, i);
802 opt_result res
803 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
804 &worklist, false);
805 if (!res)
806 return res;
810 else
811 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
813 tree op = USE_FROM_PTR (use_p);
814 opt_result res
815 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
816 &worklist, false);
817 if (!res)
818 return res;
821 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
823 gather_scatter_info gs_info;
824 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
825 gcc_unreachable ();
826 opt_result res
827 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
828 &worklist, true);
829 if (!res)
831 if (fatal)
832 *fatal = false;
833 return res;
836 } /* while worklist */
838 return opt_result::success ();
841 /* Function vect_model_simple_cost.
843 Models cost for simple operations, i.e. those that only emit ncopies of a
844 single op. Right now, this does not account for multiple insns that could
845 be generated for the single vector op. We will handle that shortly. */
847 static void
848 vect_model_simple_cost (vec_info *,
849 stmt_vec_info stmt_info, int ncopies,
850 enum vect_def_type *dt,
851 int ndts,
852 slp_tree node,
853 stmt_vector_for_cost *cost_vec,
854 vect_cost_for_stmt kind = vector_stmt)
856 int inside_cost = 0, prologue_cost = 0;
858 gcc_assert (cost_vec != NULL);
860 /* ??? Somehow we need to fix this at the callers. */
861 if (node)
862 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
864 if (!node)
865 /* Cost the "broadcast" of a scalar operand in to a vector operand.
866 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
867 cost model. */
868 for (int i = 0; i < ndts; i++)
869 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
870 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
871 stmt_info, 0, vect_prologue);
873 /* Pass the inside-of-loop statements to the target-specific cost model. */
874 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
875 stmt_info, 0, vect_body);
877 if (dump_enabled_p ())
878 dump_printf_loc (MSG_NOTE, vect_location,
879 "vect_model_simple_cost: inside_cost = %d, "
880 "prologue_cost = %d .\n", inside_cost, prologue_cost);
884 /* Model cost for type demotion and promotion operations. PWR is
885 normally zero for single-step promotions and demotions. It will be
886 one if two-step promotion/demotion is required, and so on. NCOPIES
887 is the number of vector results (and thus number of instructions)
888 for the narrowest end of the operation chain. Each additional
889 step doubles the number of instructions required. If WIDEN_ARITH
890 is true the stmt is doing widening arithmetic. */
892 static void
893 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
894 enum vect_def_type *dt,
895 unsigned int ncopies, int pwr,
896 stmt_vector_for_cost *cost_vec,
897 bool widen_arith)
899 int i;
900 int inside_cost = 0, prologue_cost = 0;
902 for (i = 0; i < pwr + 1; i++)
904 inside_cost += record_stmt_cost (cost_vec, ncopies,
905 widen_arith
906 ? vector_stmt : vec_promote_demote,
907 stmt_info, 0, vect_body);
908 ncopies *= 2;
911 /* FORNOW: Assuming maximum 2 args per stmts. */
912 for (i = 0; i < 2; i++)
913 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
914 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
915 stmt_info, 0, vect_prologue);
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE, vect_location,
919 "vect_model_promotion_demotion_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost, prologue_cost);
923 /* Returns true if the current function returns DECL. */
925 static bool
926 cfun_returns (tree decl)
928 edge_iterator ei;
929 edge e;
930 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
932 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
933 if (!ret)
934 continue;
935 if (gimple_return_retval (ret) == decl)
936 return true;
937 /* We often end up with an aggregate copy to the result decl,
938 handle that case as well. First skip intermediate clobbers
939 though. */
940 gimple *def = ret;
943 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
945 while (gimple_clobber_p (def));
946 if (is_a <gassign *> (def)
947 && gimple_assign_lhs (def) == gimple_return_retval (ret)
948 && gimple_assign_rhs1 (def) == decl)
949 return true;
951 return false;
954 /* Calculate cost of DR's memory access. */
955 void
956 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
957 dr_alignment_support alignment_support_scheme,
958 int misalignment,
959 unsigned int *inside_cost,
960 stmt_vector_for_cost *body_cost_vec)
962 switch (alignment_support_scheme)
964 case dr_aligned:
966 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
967 vector_store, stmt_info, 0,
968 vect_body);
970 if (dump_enabled_p ())
971 dump_printf_loc (MSG_NOTE, vect_location,
972 "vect_model_store_cost: aligned.\n");
973 break;
976 case dr_unaligned_supported:
978 /* Here, we assign an additional cost for the unaligned store. */
979 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
980 unaligned_store, stmt_info,
981 misalignment, vect_body);
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_NOTE, vect_location,
984 "vect_model_store_cost: unaligned supported by "
985 "hardware.\n");
986 break;
989 case dr_unaligned_unsupported:
991 *inside_cost = VECT_MAX_COST;
993 if (dump_enabled_p ())
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
995 "vect_model_store_cost: unsupported access.\n");
996 break;
999 default:
1000 gcc_unreachable ();
1004 /* Calculate cost of DR's memory access. */
1005 void
1006 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1007 dr_alignment_support alignment_support_scheme,
1008 int misalignment,
1009 bool add_realign_cost, unsigned int *inside_cost,
1010 unsigned int *prologue_cost,
1011 stmt_vector_for_cost *prologue_cost_vec,
1012 stmt_vector_for_cost *body_cost_vec,
1013 bool record_prologue_costs)
1015 switch (alignment_support_scheme)
1017 case dr_aligned:
1019 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1020 stmt_info, 0, vect_body);
1022 if (dump_enabled_p ())
1023 dump_printf_loc (MSG_NOTE, vect_location,
1024 "vect_model_load_cost: aligned.\n");
1026 break;
1028 case dr_unaligned_supported:
1030 /* Here, we assign an additional cost for the unaligned load. */
1031 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1032 unaligned_load, stmt_info,
1033 misalignment, vect_body);
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_NOTE, vect_location,
1037 "vect_model_load_cost: unaligned supported by "
1038 "hardware.\n");
1040 break;
1042 case dr_explicit_realign:
1044 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1045 vector_load, stmt_info, 0, vect_body);
1046 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1047 vec_perm, stmt_info, 0, vect_body);
1049 /* FIXME: If the misalignment remains fixed across the iterations of
1050 the containing loop, the following cost should be added to the
1051 prologue costs. */
1052 if (targetm.vectorize.builtin_mask_for_load)
1053 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1054 stmt_info, 0, vect_body);
1056 if (dump_enabled_p ())
1057 dump_printf_loc (MSG_NOTE, vect_location,
1058 "vect_model_load_cost: explicit realign\n");
1060 break;
1062 case dr_explicit_realign_optimized:
1064 if (dump_enabled_p ())
1065 dump_printf_loc (MSG_NOTE, vect_location,
1066 "vect_model_load_cost: unaligned software "
1067 "pipelined.\n");
1069 /* Unaligned software pipeline has a load of an address, an initial
1070 load, and possibly a mask operation to "prime" the loop. However,
1071 if this is an access in a group of loads, which provide grouped
1072 access, then the above cost should only be considered for one
1073 access in the group. Inside the loop, there is a load op
1074 and a realignment op. */
1076 if (add_realign_cost && record_prologue_costs)
1078 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1079 vector_stmt, stmt_info,
1080 0, vect_prologue);
1081 if (targetm.vectorize.builtin_mask_for_load)
1082 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1083 vector_stmt, stmt_info,
1084 0, vect_prologue);
1087 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1088 stmt_info, 0, vect_body);
1089 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1090 stmt_info, 0, vect_body);
1092 if (dump_enabled_p ())
1093 dump_printf_loc (MSG_NOTE, vect_location,
1094 "vect_model_load_cost: explicit realign optimized"
1095 "\n");
1097 break;
1100 case dr_unaligned_unsupported:
1102 *inside_cost = VECT_MAX_COST;
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1106 "vect_model_load_cost: unsupported access.\n");
1107 break;
1110 default:
1111 gcc_unreachable ();
1115 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1116 the loop preheader for the vectorized stmt STMT_VINFO. */
1118 static void
1119 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1120 gimple_stmt_iterator *gsi)
1122 if (gsi)
1123 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1124 else
1125 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1127 if (dump_enabled_p ())
1128 dump_printf_loc (MSG_NOTE, vect_location,
1129 "created new init_stmt: %G", new_stmt);
1132 /* Function vect_init_vector.
1134 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1135 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1136 vector type a vector with all elements equal to VAL is created first.
1137 Place the initialization at GSI if it is not NULL. Otherwise, place the
1138 initialization at the loop preheader.
1139 Return the DEF of INIT_STMT.
1140 It will be used in the vectorization of STMT_INFO. */
1142 tree
1143 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1144 gimple_stmt_iterator *gsi)
1146 gimple *init_stmt;
1147 tree new_temp;
1149 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1150 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1152 gcc_assert (VECTOR_TYPE_P (type));
1153 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1155 /* Scalar boolean value should be transformed into
1156 all zeros or all ones value before building a vector. */
1157 if (VECTOR_BOOLEAN_TYPE_P (type))
1159 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1160 tree false_val = build_zero_cst (TREE_TYPE (type));
1162 if (CONSTANT_CLASS_P (val))
1163 val = integer_zerop (val) ? false_val : true_val;
1164 else
1166 new_temp = make_ssa_name (TREE_TYPE (type));
1167 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1168 val, true_val, false_val);
1169 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1170 val = new_temp;
1173 else
1175 gimple_seq stmts = NULL;
1176 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1177 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1178 TREE_TYPE (type), val);
1179 else
1180 /* ??? Condition vectorization expects us to do
1181 promotion of invariant/external defs. */
1182 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1183 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1184 !gsi_end_p (gsi2); )
1186 init_stmt = gsi_stmt (gsi2);
1187 gsi_remove (&gsi2, false);
1188 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1192 val = build_vector_from_val (type, val);
1195 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1196 init_stmt = gimple_build_assign (new_temp, val);
1197 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1198 return new_temp;
1202 /* Function vect_get_vec_defs_for_operand.
1204 OP is an operand in STMT_VINFO. This function returns a vector of
1205 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1207 In the case that OP is an SSA_NAME which is defined in the loop, then
1208 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1210 In case OP is an invariant or constant, a new stmt that creates a vector def
1211 needs to be introduced. VECTYPE may be used to specify a required type for
1212 vector invariant. */
1214 void
1215 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1216 unsigned ncopies,
1217 tree op, vec<tree> *vec_oprnds, tree vectype)
1219 gimple *def_stmt;
1220 enum vect_def_type dt;
1221 bool is_simple_use;
1222 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE, vect_location,
1226 "vect_get_vec_defs_for_operand: %T\n", op);
1228 stmt_vec_info def_stmt_info;
1229 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1230 &def_stmt_info, &def_stmt);
1231 gcc_assert (is_simple_use);
1232 if (def_stmt && dump_enabled_p ())
1233 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1235 vec_oprnds->create (ncopies);
1236 if (dt == vect_constant_def || dt == vect_external_def)
1238 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1239 tree vector_type;
1241 if (vectype)
1242 vector_type = vectype;
1243 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1244 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1245 vector_type = truth_type_for (stmt_vectype);
1246 else
1247 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1249 gcc_assert (vector_type);
1250 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1251 while (ncopies--)
1252 vec_oprnds->quick_push (vop);
1254 else
1256 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1257 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1258 for (unsigned i = 0; i < ncopies; ++i)
1259 vec_oprnds->quick_push (gimple_get_lhs
1260 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1265 /* Get vectorized definitions for OP0 and OP1. */
1267 void
1268 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1269 unsigned ncopies,
1270 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1271 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1272 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1273 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1275 if (slp_node)
1277 if (op0)
1278 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1279 if (op1)
1280 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1281 if (op2)
1282 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1283 if (op3)
1284 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1286 else
1288 if (op0)
1289 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1290 op0, vec_oprnds0, vectype0);
1291 if (op1)
1292 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1293 op1, vec_oprnds1, vectype1);
1294 if (op2)
1295 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1296 op2, vec_oprnds2, vectype2);
1297 if (op3)
1298 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1299 op3, vec_oprnds3, vectype3);
1303 void
1304 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1305 unsigned ncopies,
1306 tree op0, vec<tree> *vec_oprnds0,
1307 tree op1, vec<tree> *vec_oprnds1,
1308 tree op2, vec<tree> *vec_oprnds2,
1309 tree op3, vec<tree> *vec_oprnds3)
1311 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1312 op0, vec_oprnds0, NULL_TREE,
1313 op1, vec_oprnds1, NULL_TREE,
1314 op2, vec_oprnds2, NULL_TREE,
1315 op3, vec_oprnds3, NULL_TREE);
1318 /* Helper function called by vect_finish_replace_stmt and
1319 vect_finish_stmt_generation. Set the location of the new
1320 statement and create and return a stmt_vec_info for it. */
1322 static void
1323 vect_finish_stmt_generation_1 (vec_info *,
1324 stmt_vec_info stmt_info, gimple *vec_stmt)
1326 if (dump_enabled_p ())
1327 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1329 if (stmt_info)
1331 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1333 /* While EH edges will generally prevent vectorization, stmt might
1334 e.g. be in a must-not-throw region. Ensure newly created stmts
1335 that could throw are part of the same region. */
1336 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1337 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1338 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1340 else
1341 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1344 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1345 which sets the same scalar result as STMT_INFO did. Create and return a
1346 stmt_vec_info for VEC_STMT. */
1348 void
1349 vect_finish_replace_stmt (vec_info *vinfo,
1350 stmt_vec_info stmt_info, gimple *vec_stmt)
1352 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1353 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1355 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1356 gsi_replace (&gsi, vec_stmt, true);
1358 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1361 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1362 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1364 void
1365 vect_finish_stmt_generation (vec_info *vinfo,
1366 stmt_vec_info stmt_info, gimple *vec_stmt,
1367 gimple_stmt_iterator *gsi)
1369 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1371 if (!gsi_end_p (*gsi)
1372 && gimple_has_mem_ops (vec_stmt))
1374 gimple *at_stmt = gsi_stmt (*gsi);
1375 tree vuse = gimple_vuse (at_stmt);
1376 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1378 tree vdef = gimple_vdef (at_stmt);
1379 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1380 gimple_set_modified (vec_stmt, true);
1381 /* If we have an SSA vuse and insert a store, update virtual
1382 SSA form to avoid triggering the renamer. Do so only
1383 if we can easily see all uses - which is what almost always
1384 happens with the way vectorized stmts are inserted. */
1385 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1386 && ((is_gimple_assign (vec_stmt)
1387 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1388 || (is_gimple_call (vec_stmt)
1389 && (!(gimple_call_flags (vec_stmt)
1390 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1391 || (gimple_call_lhs (vec_stmt)
1392 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1394 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1395 gimple_set_vdef (vec_stmt, new_vdef);
1396 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1400 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1401 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1404 /* We want to vectorize a call to combined function CFN with function
1405 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1406 as the types of all inputs. Check whether this is possible using
1407 an internal function, returning its code if so or IFN_LAST if not. */
1409 static internal_fn
1410 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1411 tree vectype_out, tree vectype_in)
1413 internal_fn ifn;
1414 if (internal_fn_p (cfn))
1415 ifn = as_internal_fn (cfn);
1416 else
1417 ifn = associated_internal_fn (fndecl);
1418 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1420 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1421 if (info.vectorizable)
1423 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1424 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1425 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1426 OPTIMIZE_FOR_SPEED))
1427 return ifn;
1430 return IFN_LAST;
1434 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1435 gimple_stmt_iterator *);
1437 /* Check whether a load or store statement in the loop described by
1438 LOOP_VINFO is possible in a loop using partial vectors. This is
1439 testing whether the vectorizer pass has the appropriate support,
1440 as well as whether the target does.
1442 VLS_TYPE says whether the statement is a load or store and VECTYPE
1443 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1444 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1445 says how the load or store is going to be implemented and GROUP_SIZE
1446 is the number of load or store statements in the containing group.
1447 If the access is a gather load or scatter store, GS_INFO describes
1448 its arguments. If the load or store is conditional, SCALAR_MASK is the
1449 condition under which it occurs.
1451 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1452 vectors is not supported, otherwise record the required rgroup control
1453 types. */
1455 static void
1456 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1457 slp_tree slp_node,
1458 vec_load_store_type vls_type,
1459 int group_size,
1460 vect_memory_access_type
1461 memory_access_type,
1462 gather_scatter_info *gs_info,
1463 tree scalar_mask)
1465 /* Invariant loads need no special support. */
1466 if (memory_access_type == VMAT_INVARIANT)
1467 return;
1469 unsigned int nvectors;
1470 if (slp_node)
1471 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1472 else
1473 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1475 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1476 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1477 machine_mode vecmode = TYPE_MODE (vectype);
1478 bool is_load = (vls_type == VLS_LOAD);
1479 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1481 internal_fn ifn
1482 = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1483 : vect_store_lanes_supported (vectype, group_size, true));
1484 if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1485 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1486 else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1487 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1488 scalar_mask);
1489 else
1491 if (dump_enabled_p ())
1492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1493 "can't operate on partial vectors because"
1494 " the target doesn't have an appropriate"
1495 " load/store-lanes instruction.\n");
1496 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1498 return;
1501 if (memory_access_type == VMAT_GATHER_SCATTER)
1503 internal_fn ifn = (is_load
1504 ? IFN_MASK_GATHER_LOAD
1505 : IFN_MASK_SCATTER_STORE);
1506 internal_fn len_ifn = (is_load
1507 ? IFN_MASK_LEN_GATHER_LOAD
1508 : IFN_MASK_LEN_SCATTER_STORE);
1509 if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1510 gs_info->memory_type,
1511 gs_info->offset_vectype,
1512 gs_info->scale))
1513 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1514 else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1515 gs_info->memory_type,
1516 gs_info->offset_vectype,
1517 gs_info->scale))
1518 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1519 scalar_mask);
1520 else
1522 if (dump_enabled_p ())
1523 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1524 "can't operate on partial vectors because"
1525 " the target doesn't have an appropriate"
1526 " gather load or scatter store instruction.\n");
1527 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1529 return;
1532 if (memory_access_type != VMAT_CONTIGUOUS
1533 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1535 /* Element X of the data must come from iteration i * VF + X of the
1536 scalar loop. We need more work to support other mappings. */
1537 if (dump_enabled_p ())
1538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1539 "can't operate on partial vectors because an"
1540 " access isn't contiguous.\n");
1541 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1542 return;
1545 if (!VECTOR_MODE_P (vecmode))
1547 if (dump_enabled_p ())
1548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1549 "can't operate on partial vectors when emulating"
1550 " vector operations.\n");
1551 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1552 return;
1555 /* We might load more scalars than we need for permuting SLP loads.
1556 We checked in get_group_load_store_type that the extra elements
1557 don't leak into a new vector. */
1558 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1560 unsigned int nvectors;
1561 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1562 return nvectors;
1563 gcc_unreachable ();
1566 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1567 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1568 machine_mode mask_mode;
1569 machine_mode vmode;
1570 bool using_partial_vectors_p = false;
1571 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1573 nvectors = group_memory_nvectors (group_size * vf, nunits);
1574 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1575 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1576 using_partial_vectors_p = true;
1578 else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1579 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1581 nvectors = group_memory_nvectors (group_size * vf, nunits);
1582 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1583 using_partial_vectors_p = true;
1586 if (!using_partial_vectors_p)
1588 if (dump_enabled_p ())
1589 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1590 "can't operate on partial vectors because the"
1591 " target doesn't have the appropriate partial"
1592 " vectorization load or store.\n");
1593 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1597 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1598 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1599 that needs to be applied to all loads and stores in a vectorized loop.
1600 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1601 otherwise return VEC_MASK & LOOP_MASK.
1603 MASK_TYPE is the type of both masks. If new statements are needed,
1604 insert them before GSI. */
1606 static tree
1607 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1608 tree vec_mask, gimple_stmt_iterator *gsi)
1610 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1611 if (!loop_mask)
1612 return vec_mask;
1614 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1616 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1617 return vec_mask;
1619 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1620 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1621 vec_mask, loop_mask);
1623 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1624 return and_res;
1627 /* Determine whether we can use a gather load or scatter store to vectorize
1628 strided load or store STMT_INFO by truncating the current offset to a
1629 smaller width. We need to be able to construct an offset vector:
1631 { 0, X, X*2, X*3, ... }
1633 without loss of precision, where X is STMT_INFO's DR_STEP.
1635 Return true if this is possible, describing the gather load or scatter
1636 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1638 static bool
1639 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1640 loop_vec_info loop_vinfo, bool masked_p,
1641 gather_scatter_info *gs_info)
1643 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1644 data_reference *dr = dr_info->dr;
1645 tree step = DR_STEP (dr);
1646 if (TREE_CODE (step) != INTEGER_CST)
1648 /* ??? Perhaps we could use range information here? */
1649 if (dump_enabled_p ())
1650 dump_printf_loc (MSG_NOTE, vect_location,
1651 "cannot truncate variable step.\n");
1652 return false;
1655 /* Get the number of bits in an element. */
1656 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1657 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1658 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1660 /* Set COUNT to the upper limit on the number of elements - 1.
1661 Start with the maximum vectorization factor. */
1662 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1664 /* Try lowering COUNT to the number of scalar latch iterations. */
1665 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1666 widest_int max_iters;
1667 if (max_loop_iterations (loop, &max_iters)
1668 && max_iters < count)
1669 count = max_iters.to_shwi ();
1671 /* Try scales of 1 and the element size. */
1672 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1673 wi::overflow_type overflow = wi::OVF_NONE;
1674 for (int i = 0; i < 2; ++i)
1676 int scale = scales[i];
1677 widest_int factor;
1678 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1679 continue;
1681 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1682 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1683 if (overflow)
1684 continue;
1685 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1686 unsigned int min_offset_bits = wi::min_precision (range, sign);
1688 /* Find the narrowest viable offset type. */
1689 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1690 tree offset_type = build_nonstandard_integer_type (offset_bits,
1691 sign == UNSIGNED);
1693 /* See whether the target supports the operation with an offset
1694 no narrower than OFFSET_TYPE. */
1695 tree memory_type = TREE_TYPE (DR_REF (dr));
1696 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1697 vectype, memory_type, offset_type, scale,
1698 &gs_info->ifn, &gs_info->offset_vectype)
1699 || gs_info->ifn == IFN_LAST)
1700 continue;
1702 gs_info->decl = NULL_TREE;
1703 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1704 but we don't need to store that here. */
1705 gs_info->base = NULL_TREE;
1706 gs_info->element_type = TREE_TYPE (vectype);
1707 gs_info->offset = fold_convert (offset_type, step);
1708 gs_info->offset_dt = vect_constant_def;
1709 gs_info->scale = scale;
1710 gs_info->memory_type = memory_type;
1711 return true;
1714 if (overflow && dump_enabled_p ())
1715 dump_printf_loc (MSG_NOTE, vect_location,
1716 "truncating gather/scatter offset to %d bits"
1717 " might change its value.\n", element_bits);
1719 return false;
1722 /* Return true if we can use gather/scatter internal functions to
1723 vectorize STMT_INFO, which is a grouped or strided load or store.
1724 MASKED_P is true if load or store is conditional. When returning
1725 true, fill in GS_INFO with the information required to perform the
1726 operation. */
1728 static bool
1729 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1730 loop_vec_info loop_vinfo, bool masked_p,
1731 gather_scatter_info *gs_info)
1733 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1734 || gs_info->ifn == IFN_LAST)
1735 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1736 masked_p, gs_info);
1738 tree old_offset_type = TREE_TYPE (gs_info->offset);
1739 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1741 gcc_assert (TYPE_PRECISION (new_offset_type)
1742 >= TYPE_PRECISION (old_offset_type));
1743 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1745 if (dump_enabled_p ())
1746 dump_printf_loc (MSG_NOTE, vect_location,
1747 "using gather/scatter for strided/grouped access,"
1748 " scale = %d\n", gs_info->scale);
1750 return true;
1753 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1754 elements with a known constant step. Return -1 if that step
1755 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1757 static int
1758 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1760 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1761 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1762 size_zero_node);
1765 /* If the target supports a permute mask that reverses the elements in
1766 a vector of type VECTYPE, return that mask, otherwise return null. */
1768 static tree
1769 perm_mask_for_reverse (tree vectype)
1771 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1773 /* The encoding has a single stepped pattern. */
1774 vec_perm_builder sel (nunits, 1, 3);
1775 for (int i = 0; i < 3; ++i)
1776 sel.quick_push (nunits - 1 - i);
1778 vec_perm_indices indices (sel, 1, nunits);
1779 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1780 indices))
1781 return NULL_TREE;
1782 return vect_gen_perm_mask_checked (vectype, indices);
1785 /* A subroutine of get_load_store_type, with a subset of the same
1786 arguments. Handle the case where STMT_INFO is a load or store that
1787 accesses consecutive elements with a negative step. Sets *POFFSET
1788 to the offset to be applied to the DR for the first access. */
1790 static vect_memory_access_type
1791 get_negative_load_store_type (vec_info *vinfo,
1792 stmt_vec_info stmt_info, tree vectype,
1793 vec_load_store_type vls_type,
1794 unsigned int ncopies, poly_int64 *poffset)
1796 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1797 dr_alignment_support alignment_support_scheme;
1799 if (ncopies > 1)
1801 if (dump_enabled_p ())
1802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1803 "multiple types with negative step.\n");
1804 return VMAT_ELEMENTWISE;
1807 /* For backward running DRs the first access in vectype actually is
1808 N-1 elements before the address of the DR. */
1809 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1810 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1812 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
1813 alignment_support_scheme
1814 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1815 if (alignment_support_scheme != dr_aligned
1816 && alignment_support_scheme != dr_unaligned_supported)
1818 if (dump_enabled_p ())
1819 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1820 "negative step but alignment required.\n");
1821 *poffset = 0;
1822 return VMAT_ELEMENTWISE;
1825 if (vls_type == VLS_STORE_INVARIANT)
1827 if (dump_enabled_p ())
1828 dump_printf_loc (MSG_NOTE, vect_location,
1829 "negative step with invariant source;"
1830 " no permute needed.\n");
1831 return VMAT_CONTIGUOUS_DOWN;
1834 if (!perm_mask_for_reverse (vectype))
1836 if (dump_enabled_p ())
1837 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1838 "negative step and reversing not supported.\n");
1839 *poffset = 0;
1840 return VMAT_ELEMENTWISE;
1843 return VMAT_CONTIGUOUS_REVERSE;
1846 /* STMT_INFO is either a masked or unconditional store. Return the value
1847 being stored. */
1849 tree
1850 vect_get_store_rhs (stmt_vec_info stmt_info)
1852 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1854 gcc_assert (gimple_assign_single_p (assign));
1855 return gimple_assign_rhs1 (assign);
1857 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1859 internal_fn ifn = gimple_call_internal_fn (call);
1860 int index = internal_fn_stored_value_index (ifn);
1861 gcc_assert (index >= 0);
1862 return gimple_call_arg (call, index);
1864 gcc_unreachable ();
1867 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1869 This function returns a vector type which can be composed with NETLS pieces,
1870 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1871 same vector size as the return vector. It checks target whether supports
1872 pieces-size vector mode for construction firstly, if target fails to, check
1873 pieces-size scalar mode for construction further. It returns NULL_TREE if
1874 fails to find the available composition.
1876 For example, for (vtype=V16QI, nelts=4), we can probably get:
1877 - V16QI with PTYPE V4QI.
1878 - V4SI with PTYPE SI.
1879 - NULL_TREE. */
1881 static tree
1882 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
1884 gcc_assert (VECTOR_TYPE_P (vtype));
1885 gcc_assert (known_gt (nelts, 0U));
1887 machine_mode vmode = TYPE_MODE (vtype);
1888 if (!VECTOR_MODE_P (vmode))
1889 return NULL_TREE;
1891 /* When we are asked to compose the vector from its components let
1892 that happen directly. */
1893 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
1895 *ptype = TREE_TYPE (vtype);
1896 return vtype;
1899 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
1900 unsigned int pbsize;
1901 if (constant_multiple_p (vbsize, nelts, &pbsize))
1903 /* First check if vec_init optab supports construction from
1904 vector pieces directly. */
1905 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
1906 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
1907 machine_mode rmode;
1908 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
1909 && (convert_optab_handler (vec_init_optab, vmode, rmode)
1910 != CODE_FOR_nothing))
1912 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
1913 return vtype;
1916 /* Otherwise check if exists an integer type of the same piece size and
1917 if vec_init optab supports construction from it directly. */
1918 if (int_mode_for_size (pbsize, 0).exists (&elmode)
1919 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
1920 && (convert_optab_handler (vec_init_optab, rmode, elmode)
1921 != CODE_FOR_nothing))
1923 *ptype = build_nonstandard_integer_type (pbsize, 1);
1924 return build_vector_type (*ptype, nelts);
1928 return NULL_TREE;
1931 /* A subroutine of get_load_store_type, with a subset of the same
1932 arguments. Handle the case where STMT_INFO is part of a grouped load
1933 or store.
1935 For stores, the statements in the group are all consecutive
1936 and there is no gap at the end. For loads, the statements in the
1937 group might not be consecutive; there can be gaps between statements
1938 as well as at the end. */
1940 static bool
1941 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
1942 tree vectype, slp_tree slp_node,
1943 bool masked_p, vec_load_store_type vls_type,
1944 vect_memory_access_type *memory_access_type,
1945 poly_int64 *poffset,
1946 dr_alignment_support *alignment_support_scheme,
1947 int *misalignment,
1948 gather_scatter_info *gs_info,
1949 internal_fn *lanes_ifn)
1951 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1952 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1953 stmt_vec_info first_stmt_info;
1954 unsigned int group_size;
1955 unsigned HOST_WIDE_INT gap;
1956 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1958 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1959 group_size = DR_GROUP_SIZE (first_stmt_info);
1960 gap = DR_GROUP_GAP (first_stmt_info);
1962 else
1964 first_stmt_info = stmt_info;
1965 group_size = 1;
1966 gap = 0;
1968 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
1969 bool single_element_p = (stmt_info == first_stmt_info
1970 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
1971 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1973 /* True if the vectorized statements would access beyond the last
1974 statement in the group. */
1975 bool overrun_p = false;
1977 /* True if we can cope with such overrun by peeling for gaps, so that
1978 there is at least one final scalar iteration after the vector loop. */
1979 bool can_overrun_p = (!masked_p
1980 && vls_type == VLS_LOAD
1981 && loop_vinfo
1982 && !loop->inner);
1984 /* There can only be a gap at the end of the group if the stride is
1985 known at compile time. */
1986 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
1988 /* Stores can't yet have gaps. */
1989 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
1991 if (slp_node)
1993 /* For SLP vectorization we directly vectorize a subchain
1994 without permutation. */
1995 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1996 first_dr_info
1997 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
1998 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2000 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2001 separated by the stride, until we have a complete vector.
2002 Fall back to scalar accesses if that isn't possible. */
2003 if (multiple_p (nunits, group_size))
2004 *memory_access_type = VMAT_STRIDED_SLP;
2005 else
2006 *memory_access_type = VMAT_ELEMENTWISE;
2008 else
2010 overrun_p = loop_vinfo && gap != 0;
2011 if (overrun_p && vls_type != VLS_LOAD)
2013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2014 "Grouped store with gaps requires"
2015 " non-consecutive accesses\n");
2016 return false;
2018 /* An overrun is fine if the trailing elements are smaller
2019 than the alignment boundary B. Every vector access will
2020 be a multiple of B and so we are guaranteed to access a
2021 non-gap element in the same B-sized block. */
2022 if (overrun_p
2023 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2024 vectype)
2025 / vect_get_scalar_dr_size (first_dr_info)))
2026 overrun_p = false;
2028 /* If the gap splits the vector in half and the target
2029 can do half-vector operations avoid the epilogue peeling
2030 by simply loading half of the vector only. Usually
2031 the construction with an upper zero half will be elided. */
2032 dr_alignment_support alss;
2033 int misalign = dr_misalignment (first_dr_info, vectype);
2034 tree half_vtype;
2035 if (overrun_p
2036 && !masked_p
2037 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2038 vectype, misalign)))
2039 == dr_aligned
2040 || alss == dr_unaligned_supported)
2041 && known_eq (nunits, (group_size - gap) * 2)
2042 && known_eq (nunits, group_size)
2043 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2044 != NULL_TREE))
2045 overrun_p = false;
2047 if (overrun_p && !can_overrun_p)
2049 if (dump_enabled_p ())
2050 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2051 "Peeling for outer loop is not supported\n");
2052 return false;
2054 int cmp = compare_step_with_zero (vinfo, stmt_info);
2055 if (cmp < 0)
2057 if (single_element_p)
2058 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2059 only correct for single element "interleaving" SLP. */
2060 *memory_access_type = get_negative_load_store_type
2061 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2062 else
2064 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2065 separated by the stride, until we have a complete vector.
2066 Fall back to scalar accesses if that isn't possible. */
2067 if (multiple_p (nunits, group_size))
2068 *memory_access_type = VMAT_STRIDED_SLP;
2069 else
2070 *memory_access_type = VMAT_ELEMENTWISE;
2073 else if (cmp == 0 && loop_vinfo)
2075 gcc_assert (vls_type == VLS_LOAD);
2076 *memory_access_type = VMAT_INVARIANT;
2077 /* Invariant accesses perform only component accesses, alignment
2078 is irrelevant for them. */
2079 *alignment_support_scheme = dr_unaligned_supported;
2081 else
2082 *memory_access_type = VMAT_CONTIGUOUS;
2084 /* When we have a contiguous access across loop iterations
2085 but the access in the loop doesn't cover the full vector
2086 we can end up with no gap recorded but still excess
2087 elements accessed, see PR103116. Make sure we peel for
2088 gaps if necessary and sufficient and give up if not.
2090 If there is a combination of the access not covering the full
2091 vector and a gap recorded then we may need to peel twice. */
2092 if (loop_vinfo
2093 && *memory_access_type == VMAT_CONTIGUOUS
2094 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2095 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2096 nunits))
2098 unsigned HOST_WIDE_INT cnunits, cvf;
2099 if (!can_overrun_p
2100 || !nunits.is_constant (&cnunits)
2101 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2102 /* Peeling for gaps assumes that a single scalar iteration
2103 is enough to make sure the last vector iteration doesn't
2104 access excess elements.
2105 ??? Enhancements include peeling multiple iterations
2106 or using masked loads with a static mask. */
2107 || (group_size * cvf) % cnunits + group_size - gap < cnunits)
2109 if (dump_enabled_p ())
2110 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2111 "peeling for gaps insufficient for "
2112 "access\n");
2113 return false;
2115 overrun_p = true;
2119 else
2121 /* We can always handle this case using elementwise accesses,
2122 but see if something more efficient is available. */
2123 *memory_access_type = VMAT_ELEMENTWISE;
2125 /* If there is a gap at the end of the group then these optimizations
2126 would access excess elements in the last iteration. */
2127 bool would_overrun_p = (gap != 0);
2128 /* An overrun is fine if the trailing elements are smaller than the
2129 alignment boundary B. Every vector access will be a multiple of B
2130 and so we are guaranteed to access a non-gap element in the
2131 same B-sized block. */
2132 if (would_overrun_p
2133 && !masked_p
2134 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2135 / vect_get_scalar_dr_size (first_dr_info)))
2136 would_overrun_p = false;
2138 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2139 && (can_overrun_p || !would_overrun_p)
2140 && compare_step_with_zero (vinfo, stmt_info) > 0)
2142 /* First cope with the degenerate case of a single-element
2143 vector. */
2144 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2147 else
2149 /* Otherwise try using LOAD/STORE_LANES. */
2150 *lanes_ifn
2151 = vls_type == VLS_LOAD
2152 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2153 : vect_store_lanes_supported (vectype, group_size,
2154 masked_p);
2155 if (*lanes_ifn != IFN_LAST)
2157 *memory_access_type = VMAT_LOAD_STORE_LANES;
2158 overrun_p = would_overrun_p;
2161 /* If that fails, try using permuting loads. */
2162 else if (vls_type == VLS_LOAD
2163 ? vect_grouped_load_supported (vectype,
2164 single_element_p,
2165 group_size)
2166 : vect_grouped_store_supported (vectype, group_size))
2168 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2169 overrun_p = would_overrun_p;
2174 /* As a last resort, trying using a gather load or scatter store.
2176 ??? Although the code can handle all group sizes correctly,
2177 it probably isn't a win to use separate strided accesses based
2178 on nearby locations. Or, even if it's a win over scalar code,
2179 it might not be a win over vectorizing at a lower VF, if that
2180 allows us to use contiguous accesses. */
2181 if (*memory_access_type == VMAT_ELEMENTWISE
2182 && single_element_p
2183 && loop_vinfo
2184 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2185 masked_p, gs_info))
2186 *memory_access_type = VMAT_GATHER_SCATTER;
2189 if (*memory_access_type == VMAT_GATHER_SCATTER
2190 || *memory_access_type == VMAT_ELEMENTWISE)
2192 *alignment_support_scheme = dr_unaligned_supported;
2193 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2195 else
2197 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2198 *alignment_support_scheme
2199 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2200 *misalignment);
2203 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2205 /* STMT is the leader of the group. Check the operands of all the
2206 stmts of the group. */
2207 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2208 while (next_stmt_info)
2210 tree op = vect_get_store_rhs (next_stmt_info);
2211 enum vect_def_type dt;
2212 if (!vect_is_simple_use (op, vinfo, &dt))
2214 if (dump_enabled_p ())
2215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2216 "use not simple.\n");
2217 return false;
2219 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2223 if (overrun_p)
2225 gcc_assert (can_overrun_p);
2226 if (dump_enabled_p ())
2227 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2228 "Data access with gaps requires scalar "
2229 "epilogue loop\n");
2230 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2233 return true;
2236 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2237 if there is a memory access type that the vectorized form can use,
2238 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2239 or scatters, fill in GS_INFO accordingly. In addition
2240 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2241 the target does not support the alignment scheme. *MISALIGNMENT
2242 is set according to the alignment of the access (including
2243 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2245 SLP says whether we're performing SLP rather than loop vectorization.
2246 MASKED_P is true if the statement is conditional on a vectorized mask.
2247 VECTYPE is the vector type that the vectorized statements will use.
2248 NCOPIES is the number of vector statements that will be needed. */
2250 static bool
2251 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2252 tree vectype, slp_tree slp_node,
2253 bool masked_p, vec_load_store_type vls_type,
2254 unsigned int ncopies,
2255 vect_memory_access_type *memory_access_type,
2256 poly_int64 *poffset,
2257 dr_alignment_support *alignment_support_scheme,
2258 int *misalignment,
2259 gather_scatter_info *gs_info,
2260 internal_fn *lanes_ifn)
2262 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2263 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2264 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2265 *poffset = 0;
2266 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2268 *memory_access_type = VMAT_GATHER_SCATTER;
2269 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2270 gcc_unreachable ();
2271 /* When using internal functions, we rely on pattern recognition
2272 to convert the type of the offset to the type that the target
2273 requires, with the result being a call to an internal function.
2274 If that failed for some reason (e.g. because another pattern
2275 took priority), just handle cases in which the offset already
2276 has the right type. */
2277 else if (gs_info->ifn != IFN_LAST
2278 && !is_gimple_call (stmt_info->stmt)
2279 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2280 TREE_TYPE (gs_info->offset_vectype)))
2282 if (dump_enabled_p ())
2283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2284 "%s offset requires a conversion\n",
2285 vls_type == VLS_LOAD ? "gather" : "scatter");
2286 return false;
2288 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2289 &gs_info->offset_dt,
2290 &gs_info->offset_vectype))
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2294 "%s index use not simple.\n",
2295 vls_type == VLS_LOAD ? "gather" : "scatter");
2296 return false;
2298 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2300 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2301 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2302 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2303 (gs_info->offset_vectype),
2304 TYPE_VECTOR_SUBPARTS (vectype)))
2306 if (dump_enabled_p ())
2307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2308 "unsupported vector types for emulated "
2309 "gather.\n");
2310 return false;
2313 /* Gather-scatter accesses perform only component accesses, alignment
2314 is irrelevant for them. */
2315 *alignment_support_scheme = dr_unaligned_supported;
2317 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
2319 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2320 masked_p,
2321 vls_type, memory_access_type, poffset,
2322 alignment_support_scheme,
2323 misalignment, gs_info, lanes_ifn))
2324 return false;
2326 else if (STMT_VINFO_STRIDED_P (stmt_info))
2328 gcc_assert (!slp_node);
2329 if (loop_vinfo
2330 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2331 masked_p, gs_info))
2332 *memory_access_type = VMAT_GATHER_SCATTER;
2333 else
2334 *memory_access_type = VMAT_ELEMENTWISE;
2335 /* Alignment is irrelevant here. */
2336 *alignment_support_scheme = dr_unaligned_supported;
2338 else
2340 int cmp = compare_step_with_zero (vinfo, stmt_info);
2341 if (cmp == 0)
2343 gcc_assert (vls_type == VLS_LOAD);
2344 *memory_access_type = VMAT_INVARIANT;
2345 /* Invariant accesses perform only component accesses, alignment
2346 is irrelevant for them. */
2347 *alignment_support_scheme = dr_unaligned_supported;
2349 else
2351 if (cmp < 0)
2352 *memory_access_type = get_negative_load_store_type
2353 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2354 else
2355 *memory_access_type = VMAT_CONTIGUOUS;
2356 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2357 vectype, *poffset);
2358 *alignment_support_scheme
2359 = vect_supportable_dr_alignment (vinfo,
2360 STMT_VINFO_DR_INFO (stmt_info),
2361 vectype, *misalignment);
2365 if ((*memory_access_type == VMAT_ELEMENTWISE
2366 || *memory_access_type == VMAT_STRIDED_SLP)
2367 && !nunits.is_constant ())
2369 if (dump_enabled_p ())
2370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2371 "Not using elementwise accesses due to variable "
2372 "vectorization factor.\n");
2373 return false;
2376 if (*alignment_support_scheme == dr_unaligned_unsupported)
2378 if (dump_enabled_p ())
2379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2380 "unsupported unaligned access\n");
2381 return false;
2384 /* FIXME: At the moment the cost model seems to underestimate the
2385 cost of using elementwise accesses. This check preserves the
2386 traditional behavior until that can be fixed. */
2387 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2388 if (!first_stmt_info)
2389 first_stmt_info = stmt_info;
2390 if (*memory_access_type == VMAT_ELEMENTWISE
2391 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2392 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2393 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2394 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2396 if (dump_enabled_p ())
2397 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2398 "not falling back to elementwise accesses\n");
2399 return false;
2401 return true;
2404 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2405 conditional operation STMT_INFO. When returning true, store the mask
2406 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2407 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2408 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2410 static bool
2411 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2412 slp_tree slp_node, unsigned mask_index,
2413 tree *mask, slp_tree *mask_node,
2414 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2416 enum vect_def_type mask_dt;
2417 tree mask_vectype;
2418 slp_tree mask_node_1;
2419 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2420 mask, &mask_node_1, &mask_dt, &mask_vectype))
2422 if (dump_enabled_p ())
2423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2424 "mask use not simple.\n");
2425 return false;
2428 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2430 if (dump_enabled_p ())
2431 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2432 "mask argument is not a boolean.\n");
2433 return false;
2436 /* If the caller is not prepared for adjusting an external/constant
2437 SLP mask vector type fail. */
2438 if (slp_node
2439 && !mask_node
2440 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2442 if (dump_enabled_p ())
2443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2444 "SLP mask argument is not vectorized.\n");
2445 return false;
2448 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2449 if (!mask_vectype)
2450 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2452 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2454 if (dump_enabled_p ())
2455 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2456 "could not find an appropriate vector mask type.\n");
2457 return false;
2460 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2461 TYPE_VECTOR_SUBPARTS (vectype)))
2463 if (dump_enabled_p ())
2464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2465 "vector mask type %T"
2466 " does not match vector data type %T.\n",
2467 mask_vectype, vectype);
2469 return false;
2472 *mask_dt_out = mask_dt;
2473 *mask_vectype_out = mask_vectype;
2474 if (mask_node)
2475 *mask_node = mask_node_1;
2476 return true;
2479 /* Return true if stored value RHS is suitable for vectorizing store
2480 statement STMT_INFO. When returning true, store the type of the
2481 definition in *RHS_DT_OUT, the type of the vectorized store value in
2482 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2484 static bool
2485 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2486 slp_tree slp_node, tree rhs,
2487 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2488 vec_load_store_type *vls_type_out)
2490 /* In the case this is a store from a constant make sure
2491 native_encode_expr can handle it. */
2492 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2494 if (dump_enabled_p ())
2495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2496 "cannot encode constant as a byte sequence.\n");
2497 return false;
2500 int op_no = 0;
2501 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2503 if (gimple_call_internal_p (call)
2504 && internal_store_fn_p (gimple_call_internal_fn (call)))
2505 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2506 if (slp_node)
2507 op_no = vect_slp_child_index_for_operand (call, op_no);
2510 enum vect_def_type rhs_dt;
2511 tree rhs_vectype;
2512 slp_tree slp_op;
2513 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2514 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2516 if (dump_enabled_p ())
2517 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2518 "use not simple.\n");
2519 return false;
2522 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2523 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2525 if (dump_enabled_p ())
2526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2527 "incompatible vector types.\n");
2528 return false;
2531 *rhs_dt_out = rhs_dt;
2532 *rhs_vectype_out = rhs_vectype;
2533 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2534 *vls_type_out = VLS_STORE_INVARIANT;
2535 else
2536 *vls_type_out = VLS_STORE;
2537 return true;
2540 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2541 Note that we support masks with floating-point type, in which case the
2542 floats are interpreted as a bitmask. */
2544 static tree
2545 vect_build_all_ones_mask (vec_info *vinfo,
2546 stmt_vec_info stmt_info, tree masktype)
2548 if (TREE_CODE (masktype) == INTEGER_TYPE)
2549 return build_int_cst (masktype, -1);
2550 else if (VECTOR_BOOLEAN_TYPE_P (masktype)
2551 || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2553 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2554 mask = build_vector_from_val (masktype, mask);
2555 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2557 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2559 REAL_VALUE_TYPE r;
2560 long tmp[6];
2561 for (int j = 0; j < 6; ++j)
2562 tmp[j] = -1;
2563 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2564 tree mask = build_real (TREE_TYPE (masktype), r);
2565 mask = build_vector_from_val (masktype, mask);
2566 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2568 gcc_unreachable ();
2571 /* Build an all-zero merge value of type VECTYPE while vectorizing
2572 STMT_INFO as a gather load. */
2574 static tree
2575 vect_build_zero_merge_argument (vec_info *vinfo,
2576 stmt_vec_info stmt_info, tree vectype)
2578 tree merge;
2579 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2580 merge = build_int_cst (TREE_TYPE (vectype), 0);
2581 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2583 REAL_VALUE_TYPE r;
2584 long tmp[6];
2585 for (int j = 0; j < 6; ++j)
2586 tmp[j] = 0;
2587 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2588 merge = build_real (TREE_TYPE (vectype), r);
2590 else
2591 gcc_unreachable ();
2592 merge = build_vector_from_val (vectype, merge);
2593 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2596 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2597 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2598 the gather load operation. If the load is conditional, MASK is the
2599 vectorized condition, otherwise MASK is null. PTR is the base
2600 pointer and OFFSET is the vectorized offset. */
2602 static gimple *
2603 vect_build_one_gather_load_call (vec_info *vinfo, stmt_vec_info stmt_info,
2604 gimple_stmt_iterator *gsi,
2605 gather_scatter_info *gs_info,
2606 tree ptr, tree offset, tree mask)
2608 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2609 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2610 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2611 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2612 /* ptrtype */ arglist = TREE_CHAIN (arglist);
2613 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2614 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2615 tree scaletype = TREE_VALUE (arglist);
2616 tree var;
2617 gcc_checking_assert (types_compatible_p (srctype, rettype)
2618 && (!mask
2619 || TREE_CODE (masktype) == INTEGER_TYPE
2620 || types_compatible_p (srctype, masktype)));
2622 tree op = offset;
2623 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2625 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2626 TYPE_VECTOR_SUBPARTS (idxtype)));
2627 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2628 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2629 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2630 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2631 op = var;
2634 tree src_op = NULL_TREE;
2635 tree mask_op = NULL_TREE;
2636 if (mask)
2638 if (!useless_type_conversion_p (masktype, TREE_TYPE (mask)))
2640 tree utype, optype = TREE_TYPE (mask);
2641 if (VECTOR_TYPE_P (masktype)
2642 || TYPE_MODE (masktype) == TYPE_MODE (optype))
2643 utype = masktype;
2644 else
2645 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2646 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2647 tree mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask);
2648 gassign *new_stmt
2649 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2650 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2651 mask_arg = var;
2652 if (!useless_type_conversion_p (masktype, utype))
2654 gcc_assert (TYPE_PRECISION (utype)
2655 <= TYPE_PRECISION (masktype));
2656 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2657 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2658 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2659 mask_arg = var;
2661 src_op = build_zero_cst (srctype);
2662 mask_op = mask_arg;
2664 else
2666 src_op = mask;
2667 mask_op = mask;
2670 else
2672 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2673 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2676 tree scale = build_int_cst (scaletype, gs_info->scale);
2677 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2678 mask_op, scale);
2680 if (!useless_type_conversion_p (vectype, rettype))
2682 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2683 TYPE_VECTOR_SUBPARTS (rettype)));
2684 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2685 gimple_call_set_lhs (new_stmt, op);
2686 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2687 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2688 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR, op);
2691 return new_stmt;
2694 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2695 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2696 the scatter store operation. If the store is conditional, MASK is the
2697 unvectorized condition, otherwise MASK is null. */
2699 static void
2700 vect_build_scatter_store_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2701 gimple_stmt_iterator *gsi, gimple **vec_stmt,
2702 gather_scatter_info *gs_info, tree mask,
2703 stmt_vector_for_cost *cost_vec)
2705 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
2706 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2707 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2708 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2709 enum { NARROW, NONE, WIDEN } modifier;
2710 poly_uint64 scatter_off_nunits
2711 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2713 /* FIXME: Keep the previous costing way in vect_model_store_cost by
2714 costing N scalar stores, but it should be tweaked to use target
2715 specific costs on related scatter store calls. */
2716 if (cost_vec)
2718 tree op = vect_get_store_rhs (stmt_info);
2719 enum vect_def_type dt;
2720 gcc_assert (vect_is_simple_use (op, vinfo, &dt));
2721 unsigned int inside_cost, prologue_cost = 0;
2722 if (dt == vect_constant_def || dt == vect_external_def)
2723 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
2724 stmt_info, 0, vect_prologue);
2725 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
2726 inside_cost = record_stmt_cost (cost_vec, ncopies * assumed_nunits,
2727 scalar_store, stmt_info, 0, vect_body);
2729 if (dump_enabled_p ())
2730 dump_printf_loc (MSG_NOTE, vect_location,
2731 "vect_model_store_cost: inside_cost = %d, "
2732 "prologue_cost = %d .\n",
2733 inside_cost, prologue_cost);
2734 return;
2737 tree perm_mask = NULL_TREE, mask_halfvectype = NULL_TREE;
2738 if (known_eq (nunits, scatter_off_nunits))
2739 modifier = NONE;
2740 else if (known_eq (nunits * 2, scatter_off_nunits))
2742 modifier = WIDEN;
2744 /* Currently gathers and scatters are only supported for
2745 fixed-length vectors. */
2746 unsigned int count = scatter_off_nunits.to_constant ();
2747 vec_perm_builder sel (count, count, 1);
2748 for (unsigned i = 0; i < (unsigned int) count; ++i)
2749 sel.quick_push (i | (count / 2));
2751 vec_perm_indices indices (sel, 1, count);
2752 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype, indices);
2753 gcc_assert (perm_mask != NULL_TREE);
2755 else if (known_eq (nunits, scatter_off_nunits * 2))
2757 modifier = NARROW;
2759 /* Currently gathers and scatters are only supported for
2760 fixed-length vectors. */
2761 unsigned int count = nunits.to_constant ();
2762 vec_perm_builder sel (count, count, 1);
2763 for (unsigned i = 0; i < (unsigned int) count; ++i)
2764 sel.quick_push (i | (count / 2));
2766 vec_perm_indices indices (sel, 2, count);
2767 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2768 gcc_assert (perm_mask != NULL_TREE);
2769 ncopies *= 2;
2771 if (mask)
2772 mask_halfvectype = truth_type_for (gs_info->offset_vectype);
2774 else
2775 gcc_unreachable ();
2777 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2778 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2779 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2780 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2781 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2782 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2783 tree scaletype = TREE_VALUE (arglist);
2785 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
2786 && TREE_CODE (rettype) == VOID_TYPE);
2788 tree ptr = fold_convert (ptrtype, gs_info->base);
2789 if (!is_gimple_min_invariant (ptr))
2791 gimple_seq seq;
2792 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2793 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2794 edge pe = loop_preheader_edge (loop);
2795 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2796 gcc_assert (!new_bb);
2799 tree mask_arg = NULL_TREE;
2800 if (mask == NULL_TREE)
2802 mask_arg = build_int_cst (masktype, -1);
2803 mask_arg = vect_init_vector (vinfo, stmt_info, mask_arg, masktype, NULL);
2806 tree scale = build_int_cst (scaletype, gs_info->scale);
2808 auto_vec<tree> vec_oprnds0;
2809 auto_vec<tree> vec_oprnds1;
2810 auto_vec<tree> vec_masks;
2811 if (mask)
2813 tree mask_vectype = truth_type_for (vectype);
2814 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2815 modifier == NARROW ? ncopies / 2 : ncopies,
2816 mask, &vec_masks, mask_vectype);
2818 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2819 modifier == WIDEN ? ncopies / 2 : ncopies,
2820 gs_info->offset, &vec_oprnds0);
2821 tree op = vect_get_store_rhs (stmt_info);
2822 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2823 modifier == NARROW ? ncopies / 2 : ncopies, op,
2824 &vec_oprnds1);
2826 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2827 tree mask_op = NULL_TREE;
2828 tree src, vec_mask;
2829 for (int j = 0; j < ncopies; ++j)
2831 if (modifier == WIDEN)
2833 if (j & 1)
2834 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0, perm_mask,
2835 stmt_info, gsi);
2836 else
2837 op = vec_oprnd0 = vec_oprnds0[j / 2];
2838 src = vec_oprnd1 = vec_oprnds1[j];
2839 if (mask)
2840 mask_op = vec_mask = vec_masks[j];
2842 else if (modifier == NARROW)
2844 if (j & 1)
2845 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
2846 perm_mask, stmt_info, gsi);
2847 else
2848 src = vec_oprnd1 = vec_oprnds1[j / 2];
2849 op = vec_oprnd0 = vec_oprnds0[j];
2850 if (mask)
2851 mask_op = vec_mask = vec_masks[j / 2];
2853 else
2855 op = vec_oprnd0 = vec_oprnds0[j];
2856 src = vec_oprnd1 = vec_oprnds1[j];
2857 if (mask)
2858 mask_op = vec_mask = vec_masks[j];
2861 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
2863 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
2864 TYPE_VECTOR_SUBPARTS (srctype)));
2865 tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
2866 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
2867 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
2868 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2869 src = var;
2872 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2874 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2875 TYPE_VECTOR_SUBPARTS (idxtype)));
2876 tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2877 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2878 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2879 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2880 op = var;
2883 if (mask)
2885 tree utype;
2886 mask_arg = mask_op;
2887 if (modifier == NARROW)
2889 tree var
2890 = vect_get_new_ssa_name (mask_halfvectype, vect_simple_var);
2891 gassign *new_stmt
2892 = gimple_build_assign (var,
2893 (j & 1) ? VEC_UNPACK_HI_EXPR
2894 : VEC_UNPACK_LO_EXPR,
2895 mask_op);
2896 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2897 mask_arg = var;
2899 tree optype = TREE_TYPE (mask_arg);
2900 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
2901 utype = masktype;
2902 else
2903 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2904 tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
2905 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
2906 gassign *new_stmt
2907 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2908 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2909 mask_arg = var;
2910 if (!useless_type_conversion_p (masktype, utype))
2912 gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
2913 tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
2914 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2915 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2916 mask_arg = var;
2920 gcall *new_stmt
2921 = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
2922 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2924 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2926 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2929 /* Prepare the base and offset in GS_INFO for vectorization.
2930 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2931 to the vectorized offset argument for the first copy of STMT_INFO.
2932 STMT_INFO is the statement described by GS_INFO and LOOP is the
2933 containing loop. */
2935 static void
2936 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
2937 class loop *loop, stmt_vec_info stmt_info,
2938 slp_tree slp_node, gather_scatter_info *gs_info,
2939 tree *dataref_ptr, vec<tree> *vec_offset)
2941 gimple_seq stmts = NULL;
2942 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2943 if (stmts != NULL)
2945 basic_block new_bb;
2946 edge pe = loop_preheader_edge (loop);
2947 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2948 gcc_assert (!new_bb);
2950 if (slp_node)
2951 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
2952 else
2954 unsigned ncopies
2955 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
2956 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
2957 gs_info->offset, vec_offset,
2958 gs_info->offset_vectype);
2962 /* Prepare to implement a grouped or strided load or store using
2963 the gather load or scatter store operation described by GS_INFO.
2964 STMT_INFO is the load or store statement.
2966 Set *DATAREF_BUMP to the amount that should be added to the base
2967 address after each copy of the vectorized statement. Set *VEC_OFFSET
2968 to an invariant offset vector in which element I has the value
2969 I * DR_STEP / SCALE. */
2971 static void
2972 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2973 loop_vec_info loop_vinfo,
2974 gimple_stmt_iterator *gsi,
2975 gather_scatter_info *gs_info,
2976 tree *dataref_bump, tree *vec_offset,
2977 vec_loop_lens *loop_lens)
2979 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2980 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2982 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
2984 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
2985 ivtmp_8 = _31 * 16 (step in bytes);
2986 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2987 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
2988 tree loop_len
2989 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
2990 tree tmp
2991 = fold_build2 (MULT_EXPR, sizetype,
2992 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2993 loop_len);
2994 *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
2995 GSI_SAME_STMT);
2997 else
2999 tree bump
3000 = size_binop (MULT_EXPR,
3001 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3002 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3003 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3006 /* The offset given in GS_INFO can have pointer type, so use the element
3007 type of the vector instead. */
3008 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3010 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3011 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3012 ssize_int (gs_info->scale));
3013 step = fold_convert (offset_type, step);
3015 /* Create {0, X, X*2, X*3, ...}. */
3016 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3017 build_zero_cst (offset_type), step);
3018 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3021 /* Prepare the pointer IVs which needs to be updated by a variable amount.
3022 Such variable amount is the outcome of .SELECT_VL. In this case, we can
3023 allow each iteration process the flexible number of elements as long as
3024 the number <= vf elments.
3026 Return data reference according to SELECT_VL.
3027 If new statements are needed, insert them before GSI. */
3029 static tree
3030 vect_get_loop_variant_data_ptr_increment (
3031 vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
3032 vec_loop_lens *loop_lens, dr_vec_info *dr_info,
3033 vect_memory_access_type memory_access_type)
3035 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3036 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3038 /* gather/scatter never reach here. */
3039 gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
3041 /* When we support SELECT_VL pattern, we dynamic adjust
3042 the memory address by .SELECT_VL result.
3044 The result of .SELECT_VL is the number of elements to
3045 be processed of each iteration. So the memory address
3046 adjustment operation should be:
3048 addr = addr + .SELECT_VL (ARG..) * step;
3050 tree loop_len
3051 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
3052 tree len_type = TREE_TYPE (loop_len);
3053 /* Since the outcome of .SELECT_VL is element size, we should adjust
3054 it into bytesize so that it can be used in address pointer variable
3055 amount IVs adjustment. */
3056 tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
3057 wide_int_to_tree (len_type, wi::to_widest (step)));
3058 tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
3059 gassign *assign = gimple_build_assign (bump, tmp);
3060 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
3061 return bump;
3064 /* Return the amount that should be added to a vector pointer to move
3065 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3066 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3067 vectorization. */
3069 static tree
3070 vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
3071 dr_vec_info *dr_info, tree aggr_type,
3072 vect_memory_access_type memory_access_type,
3073 vec_loop_lens *loop_lens = nullptr)
3075 if (memory_access_type == VMAT_INVARIANT)
3076 return size_zero_node;
3078 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3079 if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3080 return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
3081 loop_lens, dr_info,
3082 memory_access_type);
3084 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3085 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3086 if (tree_int_cst_sgn (step) == -1)
3087 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3088 return iv_step;
3091 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3093 static bool
3094 vectorizable_bswap (vec_info *vinfo,
3095 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3096 gimple **vec_stmt, slp_tree slp_node,
3097 slp_tree *slp_op,
3098 tree vectype_in, stmt_vector_for_cost *cost_vec)
3100 tree op, vectype;
3101 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3102 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3103 unsigned ncopies;
3105 op = gimple_call_arg (stmt, 0);
3106 vectype = STMT_VINFO_VECTYPE (stmt_info);
3107 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3109 /* Multiple types in SLP are handled by creating the appropriate number of
3110 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3111 case of SLP. */
3112 if (slp_node)
3113 ncopies = 1;
3114 else
3115 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3117 gcc_assert (ncopies >= 1);
3119 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3120 if (! char_vectype)
3121 return false;
3123 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3124 unsigned word_bytes;
3125 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3126 return false;
3128 /* The encoding uses one stepped pattern for each byte in the word. */
3129 vec_perm_builder elts (num_bytes, word_bytes, 3);
3130 for (unsigned i = 0; i < 3; ++i)
3131 for (unsigned j = 0; j < word_bytes; ++j)
3132 elts.quick_push ((i + 1) * word_bytes - j - 1);
3134 vec_perm_indices indices (elts, 1, num_bytes);
3135 machine_mode vmode = TYPE_MODE (char_vectype);
3136 if (!can_vec_perm_const_p (vmode, vmode, indices))
3137 return false;
3139 if (! vec_stmt)
3141 if (slp_node
3142 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3144 if (dump_enabled_p ())
3145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3146 "incompatible vector types for invariants\n");
3147 return false;
3150 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3151 DUMP_VECT_SCOPE ("vectorizable_bswap");
3152 record_stmt_cost (cost_vec,
3153 1, vector_stmt, stmt_info, 0, vect_prologue);
3154 record_stmt_cost (cost_vec,
3155 slp_node
3156 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3157 vec_perm, stmt_info, 0, vect_body);
3158 return true;
3161 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3163 /* Transform. */
3164 vec<tree> vec_oprnds = vNULL;
3165 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3166 op, &vec_oprnds);
3167 /* Arguments are ready. create the new vector stmt. */
3168 unsigned i;
3169 tree vop;
3170 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3172 gimple *new_stmt;
3173 tree tem = make_ssa_name (char_vectype);
3174 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3175 char_vectype, vop));
3176 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3177 tree tem2 = make_ssa_name (char_vectype);
3178 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3179 tem, tem, bswap_vconst);
3180 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3181 tem = make_ssa_name (vectype);
3182 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3183 vectype, tem2));
3184 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3185 if (slp_node)
3186 slp_node->push_vec_def (new_stmt);
3187 else
3188 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3191 if (!slp_node)
3192 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3194 vec_oprnds.release ();
3195 return true;
3198 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3199 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3200 in a single step. On success, store the binary pack code in
3201 *CONVERT_CODE. */
3203 static bool
3204 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3205 code_helper *convert_code)
3207 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3208 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3209 return false;
3211 code_helper code;
3212 int multi_step_cvt = 0;
3213 auto_vec <tree, 8> interm_types;
3214 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3215 &code, &multi_step_cvt, &interm_types)
3216 || multi_step_cvt)
3217 return false;
3219 *convert_code = code;
3220 return true;
3223 /* Function vectorizable_call.
3225 Check if STMT_INFO performs a function call that can be vectorized.
3226 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3227 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3228 Return true if STMT_INFO is vectorizable in this way. */
3230 static bool
3231 vectorizable_call (vec_info *vinfo,
3232 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3233 gimple **vec_stmt, slp_tree slp_node,
3234 stmt_vector_for_cost *cost_vec)
3236 gcall *stmt;
3237 tree vec_dest;
3238 tree scalar_dest;
3239 tree op;
3240 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3241 tree vectype_out, vectype_in;
3242 poly_uint64 nunits_in;
3243 poly_uint64 nunits_out;
3244 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3245 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3246 tree fndecl, new_temp, rhs_type;
3247 enum vect_def_type dt[4]
3248 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3249 vect_unknown_def_type };
3250 tree vectypes[ARRAY_SIZE (dt)] = {};
3251 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3252 int ndts = ARRAY_SIZE (dt);
3253 int ncopies, j;
3254 auto_vec<tree, 8> vargs;
3255 enum { NARROW, NONE, WIDEN } modifier;
3256 size_t i, nargs;
3257 tree lhs;
3259 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3260 return false;
3262 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3263 && ! vec_stmt)
3264 return false;
3266 /* Is STMT_INFO a vectorizable call? */
3267 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3268 if (!stmt)
3269 return false;
3271 if (gimple_call_internal_p (stmt)
3272 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3273 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3274 /* Handled by vectorizable_load and vectorizable_store. */
3275 return false;
3277 if (gimple_call_lhs (stmt) == NULL_TREE
3278 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3279 return false;
3281 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3283 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3285 /* Process function arguments. */
3286 rhs_type = NULL_TREE;
3287 vectype_in = NULL_TREE;
3288 nargs = gimple_call_num_args (stmt);
3290 /* Bail out if the function has more than four arguments, we do not have
3291 interesting builtin functions to vectorize with more than two arguments
3292 except for fma. No arguments is also not good. */
3293 if (nargs == 0 || nargs > 4)
3294 return false;
3296 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3297 combined_fn cfn = gimple_call_combined_fn (stmt);
3298 if (cfn == CFN_GOMP_SIMD_LANE)
3300 nargs = 0;
3301 rhs_type = unsigned_type_node;
3304 int mask_opno = -1;
3305 if (internal_fn_p (cfn))
3306 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3308 for (i = 0; i < nargs; i++)
3310 if ((int) i == mask_opno)
3312 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3313 &op, &slp_op[i], &dt[i], &vectypes[i]))
3314 return false;
3315 continue;
3318 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3319 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3321 if (dump_enabled_p ())
3322 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3323 "use not simple.\n");
3324 return false;
3327 /* We can only handle calls with arguments of the same type. */
3328 if (rhs_type
3329 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3331 if (dump_enabled_p ())
3332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3333 "argument types differ.\n");
3334 return false;
3336 if (!rhs_type)
3337 rhs_type = TREE_TYPE (op);
3339 if (!vectype_in)
3340 vectype_in = vectypes[i];
3341 else if (vectypes[i]
3342 && !types_compatible_p (vectypes[i], vectype_in))
3344 if (dump_enabled_p ())
3345 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3346 "argument vector types differ.\n");
3347 return false;
3350 /* If all arguments are external or constant defs, infer the vector type
3351 from the scalar type. */
3352 if (!vectype_in)
3353 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3354 if (vec_stmt)
3355 gcc_assert (vectype_in);
3356 if (!vectype_in)
3358 if (dump_enabled_p ())
3359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3360 "no vectype for scalar type %T\n", rhs_type);
3362 return false;
3364 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3365 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3366 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3367 by a pack of the two vectors into an SI vector. We would need
3368 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3369 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3371 if (dump_enabled_p ())
3372 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3373 "mismatched vector sizes %T and %T\n",
3374 vectype_in, vectype_out);
3375 return false;
3378 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3379 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3381 if (dump_enabled_p ())
3382 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3383 "mixed mask and nonmask vector types\n");
3384 return false;
3387 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3389 if (dump_enabled_p ())
3390 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3391 "use emulated vector type for call\n");
3392 return false;
3395 /* FORNOW */
3396 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3397 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3398 if (known_eq (nunits_in * 2, nunits_out))
3399 modifier = NARROW;
3400 else if (known_eq (nunits_out, nunits_in))
3401 modifier = NONE;
3402 else if (known_eq (nunits_out * 2, nunits_in))
3403 modifier = WIDEN;
3404 else
3405 return false;
3407 /* We only handle functions that do not read or clobber memory. */
3408 if (gimple_vuse (stmt))
3410 if (dump_enabled_p ())
3411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3412 "function reads from or writes to memory.\n");
3413 return false;
3416 /* For now, we only vectorize functions if a target specific builtin
3417 is available. TODO -- in some cases, it might be profitable to
3418 insert the calls for pieces of the vector, in order to be able
3419 to vectorize other operations in the loop. */
3420 fndecl = NULL_TREE;
3421 internal_fn ifn = IFN_LAST;
3422 tree callee = gimple_call_fndecl (stmt);
3424 /* First try using an internal function. */
3425 code_helper convert_code = MAX_TREE_CODES;
3426 if (cfn != CFN_LAST
3427 && (modifier == NONE
3428 || (modifier == NARROW
3429 && simple_integer_narrowing (vectype_out, vectype_in,
3430 &convert_code))))
3431 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3432 vectype_in);
3434 /* If that fails, try asking for a target-specific built-in function. */
3435 if (ifn == IFN_LAST)
3437 if (cfn != CFN_LAST)
3438 fndecl = targetm.vectorize.builtin_vectorized_function
3439 (cfn, vectype_out, vectype_in);
3440 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3441 fndecl = targetm.vectorize.builtin_md_vectorized_function
3442 (callee, vectype_out, vectype_in);
3445 if (ifn == IFN_LAST && !fndecl)
3447 if (cfn == CFN_GOMP_SIMD_LANE
3448 && !slp_node
3449 && loop_vinfo
3450 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3451 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3452 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3453 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3455 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3456 { 0, 1, 2, ... vf - 1 } vector. */
3457 gcc_assert (nargs == 0);
3459 else if (modifier == NONE
3460 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3461 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3462 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3463 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3464 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3465 slp_op, vectype_in, cost_vec);
3466 else
3468 if (dump_enabled_p ())
3469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3470 "function is not vectorizable.\n");
3471 return false;
3475 if (slp_node)
3476 ncopies = 1;
3477 else if (modifier == NARROW && ifn == IFN_LAST)
3478 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3479 else
3480 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3482 /* Sanity check: make sure that at least one copy of the vectorized stmt
3483 needs to be generated. */
3484 gcc_assert (ncopies >= 1);
3486 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3487 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3488 internal_fn cond_len_fn = get_len_internal_fn (ifn);
3489 int len_opno = internal_fn_len_index (cond_len_fn);
3490 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3491 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3492 if (!vec_stmt) /* transformation not required. */
3494 if (slp_node)
3495 for (i = 0; i < nargs; ++i)
3496 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3497 vectypes[i]
3498 ? vectypes[i] : vectype_in))
3500 if (dump_enabled_p ())
3501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3502 "incompatible vector types for invariants\n");
3503 return false;
3505 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3506 DUMP_VECT_SCOPE ("vectorizable_call");
3507 vect_model_simple_cost (vinfo, stmt_info,
3508 ncopies, dt, ndts, slp_node, cost_vec);
3509 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3510 record_stmt_cost (cost_vec, ncopies / 2,
3511 vec_promote_demote, stmt_info, 0, vect_body);
3513 if (loop_vinfo
3514 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3515 && (reduc_idx >= 0 || mask_opno >= 0))
3517 if (reduc_idx >= 0
3518 && (cond_fn == IFN_LAST
3519 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3520 OPTIMIZE_FOR_SPEED))
3521 && (cond_len_fn == IFN_LAST
3522 || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3523 OPTIMIZE_FOR_SPEED)))
3525 if (dump_enabled_p ())
3526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3527 "can't use a fully-masked loop because no"
3528 " conditional operation is available.\n");
3529 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3531 else
3533 unsigned int nvectors
3534 = (slp_node
3535 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3536 : ncopies);
3537 tree scalar_mask = NULL_TREE;
3538 if (mask_opno >= 0)
3539 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3540 if (cond_len_fn != IFN_LAST
3541 && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3542 OPTIMIZE_FOR_SPEED))
3543 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3545 else
3546 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3547 scalar_mask);
3550 return true;
3553 /* Transform. */
3555 if (dump_enabled_p ())
3556 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3558 /* Handle def. */
3559 scalar_dest = gimple_call_lhs (stmt);
3560 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3562 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3563 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3564 unsigned int vect_nargs = nargs;
3565 if (len_loop_p)
3567 if (len_opno >= 0)
3569 ifn = cond_len_fn;
3570 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3571 vect_nargs += 2;
3573 else if (reduc_idx >= 0)
3574 gcc_unreachable ();
3576 else if (masked_loop_p && reduc_idx >= 0)
3578 ifn = cond_fn;
3579 vect_nargs += 2;
3582 if (modifier == NONE || ifn != IFN_LAST)
3584 tree prev_res = NULL_TREE;
3585 vargs.safe_grow (vect_nargs, true);
3586 auto_vec<vec<tree> > vec_defs (nargs);
3587 for (j = 0; j < ncopies; ++j)
3589 /* Build argument list for the vectorized call. */
3590 if (slp_node)
3592 vec<tree> vec_oprnds0;
3594 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3595 vec_oprnds0 = vec_defs[0];
3597 /* Arguments are ready. Create the new vector stmt. */
3598 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3600 int varg = 0;
3601 if (masked_loop_p && reduc_idx >= 0)
3603 unsigned int vec_num = vec_oprnds0.length ();
3604 /* Always true for SLP. */
3605 gcc_assert (ncopies == 1);
3606 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
3607 gsi, masks, vec_num,
3608 vectype_out, i);
3610 size_t k;
3611 for (k = 0; k < nargs; k++)
3613 vec<tree> vec_oprndsk = vec_defs[k];
3614 vargs[varg++] = vec_oprndsk[i];
3616 if (masked_loop_p && reduc_idx >= 0)
3617 vargs[varg++] = vargs[reduc_idx + 1];
3618 gimple *new_stmt;
3619 if (modifier == NARROW)
3621 /* We don't define any narrowing conditional functions
3622 at present. */
3623 gcc_assert (mask_opno < 0);
3624 tree half_res = make_ssa_name (vectype_in);
3625 gcall *call
3626 = gimple_build_call_internal_vec (ifn, vargs);
3627 gimple_call_set_lhs (call, half_res);
3628 gimple_call_set_nothrow (call, true);
3629 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3630 if ((i & 1) == 0)
3632 prev_res = half_res;
3633 continue;
3635 new_temp = make_ssa_name (vec_dest);
3636 new_stmt = vect_gimple_build (new_temp, convert_code,
3637 prev_res, half_res);
3638 vect_finish_stmt_generation (vinfo, stmt_info,
3639 new_stmt, gsi);
3641 else
3643 if (len_opno >= 0 && len_loop_p)
3645 unsigned int vec_num = vec_oprnds0.length ();
3646 /* Always true for SLP. */
3647 gcc_assert (ncopies == 1);
3648 tree len
3649 = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3650 vectype_out, i, 1);
3651 signed char biasval
3652 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3653 tree bias = build_int_cst (intQI_type_node, biasval);
3654 vargs[len_opno] = len;
3655 vargs[len_opno + 1] = bias;
3657 else if (mask_opno >= 0 && masked_loop_p)
3659 unsigned int vec_num = vec_oprnds0.length ();
3660 /* Always true for SLP. */
3661 gcc_assert (ncopies == 1);
3662 tree mask = vect_get_loop_mask (loop_vinfo,
3663 gsi, masks, vec_num,
3664 vectype_out, i);
3665 vargs[mask_opno] = prepare_vec_mask
3666 (loop_vinfo, TREE_TYPE (mask), mask,
3667 vargs[mask_opno], gsi);
3670 gcall *call;
3671 if (ifn != IFN_LAST)
3672 call = gimple_build_call_internal_vec (ifn, vargs);
3673 else
3674 call = gimple_build_call_vec (fndecl, vargs);
3675 new_temp = make_ssa_name (vec_dest, call);
3676 gimple_call_set_lhs (call, new_temp);
3677 gimple_call_set_nothrow (call, true);
3678 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3679 new_stmt = call;
3681 slp_node->push_vec_def (new_stmt);
3683 continue;
3686 int varg = 0;
3687 if (masked_loop_p && reduc_idx >= 0)
3688 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3689 vectype_out, j);
3690 for (i = 0; i < nargs; i++)
3692 op = gimple_call_arg (stmt, i);
3693 if (j == 0)
3695 vec_defs.quick_push (vNULL);
3696 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3697 op, &vec_defs[i],
3698 vectypes[i]);
3700 vargs[varg++] = vec_defs[i][j];
3702 if (masked_loop_p && reduc_idx >= 0)
3703 vargs[varg++] = vargs[reduc_idx + 1];
3705 if (len_opno >= 0 && len_loop_p)
3707 tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3708 vectype_out, j, 1);
3709 signed char biasval
3710 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3711 tree bias = build_int_cst (intQI_type_node, biasval);
3712 vargs[len_opno] = len;
3713 vargs[len_opno + 1] = bias;
3715 else if (mask_opno >= 0 && masked_loop_p)
3717 tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3718 vectype_out, j);
3719 vargs[mask_opno]
3720 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3721 vargs[mask_opno], gsi);
3724 gimple *new_stmt;
3725 if (cfn == CFN_GOMP_SIMD_LANE)
3727 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3728 tree new_var
3729 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3730 gimple *init_stmt = gimple_build_assign (new_var, cst);
3731 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3732 new_temp = make_ssa_name (vec_dest);
3733 new_stmt = gimple_build_assign (new_temp, new_var);
3734 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3736 else if (modifier == NARROW)
3738 /* We don't define any narrowing conditional functions at
3739 present. */
3740 gcc_assert (mask_opno < 0);
3741 tree half_res = make_ssa_name (vectype_in);
3742 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3743 gimple_call_set_lhs (call, half_res);
3744 gimple_call_set_nothrow (call, true);
3745 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3746 if ((j & 1) == 0)
3748 prev_res = half_res;
3749 continue;
3751 new_temp = make_ssa_name (vec_dest);
3752 new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
3753 half_res);
3754 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3756 else
3758 gcall *call;
3759 if (ifn != IFN_LAST)
3760 call = gimple_build_call_internal_vec (ifn, vargs);
3761 else
3762 call = gimple_build_call_vec (fndecl, vargs);
3763 new_temp = make_ssa_name (vec_dest, call);
3764 gimple_call_set_lhs (call, new_temp);
3765 gimple_call_set_nothrow (call, true);
3766 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3767 new_stmt = call;
3770 if (j == (modifier == NARROW ? 1 : 0))
3771 *vec_stmt = new_stmt;
3772 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3774 for (i = 0; i < nargs; i++)
3776 vec<tree> vec_oprndsi = vec_defs[i];
3777 vec_oprndsi.release ();
3780 else if (modifier == NARROW)
3782 auto_vec<vec<tree> > vec_defs (nargs);
3783 /* We don't define any narrowing conditional functions at present. */
3784 gcc_assert (mask_opno < 0);
3785 for (j = 0; j < ncopies; ++j)
3787 /* Build argument list for the vectorized call. */
3788 if (j == 0)
3789 vargs.create (nargs * 2);
3790 else
3791 vargs.truncate (0);
3793 if (slp_node)
3795 vec<tree> vec_oprnds0;
3797 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3798 vec_oprnds0 = vec_defs[0];
3800 /* Arguments are ready. Create the new vector stmt. */
3801 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3803 size_t k;
3804 vargs.truncate (0);
3805 for (k = 0; k < nargs; k++)
3807 vec<tree> vec_oprndsk = vec_defs[k];
3808 vargs.quick_push (vec_oprndsk[i]);
3809 vargs.quick_push (vec_oprndsk[i + 1]);
3811 gcall *call;
3812 if (ifn != IFN_LAST)
3813 call = gimple_build_call_internal_vec (ifn, vargs);
3814 else
3815 call = gimple_build_call_vec (fndecl, vargs);
3816 new_temp = make_ssa_name (vec_dest, call);
3817 gimple_call_set_lhs (call, new_temp);
3818 gimple_call_set_nothrow (call, true);
3819 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3820 slp_node->push_vec_def (call);
3822 continue;
3825 for (i = 0; i < nargs; i++)
3827 op = gimple_call_arg (stmt, i);
3828 if (j == 0)
3830 vec_defs.quick_push (vNULL);
3831 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3832 op, &vec_defs[i], vectypes[i]);
3834 vec_oprnd0 = vec_defs[i][2*j];
3835 vec_oprnd1 = vec_defs[i][2*j+1];
3837 vargs.quick_push (vec_oprnd0);
3838 vargs.quick_push (vec_oprnd1);
3841 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3842 new_temp = make_ssa_name (vec_dest, new_stmt);
3843 gimple_call_set_lhs (new_stmt, new_temp);
3844 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3846 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3849 if (!slp_node)
3850 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3852 for (i = 0; i < nargs; i++)
3854 vec<tree> vec_oprndsi = vec_defs[i];
3855 vec_oprndsi.release ();
3858 else
3859 /* No current target implements this case. */
3860 return false;
3862 vargs.release ();
3864 /* The call in STMT might prevent it from being removed in dce.
3865 We however cannot remove it here, due to the way the ssa name
3866 it defines is mapped to the new definition. So just replace
3867 rhs of the statement with something harmless. */
3869 if (slp_node)
3870 return true;
3872 stmt_info = vect_orig_stmt (stmt_info);
3873 lhs = gimple_get_lhs (stmt_info->stmt);
3875 gassign *new_stmt
3876 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3877 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3879 return true;
3883 struct simd_call_arg_info
3885 tree vectype;
3886 tree op;
3887 HOST_WIDE_INT linear_step;
3888 enum vect_def_type dt;
3889 unsigned int align;
3890 bool simd_lane_linear;
3893 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3894 is linear within simd lane (but not within whole loop), note it in
3895 *ARGINFO. */
3897 static void
3898 vect_simd_lane_linear (tree op, class loop *loop,
3899 struct simd_call_arg_info *arginfo)
3901 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3903 if (!is_gimple_assign (def_stmt)
3904 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3905 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3906 return;
3908 tree base = gimple_assign_rhs1 (def_stmt);
3909 HOST_WIDE_INT linear_step = 0;
3910 tree v = gimple_assign_rhs2 (def_stmt);
3911 while (TREE_CODE (v) == SSA_NAME)
3913 tree t;
3914 def_stmt = SSA_NAME_DEF_STMT (v);
3915 if (is_gimple_assign (def_stmt))
3916 switch (gimple_assign_rhs_code (def_stmt))
3918 case PLUS_EXPR:
3919 t = gimple_assign_rhs2 (def_stmt);
3920 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3921 return;
3922 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3923 v = gimple_assign_rhs1 (def_stmt);
3924 continue;
3925 case MULT_EXPR:
3926 t = gimple_assign_rhs2 (def_stmt);
3927 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3928 return;
3929 linear_step = tree_to_shwi (t);
3930 v = gimple_assign_rhs1 (def_stmt);
3931 continue;
3932 CASE_CONVERT:
3933 t = gimple_assign_rhs1 (def_stmt);
3934 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3935 || (TYPE_PRECISION (TREE_TYPE (v))
3936 < TYPE_PRECISION (TREE_TYPE (t))))
3937 return;
3938 if (!linear_step)
3939 linear_step = 1;
3940 v = t;
3941 continue;
3942 default:
3943 return;
3945 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3946 && loop->simduid
3947 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3948 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3949 == loop->simduid))
3951 if (!linear_step)
3952 linear_step = 1;
3953 arginfo->linear_step = linear_step;
3954 arginfo->op = base;
3955 arginfo->simd_lane_linear = true;
3956 return;
3961 /* Function vectorizable_simd_clone_call.
3963 Check if STMT_INFO performs a function call that can be vectorized
3964 by calling a simd clone of the function.
3965 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3966 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3967 Return true if STMT_INFO is vectorizable in this way. */
3969 static bool
3970 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
3971 gimple_stmt_iterator *gsi,
3972 gimple **vec_stmt, slp_tree slp_node,
3973 stmt_vector_for_cost *)
3975 tree vec_dest;
3976 tree scalar_dest;
3977 tree op, type;
3978 tree vec_oprnd0 = NULL_TREE;
3979 tree vectype;
3980 poly_uint64 nunits;
3981 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3982 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3983 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3984 tree fndecl, new_temp;
3985 int ncopies, j;
3986 auto_vec<simd_call_arg_info> arginfo;
3987 vec<tree> vargs = vNULL;
3988 size_t i, nargs;
3989 tree lhs, rtype, ratype;
3990 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3991 int masked_call_offset = 0;
3993 /* Is STMT a vectorizable call? */
3994 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3995 if (!stmt)
3996 return false;
3998 fndecl = gimple_call_fndecl (stmt);
3999 if (fndecl == NULL_TREE
4000 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
4002 fndecl = gimple_call_arg (stmt, 0);
4003 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
4004 fndecl = TREE_OPERAND (fndecl, 0);
4005 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
4006 masked_call_offset = 1;
4008 if (fndecl == NULL_TREE)
4009 return false;
4011 struct cgraph_node *node = cgraph_node::get (fndecl);
4012 if (node == NULL || node->simd_clones == NULL)
4013 return false;
4015 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4016 return false;
4018 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4019 && ! vec_stmt)
4020 return false;
4022 if (gimple_call_lhs (stmt)
4023 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4024 return false;
4026 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4028 vectype = STMT_VINFO_VECTYPE (stmt_info);
4030 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4031 return false;
4033 /* Process function arguments. */
4034 nargs = gimple_call_num_args (stmt) - masked_call_offset;
4036 /* Bail out if the function has zero arguments. */
4037 if (nargs == 0)
4038 return false;
4040 vec<tree>& simd_clone_info = (slp_node ? SLP_TREE_SIMD_CLONE_INFO (slp_node)
4041 : STMT_VINFO_SIMD_CLONE_INFO (stmt_info));
4042 arginfo.reserve (nargs, true);
4043 auto_vec<slp_tree> slp_op;
4044 slp_op.safe_grow_cleared (nargs);
4046 for (i = 0; i < nargs; i++)
4048 simd_call_arg_info thisarginfo;
4049 affine_iv iv;
4051 thisarginfo.linear_step = 0;
4052 thisarginfo.align = 0;
4053 thisarginfo.op = NULL_TREE;
4054 thisarginfo.simd_lane_linear = false;
4056 int op_no = i + masked_call_offset;
4057 if (slp_node)
4058 op_no = vect_slp_child_index_for_operand (stmt, op_no);
4059 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
4060 op_no, &op, &slp_op[i],
4061 &thisarginfo.dt, &thisarginfo.vectype)
4062 || thisarginfo.dt == vect_uninitialized_def)
4064 if (dump_enabled_p ())
4065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4066 "use not simple.\n");
4067 return false;
4070 if (thisarginfo.dt == vect_constant_def
4071 || thisarginfo.dt == vect_external_def)
4073 /* With SLP we determine the vector type of constants/externals
4074 at analysis time, handling conflicts via
4075 vect_maybe_update_slp_op_vectype. At transform time
4076 we have a vector type recorded for SLP. */
4077 gcc_assert (!vec_stmt
4078 || !slp_node
4079 || thisarginfo.vectype != NULL_TREE);
4080 if (!vec_stmt)
4081 thisarginfo.vectype = get_vectype_for_scalar_type (vinfo,
4082 TREE_TYPE (op),
4083 slp_node);
4085 else
4086 gcc_assert (thisarginfo.vectype != NULL_TREE);
4088 /* For linear arguments, the analyze phase should have saved
4089 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */
4090 if (i * 3 + 4 <= simd_clone_info.length ()
4091 && simd_clone_info[i * 3 + 2])
4093 gcc_assert (vec_stmt);
4094 thisarginfo.linear_step = tree_to_shwi (simd_clone_info[i * 3 + 2]);
4095 thisarginfo.op = simd_clone_info[i * 3 + 1];
4096 thisarginfo.simd_lane_linear
4097 = (simd_clone_info[i * 3 + 3] == boolean_true_node);
4098 /* If loop has been peeled for alignment, we need to adjust it. */
4099 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4100 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4101 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4103 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4104 tree step = simd_clone_info[i * 3 + 2];
4105 tree opt = TREE_TYPE (thisarginfo.op);
4106 bias = fold_convert (TREE_TYPE (step), bias);
4107 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4108 thisarginfo.op
4109 = fold_build2 (POINTER_TYPE_P (opt)
4110 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4111 thisarginfo.op, bias);
4114 else if (!vec_stmt
4115 && thisarginfo.dt != vect_constant_def
4116 && thisarginfo.dt != vect_external_def
4117 && loop_vinfo
4118 && TREE_CODE (op) == SSA_NAME
4119 && simple_iv (loop, loop_containing_stmt (stmt), op,
4120 &iv, false)
4121 && tree_fits_shwi_p (iv.step))
4123 thisarginfo.linear_step = tree_to_shwi (iv.step);
4124 thisarginfo.op = iv.base;
4126 else if ((thisarginfo.dt == vect_constant_def
4127 || thisarginfo.dt == vect_external_def)
4128 && POINTER_TYPE_P (TREE_TYPE (op)))
4129 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4130 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4131 linear too. */
4132 if (POINTER_TYPE_P (TREE_TYPE (op))
4133 && !thisarginfo.linear_step
4134 && !vec_stmt
4135 && thisarginfo.dt != vect_constant_def
4136 && thisarginfo.dt != vect_external_def
4137 && loop_vinfo
4138 && TREE_CODE (op) == SSA_NAME)
4139 vect_simd_lane_linear (op, loop, &thisarginfo);
4141 arginfo.quick_push (thisarginfo);
4144 poly_uint64 vf = loop_vinfo ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) : 1;
4145 unsigned group_size = slp_node ? SLP_TREE_LANES (slp_node) : 1;
4146 unsigned int badness = 0;
4147 struct cgraph_node *bestn = NULL;
4148 if (simd_clone_info.exists ())
4149 bestn = cgraph_node::get (simd_clone_info[0]);
4150 else
4151 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4152 n = n->simdclone->next_clone)
4154 unsigned int this_badness = 0;
4155 unsigned int num_calls;
4156 if (!constant_multiple_p (vf * group_size, n->simdclone->simdlen,
4157 &num_calls)
4158 || (!n->simdclone->inbranch && (masked_call_offset > 0))
4159 || nargs != n->simdclone->nargs)
4160 continue;
4161 if (num_calls != 1)
4162 this_badness += exact_log2 (num_calls) * 4096;
4163 if (n->simdclone->inbranch)
4164 this_badness += 8192;
4165 int target_badness = targetm.simd_clone.usable (n);
4166 if (target_badness < 0)
4167 continue;
4168 this_badness += target_badness * 512;
4169 for (i = 0; i < nargs; i++)
4171 switch (n->simdclone->args[i].arg_type)
4173 case SIMD_CLONE_ARG_TYPE_VECTOR:
4174 if (!useless_type_conversion_p
4175 (n->simdclone->args[i].orig_type,
4176 TREE_TYPE (gimple_call_arg (stmt,
4177 i + masked_call_offset))))
4178 i = -1;
4179 else if (arginfo[i].dt == vect_constant_def
4180 || arginfo[i].dt == vect_external_def
4181 || arginfo[i].linear_step)
4182 this_badness += 64;
4183 break;
4184 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4185 if (arginfo[i].dt != vect_constant_def
4186 && arginfo[i].dt != vect_external_def)
4187 i = -1;
4188 break;
4189 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4190 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4191 if (arginfo[i].dt == vect_constant_def
4192 || arginfo[i].dt == vect_external_def
4193 || (arginfo[i].linear_step
4194 != n->simdclone->args[i].linear_step))
4195 i = -1;
4196 break;
4197 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4198 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4199 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4200 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4201 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4202 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4203 /* FORNOW */
4204 i = -1;
4205 break;
4206 case SIMD_CLONE_ARG_TYPE_MASK:
4207 /* While we can create a traditional data vector from
4208 an incoming integer mode mask we have no good way to
4209 force generate an integer mode mask from a traditional
4210 boolean vector input. */
4211 if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4212 && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4213 i = -1;
4214 else if (!SCALAR_INT_MODE_P (n->simdclone->mask_mode)
4215 && SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
4216 this_badness += 2048;
4217 break;
4219 if (i == (size_t) -1)
4220 break;
4221 if (n->simdclone->args[i].alignment > arginfo[i].align)
4223 i = -1;
4224 break;
4226 if (arginfo[i].align)
4227 this_badness += (exact_log2 (arginfo[i].align)
4228 - exact_log2 (n->simdclone->args[i].alignment));
4230 if (i == (size_t) -1)
4231 continue;
4232 if (masked_call_offset == 0
4233 && n->simdclone->inbranch
4234 && n->simdclone->nargs > nargs)
4236 gcc_assert (n->simdclone->args[n->simdclone->nargs - 1].arg_type ==
4237 SIMD_CLONE_ARG_TYPE_MASK);
4238 /* Penalize using a masked SIMD clone in a non-masked loop, that is
4239 not in a branch, as we'd have to construct an all-true mask. */
4240 if (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4241 this_badness += 64;
4243 if (bestn == NULL || this_badness < badness)
4245 bestn = n;
4246 badness = this_badness;
4250 if (bestn == NULL)
4251 return false;
4253 unsigned int num_mask_args = 0;
4254 if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4255 for (i = 0; i < nargs; i++)
4256 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4257 num_mask_args++;
4259 for (i = 0; i < nargs; i++)
4261 if ((arginfo[i].dt == vect_constant_def
4262 || arginfo[i].dt == vect_external_def)
4263 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4265 tree arg_type = TREE_TYPE (gimple_call_arg (stmt,
4266 i + masked_call_offset));
4267 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4268 slp_node);
4269 if (arginfo[i].vectype == NULL
4270 || !constant_multiple_p (bestn->simdclone->simdlen,
4271 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4272 return false;
4275 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4276 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4278 if (dump_enabled_p ())
4279 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4280 "vector mask arguments are not supported.\n");
4281 return false;
4284 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
4286 tree clone_arg_vectype = bestn->simdclone->args[i].vector_type;
4287 if (bestn->simdclone->mask_mode == VOIDmode)
4289 if (maybe_ne (TYPE_VECTOR_SUBPARTS (clone_arg_vectype),
4290 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4292 /* FORNOW we only have partial support for vector-type masks
4293 that can't hold all of simdlen. */
4294 if (dump_enabled_p ())
4295 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4296 vect_location,
4297 "in-branch vector clones are not yet"
4298 " supported for mismatched vector sizes.\n");
4299 return false;
4302 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4304 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype))
4305 || maybe_ne (exact_div (bestn->simdclone->simdlen,
4306 num_mask_args),
4307 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4309 /* FORNOW we only have partial support for integer-type masks
4310 that represent the same number of lanes as the
4311 vectorized mask inputs. */
4312 if (dump_enabled_p ())
4313 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4314 vect_location,
4315 "in-branch vector clones are not yet "
4316 "supported for mismatched vector sizes.\n");
4317 return false;
4320 else
4322 if (dump_enabled_p ())
4323 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4324 vect_location,
4325 "in-branch vector clones not supported"
4326 " on this target.\n");
4327 return false;
4332 fndecl = bestn->decl;
4333 nunits = bestn->simdclone->simdlen;
4334 if (slp_node)
4335 ncopies = vector_unroll_factor (vf * group_size, nunits);
4336 else
4337 ncopies = vector_unroll_factor (vf, nunits);
4339 /* If the function isn't const, only allow it in simd loops where user
4340 has asserted that at least nunits consecutive iterations can be
4341 performed using SIMD instructions. */
4342 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4343 && gimple_vuse (stmt))
4344 return false;
4346 /* Sanity check: make sure that at least one copy of the vectorized stmt
4347 needs to be generated. */
4348 gcc_assert (ncopies >= 1);
4350 if (!vec_stmt) /* transformation not required. */
4352 if (slp_node)
4353 for (unsigned i = 0; i < nargs; ++i)
4354 if (!vect_maybe_update_slp_op_vectype (slp_op[i], arginfo[i].vectype))
4356 if (dump_enabled_p ())
4357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4358 "incompatible vector types for invariants\n");
4359 return false;
4361 /* When the original call is pure or const but the SIMD ABI dictates
4362 an aggregate return we will have to use a virtual definition and
4363 in a loop eventually even need to add a virtual PHI. That's
4364 not straight-forward so allow to fix this up via renaming. */
4365 if (gimple_call_lhs (stmt)
4366 && !gimple_vdef (stmt)
4367 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4368 vinfo->any_known_not_updated_vssa = true;
4369 /* ??? For SLP code-gen we end up inserting after the last
4370 vector argument def rather than at the original call position
4371 so automagic virtual operand updating doesn't work. */
4372 if (gimple_vuse (stmt) && slp_node)
4373 vinfo->any_known_not_updated_vssa = true;
4374 simd_clone_info.safe_push (bestn->decl);
4375 for (i = 0; i < bestn->simdclone->nargs; i++)
4377 switch (bestn->simdclone->args[i].arg_type)
4379 default:
4380 continue;
4381 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4382 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4384 auto &clone_info = STMT_VINFO_SIMD_CLONE_INFO (stmt_info);
4385 clone_info.safe_grow_cleared (i * 3 + 1, true);
4386 clone_info.safe_push (arginfo[i].op);
4387 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4388 ? size_type_node : TREE_TYPE (arginfo[i].op);
4389 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4390 clone_info.safe_push (ls);
4391 tree sll = arginfo[i].simd_lane_linear
4392 ? boolean_true_node : boolean_false_node;
4393 clone_info.safe_push (sll);
4395 break;
4396 case SIMD_CLONE_ARG_TYPE_MASK:
4397 if (loop_vinfo
4398 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4399 vect_record_loop_mask (loop_vinfo,
4400 &LOOP_VINFO_MASKS (loop_vinfo),
4401 ncopies, vectype, op);
4403 break;
4407 if (!bestn->simdclone->inbranch && loop_vinfo)
4409 if (dump_enabled_p ()
4410 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
4411 dump_printf_loc (MSG_NOTE, vect_location,
4412 "can't use a fully-masked loop because a"
4413 " non-masked simd clone was selected.\n");
4414 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
4417 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4418 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4419 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4420 dt, slp_node, cost_vec); */
4421 return true;
4424 /* Transform. */
4426 if (dump_enabled_p ())
4427 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4429 /* Handle def. */
4430 scalar_dest = gimple_call_lhs (stmt);
4431 vec_dest = NULL_TREE;
4432 rtype = NULL_TREE;
4433 ratype = NULL_TREE;
4434 if (scalar_dest)
4436 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4437 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4438 if (TREE_CODE (rtype) == ARRAY_TYPE)
4440 ratype = rtype;
4441 rtype = TREE_TYPE (ratype);
4445 auto_vec<vec<tree> > vec_oprnds;
4446 auto_vec<unsigned> vec_oprnds_i;
4447 vec_oprnds_i.safe_grow_cleared (nargs, true);
4448 if (slp_node)
4450 vec_oprnds.reserve_exact (nargs);
4451 vect_get_slp_defs (vinfo, slp_node, &vec_oprnds);
4453 else
4454 vec_oprnds.safe_grow_cleared (nargs, true);
4455 for (j = 0; j < ncopies; ++j)
4457 poly_uint64 callee_nelements;
4458 poly_uint64 caller_nelements;
4459 /* Build argument list for the vectorized call. */
4460 if (j == 0)
4461 vargs.create (nargs);
4462 else
4463 vargs.truncate (0);
4465 for (i = 0; i < nargs; i++)
4467 unsigned int k, l, m, o;
4468 tree atype;
4469 op = gimple_call_arg (stmt, i + masked_call_offset);
4470 switch (bestn->simdclone->args[i].arg_type)
4472 case SIMD_CLONE_ARG_TYPE_VECTOR:
4473 atype = bestn->simdclone->args[i].vector_type;
4474 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4475 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4476 o = vector_unroll_factor (nunits, callee_nelements);
4477 for (m = j * o; m < (j + 1) * o; m++)
4479 if (known_lt (callee_nelements, caller_nelements))
4481 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4482 if (!constant_multiple_p (caller_nelements,
4483 callee_nelements, &k))
4484 gcc_unreachable ();
4486 gcc_assert ((k & (k - 1)) == 0);
4487 if (m == 0)
4489 if (!slp_node)
4490 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4491 ncopies * o / k, op,
4492 &vec_oprnds[i]);
4493 vec_oprnds_i[i] = 0;
4494 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4496 else
4498 vec_oprnd0 = arginfo[i].op;
4499 if ((m & (k - 1)) == 0)
4500 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4502 arginfo[i].op = vec_oprnd0;
4503 vec_oprnd0
4504 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4505 bitsize_int (prec),
4506 bitsize_int ((m & (k - 1)) * prec));
4507 gassign *new_stmt
4508 = gimple_build_assign (make_ssa_name (atype),
4509 vec_oprnd0);
4510 vect_finish_stmt_generation (vinfo, stmt_info,
4511 new_stmt, gsi);
4512 vargs.safe_push (gimple_assign_lhs (new_stmt));
4514 else
4516 if (!constant_multiple_p (callee_nelements,
4517 caller_nelements, &k))
4518 gcc_unreachable ();
4519 gcc_assert ((k & (k - 1)) == 0);
4520 vec<constructor_elt, va_gc> *ctor_elts;
4521 if (k != 1)
4522 vec_alloc (ctor_elts, k);
4523 else
4524 ctor_elts = NULL;
4525 for (l = 0; l < k; l++)
4527 if (m == 0 && l == 0)
4529 if (!slp_node)
4530 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4531 k * o * ncopies,
4533 &vec_oprnds[i]);
4534 vec_oprnds_i[i] = 0;
4535 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4537 else
4538 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4539 arginfo[i].op = vec_oprnd0;
4540 if (k == 1)
4541 break;
4542 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4543 vec_oprnd0);
4545 if (k == 1)
4546 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4547 atype))
4549 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, atype,
4550 vec_oprnd0);
4551 gassign *new_stmt
4552 = gimple_build_assign (make_ssa_name (atype),
4553 vec_oprnd0);
4554 vect_finish_stmt_generation (vinfo, stmt_info,
4555 new_stmt, gsi);
4556 vargs.safe_push (gimple_get_lhs (new_stmt));
4558 else
4559 vargs.safe_push (vec_oprnd0);
4560 else
4562 vec_oprnd0 = build_constructor (atype, ctor_elts);
4563 gassign *new_stmt
4564 = gimple_build_assign (make_ssa_name (atype),
4565 vec_oprnd0);
4566 vect_finish_stmt_generation (vinfo, stmt_info,
4567 new_stmt, gsi);
4568 vargs.safe_push (gimple_assign_lhs (new_stmt));
4572 break;
4573 case SIMD_CLONE_ARG_TYPE_MASK:
4574 if (bestn->simdclone->mask_mode == VOIDmode)
4576 atype = bestn->simdclone->args[i].vector_type;
4577 tree elt_type = TREE_TYPE (atype);
4578 tree one = fold_convert (elt_type, integer_one_node);
4579 tree zero = fold_convert (elt_type, integer_zero_node);
4580 callee_nelements = TYPE_VECTOR_SUBPARTS (atype);
4581 caller_nelements = TYPE_VECTOR_SUBPARTS (arginfo[i].vectype);
4582 o = vector_unroll_factor (nunits, callee_nelements);
4583 for (m = j * o; m < (j + 1) * o; m++)
4585 if (maybe_lt (callee_nelements, caller_nelements))
4587 /* The mask type has fewer elements than simdlen. */
4589 /* FORNOW */
4590 gcc_unreachable ();
4592 else if (known_eq (callee_nelements, caller_nelements))
4594 /* The SIMD clone function has the same number of
4595 elements as the current function. */
4596 if (m == 0)
4598 if (!slp_node)
4599 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4600 o * ncopies,
4602 &vec_oprnds[i]);
4603 vec_oprnds_i[i] = 0;
4605 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4606 if (loop_vinfo
4607 && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4609 vec_loop_masks *loop_masks
4610 = &LOOP_VINFO_MASKS (loop_vinfo);
4611 tree loop_mask
4612 = vect_get_loop_mask (loop_vinfo, gsi,
4613 loop_masks, ncopies,
4614 vectype, j);
4615 vec_oprnd0
4616 = prepare_vec_mask (loop_vinfo,
4617 TREE_TYPE (loop_mask),
4618 loop_mask, vec_oprnd0,
4619 gsi);
4620 loop_vinfo->vec_cond_masked_set.add ({ vec_oprnd0,
4621 loop_mask });
4624 vec_oprnd0
4625 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4626 build_vector_from_val (atype, one),
4627 build_vector_from_val (atype, zero));
4628 gassign *new_stmt
4629 = gimple_build_assign (make_ssa_name (atype),
4630 vec_oprnd0);
4631 vect_finish_stmt_generation (vinfo, stmt_info,
4632 new_stmt, gsi);
4633 vargs.safe_push (gimple_assign_lhs (new_stmt));
4635 else
4637 /* The mask type has more elements than simdlen. */
4639 /* FORNOW */
4640 gcc_unreachable ();
4644 else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
4646 atype = bestn->simdclone->args[i].vector_type;
4647 /* Guess the number of lanes represented by atype. */
4648 poly_uint64 atype_subparts
4649 = exact_div (bestn->simdclone->simdlen,
4650 num_mask_args);
4651 o = vector_unroll_factor (nunits, atype_subparts);
4652 for (m = j * o; m < (j + 1) * o; m++)
4654 if (m == 0)
4656 if (!slp_node)
4657 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4658 o * ncopies,
4660 &vec_oprnds[i]);
4661 vec_oprnds_i[i] = 0;
4663 if (maybe_lt (atype_subparts,
4664 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4666 /* The mask argument has fewer elements than the
4667 input vector. */
4668 /* FORNOW */
4669 gcc_unreachable ();
4671 else if (known_eq (atype_subparts,
4672 TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)))
4674 /* The vector mask argument matches the input
4675 in the number of lanes, but not necessarily
4676 in the mode. */
4677 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4678 tree st = lang_hooks.types.type_for_mode
4679 (TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
4680 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
4681 vec_oprnd0);
4682 gassign *new_stmt
4683 = gimple_build_assign (make_ssa_name (st),
4684 vec_oprnd0);
4685 vect_finish_stmt_generation (vinfo, stmt_info,
4686 new_stmt, gsi);
4687 if (!types_compatible_p (atype, st))
4689 new_stmt
4690 = gimple_build_assign (make_ssa_name (atype),
4691 NOP_EXPR,
4692 gimple_assign_lhs
4693 (new_stmt));
4694 vect_finish_stmt_generation (vinfo, stmt_info,
4695 new_stmt, gsi);
4697 vargs.safe_push (gimple_assign_lhs (new_stmt));
4699 else
4701 /* The mask argument has more elements than the
4702 input vector. */
4703 /* FORNOW */
4704 gcc_unreachable ();
4708 else
4709 gcc_unreachable ();
4710 break;
4711 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4712 vargs.safe_push (op);
4713 break;
4714 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4715 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4716 if (j == 0)
4718 gimple_seq stmts;
4719 arginfo[i].op
4720 = force_gimple_operand (unshare_expr (arginfo[i].op),
4721 &stmts, true, NULL_TREE);
4722 if (stmts != NULL)
4724 basic_block new_bb;
4725 edge pe = loop_preheader_edge (loop);
4726 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4727 gcc_assert (!new_bb);
4729 if (arginfo[i].simd_lane_linear)
4731 vargs.safe_push (arginfo[i].op);
4732 break;
4734 tree phi_res = copy_ssa_name (op);
4735 gphi *new_phi = create_phi_node (phi_res, loop->header);
4736 add_phi_arg (new_phi, arginfo[i].op,
4737 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4738 enum tree_code code
4739 = POINTER_TYPE_P (TREE_TYPE (op))
4740 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4741 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4742 ? sizetype : TREE_TYPE (op);
4743 poly_widest_int cst
4744 = wi::mul (bestn->simdclone->args[i].linear_step,
4745 ncopies * nunits);
4746 tree tcst = wide_int_to_tree (type, cst);
4747 tree phi_arg = copy_ssa_name (op);
4748 gassign *new_stmt
4749 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4750 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4751 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4752 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4753 UNKNOWN_LOCATION);
4754 arginfo[i].op = phi_res;
4755 vargs.safe_push (phi_res);
4757 else
4759 enum tree_code code
4760 = POINTER_TYPE_P (TREE_TYPE (op))
4761 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4762 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4763 ? sizetype : TREE_TYPE (op);
4764 poly_widest_int cst
4765 = wi::mul (bestn->simdclone->args[i].linear_step,
4766 j * nunits);
4767 tree tcst = wide_int_to_tree (type, cst);
4768 new_temp = make_ssa_name (TREE_TYPE (op));
4769 gassign *new_stmt
4770 = gimple_build_assign (new_temp, code,
4771 arginfo[i].op, tcst);
4772 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4773 vargs.safe_push (new_temp);
4775 break;
4776 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4777 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4778 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4779 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4780 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4781 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4782 default:
4783 gcc_unreachable ();
4787 if (masked_call_offset == 0
4788 && bestn->simdclone->inbranch
4789 && bestn->simdclone->nargs > nargs)
4791 unsigned long m, o;
4792 size_t mask_i = bestn->simdclone->nargs - 1;
4793 tree mask;
4794 gcc_assert (bestn->simdclone->args[mask_i].arg_type ==
4795 SIMD_CLONE_ARG_TYPE_MASK);
4797 tree masktype = bestn->simdclone->args[mask_i].vector_type;
4798 callee_nelements = TYPE_VECTOR_SUBPARTS (masktype);
4799 o = vector_unroll_factor (nunits, callee_nelements);
4800 for (m = j * o; m < (j + 1) * o; m++)
4802 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
4804 vec_loop_masks *loop_masks = &LOOP_VINFO_MASKS (loop_vinfo);
4805 mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
4806 ncopies, vectype, j);
4808 else
4809 mask = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
4811 if (!useless_type_conversion_p (TREE_TYPE (mask), masktype))
4813 gassign *new_stmt;
4814 if (bestn->simdclone->mask_mode != VOIDmode)
4816 /* This means we are dealing with integer mask modes.
4817 First convert to an integer type with the same size as
4818 the current vector type. */
4819 unsigned HOST_WIDE_INT intermediate_size
4820 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask)));
4821 tree mid_int_type =
4822 build_nonstandard_integer_type (intermediate_size, 1);
4823 mask = build1 (VIEW_CONVERT_EXPR, mid_int_type, mask);
4824 new_stmt
4825 = gimple_build_assign (make_ssa_name (mid_int_type),
4826 mask);
4827 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4828 /* Then zero-extend to the mask mode. */
4829 mask = fold_build1 (NOP_EXPR, masktype,
4830 gimple_get_lhs (new_stmt));
4832 else
4833 mask = build1 (VIEW_CONVERT_EXPR, masktype, mask);
4835 new_stmt = gimple_build_assign (make_ssa_name (masktype),
4836 mask);
4837 vect_finish_stmt_generation (vinfo, stmt_info,
4838 new_stmt, gsi);
4839 mask = gimple_assign_lhs (new_stmt);
4841 vargs.safe_push (mask);
4845 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4846 if (vec_dest)
4848 gcc_assert (ratype
4849 || known_eq (TYPE_VECTOR_SUBPARTS (rtype), nunits));
4850 if (ratype)
4851 new_temp = create_tmp_var (ratype);
4852 else if (useless_type_conversion_p (vectype, rtype))
4853 new_temp = make_ssa_name (vec_dest, new_call);
4854 else
4855 new_temp = make_ssa_name (rtype, new_call);
4856 gimple_call_set_lhs (new_call, new_temp);
4858 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4859 gimple *new_stmt = new_call;
4861 if (vec_dest)
4863 if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype), nunits))
4865 unsigned int k, l;
4866 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4867 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4868 k = vector_unroll_factor (nunits,
4869 TYPE_VECTOR_SUBPARTS (vectype));
4870 gcc_assert ((k & (k - 1)) == 0);
4871 for (l = 0; l < k; l++)
4873 tree t;
4874 if (ratype)
4876 t = build_fold_addr_expr (new_temp);
4877 t = build2 (MEM_REF, vectype, t,
4878 build_int_cst (TREE_TYPE (t), l * bytes));
4880 else
4881 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4882 bitsize_int (prec), bitsize_int (l * prec));
4883 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4884 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4886 if (j == 0 && l == 0)
4887 *vec_stmt = new_stmt;
4888 if (slp_node)
4889 SLP_TREE_VEC_DEFS (slp_node)
4890 .quick_push (gimple_assign_lhs (new_stmt));
4891 else
4892 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4895 if (ratype)
4896 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4897 continue;
4899 else if (!multiple_p (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
4901 unsigned int k;
4902 if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype),
4903 TYPE_VECTOR_SUBPARTS (rtype), &k))
4904 gcc_unreachable ();
4905 gcc_assert ((k & (k - 1)) == 0);
4906 if ((j & (k - 1)) == 0)
4907 vec_alloc (ret_ctor_elts, k);
4908 if (ratype)
4910 unsigned int m, o;
4911 o = vector_unroll_factor (nunits,
4912 TYPE_VECTOR_SUBPARTS (rtype));
4913 for (m = 0; m < o; m++)
4915 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4916 size_int (m), NULL_TREE, NULL_TREE);
4917 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4918 tem);
4919 vect_finish_stmt_generation (vinfo, stmt_info,
4920 new_stmt, gsi);
4921 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4922 gimple_assign_lhs (new_stmt));
4924 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4926 else
4927 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4928 if ((j & (k - 1)) != k - 1)
4929 continue;
4930 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4931 new_stmt
4932 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4933 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4935 if ((unsigned) j == k - 1)
4936 *vec_stmt = new_stmt;
4937 if (slp_node)
4938 SLP_TREE_VEC_DEFS (slp_node)
4939 .quick_push (gimple_assign_lhs (new_stmt));
4940 else
4941 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4942 continue;
4944 else if (ratype)
4946 tree t = build_fold_addr_expr (new_temp);
4947 t = build2 (MEM_REF, vectype, t,
4948 build_int_cst (TREE_TYPE (t), 0));
4949 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4950 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4951 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4953 else if (!useless_type_conversion_p (vectype, rtype))
4955 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4956 new_stmt
4957 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4958 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4962 if (j == 0)
4963 *vec_stmt = new_stmt;
4964 if (slp_node)
4965 SLP_TREE_VEC_DEFS (slp_node).quick_push (gimple_get_lhs (new_stmt));
4966 else
4967 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4970 for (i = 0; i < nargs; ++i)
4972 vec<tree> oprndsi = vec_oprnds[i];
4973 oprndsi.release ();
4975 vargs.release ();
4977 /* Mark the clone as no longer being a candidate for GC. */
4978 bestn->gc_candidate = false;
4980 /* The call in STMT might prevent it from being removed in dce.
4981 We however cannot remove it here, due to the way the ssa name
4982 it defines is mapped to the new definition. So just replace
4983 rhs of the statement with something harmless. */
4985 if (slp_node)
4986 return true;
4988 gimple *new_stmt;
4989 if (scalar_dest)
4991 type = TREE_TYPE (scalar_dest);
4992 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4993 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4995 else
4996 new_stmt = gimple_build_nop ();
4997 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4998 unlink_stmt_vdef (stmt);
5000 return true;
5004 /* Function vect_gen_widened_results_half
5006 Create a vector stmt whose code, type, number of arguments, and result
5007 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
5008 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
5009 In the case that CODE is a CALL_EXPR, this means that a call to DECL
5010 needs to be created (DECL is a function-decl of a target-builtin).
5011 STMT_INFO is the original scalar stmt that we are vectorizing. */
5013 static gimple *
5014 vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
5015 tree vec_oprnd0, tree vec_oprnd1, int op_type,
5016 tree vec_dest, gimple_stmt_iterator *gsi,
5017 stmt_vec_info stmt_info)
5019 gimple *new_stmt;
5020 tree new_temp;
5022 /* Generate half of the widened result: */
5023 if (op_type != binary_op)
5024 vec_oprnd1 = NULL;
5025 new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
5026 new_temp = make_ssa_name (vec_dest, new_stmt);
5027 gimple_set_lhs (new_stmt, new_temp);
5028 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5030 return new_stmt;
5034 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
5035 For multi-step conversions store the resulting vectors and call the function
5036 recursively. When NARROW_SRC_P is true, there's still a conversion after
5037 narrowing, don't store the vectors in the SLP_NODE or in vector info of
5038 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
5040 static void
5041 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
5042 int multi_step_cvt,
5043 stmt_vec_info stmt_info,
5044 vec<tree> &vec_dsts,
5045 gimple_stmt_iterator *gsi,
5046 slp_tree slp_node, code_helper code,
5047 bool narrow_src_p)
5049 unsigned int i;
5050 tree vop0, vop1, new_tmp, vec_dest;
5052 vec_dest = vec_dsts.pop ();
5054 for (i = 0; i < vec_oprnds->length (); i += 2)
5056 /* Create demotion operation. */
5057 vop0 = (*vec_oprnds)[i];
5058 vop1 = (*vec_oprnds)[i + 1];
5059 gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
5060 new_tmp = make_ssa_name (vec_dest, new_stmt);
5061 gimple_set_lhs (new_stmt, new_tmp);
5062 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5063 if (multi_step_cvt || narrow_src_p)
5064 /* Store the resulting vector for next recursive call,
5065 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
5066 (*vec_oprnds)[i/2] = new_tmp;
5067 else
5069 /* This is the last step of the conversion sequence. Store the
5070 vectors in SLP_NODE or in vector info of the scalar statement
5071 (or in STMT_VINFO_RELATED_STMT chain). */
5072 if (slp_node)
5073 slp_node->push_vec_def (new_stmt);
5074 else
5075 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5079 /* For multi-step demotion operations we first generate demotion operations
5080 from the source type to the intermediate types, and then combine the
5081 results (stored in VEC_OPRNDS) in demotion operation to the destination
5082 type. */
5083 if (multi_step_cvt)
5085 /* At each level of recursion we have half of the operands we had at the
5086 previous level. */
5087 vec_oprnds->truncate ((i+1)/2);
5088 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
5089 multi_step_cvt - 1,
5090 stmt_info, vec_dsts, gsi,
5091 slp_node, VEC_PACK_TRUNC_EXPR,
5092 narrow_src_p);
5095 vec_dsts.quick_push (vec_dest);
5099 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5100 and VEC_OPRNDS1, for a binary operation associated with scalar statement
5101 STMT_INFO. For multi-step conversions store the resulting vectors and
5102 call the function recursively. */
5104 static void
5105 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
5106 vec<tree> *vec_oprnds0,
5107 vec<tree> *vec_oprnds1,
5108 stmt_vec_info stmt_info, tree vec_dest,
5109 gimple_stmt_iterator *gsi,
5110 code_helper ch1,
5111 code_helper ch2, int op_type)
5113 int i;
5114 tree vop0, vop1, new_tmp1, new_tmp2;
5115 gimple *new_stmt1, *new_stmt2;
5116 vec<tree> vec_tmp = vNULL;
5118 vec_tmp.create (vec_oprnds0->length () * 2);
5119 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5121 if (op_type == binary_op)
5122 vop1 = (*vec_oprnds1)[i];
5123 else
5124 vop1 = NULL_TREE;
5126 /* Generate the two halves of promotion operation. */
5127 new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
5128 op_type, vec_dest, gsi,
5129 stmt_info);
5130 new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
5131 op_type, vec_dest, gsi,
5132 stmt_info);
5133 if (is_gimple_call (new_stmt1))
5135 new_tmp1 = gimple_call_lhs (new_stmt1);
5136 new_tmp2 = gimple_call_lhs (new_stmt2);
5138 else
5140 new_tmp1 = gimple_assign_lhs (new_stmt1);
5141 new_tmp2 = gimple_assign_lhs (new_stmt2);
5144 /* Store the results for the next step. */
5145 vec_tmp.quick_push (new_tmp1);
5146 vec_tmp.quick_push (new_tmp2);
5149 vec_oprnds0->release ();
5150 *vec_oprnds0 = vec_tmp;
5153 /* Create vectorized promotion stmts for widening stmts using only half the
5154 potential vector size for input. */
5155 static void
5156 vect_create_half_widening_stmts (vec_info *vinfo,
5157 vec<tree> *vec_oprnds0,
5158 vec<tree> *vec_oprnds1,
5159 stmt_vec_info stmt_info, tree vec_dest,
5160 gimple_stmt_iterator *gsi,
5161 code_helper code1,
5162 int op_type)
5164 int i;
5165 tree vop0, vop1;
5166 gimple *new_stmt1;
5167 gimple *new_stmt2;
5168 gimple *new_stmt3;
5169 vec<tree> vec_tmp = vNULL;
5171 vec_tmp.create (vec_oprnds0->length ());
5172 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5174 tree new_tmp1, new_tmp2, new_tmp3, out_type;
5176 gcc_assert (op_type == binary_op);
5177 vop1 = (*vec_oprnds1)[i];
5179 /* Widen the first vector input. */
5180 out_type = TREE_TYPE (vec_dest);
5181 new_tmp1 = make_ssa_name (out_type);
5182 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5183 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
5184 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5186 /* Widen the second vector input. */
5187 new_tmp2 = make_ssa_name (out_type);
5188 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5189 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
5190 /* Perform the operation. With both vector inputs widened. */
5191 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5193 else
5195 /* Perform the operation. With the single vector input widened. */
5196 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5199 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
5200 gimple_assign_set_lhs (new_stmt3, new_tmp3);
5201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
5203 /* Store the results for the next step. */
5204 vec_tmp.quick_push (new_tmp3);
5207 vec_oprnds0->release ();
5208 *vec_oprnds0 = vec_tmp;
5212 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5213 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5214 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5215 Return true if STMT_INFO is vectorizable in this way. */
5217 static bool
5218 vectorizable_conversion (vec_info *vinfo,
5219 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5220 gimple **vec_stmt, slp_tree slp_node,
5221 stmt_vector_for_cost *cost_vec)
5223 tree vec_dest, cvt_op = NULL_TREE;
5224 tree scalar_dest;
5225 tree op0, op1 = NULL_TREE;
5226 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5227 tree_code tc1, tc2;
5228 code_helper code, code1, code2;
5229 code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5230 tree new_temp;
5231 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5232 int ndts = 2;
5233 poly_uint64 nunits_in;
5234 poly_uint64 nunits_out;
5235 tree vectype_out, vectype_in;
5236 int ncopies, i;
5237 tree lhs_type, rhs_type;
5238 /* For conversions between floating point and integer, there're 2 NARROW
5239 cases. NARROW_SRC is for FLOAT_EXPR, means
5240 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5241 This is safe when the range of the source integer can fit into the lower
5242 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5243 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5244 For other conversions, when there's narrowing, NARROW_DST is used as
5245 default. */
5246 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5247 vec<tree> vec_oprnds0 = vNULL;
5248 vec<tree> vec_oprnds1 = vNULL;
5249 tree vop0;
5250 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5251 int multi_step_cvt = 0;
5252 vec<tree> interm_types = vNULL;
5253 tree intermediate_type, cvt_type = NULL_TREE;
5254 int op_type;
5255 unsigned short fltsz;
5257 /* Is STMT a vectorizable conversion? */
5259 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5260 return false;
5262 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5263 && ! vec_stmt)
5264 return false;
5266 gimple* stmt = stmt_info->stmt;
5267 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
5268 return false;
5270 if (gimple_get_lhs (stmt) == NULL_TREE
5271 || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5272 return false;
5274 if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5275 return false;
5277 if (is_gimple_assign (stmt))
5279 code = gimple_assign_rhs_code (stmt);
5280 op_type = TREE_CODE_LENGTH ((tree_code) code);
5282 else if (gimple_call_internal_p (stmt))
5284 code = gimple_call_internal_fn (stmt);
5285 op_type = gimple_call_num_args (stmt);
5287 else
5288 return false;
5290 bool widen_arith = (code == WIDEN_MULT_EXPR
5291 || code == WIDEN_LSHIFT_EXPR
5292 || widening_fn_p (code));
5294 if (!widen_arith
5295 && !CONVERT_EXPR_CODE_P (code)
5296 && code != FIX_TRUNC_EXPR
5297 && code != FLOAT_EXPR)
5298 return false;
5300 /* Check types of lhs and rhs. */
5301 scalar_dest = gimple_get_lhs (stmt);
5302 lhs_type = TREE_TYPE (scalar_dest);
5303 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5305 /* Check the operands of the operation. */
5306 slp_tree slp_op0, slp_op1 = NULL;
5307 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5308 0, &op0, &slp_op0, &dt[0], &vectype_in))
5310 if (dump_enabled_p ())
5311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5312 "use not simple.\n");
5313 return false;
5316 rhs_type = TREE_TYPE (op0);
5317 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5318 && !((INTEGRAL_TYPE_P (lhs_type)
5319 && INTEGRAL_TYPE_P (rhs_type))
5320 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5321 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5322 return false;
5324 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5325 && ((INTEGRAL_TYPE_P (lhs_type)
5326 && !type_has_mode_precision_p (lhs_type))
5327 || (INTEGRAL_TYPE_P (rhs_type)
5328 && !type_has_mode_precision_p (rhs_type))))
5330 if (dump_enabled_p ())
5331 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5332 "type conversion to/from bit-precision unsupported."
5333 "\n");
5334 return false;
5337 if (op_type == binary_op)
5339 gcc_assert (code == WIDEN_MULT_EXPR
5340 || code == WIDEN_LSHIFT_EXPR
5341 || widening_fn_p (code));
5343 op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
5344 gimple_call_arg (stmt, 0);
5345 tree vectype1_in;
5346 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5347 &op1, &slp_op1, &dt[1], &vectype1_in))
5349 if (dump_enabled_p ())
5350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5351 "use not simple.\n");
5352 return false;
5354 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5355 OP1. */
5356 if (!vectype_in)
5357 vectype_in = vectype1_in;
5360 /* If op0 is an external or constant def, infer the vector type
5361 from the scalar type. */
5362 if (!vectype_in)
5363 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5364 if (vec_stmt)
5365 gcc_assert (vectype_in);
5366 if (!vectype_in)
5368 if (dump_enabled_p ())
5369 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5370 "no vectype for scalar type %T\n", rhs_type);
5372 return false;
5375 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5376 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5378 if (dump_enabled_p ())
5379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5380 "can't convert between boolean and non "
5381 "boolean vectors %T\n", rhs_type);
5383 return false;
5386 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5387 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5388 if (known_eq (nunits_out, nunits_in))
5389 if (widen_arith)
5390 modifier = WIDEN;
5391 else
5392 modifier = NONE;
5393 else if (multiple_p (nunits_out, nunits_in))
5394 modifier = NARROW_DST;
5395 else
5397 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5398 modifier = WIDEN;
5401 /* Multiple types in SLP are handled by creating the appropriate number of
5402 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5403 case of SLP. */
5404 if (slp_node)
5405 ncopies = 1;
5406 else if (modifier == NARROW_DST)
5407 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5408 else
5409 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5411 /* Sanity check: make sure that at least one copy of the vectorized stmt
5412 needs to be generated. */
5413 gcc_assert (ncopies >= 1);
5415 bool found_mode = false;
5416 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5417 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5418 opt_scalar_mode rhs_mode_iter;
5420 /* Supportable by target? */
5421 switch (modifier)
5423 case NONE:
5424 if (code != FIX_TRUNC_EXPR
5425 && code != FLOAT_EXPR
5426 && !CONVERT_EXPR_CODE_P (code))
5427 return false;
5428 gcc_assert (code.is_tree_code ());
5429 if (supportable_convert_operation ((tree_code) code, vectype_out,
5430 vectype_in, &tc1))
5432 code1 = tc1;
5433 break;
5436 /* For conversions between float and integer types try whether
5437 we can use intermediate signed integer types to support the
5438 conversion. */
5439 if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
5440 && (code == FLOAT_EXPR ||
5441 (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
5443 bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
5444 bool float_expr_p = code == FLOAT_EXPR;
5445 unsigned short target_size;
5446 scalar_mode intermediate_mode;
5447 if (demotion)
5449 intermediate_mode = lhs_mode;
5450 target_size = GET_MODE_SIZE (rhs_mode);
5452 else
5454 target_size = GET_MODE_SIZE (lhs_mode);
5455 if (!int_mode_for_size
5456 (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
5457 goto unsupported;
5459 code1 = float_expr_p ? code : NOP_EXPR;
5460 codecvt1 = float_expr_p ? NOP_EXPR : code;
5461 opt_scalar_mode mode_iter;
5462 FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
5464 intermediate_mode = mode_iter.require ();
5466 if (GET_MODE_SIZE (intermediate_mode) > target_size)
5467 break;
5469 scalar_mode cvt_mode;
5470 if (!int_mode_for_size
5471 (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
5472 break;
5474 cvt_type = build_nonstandard_integer_type
5475 (GET_MODE_BITSIZE (cvt_mode), 0);
5477 /* Check if the intermediate type can hold OP0's range.
5478 When converting from float to integer this is not necessary
5479 because values that do not fit the (smaller) target type are
5480 unspecified anyway. */
5481 if (demotion && float_expr_p)
5483 wide_int op_min_value, op_max_value;
5484 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5485 break;
5487 if (cvt_type == NULL_TREE
5488 || (wi::min_precision (op_max_value, SIGNED)
5489 > TYPE_PRECISION (cvt_type))
5490 || (wi::min_precision (op_min_value, SIGNED)
5491 > TYPE_PRECISION (cvt_type)))
5492 continue;
5495 cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
5496 /* This should only happened for SLP as long as loop vectorizer
5497 only supports same-sized vector. */
5498 if (cvt_type == NULL_TREE
5499 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
5500 || !supportable_convert_operation ((tree_code) code1,
5501 vectype_out,
5502 cvt_type, &tc1)
5503 || !supportable_convert_operation ((tree_code) codecvt1,
5504 cvt_type,
5505 vectype_in, &tc2))
5506 continue;
5508 found_mode = true;
5509 break;
5512 if (found_mode)
5514 multi_step_cvt++;
5515 interm_types.safe_push (cvt_type);
5516 cvt_type = NULL_TREE;
5517 code1 = tc1;
5518 codecvt1 = tc2;
5519 break;
5522 /* FALLTHRU */
5523 unsupported:
5524 if (dump_enabled_p ())
5525 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5526 "conversion not supported by target.\n");
5527 return false;
5529 case WIDEN:
5530 if (known_eq (nunits_in, nunits_out))
5532 if (!(code.is_tree_code ()
5533 && supportable_half_widening_operation ((tree_code) code,
5534 vectype_out, vectype_in,
5535 &tc1)))
5536 goto unsupported;
5537 code1 = tc1;
5538 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5539 break;
5541 if (supportable_widening_operation (vinfo, code, stmt_info,
5542 vectype_out, vectype_in, &code1,
5543 &code2, &multi_step_cvt,
5544 &interm_types))
5546 /* Binary widening operation can only be supported directly by the
5547 architecture. */
5548 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5549 break;
5552 if (code != FLOAT_EXPR
5553 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5554 goto unsupported;
5556 fltsz = GET_MODE_SIZE (lhs_mode);
5557 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5559 rhs_mode = rhs_mode_iter.require ();
5560 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5561 break;
5563 cvt_type
5564 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5565 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5566 if (cvt_type == NULL_TREE)
5567 goto unsupported;
5569 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5571 tc1 = ERROR_MARK;
5572 gcc_assert (code.is_tree_code ());
5573 if (!supportable_convert_operation ((tree_code) code, vectype_out,
5574 cvt_type, &tc1))
5575 goto unsupported;
5576 codecvt1 = tc1;
5578 else if (!supportable_widening_operation (vinfo, code,
5579 stmt_info, vectype_out,
5580 cvt_type, &codecvt1,
5581 &codecvt2, &multi_step_cvt,
5582 &interm_types))
5583 continue;
5584 else
5585 gcc_assert (multi_step_cvt == 0);
5587 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5588 cvt_type,
5589 vectype_in, &code1,
5590 &code2, &multi_step_cvt,
5591 &interm_types))
5593 found_mode = true;
5594 break;
5598 if (!found_mode)
5599 goto unsupported;
5601 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5602 codecvt2 = ERROR_MARK;
5603 else
5605 multi_step_cvt++;
5606 interm_types.safe_push (cvt_type);
5607 cvt_type = NULL_TREE;
5609 break;
5611 case NARROW_DST:
5612 gcc_assert (op_type == unary_op);
5613 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5614 &code1, &multi_step_cvt,
5615 &interm_types))
5616 break;
5618 if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5619 goto unsupported;
5621 if (code == FIX_TRUNC_EXPR)
5623 cvt_type
5624 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5625 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5626 if (cvt_type == NULL_TREE)
5627 goto unsupported;
5628 if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5629 &tc1))
5630 codecvt1 = tc1;
5631 else
5632 goto unsupported;
5633 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5634 &code1, &multi_step_cvt,
5635 &interm_types))
5636 break;
5638 /* If op0 can be represented with low precision integer,
5639 truncate it to cvt_type and the do FLOAT_EXPR. */
5640 else if (code == FLOAT_EXPR)
5642 wide_int op_min_value, op_max_value;
5643 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5644 goto unsupported;
5646 cvt_type
5647 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5648 if (cvt_type == NULL_TREE
5649 || (wi::min_precision (op_max_value, SIGNED)
5650 > TYPE_PRECISION (cvt_type))
5651 || (wi::min_precision (op_min_value, SIGNED)
5652 > TYPE_PRECISION (cvt_type)))
5653 goto unsupported;
5655 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5656 if (cvt_type == NULL_TREE)
5657 goto unsupported;
5658 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5659 &code1, &multi_step_cvt,
5660 &interm_types))
5661 goto unsupported;
5662 if (supportable_convert_operation ((tree_code) code, vectype_out,
5663 cvt_type, &tc1))
5665 codecvt1 = tc1;
5666 modifier = NARROW_SRC;
5667 break;
5671 goto unsupported;
5673 default:
5674 gcc_unreachable ();
5677 if (!vec_stmt) /* transformation not required. */
5679 if (slp_node
5680 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5681 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5683 if (dump_enabled_p ())
5684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5685 "incompatible vector types for invariants\n");
5686 return false;
5688 DUMP_VECT_SCOPE ("vectorizable_conversion");
5689 if (modifier == NONE)
5691 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5692 vect_model_simple_cost (vinfo, stmt_info,
5693 ncopies * (1 + multi_step_cvt),
5694 dt, ndts, slp_node, cost_vec);
5696 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5698 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5699 /* The final packing step produces one vector result per copy. */
5700 unsigned int nvectors
5701 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5702 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5703 multi_step_cvt, cost_vec,
5704 widen_arith);
5706 else
5708 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5709 /* The initial unpacking step produces two vector results
5710 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5711 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5712 unsigned int nvectors
5713 = (slp_node
5714 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5715 : ncopies * 2);
5716 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5717 multi_step_cvt, cost_vec,
5718 widen_arith);
5720 interm_types.release ();
5721 return true;
5724 /* Transform. */
5725 if (dump_enabled_p ())
5726 dump_printf_loc (MSG_NOTE, vect_location,
5727 "transform conversion. ncopies = %d.\n", ncopies);
5729 if (op_type == binary_op)
5731 if (CONSTANT_CLASS_P (op0))
5732 op0 = fold_convert (TREE_TYPE (op1), op0);
5733 else if (CONSTANT_CLASS_P (op1))
5734 op1 = fold_convert (TREE_TYPE (op0), op1);
5737 /* In case of multi-step conversion, we first generate conversion operations
5738 to the intermediate types, and then from that types to the final one.
5739 We create vector destinations for the intermediate type (TYPES) received
5740 from supportable_*_operation, and store them in the correct order
5741 for future use in vect_create_vectorized_*_stmts (). */
5742 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5743 bool widen_or_narrow_float_p
5744 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5745 vec_dest = vect_create_destination_var (scalar_dest,
5746 widen_or_narrow_float_p
5747 ? cvt_type : vectype_out);
5748 vec_dsts.quick_push (vec_dest);
5750 if (multi_step_cvt)
5752 for (i = interm_types.length () - 1;
5753 interm_types.iterate (i, &intermediate_type); i--)
5755 vec_dest = vect_create_destination_var (scalar_dest,
5756 intermediate_type);
5757 vec_dsts.quick_push (vec_dest);
5761 if (cvt_type)
5762 vec_dest = vect_create_destination_var (scalar_dest,
5763 widen_or_narrow_float_p
5764 ? vectype_out : cvt_type);
5766 int ninputs = 1;
5767 if (!slp_node)
5769 if (modifier == WIDEN)
5771 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5773 if (multi_step_cvt)
5774 ninputs = vect_pow2 (multi_step_cvt);
5775 ninputs *= 2;
5779 switch (modifier)
5781 case NONE:
5782 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5783 op0, &vec_oprnds0);
5784 /* vec_dest is intermediate type operand when multi_step_cvt. */
5785 if (multi_step_cvt)
5787 cvt_op = vec_dest;
5788 vec_dest = vec_dsts[0];
5791 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5793 /* Arguments are ready, create the new vector stmt. */
5794 gimple* new_stmt;
5795 if (multi_step_cvt)
5797 gcc_assert (multi_step_cvt == 1);
5798 new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5799 new_temp = make_ssa_name (cvt_op, new_stmt);
5800 gimple_assign_set_lhs (new_stmt, new_temp);
5801 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5802 vop0 = new_temp;
5804 new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5805 new_temp = make_ssa_name (vec_dest, new_stmt);
5806 gimple_set_lhs (new_stmt, new_temp);
5807 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5809 if (slp_node)
5810 slp_node->push_vec_def (new_stmt);
5811 else
5812 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5814 break;
5816 case WIDEN:
5817 /* In case the vectorization factor (VF) is bigger than the number
5818 of elements that we can fit in a vectype (nunits), we have to
5819 generate more than one vector stmt - i.e - we need to "unroll"
5820 the vector stmt by a factor VF/nunits. */
5821 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5822 op0, &vec_oprnds0,
5823 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5824 &vec_oprnds1);
5825 if (code == WIDEN_LSHIFT_EXPR)
5827 int oprnds_size = vec_oprnds0.length ();
5828 vec_oprnds1.create (oprnds_size);
5829 for (i = 0; i < oprnds_size; ++i)
5830 vec_oprnds1.quick_push (op1);
5832 /* Arguments are ready. Create the new vector stmts. */
5833 for (i = multi_step_cvt; i >= 0; i--)
5835 tree this_dest = vec_dsts[i];
5836 code_helper c1 = code1, c2 = code2;
5837 if (i == 0 && codecvt2 != ERROR_MARK)
5839 c1 = codecvt1;
5840 c2 = codecvt2;
5842 if (known_eq (nunits_out, nunits_in))
5843 vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
5844 stmt_info, this_dest, gsi, c1,
5845 op_type);
5846 else
5847 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5848 &vec_oprnds1, stmt_info,
5849 this_dest, gsi,
5850 c1, c2, op_type);
5853 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5855 gimple *new_stmt;
5856 if (cvt_type)
5858 new_temp = make_ssa_name (vec_dest);
5859 new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5860 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5862 else
5863 new_stmt = SSA_NAME_DEF_STMT (vop0);
5865 if (slp_node)
5866 slp_node->push_vec_def (new_stmt);
5867 else
5868 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5870 break;
5872 case NARROW_SRC:
5873 case NARROW_DST:
5874 /* In case the vectorization factor (VF) is bigger than the number
5875 of elements that we can fit in a vectype (nunits), we have to
5876 generate more than one vector stmt - i.e - we need to "unroll"
5877 the vector stmt by a factor VF/nunits. */
5878 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5879 op0, &vec_oprnds0);
5880 /* Arguments are ready. Create the new vector stmts. */
5881 if (cvt_type && modifier == NARROW_DST)
5882 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5884 new_temp = make_ssa_name (vec_dest);
5885 gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5886 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5887 vec_oprnds0[i] = new_temp;
5890 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5891 multi_step_cvt,
5892 stmt_info, vec_dsts, gsi,
5893 slp_node, code1,
5894 modifier == NARROW_SRC);
5895 /* After demoting op0 to cvt_type, convert it to dest. */
5896 if (cvt_type && code == FLOAT_EXPR)
5898 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5900 /* Arguments are ready, create the new vector stmt. */
5901 gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5902 gimple *new_stmt
5903 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5904 new_temp = make_ssa_name (vec_dest, new_stmt);
5905 gimple_set_lhs (new_stmt, new_temp);
5906 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5908 /* This is the last step of the conversion sequence. Store the
5909 vectors in SLP_NODE or in vector info of the scalar statement
5910 (or in STMT_VINFO_RELATED_STMT chain). */
5911 if (slp_node)
5912 slp_node->push_vec_def (new_stmt);
5913 else
5914 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5917 break;
5919 if (!slp_node)
5920 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5922 vec_oprnds0.release ();
5923 vec_oprnds1.release ();
5924 interm_types.release ();
5926 return true;
5929 /* Return true if we can assume from the scalar form of STMT_INFO that
5930 neither the scalar nor the vector forms will generate code. STMT_INFO
5931 is known not to involve a data reference. */
5933 bool
5934 vect_nop_conversion_p (stmt_vec_info stmt_info)
5936 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5937 if (!stmt)
5938 return false;
5940 tree lhs = gimple_assign_lhs (stmt);
5941 tree_code code = gimple_assign_rhs_code (stmt);
5942 tree rhs = gimple_assign_rhs1 (stmt);
5944 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5945 return true;
5947 if (CONVERT_EXPR_CODE_P (code))
5948 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5950 return false;
5953 /* Function vectorizable_assignment.
5955 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5956 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5957 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5958 Return true if STMT_INFO is vectorizable in this way. */
5960 static bool
5961 vectorizable_assignment (vec_info *vinfo,
5962 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5963 gimple **vec_stmt, slp_tree slp_node,
5964 stmt_vector_for_cost *cost_vec)
5966 tree vec_dest;
5967 tree scalar_dest;
5968 tree op;
5969 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5970 tree new_temp;
5971 enum vect_def_type dt[1] = {vect_unknown_def_type};
5972 int ndts = 1;
5973 int ncopies;
5974 int i;
5975 vec<tree> vec_oprnds = vNULL;
5976 tree vop;
5977 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5978 enum tree_code code;
5979 tree vectype_in;
5981 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5982 return false;
5984 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5985 && ! vec_stmt)
5986 return false;
5988 /* Is vectorizable assignment? */
5989 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5990 if (!stmt)
5991 return false;
5993 scalar_dest = gimple_assign_lhs (stmt);
5994 if (TREE_CODE (scalar_dest) != SSA_NAME)
5995 return false;
5997 if (STMT_VINFO_DATA_REF (stmt_info))
5998 return false;
6000 code = gimple_assign_rhs_code (stmt);
6001 if (!(gimple_assign_single_p (stmt)
6002 || code == PAREN_EXPR
6003 || CONVERT_EXPR_CODE_P (code)))
6004 return false;
6006 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6007 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6009 /* Multiple types in SLP are handled by creating the appropriate number of
6010 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6011 case of SLP. */
6012 if (slp_node)
6013 ncopies = 1;
6014 else
6015 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6017 gcc_assert (ncopies >= 1);
6019 slp_tree slp_op;
6020 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
6021 &dt[0], &vectype_in))
6023 if (dump_enabled_p ())
6024 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6025 "use not simple.\n");
6026 return false;
6028 if (!vectype_in)
6029 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
6031 /* We can handle NOP_EXPR conversions that do not change the number
6032 of elements or the vector size. */
6033 if ((CONVERT_EXPR_CODE_P (code)
6034 || code == VIEW_CONVERT_EXPR)
6035 && (!vectype_in
6036 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
6037 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
6038 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
6039 return false;
6041 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
6043 if (dump_enabled_p ())
6044 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6045 "can't convert between boolean and non "
6046 "boolean vectors %T\n", TREE_TYPE (op));
6048 return false;
6051 /* We do not handle bit-precision changes. */
6052 if ((CONVERT_EXPR_CODE_P (code)
6053 || code == VIEW_CONVERT_EXPR)
6054 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6055 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6056 || (INTEGRAL_TYPE_P (TREE_TYPE (op))
6057 && !type_has_mode_precision_p (TREE_TYPE (op))))
6058 /* But a conversion that does not change the bit-pattern is ok. */
6059 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6060 && INTEGRAL_TYPE_P (TREE_TYPE (op))
6061 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
6062 > TYPE_PRECISION (TREE_TYPE (op)))
6063 && TYPE_UNSIGNED (TREE_TYPE (op))))
6065 if (dump_enabled_p ())
6066 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6067 "type conversion to/from bit-precision "
6068 "unsupported.\n");
6069 return false;
6072 if (!vec_stmt) /* transformation not required. */
6074 if (slp_node
6075 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
6077 if (dump_enabled_p ())
6078 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6079 "incompatible vector types for invariants\n");
6080 return false;
6082 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
6083 DUMP_VECT_SCOPE ("vectorizable_assignment");
6084 if (!vect_nop_conversion_p (stmt_info))
6085 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
6086 cost_vec);
6087 return true;
6090 /* Transform. */
6091 if (dump_enabled_p ())
6092 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
6094 /* Handle def. */
6095 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6097 /* Handle use. */
6098 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
6100 /* Arguments are ready. create the new vector stmt. */
6101 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
6103 if (CONVERT_EXPR_CODE_P (code)
6104 || code == VIEW_CONVERT_EXPR)
6105 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
6106 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
6107 new_temp = make_ssa_name (vec_dest, new_stmt);
6108 gimple_assign_set_lhs (new_stmt, new_temp);
6109 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6110 if (slp_node)
6111 slp_node->push_vec_def (new_stmt);
6112 else
6113 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6115 if (!slp_node)
6116 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6118 vec_oprnds.release ();
6119 return true;
6123 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6124 either as shift by a scalar or by a vector. */
6126 bool
6127 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
6130 machine_mode vec_mode;
6131 optab optab;
6132 int icode;
6133 tree vectype;
6135 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6136 if (!vectype)
6137 return false;
6139 optab = optab_for_tree_code (code, vectype, optab_scalar);
6140 if (!optab
6141 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
6143 optab = optab_for_tree_code (code, vectype, optab_vector);
6144 if (!optab
6145 || (optab_handler (optab, TYPE_MODE (vectype))
6146 == CODE_FOR_nothing))
6147 return false;
6150 vec_mode = TYPE_MODE (vectype);
6151 icode = (int) optab_handler (optab, vec_mode);
6152 if (icode == CODE_FOR_nothing)
6153 return false;
6155 return true;
6159 /* Function vectorizable_shift.
6161 Check if STMT_INFO performs a shift operation that can be vectorized.
6162 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6163 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6164 Return true if STMT_INFO is vectorizable in this way. */
6166 static bool
6167 vectorizable_shift (vec_info *vinfo,
6168 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6169 gimple **vec_stmt, slp_tree slp_node,
6170 stmt_vector_for_cost *cost_vec)
6172 tree vec_dest;
6173 tree scalar_dest;
6174 tree op0, op1 = NULL;
6175 tree vec_oprnd1 = NULL_TREE;
6176 tree vectype;
6177 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6178 enum tree_code code;
6179 machine_mode vec_mode;
6180 tree new_temp;
6181 optab optab;
6182 int icode;
6183 machine_mode optab_op2_mode;
6184 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
6185 int ndts = 2;
6186 poly_uint64 nunits_in;
6187 poly_uint64 nunits_out;
6188 tree vectype_out;
6189 tree op1_vectype;
6190 int ncopies;
6191 int i;
6192 vec<tree> vec_oprnds0 = vNULL;
6193 vec<tree> vec_oprnds1 = vNULL;
6194 tree vop0, vop1;
6195 unsigned int k;
6196 bool scalar_shift_arg = true;
6197 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6198 bool incompatible_op1_vectype_p = false;
6200 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6201 return false;
6203 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6204 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6205 && ! vec_stmt)
6206 return false;
6208 /* Is STMT a vectorizable binary/unary operation? */
6209 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6210 if (!stmt)
6211 return false;
6213 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6214 return false;
6216 code = gimple_assign_rhs_code (stmt);
6218 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6219 || code == RROTATE_EXPR))
6220 return false;
6222 scalar_dest = gimple_assign_lhs (stmt);
6223 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6224 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6226 if (dump_enabled_p ())
6227 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6228 "bit-precision shifts not supported.\n");
6229 return false;
6232 slp_tree slp_op0;
6233 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6234 0, &op0, &slp_op0, &dt[0], &vectype))
6236 if (dump_enabled_p ())
6237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6238 "use not simple.\n");
6239 return false;
6241 /* If op0 is an external or constant def, infer the vector type
6242 from the scalar type. */
6243 if (!vectype)
6244 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6245 if (vec_stmt)
6246 gcc_assert (vectype);
6247 if (!vectype)
6249 if (dump_enabled_p ())
6250 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6251 "no vectype for scalar type\n");
6252 return false;
6255 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6256 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6257 if (maybe_ne (nunits_out, nunits_in))
6258 return false;
6260 stmt_vec_info op1_def_stmt_info;
6261 slp_tree slp_op1;
6262 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
6263 &dt[1], &op1_vectype, &op1_def_stmt_info))
6265 if (dump_enabled_p ())
6266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6267 "use not simple.\n");
6268 return false;
6271 /* Multiple types in SLP are handled by creating the appropriate number of
6272 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6273 case of SLP. */
6274 if (slp_node)
6275 ncopies = 1;
6276 else
6277 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6279 gcc_assert (ncopies >= 1);
6281 /* Determine whether the shift amount is a vector, or scalar. If the
6282 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6284 if ((dt[1] == vect_internal_def
6285 || dt[1] == vect_induction_def
6286 || dt[1] == vect_nested_cycle)
6287 && !slp_node)
6288 scalar_shift_arg = false;
6289 else if (dt[1] == vect_constant_def
6290 || dt[1] == vect_external_def
6291 || dt[1] == vect_internal_def)
6293 /* In SLP, need to check whether the shift count is the same,
6294 in loops if it is a constant or invariant, it is always
6295 a scalar shift. */
6296 if (slp_node)
6298 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6299 stmt_vec_info slpstmt_info;
6301 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6303 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
6304 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
6305 scalar_shift_arg = false;
6308 /* For internal SLP defs we have to make sure we see scalar stmts
6309 for all vector elements.
6310 ??? For different vectors we could resort to a different
6311 scalar shift operand but code-generation below simply always
6312 takes the first. */
6313 if (dt[1] == vect_internal_def
6314 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6315 stmts.length ()))
6316 scalar_shift_arg = false;
6319 /* If the shift amount is computed by a pattern stmt we cannot
6320 use the scalar amount directly thus give up and use a vector
6321 shift. */
6322 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
6323 scalar_shift_arg = false;
6325 else
6327 if (dump_enabled_p ())
6328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6329 "operand mode requires invariant argument.\n");
6330 return false;
6333 /* Vector shifted by vector. */
6334 bool was_scalar_shift_arg = scalar_shift_arg;
6335 if (!scalar_shift_arg)
6337 optab = optab_for_tree_code (code, vectype, optab_vector);
6338 if (dump_enabled_p ())
6339 dump_printf_loc (MSG_NOTE, vect_location,
6340 "vector/vector shift/rotate found.\n");
6342 if (!op1_vectype)
6343 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6344 slp_op1);
6345 incompatible_op1_vectype_p
6346 = (op1_vectype == NULL_TREE
6347 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
6348 TYPE_VECTOR_SUBPARTS (vectype))
6349 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6350 if (incompatible_op1_vectype_p
6351 && (!slp_node
6352 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6353 || slp_op1->refcnt != 1))
6355 if (dump_enabled_p ())
6356 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6357 "unusable type for last operand in"
6358 " vector/vector shift/rotate.\n");
6359 return false;
6362 /* See if the machine has a vector shifted by scalar insn and if not
6363 then see if it has a vector shifted by vector insn. */
6364 else
6366 optab = optab_for_tree_code (code, vectype, optab_scalar);
6367 if (optab
6368 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6370 if (dump_enabled_p ())
6371 dump_printf_loc (MSG_NOTE, vect_location,
6372 "vector/scalar shift/rotate found.\n");
6374 else
6376 optab = optab_for_tree_code (code, vectype, optab_vector);
6377 if (optab
6378 && (optab_handler (optab, TYPE_MODE (vectype))
6379 != CODE_FOR_nothing))
6381 scalar_shift_arg = false;
6383 if (dump_enabled_p ())
6384 dump_printf_loc (MSG_NOTE, vect_location,
6385 "vector/vector shift/rotate found.\n");
6387 if (!op1_vectype)
6388 op1_vectype = get_vectype_for_scalar_type (vinfo,
6389 TREE_TYPE (op1),
6390 slp_op1);
6392 /* Unlike the other binary operators, shifts/rotates have
6393 the rhs being int, instead of the same type as the lhs,
6394 so make sure the scalar is the right type if we are
6395 dealing with vectors of long long/long/short/char. */
6396 incompatible_op1_vectype_p
6397 = (!op1_vectype
6398 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6399 TREE_TYPE (op1)));
6400 if (incompatible_op1_vectype_p
6401 && dt[1] == vect_internal_def)
6403 if (dump_enabled_p ())
6404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6405 "unusable type for last operand in"
6406 " vector/vector shift/rotate.\n");
6407 return false;
6413 /* Supportable by target? */
6414 if (!optab)
6416 if (dump_enabled_p ())
6417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6418 "no optab.\n");
6419 return false;
6421 vec_mode = TYPE_MODE (vectype);
6422 icode = (int) optab_handler (optab, vec_mode);
6423 if (icode == CODE_FOR_nothing)
6425 if (dump_enabled_p ())
6426 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6427 "op not supported by target.\n");
6428 return false;
6430 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6431 if (vect_emulated_vector_p (vectype))
6432 return false;
6434 if (!vec_stmt) /* transformation not required. */
6436 if (slp_node
6437 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6438 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6439 && (!incompatible_op1_vectype_p
6440 || dt[1] == vect_constant_def)
6441 && !vect_maybe_update_slp_op_vectype
6442 (slp_op1,
6443 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6445 if (dump_enabled_p ())
6446 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6447 "incompatible vector types for invariants\n");
6448 return false;
6450 /* Now adjust the constant shift amount in place. */
6451 if (slp_node
6452 && incompatible_op1_vectype_p
6453 && dt[1] == vect_constant_def)
6455 for (unsigned i = 0;
6456 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6458 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6459 = fold_convert (TREE_TYPE (vectype),
6460 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6461 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6462 == INTEGER_CST));
6465 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6466 DUMP_VECT_SCOPE ("vectorizable_shift");
6467 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6468 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6469 return true;
6472 /* Transform. */
6474 if (dump_enabled_p ())
6475 dump_printf_loc (MSG_NOTE, vect_location,
6476 "transform binary/unary operation.\n");
6478 if (incompatible_op1_vectype_p && !slp_node)
6480 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6481 op1 = fold_convert (TREE_TYPE (vectype), op1);
6482 if (dt[1] != vect_constant_def)
6483 op1 = vect_init_vector (vinfo, stmt_info, op1,
6484 TREE_TYPE (vectype), NULL);
6487 /* Handle def. */
6488 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6490 if (scalar_shift_arg && dt[1] != vect_internal_def)
6492 /* Vector shl and shr insn patterns can be defined with scalar
6493 operand 2 (shift operand). In this case, use constant or loop
6494 invariant op1 directly, without extending it to vector mode
6495 first. */
6496 optab_op2_mode = insn_data[icode].operand[2].mode;
6497 if (!VECTOR_MODE_P (optab_op2_mode))
6499 if (dump_enabled_p ())
6500 dump_printf_loc (MSG_NOTE, vect_location,
6501 "operand 1 using scalar mode.\n");
6502 vec_oprnd1 = op1;
6503 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6504 vec_oprnds1.quick_push (vec_oprnd1);
6505 /* Store vec_oprnd1 for every vector stmt to be created.
6506 We check during the analysis that all the shift arguments
6507 are the same.
6508 TODO: Allow different constants for different vector
6509 stmts generated for an SLP instance. */
6510 for (k = 0;
6511 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6512 vec_oprnds1.quick_push (vec_oprnd1);
6515 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6517 if (was_scalar_shift_arg)
6519 /* If the argument was the same in all lanes create
6520 the correctly typed vector shift amount directly. */
6521 op1 = fold_convert (TREE_TYPE (vectype), op1);
6522 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6523 !loop_vinfo ? gsi : NULL);
6524 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6525 !loop_vinfo ? gsi : NULL);
6526 vec_oprnds1.create (slp_node->vec_stmts_size);
6527 for (k = 0; k < slp_node->vec_stmts_size; k++)
6528 vec_oprnds1.quick_push (vec_oprnd1);
6530 else if (dt[1] == vect_constant_def)
6531 /* The constant shift amount has been adjusted in place. */
6533 else
6534 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6537 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6538 (a special case for certain kind of vector shifts); otherwise,
6539 operand 1 should be of a vector type (the usual case). */
6540 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6541 op0, &vec_oprnds0,
6542 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6544 /* Arguments are ready. Create the new vector stmt. */
6545 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6547 /* For internal defs where we need to use a scalar shift arg
6548 extract the first lane. */
6549 if (scalar_shift_arg && dt[1] == vect_internal_def)
6551 vop1 = vec_oprnds1[0];
6552 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6553 gassign *new_stmt
6554 = gimple_build_assign (new_temp,
6555 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6556 vop1,
6557 TYPE_SIZE (TREE_TYPE (new_temp)),
6558 bitsize_zero_node));
6559 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6560 vop1 = new_temp;
6562 else
6563 vop1 = vec_oprnds1[i];
6564 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6565 new_temp = make_ssa_name (vec_dest, new_stmt);
6566 gimple_assign_set_lhs (new_stmt, new_temp);
6567 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6568 if (slp_node)
6569 slp_node->push_vec_def (new_stmt);
6570 else
6571 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6574 if (!slp_node)
6575 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6577 vec_oprnds0.release ();
6578 vec_oprnds1.release ();
6580 return true;
6583 /* Function vectorizable_operation.
6585 Check if STMT_INFO performs a binary, unary or ternary operation that can
6586 be vectorized.
6587 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6588 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6589 Return true if STMT_INFO is vectorizable in this way. */
6591 static bool
6592 vectorizable_operation (vec_info *vinfo,
6593 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6594 gimple **vec_stmt, slp_tree slp_node,
6595 stmt_vector_for_cost *cost_vec)
6597 tree vec_dest;
6598 tree scalar_dest;
6599 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6600 tree vectype;
6601 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6602 enum tree_code code, orig_code;
6603 machine_mode vec_mode;
6604 tree new_temp;
6605 int op_type;
6606 optab optab;
6607 bool target_support_p;
6608 enum vect_def_type dt[3]
6609 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6610 int ndts = 3;
6611 poly_uint64 nunits_in;
6612 poly_uint64 nunits_out;
6613 tree vectype_out;
6614 int ncopies, vec_num;
6615 int i;
6616 vec<tree> vec_oprnds0 = vNULL;
6617 vec<tree> vec_oprnds1 = vNULL;
6618 vec<tree> vec_oprnds2 = vNULL;
6619 tree vop0, vop1, vop2;
6620 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6622 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6623 return false;
6625 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6626 && ! vec_stmt)
6627 return false;
6629 /* Is STMT a vectorizable binary/unary operation? */
6630 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6631 if (!stmt)
6632 return false;
6634 /* Loads and stores are handled in vectorizable_{load,store}. */
6635 if (STMT_VINFO_DATA_REF (stmt_info))
6636 return false;
6638 orig_code = code = gimple_assign_rhs_code (stmt);
6640 /* Shifts are handled in vectorizable_shift. */
6641 if (code == LSHIFT_EXPR
6642 || code == RSHIFT_EXPR
6643 || code == LROTATE_EXPR
6644 || code == RROTATE_EXPR)
6645 return false;
6647 /* Comparisons are handled in vectorizable_comparison. */
6648 if (TREE_CODE_CLASS (code) == tcc_comparison)
6649 return false;
6651 /* Conditions are handled in vectorizable_condition. */
6652 if (code == COND_EXPR)
6653 return false;
6655 /* For pointer addition and subtraction, we should use the normal
6656 plus and minus for the vector operation. */
6657 if (code == POINTER_PLUS_EXPR)
6658 code = PLUS_EXPR;
6659 if (code == POINTER_DIFF_EXPR)
6660 code = MINUS_EXPR;
6662 /* Support only unary or binary operations. */
6663 op_type = TREE_CODE_LENGTH (code);
6664 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6666 if (dump_enabled_p ())
6667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6668 "num. args = %d (not unary/binary/ternary op).\n",
6669 op_type);
6670 return false;
6673 scalar_dest = gimple_assign_lhs (stmt);
6674 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6676 /* Most operations cannot handle bit-precision types without extra
6677 truncations. */
6678 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6679 if (!mask_op_p
6680 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6681 /* Exception are bitwise binary operations. */
6682 && code != BIT_IOR_EXPR
6683 && code != BIT_XOR_EXPR
6684 && code != BIT_AND_EXPR)
6686 if (dump_enabled_p ())
6687 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6688 "bit-precision arithmetic not supported.\n");
6689 return false;
6692 slp_tree slp_op0;
6693 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6694 0, &op0, &slp_op0, &dt[0], &vectype))
6696 if (dump_enabled_p ())
6697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6698 "use not simple.\n");
6699 return false;
6701 bool is_invariant = (dt[0] == vect_external_def
6702 || dt[0] == vect_constant_def);
6703 /* If op0 is an external or constant def, infer the vector type
6704 from the scalar type. */
6705 if (!vectype)
6707 /* For boolean type we cannot determine vectype by
6708 invariant value (don't know whether it is a vector
6709 of booleans or vector of integers). We use output
6710 vectype because operations on boolean don't change
6711 type. */
6712 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6714 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6716 if (dump_enabled_p ())
6717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6718 "not supported operation on bool value.\n");
6719 return false;
6721 vectype = vectype_out;
6723 else
6724 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6725 slp_node);
6727 if (vec_stmt)
6728 gcc_assert (vectype);
6729 if (!vectype)
6731 if (dump_enabled_p ())
6732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6733 "no vectype for scalar type %T\n",
6734 TREE_TYPE (op0));
6736 return false;
6739 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6740 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6741 if (maybe_ne (nunits_out, nunits_in))
6742 return false;
6744 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6745 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6746 if (op_type == binary_op || op_type == ternary_op)
6748 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6749 1, &op1, &slp_op1, &dt[1], &vectype2))
6751 if (dump_enabled_p ())
6752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6753 "use not simple.\n");
6754 return false;
6756 is_invariant &= (dt[1] == vect_external_def
6757 || dt[1] == vect_constant_def);
6758 if (vectype2
6759 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6760 return false;
6762 if (op_type == ternary_op)
6764 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6765 2, &op2, &slp_op2, &dt[2], &vectype3))
6767 if (dump_enabled_p ())
6768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6769 "use not simple.\n");
6770 return false;
6772 is_invariant &= (dt[2] == vect_external_def
6773 || dt[2] == vect_constant_def);
6774 if (vectype3
6775 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6776 return false;
6779 /* Multiple types in SLP are handled by creating the appropriate number of
6780 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6781 case of SLP. */
6782 if (slp_node)
6784 ncopies = 1;
6785 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6787 else
6789 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6790 vec_num = 1;
6793 gcc_assert (ncopies >= 1);
6795 /* Reject attempts to combine mask types with nonmask types, e.g. if
6796 we have an AND between a (nonmask) boolean loaded from memory and
6797 a (mask) boolean result of a comparison.
6799 TODO: We could easily fix these cases up using pattern statements. */
6800 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6801 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6802 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6804 if (dump_enabled_p ())
6805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6806 "mixed mask and nonmask vector types\n");
6807 return false;
6810 /* Supportable by target? */
6812 vec_mode = TYPE_MODE (vectype);
6813 if (code == MULT_HIGHPART_EXPR)
6814 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6815 else
6817 optab = optab_for_tree_code (code, vectype, optab_default);
6818 if (!optab)
6820 if (dump_enabled_p ())
6821 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6822 "no optab.\n");
6823 return false;
6825 target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing
6826 || optab_libfunc (optab, vec_mode));
6829 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6830 if (!target_support_p || using_emulated_vectors_p)
6832 if (dump_enabled_p ())
6833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6834 "op not supported by target.\n");
6835 /* When vec_mode is not a vector mode and we verified ops we
6836 do not have to lower like AND are natively supported let
6837 those through even when the mode isn't word_mode. For
6838 ops we have to lower the lowering code assumes we are
6839 dealing with word_mode. */
6840 if ((((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6841 || !target_support_p)
6842 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6843 /* Check only during analysis. */
6844 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6846 if (dump_enabled_p ())
6847 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6848 return false;
6850 if (dump_enabled_p ())
6851 dump_printf_loc (MSG_NOTE, vect_location,
6852 "proceeding using word mode.\n");
6853 using_emulated_vectors_p = true;
6856 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6857 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6858 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6859 internal_fn cond_fn = get_conditional_internal_fn (code);
6860 internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6862 /* If operating on inactive elements could generate spurious traps,
6863 we need to restrict the operation to active lanes. Note that this
6864 specifically doesn't apply to unhoisted invariants, since they
6865 operate on the same value for every lane.
6867 Similarly, if this operation is part of a reduction, a fully-masked
6868 loop should only change the active lanes of the reduction chain,
6869 keeping the inactive lanes as-is. */
6870 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6871 || reduc_idx >= 0);
6873 if (!vec_stmt) /* transformation not required. */
6875 if (loop_vinfo
6876 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6877 && mask_out_inactive)
6879 if (cond_len_fn != IFN_LAST
6880 && direct_internal_fn_supported_p (cond_len_fn, vectype,
6881 OPTIMIZE_FOR_SPEED))
6882 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6884 else if (cond_fn != IFN_LAST
6885 && direct_internal_fn_supported_p (cond_fn, vectype,
6886 OPTIMIZE_FOR_SPEED))
6887 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6888 vectype, NULL);
6889 else
6891 if (dump_enabled_p ())
6892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6893 "can't use a fully-masked loop because no"
6894 " conditional operation is available.\n");
6895 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6899 /* Put types on constant and invariant SLP children. */
6900 if (slp_node
6901 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6902 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6903 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6905 if (dump_enabled_p ())
6906 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6907 "incompatible vector types for invariants\n");
6908 return false;
6911 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6912 DUMP_VECT_SCOPE ("vectorizable_operation");
6913 vect_model_simple_cost (vinfo, stmt_info,
6914 ncopies, dt, ndts, slp_node, cost_vec);
6915 if (using_emulated_vectors_p)
6917 /* The above vect_model_simple_cost call handles constants
6918 in the prologue and (mis-)costs one of the stmts as
6919 vector stmt. See below for the actual lowering that will
6920 be applied. */
6921 unsigned n
6922 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6923 switch (code)
6925 case PLUS_EXPR:
6926 n *= 5;
6927 break;
6928 case MINUS_EXPR:
6929 n *= 6;
6930 break;
6931 case NEGATE_EXPR:
6932 n *= 4;
6933 break;
6934 default:
6935 /* Bit operations do not have extra cost and are accounted
6936 as vector stmt by vect_model_simple_cost. */
6937 n = 0;
6938 break;
6940 if (n != 0)
6942 /* We also need to materialize two large constants. */
6943 record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6944 0, vect_prologue);
6945 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6946 0, vect_body);
6949 return true;
6952 /* Transform. */
6954 if (dump_enabled_p ())
6955 dump_printf_loc (MSG_NOTE, vect_location,
6956 "transform binary/unary operation.\n");
6958 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6959 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6961 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6962 vectors with unsigned elements, but the result is signed. So, we
6963 need to compute the MINUS_EXPR into vectype temporary and
6964 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6965 tree vec_cvt_dest = NULL_TREE;
6966 if (orig_code == POINTER_DIFF_EXPR)
6968 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6969 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6971 /* Handle def. */
6972 else
6973 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6975 /* In case the vectorization factor (VF) is bigger than the number
6976 of elements that we can fit in a vectype (nunits), we have to generate
6977 more than one vector stmt - i.e - we need to "unroll" the
6978 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6979 from one copy of the vector stmt to the next, in the field
6980 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6981 stages to find the correct vector defs to be used when vectorizing
6982 stmts that use the defs of the current stmt. The example below
6983 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6984 we need to create 4 vectorized stmts):
6986 before vectorization:
6987 RELATED_STMT VEC_STMT
6988 S1: x = memref - -
6989 S2: z = x + 1 - -
6991 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6992 there):
6993 RELATED_STMT VEC_STMT
6994 VS1_0: vx0 = memref0 VS1_1 -
6995 VS1_1: vx1 = memref1 VS1_2 -
6996 VS1_2: vx2 = memref2 VS1_3 -
6997 VS1_3: vx3 = memref3 - -
6998 S1: x = load - VS1_0
6999 S2: z = x + 1 - -
7001 step2: vectorize stmt S2 (done here):
7002 To vectorize stmt S2 we first need to find the relevant vector
7003 def for the first operand 'x'. This is, as usual, obtained from
7004 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
7005 that defines 'x' (S1). This way we find the stmt VS1_0, and the
7006 relevant vector def 'vx0'. Having found 'vx0' we can generate
7007 the vector stmt VS2_0, and as usual, record it in the
7008 STMT_VINFO_VEC_STMT of stmt S2.
7009 When creating the second copy (VS2_1), we obtain the relevant vector
7010 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
7011 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
7012 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
7013 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
7014 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
7015 chain of stmts and pointers:
7016 RELATED_STMT VEC_STMT
7017 VS1_0: vx0 = memref0 VS1_1 -
7018 VS1_1: vx1 = memref1 VS1_2 -
7019 VS1_2: vx2 = memref2 VS1_3 -
7020 VS1_3: vx3 = memref3 - -
7021 S1: x = load - VS1_0
7022 VS2_0: vz0 = vx0 + v1 VS2_1 -
7023 VS2_1: vz1 = vx1 + v1 VS2_2 -
7024 VS2_2: vz2 = vx2 + v1 VS2_3 -
7025 VS2_3: vz3 = vx3 + v1 - -
7026 S2: z = x + 1 - VS2_0 */
7028 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
7029 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
7030 /* Arguments are ready. Create the new vector stmt. */
7031 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
7033 gimple *new_stmt = NULL;
7034 vop1 = ((op_type == binary_op || op_type == ternary_op)
7035 ? vec_oprnds1[i] : NULL_TREE);
7036 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
7037 if (using_emulated_vectors_p
7038 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
7040 /* Lower the operation. This follows vector lowering. */
7041 unsigned int width = vector_element_bits (vectype);
7042 tree inner_type = TREE_TYPE (vectype);
7043 tree word_type
7044 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
7045 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
7046 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
7047 tree high_bits
7048 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
7049 tree wvop0 = make_ssa_name (word_type);
7050 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
7051 build1 (VIEW_CONVERT_EXPR,
7052 word_type, vop0));
7053 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7054 tree result_low, signs;
7055 if (code == PLUS_EXPR || code == MINUS_EXPR)
7057 tree wvop1 = make_ssa_name (word_type);
7058 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
7059 build1 (VIEW_CONVERT_EXPR,
7060 word_type, vop1));
7061 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7062 signs = make_ssa_name (word_type);
7063 new_stmt = gimple_build_assign (signs,
7064 BIT_XOR_EXPR, wvop0, wvop1);
7065 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7066 tree b_low = make_ssa_name (word_type);
7067 new_stmt = gimple_build_assign (b_low,
7068 BIT_AND_EXPR, wvop1, low_bits);
7069 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7070 tree a_low = make_ssa_name (word_type);
7071 if (code == PLUS_EXPR)
7072 new_stmt = gimple_build_assign (a_low,
7073 BIT_AND_EXPR, wvop0, low_bits);
7074 else
7075 new_stmt = gimple_build_assign (a_low,
7076 BIT_IOR_EXPR, wvop0, high_bits);
7077 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7078 if (code == MINUS_EXPR)
7080 new_stmt = gimple_build_assign (NULL_TREE,
7081 BIT_NOT_EXPR, signs);
7082 signs = make_ssa_name (word_type);
7083 gimple_assign_set_lhs (new_stmt, signs);
7084 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7086 new_stmt = gimple_build_assign (NULL_TREE,
7087 BIT_AND_EXPR, signs, high_bits);
7088 signs = make_ssa_name (word_type);
7089 gimple_assign_set_lhs (new_stmt, signs);
7090 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7091 result_low = make_ssa_name (word_type);
7092 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
7093 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7095 else
7097 tree a_low = make_ssa_name (word_type);
7098 new_stmt = gimple_build_assign (a_low,
7099 BIT_AND_EXPR, wvop0, low_bits);
7100 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7101 signs = make_ssa_name (word_type);
7102 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
7103 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7104 new_stmt = gimple_build_assign (NULL_TREE,
7105 BIT_AND_EXPR, signs, high_bits);
7106 signs = make_ssa_name (word_type);
7107 gimple_assign_set_lhs (new_stmt, signs);
7108 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7109 result_low = make_ssa_name (word_type);
7110 new_stmt = gimple_build_assign (result_low,
7111 MINUS_EXPR, high_bits, a_low);
7112 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7114 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
7115 signs);
7116 result_low = make_ssa_name (word_type);
7117 gimple_assign_set_lhs (new_stmt, result_low);
7118 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7119 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
7120 build1 (VIEW_CONVERT_EXPR,
7121 vectype, result_low));
7122 new_temp = make_ssa_name (vectype);
7123 gimple_assign_set_lhs (new_stmt, new_temp);
7124 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7126 else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
7128 tree mask;
7129 if (masked_loop_p)
7130 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7131 vec_num * ncopies, vectype, i);
7132 else
7133 /* Dummy mask. */
7134 mask = build_minus_one_cst (truth_type_for (vectype));
7135 auto_vec<tree> vops (6);
7136 vops.quick_push (mask);
7137 vops.quick_push (vop0);
7138 if (vop1)
7139 vops.quick_push (vop1);
7140 if (vop2)
7141 vops.quick_push (vop2);
7142 if (reduc_idx >= 0)
7144 /* Perform the operation on active elements only and take
7145 inactive elements from the reduction chain input. */
7146 gcc_assert (!vop2);
7147 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
7149 else
7151 auto else_value = targetm.preferred_else_value
7152 (cond_fn, vectype, vops.length () - 1, &vops[1]);
7153 vops.quick_push (else_value);
7155 if (len_loop_p)
7157 tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7158 vec_num * ncopies, vectype, i, 1);
7159 signed char biasval
7160 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7161 tree bias = build_int_cst (intQI_type_node, biasval);
7162 vops.quick_push (len);
7163 vops.quick_push (bias);
7165 gcall *call
7166 = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7167 : cond_len_fn,
7168 vops);
7169 new_temp = make_ssa_name (vec_dest, call);
7170 gimple_call_set_lhs (call, new_temp);
7171 gimple_call_set_nothrow (call, true);
7172 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7173 new_stmt = call;
7175 else
7177 tree mask = NULL_TREE;
7178 /* When combining two masks check if either of them is elsewhere
7179 combined with a loop mask, if that's the case we can mark that the
7180 new combined mask doesn't need to be combined with a loop mask. */
7181 if (masked_loop_p
7182 && code == BIT_AND_EXPR
7183 && VECTOR_BOOLEAN_TYPE_P (vectype))
7185 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
7186 ncopies}))
7188 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7189 vec_num * ncopies, vectype, i);
7191 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7192 vop0, gsi);
7195 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
7196 ncopies }))
7198 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7199 vec_num * ncopies, vectype, i);
7201 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7202 vop1, gsi);
7206 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7207 new_temp = make_ssa_name (vec_dest, new_stmt);
7208 gimple_assign_set_lhs (new_stmt, new_temp);
7209 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7210 if (using_emulated_vectors_p)
7211 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7213 /* Enter the combined value into the vector cond hash so we don't
7214 AND it with a loop mask again. */
7215 if (mask)
7216 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
7219 if (vec_cvt_dest)
7221 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7222 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7223 new_temp);
7224 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
7225 gimple_assign_set_lhs (new_stmt, new_temp);
7226 vect_finish_stmt_generation (vinfo, stmt_info,
7227 new_stmt, gsi);
7230 if (slp_node)
7231 slp_node->push_vec_def (new_stmt);
7232 else
7233 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7236 if (!slp_node)
7237 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7239 vec_oprnds0.release ();
7240 vec_oprnds1.release ();
7241 vec_oprnds2.release ();
7243 return true;
7246 /* A helper function to ensure data reference DR_INFO's base alignment. */
7248 static void
7249 ensure_base_align (dr_vec_info *dr_info)
7251 /* Alignment is only analyzed for the first element of a DR group,
7252 use that to look at base alignment we need to enforce. */
7253 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7254 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7256 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7258 if (dr_info->base_misaligned)
7260 tree base_decl = dr_info->base_decl;
7262 // We should only be able to increase the alignment of a base object if
7263 // we know what its new alignment should be at compile time.
7264 unsigned HOST_WIDE_INT align_base_to =
7265 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7267 if (decl_in_symtab_p (base_decl))
7268 symtab_node::get (base_decl)->increase_alignment (align_base_to);
7269 else if (DECL_ALIGN (base_decl) < align_base_to)
7271 SET_DECL_ALIGN (base_decl, align_base_to);
7272 DECL_USER_ALIGN (base_decl) = 1;
7274 dr_info->base_misaligned = false;
7279 /* Function get_group_alias_ptr_type.
7281 Return the alias type for the group starting at FIRST_STMT_INFO. */
7283 static tree
7284 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7286 struct data_reference *first_dr, *next_dr;
7288 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7289 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7290 while (next_stmt_info)
7292 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7293 if (get_alias_set (DR_REF (first_dr))
7294 != get_alias_set (DR_REF (next_dr)))
7296 if (dump_enabled_p ())
7297 dump_printf_loc (MSG_NOTE, vect_location,
7298 "conflicting alias set types.\n");
7299 return ptr_type_node;
7301 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7303 return reference_alias_ptr_type (DR_REF (first_dr));
7307 /* Function scan_operand_equal_p.
7309 Helper function for check_scan_store. Compare two references
7310 with .GOMP_SIMD_LANE bases. */
7312 static bool
7313 scan_operand_equal_p (tree ref1, tree ref2)
7315 tree ref[2] = { ref1, ref2 };
7316 poly_int64 bitsize[2], bitpos[2];
7317 tree offset[2], base[2];
7318 for (int i = 0; i < 2; ++i)
7320 machine_mode mode;
7321 int unsignedp, reversep, volatilep = 0;
7322 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7323 &offset[i], &mode, &unsignedp,
7324 &reversep, &volatilep);
7325 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
7326 return false;
7327 if (TREE_CODE (base[i]) == MEM_REF
7328 && offset[i] == NULL_TREE
7329 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7331 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7332 if (is_gimple_assign (def_stmt)
7333 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
7334 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7335 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7337 if (maybe_ne (mem_ref_offset (base[i]), 0))
7338 return false;
7339 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7340 offset[i] = gimple_assign_rhs2 (def_stmt);
7345 if (!operand_equal_p (base[0], base[1], 0))
7346 return false;
7347 if (maybe_ne (bitsize[0], bitsize[1]))
7348 return false;
7349 if (offset[0] != offset[1])
7351 if (!offset[0] || !offset[1])
7352 return false;
7353 if (!operand_equal_p (offset[0], offset[1], 0))
7355 tree step[2];
7356 for (int i = 0; i < 2; ++i)
7358 step[i] = integer_one_node;
7359 if (TREE_CODE (offset[i]) == SSA_NAME)
7361 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7362 if (is_gimple_assign (def_stmt)
7363 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
7364 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7365 == INTEGER_CST))
7367 step[i] = gimple_assign_rhs2 (def_stmt);
7368 offset[i] = gimple_assign_rhs1 (def_stmt);
7371 else if (TREE_CODE (offset[i]) == MULT_EXPR)
7373 step[i] = TREE_OPERAND (offset[i], 1);
7374 offset[i] = TREE_OPERAND (offset[i], 0);
7376 tree rhs1 = NULL_TREE;
7377 if (TREE_CODE (offset[i]) == SSA_NAME)
7379 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7380 if (gimple_assign_cast_p (def_stmt))
7381 rhs1 = gimple_assign_rhs1 (def_stmt);
7383 else if (CONVERT_EXPR_P (offset[i]))
7384 rhs1 = TREE_OPERAND (offset[i], 0);
7385 if (rhs1
7386 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7387 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7388 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7389 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7390 offset[i] = rhs1;
7392 if (!operand_equal_p (offset[0], offset[1], 0)
7393 || !operand_equal_p (step[0], step[1], 0))
7394 return false;
7397 return true;
7401 enum scan_store_kind {
7402 /* Normal permutation. */
7403 scan_store_kind_perm,
7405 /* Whole vector left shift permutation with zero init. */
7406 scan_store_kind_lshift_zero,
7408 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7409 scan_store_kind_lshift_cond
7412 /* Function check_scan_store.
7414 Verify if we can perform the needed permutations or whole vector shifts.
7415 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7416 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7417 to do at each step. */
7419 static int
7420 scan_store_can_perm_p (tree vectype, tree init,
7421 vec<enum scan_store_kind> *use_whole_vector = NULL)
7423 enum machine_mode vec_mode = TYPE_MODE (vectype);
7424 unsigned HOST_WIDE_INT nunits;
7425 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7426 return -1;
7427 int units_log2 = exact_log2 (nunits);
7428 if (units_log2 <= 0)
7429 return -1;
7431 int i;
7432 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7433 for (i = 0; i <= units_log2; ++i)
7435 unsigned HOST_WIDE_INT j, k;
7436 enum scan_store_kind kind = scan_store_kind_perm;
7437 vec_perm_builder sel (nunits, nunits, 1);
7438 sel.quick_grow (nunits);
7439 if (i == units_log2)
7441 for (j = 0; j < nunits; ++j)
7442 sel[j] = nunits - 1;
7444 else
7446 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7447 sel[j] = j;
7448 for (k = 0; j < nunits; ++j, ++k)
7449 sel[j] = nunits + k;
7451 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7452 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7454 if (i == units_log2)
7455 return -1;
7457 if (whole_vector_shift_kind == scan_store_kind_perm)
7459 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
7460 return -1;
7461 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7462 /* Whole vector shifts shift in zeros, so if init is all zero
7463 constant, there is no need to do anything further. */
7464 if ((TREE_CODE (init) != INTEGER_CST
7465 && TREE_CODE (init) != REAL_CST)
7466 || !initializer_zerop (init))
7468 tree masktype = truth_type_for (vectype);
7469 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7470 return -1;
7471 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7474 kind = whole_vector_shift_kind;
7476 if (use_whole_vector)
7478 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7479 use_whole_vector->safe_grow_cleared (i, true);
7480 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7481 use_whole_vector->safe_push (kind);
7485 return units_log2;
7489 /* Function check_scan_store.
7491 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7493 static bool
7494 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7495 enum vect_def_type rhs_dt, bool slp, tree mask,
7496 vect_memory_access_type memory_access_type)
7498 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7499 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7500 tree ref_type;
7502 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7503 if (slp
7504 || mask
7505 || memory_access_type != VMAT_CONTIGUOUS
7506 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7507 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7508 || loop_vinfo == NULL
7509 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7510 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7511 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7512 || !integer_zerop (DR_INIT (dr_info->dr))
7513 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7514 || !alias_sets_conflict_p (get_alias_set (vectype),
7515 get_alias_set (TREE_TYPE (ref_type))))
7517 if (dump_enabled_p ())
7518 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7519 "unsupported OpenMP scan store.\n");
7520 return false;
7523 /* We need to pattern match code built by OpenMP lowering and simplified
7524 by following optimizations into something we can handle.
7525 #pragma omp simd reduction(inscan,+:r)
7526 for (...)
7528 r += something ();
7529 #pragma omp scan inclusive (r)
7530 use (r);
7532 shall have body with:
7533 // Initialization for input phase, store the reduction initializer:
7534 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7535 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7536 D.2042[_21] = 0;
7537 // Actual input phase:
7539 r.0_5 = D.2042[_20];
7540 _6 = _4 + r.0_5;
7541 D.2042[_20] = _6;
7542 // Initialization for scan phase:
7543 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7544 _26 = D.2043[_25];
7545 _27 = D.2042[_25];
7546 _28 = _26 + _27;
7547 D.2043[_25] = _28;
7548 D.2042[_25] = _28;
7549 // Actual scan phase:
7551 r.1_8 = D.2042[_20];
7553 The "omp simd array" variable D.2042 holds the privatized copy used
7554 inside of the loop and D.2043 is another one that holds copies of
7555 the current original list item. The separate GOMP_SIMD_LANE ifn
7556 kinds are there in order to allow optimizing the initializer store
7557 and combiner sequence, e.g. if it is originally some C++ish user
7558 defined reduction, but allow the vectorizer to pattern recognize it
7559 and turn into the appropriate vectorized scan.
7561 For exclusive scan, this is slightly different:
7562 #pragma omp simd reduction(inscan,+:r)
7563 for (...)
7565 use (r);
7566 #pragma omp scan exclusive (r)
7567 r += something ();
7569 shall have body with:
7570 // Initialization for input phase, store the reduction initializer:
7571 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7572 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7573 D.2042[_21] = 0;
7574 // Actual input phase:
7576 r.0_5 = D.2042[_20];
7577 _6 = _4 + r.0_5;
7578 D.2042[_20] = _6;
7579 // Initialization for scan phase:
7580 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7581 _26 = D.2043[_25];
7582 D.2044[_25] = _26;
7583 _27 = D.2042[_25];
7584 _28 = _26 + _27;
7585 D.2043[_25] = _28;
7586 // Actual scan phase:
7588 r.1_8 = D.2044[_20];
7589 ... */
7591 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7593 /* Match the D.2042[_21] = 0; store above. Just require that
7594 it is a constant or external definition store. */
7595 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7597 fail_init:
7598 if (dump_enabled_p ())
7599 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7600 "unsupported OpenMP scan initializer store.\n");
7601 return false;
7604 if (! loop_vinfo->scan_map)
7605 loop_vinfo->scan_map = new hash_map<tree, tree>;
7606 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7607 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7608 if (cached)
7609 goto fail_init;
7610 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7612 /* These stores can be vectorized normally. */
7613 return true;
7616 if (rhs_dt != vect_internal_def)
7618 fail:
7619 if (dump_enabled_p ())
7620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7621 "unsupported OpenMP scan combiner pattern.\n");
7622 return false;
7625 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7626 tree rhs = gimple_assign_rhs1 (stmt);
7627 if (TREE_CODE (rhs) != SSA_NAME)
7628 goto fail;
7630 gimple *other_store_stmt = NULL;
7631 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7632 bool inscan_var_store
7633 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7635 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7637 if (!inscan_var_store)
7639 use_operand_p use_p;
7640 imm_use_iterator iter;
7641 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7643 gimple *use_stmt = USE_STMT (use_p);
7644 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7645 continue;
7646 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7647 || !is_gimple_assign (use_stmt)
7648 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7649 || other_store_stmt
7650 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7651 goto fail;
7652 other_store_stmt = use_stmt;
7654 if (other_store_stmt == NULL)
7655 goto fail;
7656 rhs = gimple_assign_lhs (other_store_stmt);
7657 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7658 goto fail;
7661 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7663 use_operand_p use_p;
7664 imm_use_iterator iter;
7665 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7667 gimple *use_stmt = USE_STMT (use_p);
7668 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7669 continue;
7670 if (other_store_stmt)
7671 goto fail;
7672 other_store_stmt = use_stmt;
7675 else
7676 goto fail;
7678 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7679 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7680 || !is_gimple_assign (def_stmt)
7681 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7682 goto fail;
7684 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7685 /* For pointer addition, we should use the normal plus for the vector
7686 operation. */
7687 switch (code)
7689 case POINTER_PLUS_EXPR:
7690 code = PLUS_EXPR;
7691 break;
7692 case MULT_HIGHPART_EXPR:
7693 goto fail;
7694 default:
7695 break;
7697 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7698 goto fail;
7700 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7701 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7702 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7703 goto fail;
7705 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7706 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7707 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7708 || !gimple_assign_load_p (load1_stmt)
7709 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7710 || !gimple_assign_load_p (load2_stmt))
7711 goto fail;
7713 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7714 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7715 if (load1_stmt_info == NULL
7716 || load2_stmt_info == NULL
7717 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7718 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7719 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7720 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7721 goto fail;
7723 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7725 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7726 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7727 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7728 goto fail;
7729 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7730 tree lrhs;
7731 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7732 lrhs = rhs1;
7733 else
7734 lrhs = rhs2;
7735 use_operand_p use_p;
7736 imm_use_iterator iter;
7737 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7739 gimple *use_stmt = USE_STMT (use_p);
7740 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7741 continue;
7742 if (other_store_stmt)
7743 goto fail;
7744 other_store_stmt = use_stmt;
7748 if (other_store_stmt == NULL)
7749 goto fail;
7750 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7751 || !gimple_store_p (other_store_stmt))
7752 goto fail;
7754 stmt_vec_info other_store_stmt_info
7755 = loop_vinfo->lookup_stmt (other_store_stmt);
7756 if (other_store_stmt_info == NULL
7757 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7758 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7759 goto fail;
7761 gimple *stmt1 = stmt;
7762 gimple *stmt2 = other_store_stmt;
7763 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7764 std::swap (stmt1, stmt2);
7765 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7766 gimple_assign_rhs1 (load2_stmt)))
7768 std::swap (rhs1, rhs2);
7769 std::swap (load1_stmt, load2_stmt);
7770 std::swap (load1_stmt_info, load2_stmt_info);
7772 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7773 gimple_assign_rhs1 (load1_stmt)))
7774 goto fail;
7776 tree var3 = NULL_TREE;
7777 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7778 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7779 gimple_assign_rhs1 (load2_stmt)))
7780 goto fail;
7781 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7783 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7784 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7785 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7786 goto fail;
7787 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7788 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7789 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7790 || lookup_attribute ("omp simd inscan exclusive",
7791 DECL_ATTRIBUTES (var3)))
7792 goto fail;
7795 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7796 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7797 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7798 goto fail;
7800 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7801 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7802 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7803 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7804 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7805 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7806 goto fail;
7808 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7809 std::swap (var1, var2);
7811 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7813 if (!lookup_attribute ("omp simd inscan exclusive",
7814 DECL_ATTRIBUTES (var1)))
7815 goto fail;
7816 var1 = var3;
7819 if (loop_vinfo->scan_map == NULL)
7820 goto fail;
7821 tree *init = loop_vinfo->scan_map->get (var1);
7822 if (init == NULL)
7823 goto fail;
7825 /* The IL is as expected, now check if we can actually vectorize it.
7826 Inclusive scan:
7827 _26 = D.2043[_25];
7828 _27 = D.2042[_25];
7829 _28 = _26 + _27;
7830 D.2043[_25] = _28;
7831 D.2042[_25] = _28;
7832 should be vectorized as (where _40 is the vectorized rhs
7833 from the D.2042[_21] = 0; store):
7834 _30 = MEM <vector(8) int> [(int *)&D.2043];
7835 _31 = MEM <vector(8) int> [(int *)&D.2042];
7836 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7837 _33 = _31 + _32;
7838 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7839 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7840 _35 = _33 + _34;
7841 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7842 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7843 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7844 _37 = _35 + _36;
7845 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7846 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7847 _38 = _30 + _37;
7848 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7849 MEM <vector(8) int> [(int *)&D.2043] = _39;
7850 MEM <vector(8) int> [(int *)&D.2042] = _38;
7851 Exclusive scan:
7852 _26 = D.2043[_25];
7853 D.2044[_25] = _26;
7854 _27 = D.2042[_25];
7855 _28 = _26 + _27;
7856 D.2043[_25] = _28;
7857 should be vectorized as (where _40 is the vectorized rhs
7858 from the D.2042[_21] = 0; store):
7859 _30 = MEM <vector(8) int> [(int *)&D.2043];
7860 _31 = MEM <vector(8) int> [(int *)&D.2042];
7861 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7862 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7863 _34 = _32 + _33;
7864 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7865 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7866 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7867 _36 = _34 + _35;
7868 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7869 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7870 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7871 _38 = _36 + _37;
7872 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7873 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7874 _39 = _30 + _38;
7875 _50 = _31 + _39;
7876 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7877 MEM <vector(8) int> [(int *)&D.2044] = _39;
7878 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7879 enum machine_mode vec_mode = TYPE_MODE (vectype);
7880 optab optab = optab_for_tree_code (code, vectype, optab_default);
7881 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7882 goto fail;
7884 int units_log2 = scan_store_can_perm_p (vectype, *init);
7885 if (units_log2 == -1)
7886 goto fail;
7888 return true;
7892 /* Function vectorizable_scan_store.
7894 Helper of vectorizable_score, arguments like on vectorizable_store.
7895 Handle only the transformation, checking is done in check_scan_store. */
7897 static bool
7898 vectorizable_scan_store (vec_info *vinfo,
7899 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7900 gimple **vec_stmt, int ncopies)
7902 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7903 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7904 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7905 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7907 if (dump_enabled_p ())
7908 dump_printf_loc (MSG_NOTE, vect_location,
7909 "transform scan store. ncopies = %d\n", ncopies);
7911 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7912 tree rhs = gimple_assign_rhs1 (stmt);
7913 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7915 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7916 bool inscan_var_store
7917 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7919 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7921 use_operand_p use_p;
7922 imm_use_iterator iter;
7923 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7925 gimple *use_stmt = USE_STMT (use_p);
7926 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7927 continue;
7928 rhs = gimple_assign_lhs (use_stmt);
7929 break;
7933 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7934 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7935 if (code == POINTER_PLUS_EXPR)
7936 code = PLUS_EXPR;
7937 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7938 && commutative_tree_code (code));
7939 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7940 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7941 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7942 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7943 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7944 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7945 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7946 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7947 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7948 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7949 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7951 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7953 std::swap (rhs1, rhs2);
7954 std::swap (var1, var2);
7955 std::swap (load1_dr_info, load2_dr_info);
7958 tree *init = loop_vinfo->scan_map->get (var1);
7959 gcc_assert (init);
7961 unsigned HOST_WIDE_INT nunits;
7962 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7963 gcc_unreachable ();
7964 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7965 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7966 gcc_assert (units_log2 > 0);
7967 auto_vec<tree, 16> perms;
7968 perms.quick_grow (units_log2 + 1);
7969 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7970 for (int i = 0; i <= units_log2; ++i)
7972 unsigned HOST_WIDE_INT j, k;
7973 vec_perm_builder sel (nunits, nunits, 1);
7974 sel.quick_grow (nunits);
7975 if (i == units_log2)
7976 for (j = 0; j < nunits; ++j)
7977 sel[j] = nunits - 1;
7978 else
7980 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7981 sel[j] = j;
7982 for (k = 0; j < nunits; ++j, ++k)
7983 sel[j] = nunits + k;
7985 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7986 if (!use_whole_vector.is_empty ()
7987 && use_whole_vector[i] != scan_store_kind_perm)
7989 if (zero_vec == NULL_TREE)
7990 zero_vec = build_zero_cst (vectype);
7991 if (masktype == NULL_TREE
7992 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7993 masktype = truth_type_for (vectype);
7994 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7996 else
7997 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
8000 tree vec_oprnd1 = NULL_TREE;
8001 tree vec_oprnd2 = NULL_TREE;
8002 tree vec_oprnd3 = NULL_TREE;
8003 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
8004 tree dataref_offset = build_int_cst (ref_type, 0);
8005 tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
8006 vectype, VMAT_CONTIGUOUS);
8007 tree ldataref_ptr = NULL_TREE;
8008 tree orig = NULL_TREE;
8009 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
8010 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
8011 auto_vec<tree> vec_oprnds1;
8012 auto_vec<tree> vec_oprnds2;
8013 auto_vec<tree> vec_oprnds3;
8014 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
8015 *init, &vec_oprnds1,
8016 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
8017 rhs2, &vec_oprnds3);
8018 for (int j = 0; j < ncopies; j++)
8020 vec_oprnd1 = vec_oprnds1[j];
8021 if (ldataref_ptr == NULL)
8022 vec_oprnd2 = vec_oprnds2[j];
8023 vec_oprnd3 = vec_oprnds3[j];
8024 if (j == 0)
8025 orig = vec_oprnd3;
8026 else if (!inscan_var_store)
8027 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8029 if (ldataref_ptr)
8031 vec_oprnd2 = make_ssa_name (vectype);
8032 tree data_ref = fold_build2 (MEM_REF, vectype,
8033 unshare_expr (ldataref_ptr),
8034 dataref_offset);
8035 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
8036 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
8037 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8038 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8039 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8042 tree v = vec_oprnd2;
8043 for (int i = 0; i < units_log2; ++i)
8045 tree new_temp = make_ssa_name (vectype);
8046 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
8047 (zero_vec
8048 && (use_whole_vector[i]
8049 != scan_store_kind_perm))
8050 ? zero_vec : vec_oprnd1, v,
8051 perms[i]);
8052 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8053 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8054 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8056 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
8058 /* Whole vector shift shifted in zero bits, but if *init
8059 is not initializer_zerop, we need to replace those elements
8060 with elements from vec_oprnd1. */
8061 tree_vector_builder vb (masktype, nunits, 1);
8062 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
8063 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
8064 ? boolean_false_node : boolean_true_node);
8066 tree new_temp2 = make_ssa_name (vectype);
8067 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
8068 new_temp, vec_oprnd1);
8069 vect_finish_stmt_generation (vinfo, stmt_info,
8070 g, gsi);
8071 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8072 new_temp = new_temp2;
8075 /* For exclusive scan, perform the perms[i] permutation once
8076 more. */
8077 if (i == 0
8078 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
8079 && v == vec_oprnd2)
8081 v = new_temp;
8082 --i;
8083 continue;
8086 tree new_temp2 = make_ssa_name (vectype);
8087 g = gimple_build_assign (new_temp2, code, v, new_temp);
8088 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8089 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8091 v = new_temp2;
8094 tree new_temp = make_ssa_name (vectype);
8095 gimple *g = gimple_build_assign (new_temp, code, orig, v);
8096 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8097 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8099 tree last_perm_arg = new_temp;
8100 /* For exclusive scan, new_temp computed above is the exclusive scan
8101 prefix sum. Turn it into inclusive prefix sum for the broadcast
8102 of the last element into orig. */
8103 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
8105 last_perm_arg = make_ssa_name (vectype);
8106 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
8107 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8108 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8111 orig = make_ssa_name (vectype);
8112 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
8113 last_perm_arg, perms[units_log2]);
8114 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8115 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8117 if (!inscan_var_store)
8119 tree data_ref = fold_build2 (MEM_REF, vectype,
8120 unshare_expr (dataref_ptr),
8121 dataref_offset);
8122 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8123 g = gimple_build_assign (data_ref, new_temp);
8124 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8125 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8129 if (inscan_var_store)
8130 for (int j = 0; j < ncopies; j++)
8132 if (j != 0)
8133 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8135 tree data_ref = fold_build2 (MEM_REF, vectype,
8136 unshare_expr (dataref_ptr),
8137 dataref_offset);
8138 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8139 gimple *g = gimple_build_assign (data_ref, orig);
8140 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8141 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8143 return true;
8147 /* Function vectorizable_store.
8149 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8150 that can be vectorized.
8151 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8152 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8153 Return true if STMT_INFO is vectorizable in this way. */
8155 static bool
8156 vectorizable_store (vec_info *vinfo,
8157 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8158 gimple **vec_stmt, slp_tree slp_node,
8159 stmt_vector_for_cost *cost_vec)
8161 tree data_ref;
8162 tree op;
8163 tree vec_oprnd = NULL_TREE;
8164 tree elem_type;
8165 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8166 class loop *loop = NULL;
8167 machine_mode vec_mode;
8168 tree dummy;
8169 enum vect_def_type rhs_dt = vect_unknown_def_type;
8170 enum vect_def_type mask_dt = vect_unknown_def_type;
8171 tree dataref_ptr = NULL_TREE;
8172 tree dataref_offset = NULL_TREE;
8173 gimple *ptr_incr = NULL;
8174 int ncopies;
8175 int j;
8176 stmt_vec_info first_stmt_info;
8177 bool grouped_store;
8178 unsigned int group_size, i;
8179 bool slp = (slp_node != NULL);
8180 unsigned int vec_num;
8181 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8182 tree aggr_type;
8183 gather_scatter_info gs_info;
8184 poly_uint64 vf;
8185 vec_load_store_type vls_type;
8186 tree ref_type;
8188 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8189 return false;
8191 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8192 && ! vec_stmt)
8193 return false;
8195 /* Is vectorizable store? */
8197 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8198 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8200 tree scalar_dest = gimple_assign_lhs (assign);
8201 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8202 && is_pattern_stmt_p (stmt_info))
8203 scalar_dest = TREE_OPERAND (scalar_dest, 0);
8204 if (TREE_CODE (scalar_dest) != ARRAY_REF
8205 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8206 && TREE_CODE (scalar_dest) != INDIRECT_REF
8207 && TREE_CODE (scalar_dest) != COMPONENT_REF
8208 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8209 && TREE_CODE (scalar_dest) != REALPART_EXPR
8210 && TREE_CODE (scalar_dest) != MEM_REF)
8211 return false;
8213 else
8215 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8216 if (!call || !gimple_call_internal_p (call))
8217 return false;
8219 internal_fn ifn = gimple_call_internal_fn (call);
8220 if (!internal_store_fn_p (ifn))
8221 return false;
8223 int mask_index = internal_fn_mask_index (ifn);
8224 if (mask_index >= 0 && slp_node)
8225 mask_index = vect_slp_child_index_for_operand (call, mask_index);
8226 if (mask_index >= 0
8227 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8228 &mask, NULL, &mask_dt, &mask_vectype))
8229 return false;
8232 op = vect_get_store_rhs (stmt_info);
8234 /* Cannot have hybrid store SLP -- that would mean storing to the
8235 same location twice. */
8236 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8238 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8239 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8241 if (loop_vinfo)
8243 loop = LOOP_VINFO_LOOP (loop_vinfo);
8244 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8246 else
8247 vf = 1;
8249 /* Multiple types in SLP are handled by creating the appropriate number of
8250 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8251 case of SLP. */
8252 if (slp)
8253 ncopies = 1;
8254 else
8255 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8257 gcc_assert (ncopies >= 1);
8259 /* FORNOW. This restriction should be relaxed. */
8260 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
8262 if (dump_enabled_p ())
8263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8264 "multiple types in nested loop.\n");
8265 return false;
8268 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8269 op, &rhs_dt, &rhs_vectype, &vls_type))
8270 return false;
8272 elem_type = TREE_TYPE (vectype);
8273 vec_mode = TYPE_MODE (vectype);
8275 if (!STMT_VINFO_DATA_REF (stmt_info))
8276 return false;
8278 vect_memory_access_type memory_access_type;
8279 enum dr_alignment_support alignment_support_scheme;
8280 int misalignment;
8281 poly_int64 poffset;
8282 internal_fn lanes_ifn;
8283 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
8284 ncopies, &memory_access_type, &poffset,
8285 &alignment_support_scheme, &misalignment, &gs_info,
8286 &lanes_ifn))
8287 return false;
8289 if (mask)
8291 if (memory_access_type == VMAT_CONTIGUOUS)
8293 if (!VECTOR_MODE_P (vec_mode)
8294 || !can_vec_mask_load_store_p (vec_mode,
8295 TYPE_MODE (mask_vectype), false))
8296 return false;
8298 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8299 && (memory_access_type != VMAT_GATHER_SCATTER
8300 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8302 if (dump_enabled_p ())
8303 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8304 "unsupported access type for masked store.\n");
8305 return false;
8307 else if (memory_access_type == VMAT_GATHER_SCATTER
8308 && gs_info.ifn == IFN_LAST
8309 && !gs_info.decl)
8311 if (dump_enabled_p ())
8312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8313 "unsupported masked emulated scatter.\n");
8314 return false;
8317 else
8319 /* FORNOW. In some cases can vectorize even if data-type not supported
8320 (e.g. - array initialization with 0). */
8321 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
8322 return false;
8325 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8326 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8327 && memory_access_type != VMAT_GATHER_SCATTER
8328 && (slp || memory_access_type != VMAT_CONTIGUOUS));
8329 if (grouped_store)
8331 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8332 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8333 group_size = DR_GROUP_SIZE (first_stmt_info);
8335 else
8337 first_stmt_info = stmt_info;
8338 first_dr_info = dr_info;
8339 group_size = vec_num = 1;
8342 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
8344 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
8345 memory_access_type))
8346 return false;
8349 bool costing_p = !vec_stmt;
8350 if (costing_p) /* transformation not required. */
8352 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8354 if (loop_vinfo
8355 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8356 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8357 vls_type, group_size,
8358 memory_access_type, &gs_info,
8359 mask);
8361 if (slp_node
8362 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8363 vectype))
8365 if (dump_enabled_p ())
8366 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8367 "incompatible vector types for invariants\n");
8368 return false;
8371 if (dump_enabled_p ()
8372 && memory_access_type != VMAT_ELEMENTWISE
8373 && memory_access_type != VMAT_GATHER_SCATTER
8374 && alignment_support_scheme != dr_aligned)
8375 dump_printf_loc (MSG_NOTE, vect_location,
8376 "Vectorizing an unaligned access.\n");
8378 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8380 /* As function vect_transform_stmt shows, for interleaving stores
8381 the whole chain is vectorized when the last store in the chain
8382 is reached, the other stores in the group are skipped. So we
8383 want to only cost the last one here, but it's not trivial to
8384 get the last, as it's equivalent to use the first one for
8385 costing, use the first one instead. */
8386 if (grouped_store
8387 && !slp
8388 && first_stmt_info != stmt_info)
8389 return true;
8391 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8393 /* Transform. */
8395 ensure_base_align (dr_info);
8397 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8399 vect_build_scatter_store_calls (vinfo, stmt_info, gsi, vec_stmt, &gs_info,
8400 mask, cost_vec);
8401 return true;
8403 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8405 gcc_assert (memory_access_type == VMAT_CONTIGUOUS);
8406 gcc_assert (!slp);
8407 if (costing_p)
8409 unsigned int inside_cost = 0, prologue_cost = 0;
8410 if (vls_type == VLS_STORE_INVARIANT)
8411 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
8412 stmt_info, 0, vect_prologue);
8413 vect_get_store_cost (vinfo, stmt_info, ncopies,
8414 alignment_support_scheme, misalignment,
8415 &inside_cost, cost_vec);
8417 if (dump_enabled_p ())
8418 dump_printf_loc (MSG_NOTE, vect_location,
8419 "vect_model_store_cost: inside_cost = %d, "
8420 "prologue_cost = %d .\n",
8421 inside_cost, prologue_cost);
8423 return true;
8425 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8428 if (grouped_store)
8430 /* FORNOW */
8431 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8433 if (slp)
8435 grouped_store = false;
8436 /* VEC_NUM is the number of vect stmts to be created for this
8437 group. */
8438 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8439 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8440 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8441 == first_stmt_info);
8442 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8443 op = vect_get_store_rhs (first_stmt_info);
8445 else
8446 /* VEC_NUM is the number of vect stmts to be created for this
8447 group. */
8448 vec_num = group_size;
8450 ref_type = get_group_alias_ptr_type (first_stmt_info);
8452 else
8453 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8455 if (!costing_p && dump_enabled_p ())
8456 dump_printf_loc (MSG_NOTE, vect_location, "transform store. ncopies = %d\n",
8457 ncopies);
8459 /* Check if we need to update prologue cost for invariant,
8460 and update it accordingly if so. If it's not for
8461 interleaving store, we can just check vls_type; but if
8462 it's for interleaving store, need to check the def_type
8463 of the stored value since the current vls_type is just
8464 for first_stmt_info. */
8465 auto update_prologue_cost = [&](unsigned *prologue_cost, tree store_rhs)
8467 gcc_assert (costing_p);
8468 if (slp)
8469 return;
8470 if (grouped_store)
8472 gcc_assert (store_rhs);
8473 enum vect_def_type cdt;
8474 gcc_assert (vect_is_simple_use (store_rhs, vinfo, &cdt));
8475 if (cdt != vect_constant_def && cdt != vect_external_def)
8476 return;
8478 else if (vls_type != VLS_STORE_INVARIANT)
8479 return;
8480 *prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info,
8481 0, vect_prologue);
8484 if (memory_access_type == VMAT_ELEMENTWISE
8485 || memory_access_type == VMAT_STRIDED_SLP)
8487 unsigned inside_cost = 0, prologue_cost = 0;
8488 gimple_stmt_iterator incr_gsi;
8489 bool insert_after;
8490 gimple *incr;
8491 tree offvar;
8492 tree ivstep;
8493 tree running_off;
8494 tree stride_base, stride_step, alias_off;
8495 tree vec_oprnd = NULL_TREE;
8496 tree dr_offset;
8497 unsigned int g;
8498 /* Checked by get_load_store_type. */
8499 unsigned int const_nunits = nunits.to_constant ();
8501 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8502 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8504 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8505 stride_base
8506 = fold_build_pointer_plus
8507 (DR_BASE_ADDRESS (first_dr_info->dr),
8508 size_binop (PLUS_EXPR,
8509 convert_to_ptrofftype (dr_offset),
8510 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8511 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8513 /* For a store with loop-invariant (but other than power-of-2)
8514 stride (i.e. not a grouped access) like so:
8516 for (i = 0; i < n; i += stride)
8517 array[i] = ...;
8519 we generate a new induction variable and new stores from
8520 the components of the (vectorized) rhs:
8522 for (j = 0; ; j += VF*stride)
8523 vectemp = ...;
8524 tmp1 = vectemp[0];
8525 array[j] = tmp1;
8526 tmp2 = vectemp[1];
8527 array[j + stride] = tmp2;
8531 unsigned nstores = const_nunits;
8532 unsigned lnel = 1;
8533 tree ltype = elem_type;
8534 tree lvectype = vectype;
8535 if (slp)
8537 if (group_size < const_nunits
8538 && const_nunits % group_size == 0)
8540 nstores = const_nunits / group_size;
8541 lnel = group_size;
8542 ltype = build_vector_type (elem_type, group_size);
8543 lvectype = vectype;
8545 /* First check if vec_extract optab doesn't support extraction
8546 of vector elts directly. */
8547 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8548 machine_mode vmode;
8549 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8550 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8551 group_size).exists (&vmode)
8552 || (convert_optab_handler (vec_extract_optab,
8553 TYPE_MODE (vectype), vmode)
8554 == CODE_FOR_nothing))
8556 /* Try to avoid emitting an extract of vector elements
8557 by performing the extracts using an integer type of the
8558 same size, extracting from a vector of those and then
8559 re-interpreting it as the original vector type if
8560 supported. */
8561 unsigned lsize
8562 = group_size * GET_MODE_BITSIZE (elmode);
8563 unsigned int lnunits = const_nunits / group_size;
8564 /* If we can't construct such a vector fall back to
8565 element extracts from the original vector type and
8566 element size stores. */
8567 if (int_mode_for_size (lsize, 0).exists (&elmode)
8568 && VECTOR_MODE_P (TYPE_MODE (vectype))
8569 && related_vector_mode (TYPE_MODE (vectype), elmode,
8570 lnunits).exists (&vmode)
8571 && (convert_optab_handler (vec_extract_optab,
8572 vmode, elmode)
8573 != CODE_FOR_nothing))
8575 nstores = lnunits;
8576 lnel = group_size;
8577 ltype = build_nonstandard_integer_type (lsize, 1);
8578 lvectype = build_vector_type (ltype, nstores);
8580 /* Else fall back to vector extraction anyway.
8581 Fewer stores are more important than avoiding spilling
8582 of the vector we extract from. Compared to the
8583 construction case in vectorizable_load no store-forwarding
8584 issue exists here for reasonable archs. */
8587 else if (group_size >= const_nunits
8588 && group_size % const_nunits == 0)
8590 int mis_align = dr_misalignment (first_dr_info, vectype);
8591 dr_alignment_support dr_align
8592 = vect_supportable_dr_alignment (vinfo, dr_info, vectype,
8593 mis_align);
8594 if (dr_align == dr_aligned
8595 || dr_align == dr_unaligned_supported)
8597 nstores = 1;
8598 lnel = const_nunits;
8599 ltype = vectype;
8600 lvectype = vectype;
8601 alignment_support_scheme = dr_align;
8602 misalignment = mis_align;
8605 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8606 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8609 if (!costing_p)
8611 ivstep = stride_step;
8612 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8613 build_int_cst (TREE_TYPE (ivstep), vf));
8615 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8617 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8618 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8619 create_iv (stride_base, PLUS_EXPR, ivstep, NULL, loop, &incr_gsi,
8620 insert_after, &offvar, NULL);
8621 incr = gsi_stmt (incr_gsi);
8623 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8626 alias_off = build_int_cst (ref_type, 0);
8627 stmt_vec_info next_stmt_info = first_stmt_info;
8628 auto_vec<tree> vec_oprnds (ncopies);
8629 for (g = 0; g < group_size; g++)
8631 running_off = offvar;
8632 if (!costing_p)
8634 if (g)
8636 tree size = TYPE_SIZE_UNIT (ltype);
8637 tree pos
8638 = fold_build2 (MULT_EXPR, sizetype, size_int (g), size);
8639 tree newoff = copy_ssa_name (running_off, NULL);
8640 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8641 running_off, pos);
8642 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8643 running_off = newoff;
8646 if (!slp)
8647 op = vect_get_store_rhs (next_stmt_info);
8648 if (!costing_p)
8649 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies, op,
8650 &vec_oprnds);
8651 else
8652 update_prologue_cost (&prologue_cost, op);
8653 unsigned int group_el = 0;
8654 unsigned HOST_WIDE_INT
8655 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8656 for (j = 0; j < ncopies; j++)
8658 if (!costing_p)
8660 vec_oprnd = vec_oprnds[j];
8661 /* Pun the vector to extract from if necessary. */
8662 if (lvectype != vectype)
8664 tree tem = make_ssa_name (lvectype);
8665 tree cvt
8666 = build1 (VIEW_CONVERT_EXPR, lvectype, vec_oprnd);
8667 gimple *pun = gimple_build_assign (tem, cvt);
8668 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8669 vec_oprnd = tem;
8672 for (i = 0; i < nstores; i++)
8674 if (costing_p)
8676 /* Only need vector extracting when there are more
8677 than one stores. */
8678 if (nstores > 1)
8679 inside_cost
8680 += record_stmt_cost (cost_vec, 1, vec_to_scalar,
8681 stmt_info, 0, vect_body);
8682 /* Take a single lane vector type store as scalar
8683 store to avoid ICE like 110776. */
8684 if (VECTOR_TYPE_P (ltype)
8685 && known_ne (TYPE_VECTOR_SUBPARTS (ltype), 1U))
8686 vect_get_store_cost (vinfo, stmt_info, 1,
8687 alignment_support_scheme,
8688 misalignment, &inside_cost,
8689 cost_vec);
8690 else
8691 inside_cost
8692 += record_stmt_cost (cost_vec, 1, scalar_store,
8693 stmt_info, 0, vect_body);
8694 continue;
8696 tree newref, newoff;
8697 gimple *incr, *assign;
8698 tree size = TYPE_SIZE (ltype);
8699 /* Extract the i'th component. */
8700 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8701 bitsize_int (i), size);
8702 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8703 size, pos);
8705 elem = force_gimple_operand_gsi (gsi, elem, true,
8706 NULL_TREE, true,
8707 GSI_SAME_STMT);
8709 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8710 group_el * elsz);
8711 newref = build2 (MEM_REF, ltype,
8712 running_off, this_off);
8713 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8715 /* And store it to *running_off. */
8716 assign = gimple_build_assign (newref, elem);
8717 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8719 group_el += lnel;
8720 if (! slp
8721 || group_el == group_size)
8723 newoff = copy_ssa_name (running_off, NULL);
8724 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8725 running_off, stride_step);
8726 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8728 running_off = newoff;
8729 group_el = 0;
8731 if (g == group_size - 1
8732 && !slp)
8734 if (j == 0 && i == 0)
8735 *vec_stmt = assign;
8736 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8740 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8741 vec_oprnds.truncate(0);
8742 if (slp)
8743 break;
8746 if (costing_p && dump_enabled_p ())
8747 dump_printf_loc (MSG_NOTE, vect_location,
8748 "vect_model_store_cost: inside_cost = %d, "
8749 "prologue_cost = %d .\n",
8750 inside_cost, prologue_cost);
8752 return true;
8755 gcc_assert (alignment_support_scheme);
8756 vec_loop_masks *loop_masks
8757 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8758 ? &LOOP_VINFO_MASKS (loop_vinfo)
8759 : NULL);
8760 vec_loop_lens *loop_lens
8761 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8762 ? &LOOP_VINFO_LENS (loop_vinfo)
8763 : NULL);
8765 /* Shouldn't go with length-based approach if fully masked. */
8766 gcc_assert (!loop_lens || !loop_masks);
8768 /* Targets with store-lane instructions must not require explicit
8769 realignment. vect_supportable_dr_alignment always returns either
8770 dr_aligned or dr_unaligned_supported for masked operations. */
8771 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8772 && !mask
8773 && !loop_masks)
8774 || alignment_support_scheme == dr_aligned
8775 || alignment_support_scheme == dr_unaligned_supported);
8777 tree offset = NULL_TREE;
8778 if (!known_eq (poffset, 0))
8779 offset = size_int (poffset);
8781 tree bump;
8782 tree vec_offset = NULL_TREE;
8783 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8785 aggr_type = NULL_TREE;
8786 bump = NULL_TREE;
8788 else if (memory_access_type == VMAT_GATHER_SCATTER)
8790 aggr_type = elem_type;
8791 if (!costing_p)
8792 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
8793 &bump, &vec_offset, loop_lens);
8795 else
8797 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8798 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8799 else
8800 aggr_type = vectype;
8801 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8802 memory_access_type, loop_lens);
8805 if (mask && !costing_p)
8806 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8808 /* In case the vectorization factor (VF) is bigger than the number
8809 of elements that we can fit in a vectype (nunits), we have to generate
8810 more than one vector stmt - i.e - we need to "unroll" the
8811 vector stmt by a factor VF/nunits. */
8813 /* In case of interleaving (non-unit grouped access):
8815 S1: &base + 2 = x2
8816 S2: &base = x0
8817 S3: &base + 1 = x1
8818 S4: &base + 3 = x3
8820 We create vectorized stores starting from base address (the access of the
8821 first stmt in the chain (S2 in the above example), when the last store stmt
8822 of the chain (S4) is reached:
8824 VS1: &base = vx2
8825 VS2: &base + vec_size*1 = vx0
8826 VS3: &base + vec_size*2 = vx1
8827 VS4: &base + vec_size*3 = vx3
8829 Then permutation statements are generated:
8831 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8832 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8835 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8836 (the order of the data-refs in the output of vect_permute_store_chain
8837 corresponds to the order of scalar stmts in the interleaving chain - see
8838 the documentation of vect_permute_store_chain()).
8840 In case of both multiple types and interleaving, above vector stores and
8841 permutation stmts are created for every copy. The result vector stmts are
8842 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8843 STMT_VINFO_RELATED_STMT for the next copies.
8846 auto_vec<tree> dr_chain (group_size);
8847 auto_vec<tree> vec_masks;
8848 tree vec_mask = NULL;
8849 auto_delete_vec<auto_vec<tree>> gvec_oprnds (group_size);
8850 for (i = 0; i < group_size; i++)
8851 gvec_oprnds.quick_push (new auto_vec<tree> (ncopies));
8853 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8855 gcc_assert (!slp && grouped_store);
8856 unsigned inside_cost = 0, prologue_cost = 0;
8857 for (j = 0; j < ncopies; j++)
8859 gimple *new_stmt;
8860 if (j == 0)
8862 /* For interleaved stores we collect vectorized defs for all
8863 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8864 as an input to vect_permute_store_chain(). */
8865 stmt_vec_info next_stmt_info = first_stmt_info;
8866 for (i = 0; i < group_size; i++)
8868 /* Since gaps are not supported for interleaved stores,
8869 DR_GROUP_SIZE is the exact number of stmts in the
8870 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8871 op = vect_get_store_rhs (next_stmt_info);
8872 if (costing_p)
8873 update_prologue_cost (&prologue_cost, op);
8874 else
8876 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8877 ncopies, op,
8878 gvec_oprnds[i]);
8879 vec_oprnd = (*gvec_oprnds[i])[0];
8880 dr_chain.quick_push (vec_oprnd);
8882 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8885 if (!costing_p)
8887 if (mask)
8889 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8890 mask, &vec_masks,
8891 mask_vectype);
8892 vec_mask = vec_masks[0];
8895 /* We should have catched mismatched types earlier. */
8896 gcc_assert (
8897 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
8898 dataref_ptr
8899 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
8900 aggr_type, NULL, offset, &dummy,
8901 gsi, &ptr_incr, false, bump);
8904 else if (!costing_p)
8906 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8907 /* DR_CHAIN is then used as an input to
8908 vect_permute_store_chain(). */
8909 for (i = 0; i < group_size; i++)
8911 vec_oprnd = (*gvec_oprnds[i])[j];
8912 dr_chain[i] = vec_oprnd;
8914 if (mask)
8915 vec_mask = vec_masks[j];
8916 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8917 stmt_info, bump);
8920 if (costing_p)
8922 for (i = 0; i < vec_num; i++)
8923 vect_get_store_cost (vinfo, stmt_info, 1,
8924 alignment_support_scheme, misalignment,
8925 &inside_cost, cost_vec);
8926 continue;
8929 /* Get an array into which we can store the individual vectors. */
8930 tree vec_array = create_vector_array (vectype, vec_num);
8932 /* Invalidate the current contents of VEC_ARRAY. This should
8933 become an RTL clobber too, which prevents the vector registers
8934 from being upward-exposed. */
8935 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8937 /* Store the individual vectors into the array. */
8938 for (i = 0; i < vec_num; i++)
8940 vec_oprnd = dr_chain[i];
8941 write_vector_array (vinfo, stmt_info, gsi, vec_oprnd, vec_array,
8945 tree final_mask = NULL;
8946 tree final_len = NULL;
8947 tree bias = NULL;
8948 if (loop_masks)
8949 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8950 ncopies, vectype, j);
8951 if (vec_mask)
8952 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
8953 vec_mask, gsi);
8955 if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8957 if (loop_lens)
8958 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8959 ncopies, vectype, j, 1);
8960 else
8961 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8962 signed char biasval
8963 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8964 bias = build_int_cst (intQI_type_node, biasval);
8965 if (!final_mask)
8967 mask_vectype = truth_type_for (vectype);
8968 final_mask = build_minus_one_cst (mask_vectype);
8972 gcall *call;
8973 if (final_len && final_mask)
8975 /* Emit:
8976 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8977 LEN, BIAS, VEC_ARRAY). */
8978 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8979 tree alias_ptr = build_int_cst (ref_type, align);
8980 call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
8981 dataref_ptr, alias_ptr,
8982 final_mask, final_len, bias,
8983 vec_array);
8985 else if (final_mask)
8987 /* Emit:
8988 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8989 VEC_ARRAY). */
8990 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8991 tree alias_ptr = build_int_cst (ref_type, align);
8992 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8993 dataref_ptr, alias_ptr,
8994 final_mask, vec_array);
8996 else
8998 /* Emit:
8999 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
9000 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9001 call = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
9002 gimple_call_set_lhs (call, data_ref);
9004 gimple_call_set_nothrow (call, true);
9005 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9006 new_stmt = call;
9008 /* Record that VEC_ARRAY is now dead. */
9009 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
9010 if (j == 0)
9011 *vec_stmt = new_stmt;
9012 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9015 if (costing_p && dump_enabled_p ())
9016 dump_printf_loc (MSG_NOTE, vect_location,
9017 "vect_model_store_cost: inside_cost = %d, "
9018 "prologue_cost = %d .\n",
9019 inside_cost, prologue_cost);
9021 return true;
9024 if (memory_access_type == VMAT_GATHER_SCATTER)
9026 gcc_assert (!slp && !grouped_store);
9027 auto_vec<tree> vec_offsets;
9028 unsigned int inside_cost = 0, prologue_cost = 0;
9029 for (j = 0; j < ncopies; j++)
9031 gimple *new_stmt;
9032 if (j == 0)
9034 if (costing_p && vls_type == VLS_STORE_INVARIANT)
9035 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
9036 stmt_info, 0, vect_prologue);
9037 else if (!costing_p)
9039 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
9040 DR_CHAIN is of size 1. */
9041 gcc_assert (group_size == 1);
9042 op = vect_get_store_rhs (first_stmt_info);
9043 vect_get_vec_defs_for_operand (vinfo, first_stmt_info,
9044 ncopies, op, gvec_oprnds[0]);
9045 vec_oprnd = (*gvec_oprnds[0])[0];
9046 dr_chain.quick_push (vec_oprnd);
9047 if (mask)
9049 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
9050 mask, &vec_masks,
9051 mask_vectype);
9052 vec_mask = vec_masks[0];
9055 /* We should have catched mismatched types earlier. */
9056 gcc_assert (
9057 useless_type_conversion_p (vectype, TREE_TYPE (vec_oprnd)));
9058 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9059 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
9060 slp_node, &gs_info,
9061 &dataref_ptr, &vec_offsets);
9062 else
9063 dataref_ptr
9064 = vect_create_data_ref_ptr (vinfo, first_stmt_info,
9065 aggr_type, NULL, offset,
9066 &dummy, gsi, &ptr_incr, false,
9067 bump);
9070 else if (!costing_p)
9072 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9073 vec_oprnd = (*gvec_oprnds[0])[j];
9074 dr_chain[0] = vec_oprnd;
9075 if (mask)
9076 vec_mask = vec_masks[j];
9077 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9078 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9079 gsi, stmt_info, bump);
9082 new_stmt = NULL;
9083 unsigned HOST_WIDE_INT align;
9084 tree final_mask = NULL_TREE;
9085 tree final_len = NULL_TREE;
9086 tree bias = NULL_TREE;
9087 if (!costing_p)
9089 if (loop_masks)
9090 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9091 ncopies, vectype, j);
9092 if (vec_mask)
9093 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
9094 final_mask, vec_mask, gsi);
9097 if (gs_info.ifn != IFN_LAST)
9099 if (costing_p)
9101 unsigned int cnunits = vect_nunits_for_cost (vectype);
9102 inside_cost
9103 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9104 stmt_info, 0, vect_body);
9105 continue;
9108 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9109 vec_offset = vec_offsets[j];
9110 tree scale = size_int (gs_info.scale);
9112 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
9114 if (loop_lens)
9115 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9116 ncopies, vectype, j, 1);
9117 else
9118 final_len = build_int_cst (sizetype,
9119 TYPE_VECTOR_SUBPARTS (vectype));
9120 signed char biasval
9121 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9122 bias = build_int_cst (intQI_type_node, biasval);
9123 if (!final_mask)
9125 mask_vectype = truth_type_for (vectype);
9126 final_mask = build_minus_one_cst (mask_vectype);
9130 gcall *call;
9131 if (final_len && final_mask)
9132 call = gimple_build_call_internal (IFN_MASK_LEN_SCATTER_STORE,
9133 7, dataref_ptr, vec_offset,
9134 scale, vec_oprnd, final_mask,
9135 final_len, bias);
9136 else if (final_mask)
9137 call
9138 = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
9139 dataref_ptr, vec_offset, scale,
9140 vec_oprnd, final_mask);
9141 else
9142 call = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
9143 dataref_ptr, vec_offset,
9144 scale, vec_oprnd);
9145 gimple_call_set_nothrow (call, true);
9146 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9147 new_stmt = call;
9149 else
9151 /* Emulated scatter. */
9152 gcc_assert (!final_mask);
9153 if (costing_p)
9155 unsigned int cnunits = vect_nunits_for_cost (vectype);
9156 /* For emulated scatter N offset vector element extracts
9157 (we assume the scalar scaling and ptr + offset add is
9158 consumed by the load). */
9159 inside_cost
9160 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9161 stmt_info, 0, vect_body);
9162 /* N scalar stores plus extracting the elements. */
9163 inside_cost
9164 += record_stmt_cost (cost_vec, cnunits, vec_to_scalar,
9165 stmt_info, 0, vect_body);
9166 inside_cost
9167 += record_stmt_cost (cost_vec, cnunits, scalar_store,
9168 stmt_info, 0, vect_body);
9169 continue;
9172 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9173 unsigned HOST_WIDE_INT const_offset_nunits
9174 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype).to_constant ();
9175 vec<constructor_elt, va_gc> *ctor_elts;
9176 vec_alloc (ctor_elts, const_nunits);
9177 gimple_seq stmts = NULL;
9178 tree elt_type = TREE_TYPE (vectype);
9179 unsigned HOST_WIDE_INT elt_size
9180 = tree_to_uhwi (TYPE_SIZE (elt_type));
9181 /* We support offset vectors with more elements
9182 than the data vector for now. */
9183 unsigned HOST_WIDE_INT factor
9184 = const_offset_nunits / const_nunits;
9185 vec_offset = vec_offsets[j / factor];
9186 unsigned elt_offset = (j % factor) * const_nunits;
9187 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9188 tree scale = size_int (gs_info.scale);
9189 align = get_object_alignment (DR_REF (first_dr_info->dr));
9190 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9191 for (unsigned k = 0; k < const_nunits; ++k)
9193 /* Compute the offsetted pointer. */
9194 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9195 bitsize_int (k + elt_offset));
9196 tree idx
9197 = gimple_build (&stmts, BIT_FIELD_REF, idx_type, vec_offset,
9198 TYPE_SIZE (idx_type), boff);
9199 idx = gimple_convert (&stmts, sizetype, idx);
9200 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx, scale);
9201 tree ptr
9202 = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (dataref_ptr),
9203 dataref_ptr, idx);
9204 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9205 /* Extract the element to be stored. */
9206 tree elt
9207 = gimple_build (&stmts, BIT_FIELD_REF, TREE_TYPE (vectype),
9208 vec_oprnd, TYPE_SIZE (elt_type),
9209 bitsize_int (k * elt_size));
9210 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9211 stmts = NULL;
9212 tree ref
9213 = build2 (MEM_REF, ltype, ptr, build_int_cst (ref_type, 0));
9214 new_stmt = gimple_build_assign (ref, elt);
9215 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9218 if (j == 0)
9219 *vec_stmt = new_stmt;
9220 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9223 if (costing_p && dump_enabled_p ())
9224 dump_printf_loc (MSG_NOTE, vect_location,
9225 "vect_model_store_cost: inside_cost = %d, "
9226 "prologue_cost = %d .\n",
9227 inside_cost, prologue_cost);
9229 return true;
9232 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
9233 || memory_access_type == VMAT_CONTIGUOUS_DOWN
9234 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE
9235 || memory_access_type == VMAT_CONTIGUOUS_REVERSE);
9237 unsigned inside_cost = 0, prologue_cost = 0;
9238 auto_vec<tree> result_chain (group_size);
9239 auto_vec<tree, 1> vec_oprnds;
9240 for (j = 0; j < ncopies; j++)
9242 gimple *new_stmt;
9243 if (j == 0)
9245 if (slp && !costing_p)
9247 /* Get vectorized arguments for SLP_NODE. */
9248 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1, op,
9249 &vec_oprnds, mask, &vec_masks);
9250 vec_oprnd = vec_oprnds[0];
9251 if (mask)
9252 vec_mask = vec_masks[0];
9254 else
9256 /* For interleaved stores we collect vectorized defs for all the
9257 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9258 input to vect_permute_store_chain().
9260 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9261 is of size 1. */
9262 stmt_vec_info next_stmt_info = first_stmt_info;
9263 for (i = 0; i < group_size; i++)
9265 /* Since gaps are not supported for interleaved stores,
9266 DR_GROUP_SIZE is the exact number of stmts in the chain.
9267 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9268 that there is no interleaving, DR_GROUP_SIZE is 1,
9269 and only one iteration of the loop will be executed. */
9270 op = vect_get_store_rhs (next_stmt_info);
9271 if (costing_p)
9272 update_prologue_cost (&prologue_cost, op);
9273 else
9275 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
9276 ncopies, op,
9277 gvec_oprnds[i]);
9278 vec_oprnd = (*gvec_oprnds[i])[0];
9279 dr_chain.quick_push (vec_oprnd);
9281 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9283 if (mask && !costing_p)
9285 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
9286 mask, &vec_masks,
9287 mask_vectype);
9288 vec_mask = vec_masks[0];
9292 /* We should have catched mismatched types earlier. */
9293 gcc_assert (costing_p
9294 || useless_type_conversion_p (vectype,
9295 TREE_TYPE (vec_oprnd)));
9296 bool simd_lane_access_p
9297 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9298 if (!costing_p
9299 && simd_lane_access_p
9300 && !loop_masks
9301 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9302 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9303 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
9304 && integer_zerop (DR_INIT (first_dr_info->dr))
9305 && alias_sets_conflict_p (get_alias_set (aggr_type),
9306 get_alias_set (TREE_TYPE (ref_type))))
9308 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9309 dataref_offset = build_int_cst (ref_type, 0);
9311 else if (!costing_p)
9312 dataref_ptr
9313 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
9314 simd_lane_access_p ? loop : NULL,
9315 offset, &dummy, gsi, &ptr_incr,
9316 simd_lane_access_p, bump);
9318 else if (!costing_p)
9320 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
9321 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9322 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9323 of size 1. */
9324 for (i = 0; i < group_size; i++)
9326 vec_oprnd = (*gvec_oprnds[i])[j];
9327 dr_chain[i] = vec_oprnd;
9329 if (mask)
9330 vec_mask = vec_masks[j];
9331 if (dataref_offset)
9332 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
9333 else
9334 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9335 stmt_info, bump);
9338 new_stmt = NULL;
9339 if (grouped_store)
9341 /* Permute. */
9342 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
9343 if (costing_p)
9345 int group_size = DR_GROUP_SIZE (first_stmt_info);
9346 int nstmts = ceil_log2 (group_size) * group_size;
9347 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
9348 stmt_info, 0, vect_body);
9349 if (dump_enabled_p ())
9350 dump_printf_loc (MSG_NOTE, vect_location,
9351 "vect_model_store_cost: "
9352 "strided group_size = %d .\n",
9353 group_size);
9355 else
9356 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
9357 gsi, &result_chain);
9360 stmt_vec_info next_stmt_info = first_stmt_info;
9361 for (i = 0; i < vec_num; i++)
9363 if (!costing_p)
9365 if (slp)
9366 vec_oprnd = vec_oprnds[i];
9367 else if (grouped_store)
9368 /* For grouped stores vectorized defs are interleaved in
9369 vect_permute_store_chain(). */
9370 vec_oprnd = result_chain[i];
9373 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9375 if (costing_p)
9376 inside_cost += record_stmt_cost (cost_vec, 1, vec_perm,
9377 stmt_info, 0, vect_body);
9378 else
9380 tree perm_mask = perm_mask_for_reverse (vectype);
9381 tree perm_dest = vect_create_destination_var (
9382 vect_get_store_rhs (stmt_info), vectype);
9383 tree new_temp = make_ssa_name (perm_dest);
9385 /* Generate the permute statement. */
9386 gimple *perm_stmt
9387 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9388 vec_oprnd, perm_mask);
9389 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt,
9390 gsi);
9392 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9393 vec_oprnd = new_temp;
9397 if (costing_p)
9399 vect_get_store_cost (vinfo, stmt_info, 1,
9400 alignment_support_scheme, misalignment,
9401 &inside_cost, cost_vec);
9403 if (!slp)
9405 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9406 if (!next_stmt_info)
9407 break;
9410 continue;
9413 tree final_mask = NULL_TREE;
9414 tree final_len = NULL_TREE;
9415 tree bias = NULL_TREE;
9416 if (loop_masks)
9417 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
9418 vec_num * ncopies, vectype,
9419 vec_num * j + i);
9420 if (slp && vec_mask)
9421 vec_mask = vec_masks[i];
9422 if (vec_mask)
9423 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
9424 vec_mask, gsi);
9426 if (i > 0)
9427 /* Bump the vector pointer. */
9428 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
9429 stmt_info, bump);
9431 unsigned misalign;
9432 unsigned HOST_WIDE_INT align;
9433 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9434 if (alignment_support_scheme == dr_aligned)
9435 misalign = 0;
9436 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9438 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
9439 misalign = 0;
9441 else
9442 misalign = misalignment;
9443 if (dataref_offset == NULL_TREE
9444 && TREE_CODE (dataref_ptr) == SSA_NAME)
9445 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9446 misalign);
9447 align = least_bit_hwi (misalign | align);
9449 /* Compute IFN when LOOP_LENS or final_mask valid. */
9450 machine_mode vmode = TYPE_MODE (vectype);
9451 machine_mode new_vmode = vmode;
9452 internal_fn partial_ifn = IFN_LAST;
9453 if (loop_lens)
9455 opt_machine_mode new_ovmode
9456 = get_len_load_store_mode (vmode, false, &partial_ifn);
9457 new_vmode = new_ovmode.require ();
9458 unsigned factor
9459 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9460 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9461 vec_num * ncopies, vectype,
9462 vec_num * j + i, factor);
9464 else if (final_mask)
9466 if (!can_vec_mask_load_store_p (
9467 vmode, TYPE_MODE (TREE_TYPE (final_mask)), false,
9468 &partial_ifn))
9469 gcc_unreachable ();
9472 if (partial_ifn == IFN_MASK_LEN_STORE)
9474 if (!final_len)
9476 /* Pass VF value to 'len' argument of
9477 MASK_LEN_STORE if LOOP_LENS is invalid. */
9478 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9480 if (!final_mask)
9482 /* Pass all ones value to 'mask' argument of
9483 MASK_LEN_STORE if final_mask is invalid. */
9484 mask_vectype = truth_type_for (vectype);
9485 final_mask = build_minus_one_cst (mask_vectype);
9488 if (final_len)
9490 signed char biasval
9491 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9493 bias = build_int_cst (intQI_type_node, biasval);
9496 /* Arguments are ready. Create the new vector stmt. */
9497 if (final_len)
9499 gcall *call;
9500 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9501 /* Need conversion if it's wrapped with VnQI. */
9502 if (vmode != new_vmode)
9504 tree new_vtype
9505 = build_vector_type_for_mode (unsigned_intQI_type_node,
9506 new_vmode);
9507 tree var = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9508 vec_oprnd = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9509 gassign *new_stmt
9510 = gimple_build_assign (var, VIEW_CONVERT_EXPR, vec_oprnd);
9511 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9512 vec_oprnd = var;
9515 if (partial_ifn == IFN_MASK_LEN_STORE)
9516 call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9517 dataref_ptr, ptr, final_mask,
9518 final_len, bias, vec_oprnd);
9519 else
9520 call = gimple_build_call_internal (IFN_LEN_STORE, 5,
9521 dataref_ptr, ptr, final_len,
9522 bias, vec_oprnd);
9523 gimple_call_set_nothrow (call, true);
9524 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9525 new_stmt = call;
9527 else if (final_mask)
9529 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9530 gcall *call
9531 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
9532 ptr, final_mask, vec_oprnd);
9533 gimple_call_set_nothrow (call, true);
9534 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9535 new_stmt = call;
9537 else
9539 data_ref
9540 = fold_build2 (MEM_REF, vectype, dataref_ptr,
9541 dataref_offset ? dataref_offset
9542 : build_int_cst (ref_type, 0));
9543 if (alignment_support_scheme == dr_aligned)
9545 else
9546 TREE_TYPE (data_ref)
9547 = build_aligned_type (TREE_TYPE (data_ref),
9548 align * BITS_PER_UNIT);
9549 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9550 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9551 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9554 if (slp)
9555 continue;
9557 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9558 if (!next_stmt_info)
9559 break;
9561 if (!slp && !costing_p)
9563 if (j == 0)
9564 *vec_stmt = new_stmt;
9565 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9569 if (costing_p)
9571 /* When vectorizing a store into the function result assign
9572 a penalty if the function returns in a multi-register location.
9573 In this case we assume we'll end up with having to spill the
9574 vector result and do piecewise loads as a conservative estimate. */
9575 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
9576 if (base
9577 && (TREE_CODE (base) == RESULT_DECL
9578 || (DECL_P (base) && cfun_returns (base)))
9579 && !aggregate_value_p (base, cfun->decl))
9581 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
9582 /* ??? Handle PARALLEL in some way. */
9583 if (REG_P (reg))
9585 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
9586 /* Assume that a single reg-reg move is possible and cheap,
9587 do not account for vector to gp register move cost. */
9588 if (nregs > 1)
9590 /* Spill. */
9591 prologue_cost
9592 += record_stmt_cost (cost_vec, ncopies, vector_store,
9593 stmt_info, 0, vect_epilogue);
9594 /* Loads. */
9595 prologue_cost
9596 += record_stmt_cost (cost_vec, ncopies * nregs, scalar_load,
9597 stmt_info, 0, vect_epilogue);
9601 if (dump_enabled_p ())
9602 dump_printf_loc (MSG_NOTE, vect_location,
9603 "vect_model_store_cost: inside_cost = %d, "
9604 "prologue_cost = %d .\n",
9605 inside_cost, prologue_cost);
9608 return true;
9611 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9612 VECTOR_CST mask. No checks are made that the target platform supports the
9613 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9614 vect_gen_perm_mask_checked. */
9616 tree
9617 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9619 tree mask_type;
9621 poly_uint64 nunits = sel.length ();
9622 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9624 mask_type = build_vector_type (ssizetype, nunits);
9625 return vec_perm_indices_to_tree (mask_type, sel);
9628 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9629 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9631 tree
9632 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9634 machine_mode vmode = TYPE_MODE (vectype);
9635 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9636 return vect_gen_perm_mask_any (vectype, sel);
9639 /* Given a vector variable X and Y, that was generated for the scalar
9640 STMT_INFO, generate instructions to permute the vector elements of X and Y
9641 using permutation mask MASK_VEC, insert them at *GSI and return the
9642 permuted vector variable. */
9644 static tree
9645 permute_vec_elements (vec_info *vinfo,
9646 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9647 gimple_stmt_iterator *gsi)
9649 tree vectype = TREE_TYPE (x);
9650 tree perm_dest, data_ref;
9651 gimple *perm_stmt;
9653 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9654 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9655 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9656 else
9657 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9658 data_ref = make_ssa_name (perm_dest);
9660 /* Generate the permute statement. */
9661 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9662 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9664 return data_ref;
9667 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9668 inserting them on the loops preheader edge. Returns true if we
9669 were successful in doing so (and thus STMT_INFO can be moved then),
9670 otherwise returns false. HOIST_P indicates if we want to hoist the
9671 definitions of all SSA uses, it would be false when we are costing. */
9673 static bool
9674 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
9676 ssa_op_iter i;
9677 tree op;
9678 bool any = false;
9680 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9682 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9683 if (!gimple_nop_p (def_stmt)
9684 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9686 /* Make sure we don't need to recurse. While we could do
9687 so in simple cases when there are more complex use webs
9688 we don't have an easy way to preserve stmt order to fulfil
9689 dependencies within them. */
9690 tree op2;
9691 ssa_op_iter i2;
9692 if (gimple_code (def_stmt) == GIMPLE_PHI)
9693 return false;
9694 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9696 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9697 if (!gimple_nop_p (def_stmt2)
9698 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9699 return false;
9701 any = true;
9705 if (!any)
9706 return true;
9708 if (!hoist_p)
9709 return true;
9711 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9713 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9714 if (!gimple_nop_p (def_stmt)
9715 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9717 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9718 gsi_remove (&gsi, false);
9719 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9723 return true;
9726 /* vectorizable_load.
9728 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9729 that can be vectorized.
9730 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9731 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9732 Return true if STMT_INFO is vectorizable in this way. */
9734 static bool
9735 vectorizable_load (vec_info *vinfo,
9736 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9737 gimple **vec_stmt, slp_tree slp_node,
9738 stmt_vector_for_cost *cost_vec)
9740 tree scalar_dest;
9741 tree vec_dest = NULL;
9742 tree data_ref = NULL;
9743 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9744 class loop *loop = NULL;
9745 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9746 bool nested_in_vect_loop = false;
9747 tree elem_type;
9748 /* Avoid false positive uninitialized warning, see PR110652. */
9749 tree new_temp = NULL_TREE;
9750 machine_mode mode;
9751 tree dummy;
9752 tree dataref_ptr = NULL_TREE;
9753 tree dataref_offset = NULL_TREE;
9754 gimple *ptr_incr = NULL;
9755 int ncopies;
9756 int i, j;
9757 unsigned int group_size;
9758 poly_uint64 group_gap_adj;
9759 tree msq = NULL_TREE, lsq;
9760 tree realignment_token = NULL_TREE;
9761 gphi *phi = NULL;
9762 vec<tree> dr_chain = vNULL;
9763 bool grouped_load = false;
9764 stmt_vec_info first_stmt_info;
9765 stmt_vec_info first_stmt_info_for_drptr = NULL;
9766 bool compute_in_loop = false;
9767 class loop *at_loop;
9768 int vec_num;
9769 bool slp = (slp_node != NULL);
9770 bool slp_perm = false;
9771 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9772 poly_uint64 vf;
9773 tree aggr_type;
9774 gather_scatter_info gs_info;
9775 tree ref_type;
9776 enum vect_def_type mask_dt = vect_unknown_def_type;
9778 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9779 return false;
9781 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9782 && ! vec_stmt)
9783 return false;
9785 if (!STMT_VINFO_DATA_REF (stmt_info))
9786 return false;
9788 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9789 int mask_index = -1;
9790 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9792 scalar_dest = gimple_assign_lhs (assign);
9793 if (TREE_CODE (scalar_dest) != SSA_NAME)
9794 return false;
9796 tree_code code = gimple_assign_rhs_code (assign);
9797 if (code != ARRAY_REF
9798 && code != BIT_FIELD_REF
9799 && code != INDIRECT_REF
9800 && code != COMPONENT_REF
9801 && code != IMAGPART_EXPR
9802 && code != REALPART_EXPR
9803 && code != MEM_REF
9804 && TREE_CODE_CLASS (code) != tcc_declaration)
9805 return false;
9807 else
9809 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9810 if (!call || !gimple_call_internal_p (call))
9811 return false;
9813 internal_fn ifn = gimple_call_internal_fn (call);
9814 if (!internal_load_fn_p (ifn))
9815 return false;
9817 scalar_dest = gimple_call_lhs (call);
9818 if (!scalar_dest)
9819 return false;
9821 mask_index = internal_fn_mask_index (ifn);
9822 if (mask_index >= 0 && slp_node)
9823 mask_index = vect_slp_child_index_for_operand (call, mask_index);
9824 if (mask_index >= 0
9825 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9826 &mask, NULL, &mask_dt, &mask_vectype))
9827 return false;
9830 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9831 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9833 if (loop_vinfo)
9835 loop = LOOP_VINFO_LOOP (loop_vinfo);
9836 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9837 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9839 else
9840 vf = 1;
9842 /* Multiple types in SLP are handled by creating the appropriate number of
9843 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9844 case of SLP. */
9845 if (slp)
9846 ncopies = 1;
9847 else
9848 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9850 gcc_assert (ncopies >= 1);
9852 /* FORNOW. This restriction should be relaxed. */
9853 if (nested_in_vect_loop && ncopies > 1)
9855 if (dump_enabled_p ())
9856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9857 "multiple types in nested loop.\n");
9858 return false;
9861 /* Invalidate assumptions made by dependence analysis when vectorization
9862 on the unrolled body effectively re-orders stmts. */
9863 if (ncopies > 1
9864 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9865 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9866 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9868 if (dump_enabled_p ())
9869 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9870 "cannot perform implicit CSE when unrolling "
9871 "with negative dependence distance\n");
9872 return false;
9875 elem_type = TREE_TYPE (vectype);
9876 mode = TYPE_MODE (vectype);
9878 /* FORNOW. In some cases can vectorize even if data-type not supported
9879 (e.g. - data copies). */
9880 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
9882 if (dump_enabled_p ())
9883 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9884 "Aligned load, but unsupported type.\n");
9885 return false;
9888 /* Check if the load is a part of an interleaving chain. */
9889 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9891 grouped_load = true;
9892 /* FORNOW */
9893 gcc_assert (!nested_in_vect_loop);
9894 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9896 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9897 group_size = DR_GROUP_SIZE (first_stmt_info);
9899 /* Refuse non-SLP vectorization of SLP-only groups. */
9900 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9902 if (dump_enabled_p ())
9903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9904 "cannot vectorize load in non-SLP mode.\n");
9905 return false;
9908 /* Invalidate assumptions made by dependence analysis when vectorization
9909 on the unrolled body effectively re-orders stmts. */
9910 if (!PURE_SLP_STMT (stmt_info)
9911 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9912 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9913 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9915 if (dump_enabled_p ())
9916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9917 "cannot perform implicit CSE when performing "
9918 "group loads with negative dependence distance\n");
9919 return false;
9922 else
9923 group_size = 1;
9925 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9927 slp_perm = true;
9929 if (!loop_vinfo)
9931 /* In BB vectorization we may not actually use a loaded vector
9932 accessing elements in excess of DR_GROUP_SIZE. */
9933 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9934 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9935 unsigned HOST_WIDE_INT nunits;
9936 unsigned j, k, maxk = 0;
9937 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
9938 if (k > maxk)
9939 maxk = k;
9940 tree vectype = SLP_TREE_VECTYPE (slp_node);
9941 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
9942 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
9944 if (dump_enabled_p ())
9945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9946 "BB vectorization with gaps at the end of "
9947 "a load is not supported\n");
9948 return false;
9952 auto_vec<tree> tem;
9953 unsigned n_perms;
9954 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
9955 true, &n_perms))
9957 if (dump_enabled_p ())
9958 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
9959 vect_location,
9960 "unsupported load permutation\n");
9961 return false;
9965 vect_memory_access_type memory_access_type;
9966 enum dr_alignment_support alignment_support_scheme;
9967 int misalignment;
9968 poly_int64 poffset;
9969 internal_fn lanes_ifn;
9970 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
9971 ncopies, &memory_access_type, &poffset,
9972 &alignment_support_scheme, &misalignment, &gs_info,
9973 &lanes_ifn))
9974 return false;
9976 if (mask)
9978 if (memory_access_type == VMAT_CONTIGUOUS)
9980 machine_mode vec_mode = TYPE_MODE (vectype);
9981 if (!VECTOR_MODE_P (vec_mode)
9982 || !can_vec_mask_load_store_p (vec_mode,
9983 TYPE_MODE (mask_vectype), true))
9984 return false;
9986 else if (memory_access_type != VMAT_LOAD_STORE_LANES
9987 && memory_access_type != VMAT_GATHER_SCATTER)
9989 if (dump_enabled_p ())
9990 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9991 "unsupported access type for masked load.\n");
9992 return false;
9994 else if (memory_access_type == VMAT_GATHER_SCATTER
9995 && gs_info.ifn == IFN_LAST
9996 && !gs_info.decl)
9998 if (dump_enabled_p ())
9999 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10000 "unsupported masked emulated gather.\n");
10001 return false;
10005 bool costing_p = !vec_stmt;
10007 if (costing_p) /* transformation not required. */
10009 if (slp_node
10010 && mask
10011 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
10012 mask_vectype))
10014 if (dump_enabled_p ())
10015 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10016 "incompatible vector types for invariants\n");
10017 return false;
10020 if (!slp)
10021 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
10023 if (loop_vinfo
10024 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
10025 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
10026 VLS_LOAD, group_size,
10027 memory_access_type, &gs_info,
10028 mask);
10030 if (dump_enabled_p ()
10031 && memory_access_type != VMAT_ELEMENTWISE
10032 && memory_access_type != VMAT_GATHER_SCATTER
10033 && alignment_support_scheme != dr_aligned)
10034 dump_printf_loc (MSG_NOTE, vect_location,
10035 "Vectorizing an unaligned access.\n");
10037 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10038 vinfo->any_known_not_updated_vssa = true;
10040 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
10043 if (!slp)
10044 gcc_assert (memory_access_type
10045 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
10047 if (dump_enabled_p () && !costing_p)
10048 dump_printf_loc (MSG_NOTE, vect_location,
10049 "transform load. ncopies = %d\n", ncopies);
10051 /* Transform. */
10053 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
10054 ensure_base_align (dr_info);
10056 if (memory_access_type == VMAT_INVARIANT)
10058 gcc_assert (!grouped_load && !mask && !bb_vinfo);
10059 /* If we have versioned for aliasing or the loop doesn't
10060 have any data dependencies that would preclude this,
10061 then we are sure this is a loop invariant load and
10062 thus we can insert it on the preheader edge.
10063 TODO: hoist_defs_of_uses should ideally be computed
10064 once at analysis time, remembered and used in the
10065 transform time. */
10066 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
10067 && !nested_in_vect_loop
10068 && hoist_defs_of_uses (stmt_info, loop, !costing_p));
10069 if (costing_p)
10071 enum vect_cost_model_location cost_loc
10072 = hoist_p ? vect_prologue : vect_body;
10073 unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
10074 stmt_info, 0, cost_loc);
10075 cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0,
10076 cost_loc);
10077 unsigned int prologue_cost = hoist_p ? cost : 0;
10078 unsigned int inside_cost = hoist_p ? 0 : cost;
10079 if (dump_enabled_p ())
10080 dump_printf_loc (MSG_NOTE, vect_location,
10081 "vect_model_load_cost: inside_cost = %d, "
10082 "prologue_cost = %d .\n",
10083 inside_cost, prologue_cost);
10084 return true;
10086 if (hoist_p)
10088 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
10089 if (dump_enabled_p ())
10090 dump_printf_loc (MSG_NOTE, vect_location,
10091 "hoisting out of the vectorized loop: %G",
10092 (gimple *) stmt);
10093 scalar_dest = copy_ssa_name (scalar_dest);
10094 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
10095 edge pe = loop_preheader_edge (loop);
10096 gphi *vphi = get_virtual_phi (loop->header);
10097 tree vuse;
10098 if (vphi)
10099 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
10100 else
10101 vuse = gimple_vuse (gsi_stmt (*gsi));
10102 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
10103 gimple_set_vuse (new_stmt, vuse);
10104 gsi_insert_on_edge_immediate (pe, new_stmt);
10106 /* These copies are all equivalent. */
10107 if (hoist_p)
10108 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10109 vectype, NULL);
10110 else
10112 gimple_stmt_iterator gsi2 = *gsi;
10113 gsi_next (&gsi2);
10114 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
10115 vectype, &gsi2);
10117 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
10118 if (slp)
10119 for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
10120 slp_node->push_vec_def (new_stmt);
10121 else
10123 for (j = 0; j < ncopies; ++j)
10124 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10125 *vec_stmt = new_stmt;
10127 return true;
10130 if (memory_access_type == VMAT_ELEMENTWISE
10131 || memory_access_type == VMAT_STRIDED_SLP)
10133 gimple_stmt_iterator incr_gsi;
10134 bool insert_after;
10135 tree offvar;
10136 tree ivstep;
10137 tree running_off;
10138 vec<constructor_elt, va_gc> *v = NULL;
10139 tree stride_base, stride_step, alias_off;
10140 /* Checked by get_load_store_type. */
10141 unsigned int const_nunits = nunits.to_constant ();
10142 unsigned HOST_WIDE_INT cst_offset = 0;
10143 tree dr_offset;
10144 unsigned int inside_cost = 0;
10146 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
10147 gcc_assert (!nested_in_vect_loop);
10149 if (grouped_load)
10151 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10152 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10154 else
10156 first_stmt_info = stmt_info;
10157 first_dr_info = dr_info;
10160 if (slp && grouped_load)
10162 group_size = DR_GROUP_SIZE (first_stmt_info);
10163 ref_type = get_group_alias_ptr_type (first_stmt_info);
10165 else
10167 if (grouped_load)
10168 cst_offset
10169 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
10170 * vect_get_place_in_interleaving_chain (stmt_info,
10171 first_stmt_info));
10172 group_size = 1;
10173 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
10176 if (!costing_p)
10178 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
10179 stride_base = fold_build_pointer_plus (
10180 DR_BASE_ADDRESS (first_dr_info->dr),
10181 size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
10182 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
10183 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
10185 /* For a load with loop-invariant (but other than power-of-2)
10186 stride (i.e. not a grouped access) like so:
10188 for (i = 0; i < n; i += stride)
10189 ... = array[i];
10191 we generate a new induction variable and new accesses to
10192 form a new vector (or vectors, depending on ncopies):
10194 for (j = 0; ; j += VF*stride)
10195 tmp1 = array[j];
10196 tmp2 = array[j + stride];
10198 vectemp = {tmp1, tmp2, ...}
10201 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
10202 build_int_cst (TREE_TYPE (stride_step), vf));
10204 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
10206 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
10207 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
10208 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
10209 loop, &incr_gsi, insert_after,
10210 &offvar, NULL);
10212 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
10215 running_off = offvar;
10216 alias_off = build_int_cst (ref_type, 0);
10217 int nloads = const_nunits;
10218 int lnel = 1;
10219 tree ltype = TREE_TYPE (vectype);
10220 tree lvectype = vectype;
10221 auto_vec<tree> dr_chain;
10222 if (memory_access_type == VMAT_STRIDED_SLP)
10224 if (group_size < const_nunits)
10226 /* First check if vec_init optab supports construction from vector
10227 elts directly. Otherwise avoid emitting a constructor of
10228 vector elements by performing the loads using an integer type
10229 of the same size, constructing a vector of those and then
10230 re-interpreting it as the original vector type. This avoids a
10231 huge runtime penalty due to the general inability to perform
10232 store forwarding from smaller stores to a larger load. */
10233 tree ptype;
10234 tree vtype
10235 = vector_vector_composition_type (vectype,
10236 const_nunits / group_size,
10237 &ptype);
10238 if (vtype != NULL_TREE)
10240 nloads = const_nunits / group_size;
10241 lnel = group_size;
10242 lvectype = vtype;
10243 ltype = ptype;
10246 else
10248 nloads = 1;
10249 lnel = const_nunits;
10250 ltype = vectype;
10252 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
10254 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10255 else if (nloads == 1)
10256 ltype = vectype;
10258 if (slp)
10260 /* For SLP permutation support we need to load the whole group,
10261 not only the number of vector stmts the permutation result
10262 fits in. */
10263 if (slp_perm)
10265 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10266 variable VF. */
10267 unsigned int const_vf = vf.to_constant ();
10268 ncopies = CEIL (group_size * const_vf, const_nunits);
10269 dr_chain.create (ncopies);
10271 else
10272 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10274 unsigned int group_el = 0;
10275 unsigned HOST_WIDE_INT
10276 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
10277 unsigned int n_groups = 0;
10278 for (j = 0; j < ncopies; j++)
10280 if (nloads > 1 && !costing_p)
10281 vec_alloc (v, nloads);
10282 gimple *new_stmt = NULL;
10283 for (i = 0; i < nloads; i++)
10285 if (costing_p)
10287 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10288 avoid ICE, see PR110776. */
10289 if (VECTOR_TYPE_P (ltype)
10290 && memory_access_type != VMAT_ELEMENTWISE)
10291 vect_get_load_cost (vinfo, stmt_info, 1,
10292 alignment_support_scheme, misalignment,
10293 false, &inside_cost, nullptr, cost_vec,
10294 cost_vec, true);
10295 else
10296 inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
10297 stmt_info, 0, vect_body);
10298 continue;
10300 tree this_off = build_int_cst (TREE_TYPE (alias_off),
10301 group_el * elsz + cst_offset);
10302 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
10303 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10304 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
10305 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10306 if (nloads > 1)
10307 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10308 gimple_assign_lhs (new_stmt));
10310 group_el += lnel;
10311 if (! slp
10312 || group_el == group_size)
10314 n_groups++;
10315 /* When doing SLP make sure to not load elements from
10316 the next vector iteration, those will not be accessed
10317 so just use the last element again. See PR107451. */
10318 if (!slp || known_lt (n_groups, vf))
10320 tree newoff = copy_ssa_name (running_off);
10321 gimple *incr
10322 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10323 running_off, stride_step);
10324 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
10325 running_off = newoff;
10327 group_el = 0;
10331 if (nloads > 1)
10333 if (costing_p)
10334 inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
10335 stmt_info, 0, vect_body);
10336 else
10338 tree vec_inv = build_constructor (lvectype, v);
10339 new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
10340 lvectype, gsi);
10341 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10342 if (lvectype != vectype)
10344 new_stmt
10345 = gimple_build_assign (make_ssa_name (vectype),
10346 VIEW_CONVERT_EXPR,
10347 build1 (VIEW_CONVERT_EXPR,
10348 vectype, new_temp));
10349 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10350 gsi);
10355 if (!costing_p)
10357 if (slp)
10359 if (slp_perm)
10360 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
10361 else
10362 slp_node->push_vec_def (new_stmt);
10364 else
10366 if (j == 0)
10367 *vec_stmt = new_stmt;
10368 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10372 if (slp_perm)
10374 unsigned n_perms;
10375 if (costing_p)
10377 unsigned n_loads;
10378 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10379 true, &n_perms, &n_loads);
10380 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
10381 first_stmt_info, 0, vect_body);
10383 else
10384 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10385 false, &n_perms);
10388 if (costing_p && dump_enabled_p ())
10389 dump_printf_loc (MSG_NOTE, vect_location,
10390 "vect_model_load_cost: inside_cost = %u, "
10391 "prologue_cost = 0 .\n",
10392 inside_cost);
10394 return true;
10397 if (memory_access_type == VMAT_GATHER_SCATTER
10398 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
10399 grouped_load = false;
10401 if (grouped_load
10402 || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10404 if (grouped_load)
10406 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10407 group_size = DR_GROUP_SIZE (first_stmt_info);
10409 else
10411 first_stmt_info = stmt_info;
10412 group_size = 1;
10414 /* For SLP vectorization we directly vectorize a subchain
10415 without permutation. */
10416 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10417 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10418 /* For BB vectorization always use the first stmt to base
10419 the data ref pointer on. */
10420 if (bb_vinfo)
10421 first_stmt_info_for_drptr
10422 = vect_find_first_scalar_stmt_in_slp (slp_node);
10424 /* Check if the chain of loads is already vectorized. */
10425 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10426 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10427 ??? But we can only do so if there is exactly one
10428 as we have no way to get at the rest. Leave the CSE
10429 opportunity alone.
10430 ??? With the group load eventually participating
10431 in multiple different permutations (having multiple
10432 slp nodes which refer to the same group) the CSE
10433 is even wrong code. See PR56270. */
10434 && !slp)
10436 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10437 return true;
10439 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10440 group_gap_adj = 0;
10442 /* VEC_NUM is the number of vect stmts to be created for this group. */
10443 if (slp)
10445 grouped_load = false;
10446 /* If an SLP permutation is from N elements to N elements,
10447 and if one vector holds a whole number of N, we can load
10448 the inputs to the permutation in the same way as an
10449 unpermuted sequence. In other cases we need to load the
10450 whole group, not only the number of vector stmts the
10451 permutation result fits in. */
10452 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10453 if (slp_perm
10454 && (group_size != scalar_lanes
10455 || !multiple_p (nunits, group_size)))
10457 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10458 variable VF; see vect_transform_slp_perm_load. */
10459 unsigned int const_vf = vf.to_constant ();
10460 unsigned int const_nunits = nunits.to_constant ();
10461 vec_num = CEIL (group_size * const_vf, const_nunits);
10462 group_gap_adj = vf * group_size - nunits * vec_num;
10464 else
10466 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10467 group_gap_adj
10468 = group_size - scalar_lanes;
10471 else
10472 vec_num = group_size;
10474 ref_type = get_group_alias_ptr_type (first_stmt_info);
10476 else
10478 first_stmt_info = stmt_info;
10479 first_dr_info = dr_info;
10480 group_size = vec_num = 1;
10481 group_gap_adj = 0;
10482 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10483 if (slp)
10484 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10487 gcc_assert (alignment_support_scheme);
10488 vec_loop_masks *loop_masks
10489 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10490 ? &LOOP_VINFO_MASKS (loop_vinfo)
10491 : NULL);
10492 vec_loop_lens *loop_lens
10493 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10494 ? &LOOP_VINFO_LENS (loop_vinfo)
10495 : NULL);
10497 /* Shouldn't go with length-based approach if fully masked. */
10498 gcc_assert (!loop_lens || !loop_masks);
10500 /* Targets with store-lane instructions must not require explicit
10501 realignment. vect_supportable_dr_alignment always returns either
10502 dr_aligned or dr_unaligned_supported for masked operations. */
10503 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10504 && !mask
10505 && !loop_masks)
10506 || alignment_support_scheme == dr_aligned
10507 || alignment_support_scheme == dr_unaligned_supported);
10509 /* In case the vectorization factor (VF) is bigger than the number
10510 of elements that we can fit in a vectype (nunits), we have to generate
10511 more than one vector stmt - i.e - we need to "unroll" the
10512 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10513 from one copy of the vector stmt to the next, in the field
10514 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10515 stages to find the correct vector defs to be used when vectorizing
10516 stmts that use the defs of the current stmt. The example below
10517 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10518 need to create 4 vectorized stmts):
10520 before vectorization:
10521 RELATED_STMT VEC_STMT
10522 S1: x = memref - -
10523 S2: z = x + 1 - -
10525 step 1: vectorize stmt S1:
10526 We first create the vector stmt VS1_0, and, as usual, record a
10527 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10528 Next, we create the vector stmt VS1_1, and record a pointer to
10529 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10530 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10531 stmts and pointers:
10532 RELATED_STMT VEC_STMT
10533 VS1_0: vx0 = memref0 VS1_1 -
10534 VS1_1: vx1 = memref1 VS1_2 -
10535 VS1_2: vx2 = memref2 VS1_3 -
10536 VS1_3: vx3 = memref3 - -
10537 S1: x = load - VS1_0
10538 S2: z = x + 1 - -
10541 /* In case of interleaving (non-unit grouped access):
10543 S1: x2 = &base + 2
10544 S2: x0 = &base
10545 S3: x1 = &base + 1
10546 S4: x3 = &base + 3
10548 Vectorized loads are created in the order of memory accesses
10549 starting from the access of the first stmt of the chain:
10551 VS1: vx0 = &base
10552 VS2: vx1 = &base + vec_size*1
10553 VS3: vx3 = &base + vec_size*2
10554 VS4: vx4 = &base + vec_size*3
10556 Then permutation statements are generated:
10558 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10559 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10562 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10563 (the order of the data-refs in the output of vect_permute_load_chain
10564 corresponds to the order of scalar stmts in the interleaving chain - see
10565 the documentation of vect_permute_load_chain()).
10566 The generation of permutation stmts and recording them in
10567 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10569 In case of both multiple types and interleaving, the vector loads and
10570 permutation stmts above are created for every copy. The result vector
10571 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10572 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10574 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10575 on a target that supports unaligned accesses (dr_unaligned_supported)
10576 we generate the following code:
10577 p = initial_addr;
10578 indx = 0;
10579 loop {
10580 p = p + indx * vectype_size;
10581 vec_dest = *(p);
10582 indx = indx + 1;
10585 Otherwise, the data reference is potentially unaligned on a target that
10586 does not support unaligned accesses (dr_explicit_realign_optimized) -
10587 then generate the following code, in which the data in each iteration is
10588 obtained by two vector loads, one from the previous iteration, and one
10589 from the current iteration:
10590 p1 = initial_addr;
10591 msq_init = *(floor(p1))
10592 p2 = initial_addr + VS - 1;
10593 realignment_token = call target_builtin;
10594 indx = 0;
10595 loop {
10596 p2 = p2 + indx * vectype_size
10597 lsq = *(floor(p2))
10598 vec_dest = realign_load (msq, lsq, realignment_token)
10599 indx = indx + 1;
10600 msq = lsq;
10601 } */
10603 /* If the misalignment remains the same throughout the execution of the
10604 loop, we can create the init_addr and permutation mask at the loop
10605 preheader. Otherwise, it needs to be created inside the loop.
10606 This can only occur when vectorizing memory accesses in the inner-loop
10607 nested within an outer-loop that is being vectorized. */
10609 if (nested_in_vect_loop
10610 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10611 GET_MODE_SIZE (TYPE_MODE (vectype))))
10613 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10614 compute_in_loop = true;
10617 bool diff_first_stmt_info
10618 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10620 tree offset = NULL_TREE;
10621 if ((alignment_support_scheme == dr_explicit_realign_optimized
10622 || alignment_support_scheme == dr_explicit_realign)
10623 && !compute_in_loop)
10625 /* If we have different first_stmt_info, we can't set up realignment
10626 here, since we can't guarantee first_stmt_info DR has been
10627 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10628 distance from first_stmt_info DR instead as below. */
10629 if (!costing_p)
10631 if (!diff_first_stmt_info)
10632 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10633 &realignment_token,
10634 alignment_support_scheme, NULL_TREE,
10635 &at_loop);
10636 if (alignment_support_scheme == dr_explicit_realign_optimized)
10638 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10639 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10640 size_one_node);
10641 gcc_assert (!first_stmt_info_for_drptr);
10645 else
10646 at_loop = loop;
10648 if (!known_eq (poffset, 0))
10649 offset = (offset
10650 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10651 : size_int (poffset));
10653 tree bump;
10654 tree vec_offset = NULL_TREE;
10655 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10657 aggr_type = NULL_TREE;
10658 bump = NULL_TREE;
10660 else if (memory_access_type == VMAT_GATHER_SCATTER)
10662 aggr_type = elem_type;
10663 if (!costing_p)
10664 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
10665 &bump, &vec_offset, loop_lens);
10667 else
10669 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10670 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
10671 else
10672 aggr_type = vectype;
10673 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10674 memory_access_type, loop_lens);
10677 auto_vec<tree> vec_offsets;
10678 auto_vec<tree> vec_masks;
10679 if (mask && !costing_p)
10681 if (slp_node)
10682 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10683 &vec_masks);
10684 else
10685 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
10686 &vec_masks, mask_vectype);
10689 tree vec_mask = NULL_TREE;
10690 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10692 gcc_assert (alignment_support_scheme == dr_aligned
10693 || alignment_support_scheme == dr_unaligned_supported);
10694 gcc_assert (grouped_load && !slp);
10696 unsigned int inside_cost = 0, prologue_cost = 0;
10697 for (j = 0; j < ncopies; j++)
10699 if (costing_p)
10701 /* An IFN_LOAD_LANES will load all its vector results,
10702 regardless of which ones we actually need. Account
10703 for the cost of unused results. */
10704 if (first_stmt_info == stmt_info)
10706 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10707 stmt_vec_info next_stmt_info = first_stmt_info;
10710 gaps -= 1;
10711 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10713 while (next_stmt_info);
10714 if (gaps)
10716 if (dump_enabled_p ())
10717 dump_printf_loc (MSG_NOTE, vect_location,
10718 "vect_model_load_cost: %d "
10719 "unused vectors.\n",
10720 gaps);
10721 vect_get_load_cost (vinfo, stmt_info, gaps,
10722 alignment_support_scheme,
10723 misalignment, false, &inside_cost,
10724 &prologue_cost, cost_vec, cost_vec,
10725 true);
10728 vect_get_load_cost (vinfo, stmt_info, 1, alignment_support_scheme,
10729 misalignment, false, &inside_cost,
10730 &prologue_cost, cost_vec, cost_vec, true);
10731 continue;
10734 /* 1. Create the vector or array pointer update chain. */
10735 if (j == 0)
10736 dataref_ptr
10737 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10738 at_loop, offset, &dummy, gsi,
10739 &ptr_incr, false, bump);
10740 else
10742 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10743 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10744 stmt_info, bump);
10746 if (mask)
10747 vec_mask = vec_masks[j];
10749 tree vec_array = create_vector_array (vectype, vec_num);
10751 tree final_mask = NULL_TREE;
10752 tree final_len = NULL_TREE;
10753 tree bias = NULL_TREE;
10754 if (loop_masks)
10755 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10756 ncopies, vectype, j);
10757 if (vec_mask)
10758 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
10759 vec_mask, gsi);
10761 if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10763 if (loop_lens)
10764 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10765 ncopies, vectype, j, 1);
10766 else
10767 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10768 signed char biasval
10769 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10770 bias = build_int_cst (intQI_type_node, biasval);
10771 if (!final_mask)
10773 mask_vectype = truth_type_for (vectype);
10774 final_mask = build_minus_one_cst (mask_vectype);
10778 gcall *call;
10779 if (final_len && final_mask)
10781 /* Emit:
10782 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10783 VEC_MASK, LEN, BIAS). */
10784 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10785 tree alias_ptr = build_int_cst (ref_type, align);
10786 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
10787 dataref_ptr, alias_ptr,
10788 final_mask, final_len, bias);
10790 else if (final_mask)
10792 /* Emit:
10793 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10794 VEC_MASK). */
10795 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10796 tree alias_ptr = build_int_cst (ref_type, align);
10797 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
10798 dataref_ptr, alias_ptr,
10799 final_mask);
10801 else
10803 /* Emit:
10804 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10805 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
10806 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10808 gimple_call_set_lhs (call, vec_array);
10809 gimple_call_set_nothrow (call, true);
10810 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
10812 dr_chain.create (vec_num);
10813 /* Extract each vector into an SSA_NAME. */
10814 for (i = 0; i < vec_num; i++)
10816 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10817 vec_array, i);
10818 dr_chain.quick_push (new_temp);
10821 /* Record the mapping between SSA_NAMEs and statements. */
10822 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10824 /* Record that VEC_ARRAY is now dead. */
10825 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10827 dr_chain.release ();
10829 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10832 if (costing_p && dump_enabled_p ())
10833 dump_printf_loc (MSG_NOTE, vect_location,
10834 "vect_model_load_cost: inside_cost = %u, "
10835 "prologue_cost = %u .\n",
10836 inside_cost, prologue_cost);
10838 return true;
10841 if (memory_access_type == VMAT_GATHER_SCATTER)
10843 gcc_assert (alignment_support_scheme == dr_aligned
10844 || alignment_support_scheme == dr_unaligned_supported);
10845 gcc_assert (!grouped_load && !slp_perm);
10847 unsigned int inside_cost = 0, prologue_cost = 0;
10848 for (j = 0; j < ncopies; j++)
10850 /* 1. Create the vector or array pointer update chain. */
10851 if (j == 0 && !costing_p)
10853 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10854 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10855 slp_node, &gs_info, &dataref_ptr,
10856 &vec_offsets);
10857 else
10858 dataref_ptr
10859 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10860 at_loop, offset, &dummy, gsi,
10861 &ptr_incr, false, bump);
10863 else if (!costing_p)
10865 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10866 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10867 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10868 gsi, stmt_info, bump);
10871 if (mask && !costing_p)
10872 vec_mask = vec_masks[j];
10874 gimple *new_stmt = NULL;
10875 for (i = 0; i < vec_num; i++)
10877 tree final_mask = NULL_TREE;
10878 tree final_len = NULL_TREE;
10879 tree bias = NULL_TREE;
10880 if (!costing_p)
10882 if (loop_masks)
10883 final_mask
10884 = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10885 vec_num * ncopies, vectype,
10886 vec_num * j + i);
10887 if (vec_mask)
10888 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10889 final_mask, vec_mask, gsi);
10891 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10892 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10893 gsi, stmt_info, bump);
10896 /* 2. Create the vector-load in the loop. */
10897 unsigned HOST_WIDE_INT align;
10898 if (gs_info.ifn != IFN_LAST)
10900 if (costing_p)
10902 unsigned int cnunits = vect_nunits_for_cost (vectype);
10903 inside_cost
10904 = record_stmt_cost (cost_vec, cnunits, scalar_load,
10905 stmt_info, 0, vect_body);
10906 continue;
10908 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10909 vec_offset = vec_offsets[vec_num * j + i];
10910 tree zero = build_zero_cst (vectype);
10911 tree scale = size_int (gs_info.scale);
10913 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
10915 if (loop_lens)
10916 final_len
10917 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10918 vec_num * ncopies, vectype,
10919 vec_num * j + i, 1);
10920 else
10921 final_len
10922 = build_int_cst (sizetype,
10923 TYPE_VECTOR_SUBPARTS (vectype));
10924 signed char biasval
10925 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10926 bias = build_int_cst (intQI_type_node, biasval);
10927 if (!final_mask)
10929 mask_vectype = truth_type_for (vectype);
10930 final_mask = build_minus_one_cst (mask_vectype);
10934 gcall *call;
10935 if (final_len && final_mask)
10936 call
10937 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
10938 dataref_ptr, vec_offset,
10939 scale, zero, final_mask,
10940 final_len, bias);
10941 else if (final_mask)
10942 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
10943 dataref_ptr, vec_offset,
10944 scale, zero, final_mask);
10945 else
10946 call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
10947 dataref_ptr, vec_offset,
10948 scale, zero);
10949 gimple_call_set_nothrow (call, true);
10950 new_stmt = call;
10951 data_ref = NULL_TREE;
10953 else if (gs_info.decl)
10955 /* The builtin decls path for gather is legacy, x86 only. */
10956 gcc_assert (!final_len && nunits.is_constant ());
10957 if (costing_p)
10959 unsigned int cnunits = vect_nunits_for_cost (vectype);
10960 inside_cost
10961 = record_stmt_cost (cost_vec, cnunits, scalar_load,
10962 stmt_info, 0, vect_body);
10963 continue;
10965 poly_uint64 offset_nunits
10966 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
10967 if (known_eq (nunits, offset_nunits))
10969 new_stmt = vect_build_one_gather_load_call
10970 (vinfo, stmt_info, gsi, &gs_info,
10971 dataref_ptr, vec_offsets[vec_num * j + i],
10972 final_mask);
10973 data_ref = NULL_TREE;
10975 else if (known_eq (nunits, offset_nunits * 2))
10977 /* We have a offset vector with half the number of
10978 lanes but the builtins will produce full vectype
10979 data with just the lower lanes filled. */
10980 new_stmt = vect_build_one_gather_load_call
10981 (vinfo, stmt_info, gsi, &gs_info,
10982 dataref_ptr, vec_offsets[2 * vec_num * j + 2 * i],
10983 final_mask);
10984 tree low = make_ssa_name (vectype);
10985 gimple_set_lhs (new_stmt, low);
10986 vect_finish_stmt_generation (vinfo, stmt_info,
10987 new_stmt, gsi);
10989 /* now put upper half of final_mask in final_mask low. */
10990 if (final_mask
10991 && !SCALAR_INT_MODE_P
10992 (TYPE_MODE (TREE_TYPE (final_mask))))
10994 int count = nunits.to_constant ();
10995 vec_perm_builder sel (count, count, 1);
10996 sel.quick_grow (count);
10997 for (int i = 0; i < count; ++i)
10998 sel[i] = i | (count / 2);
10999 vec_perm_indices indices (sel, 2, count);
11000 tree perm_mask = vect_gen_perm_mask_checked
11001 (TREE_TYPE (final_mask), indices);
11002 new_stmt = gimple_build_assign (NULL_TREE,
11003 VEC_PERM_EXPR,
11004 final_mask,
11005 final_mask,
11006 perm_mask);
11007 final_mask = make_ssa_name (TREE_TYPE (final_mask));
11008 gimple_set_lhs (new_stmt, final_mask);
11009 vect_finish_stmt_generation (vinfo, stmt_info,
11010 new_stmt, gsi);
11012 else if (final_mask)
11014 new_stmt = gimple_build_assign (NULL_TREE,
11015 VEC_UNPACK_HI_EXPR,
11016 final_mask);
11017 final_mask = make_ssa_name
11018 (truth_type_for (gs_info.offset_vectype));
11019 gimple_set_lhs (new_stmt, final_mask);
11020 vect_finish_stmt_generation (vinfo, stmt_info,
11021 new_stmt, gsi);
11024 new_stmt = vect_build_one_gather_load_call
11025 (vinfo, stmt_info, gsi, &gs_info,
11026 dataref_ptr,
11027 vec_offsets[2 * vec_num * j + 2 * i + 1],
11028 final_mask);
11029 tree high = make_ssa_name (vectype);
11030 gimple_set_lhs (new_stmt, high);
11031 vect_finish_stmt_generation (vinfo, stmt_info,
11032 new_stmt, gsi);
11034 /* compose low + high. */
11035 int count = nunits.to_constant ();
11036 vec_perm_builder sel (count, count, 1);
11037 sel.quick_grow (count);
11038 for (int i = 0; i < count; ++i)
11039 sel[i] = i < count / 2 ? i : i + count / 2;
11040 vec_perm_indices indices (sel, 2, count);
11041 tree perm_mask
11042 = vect_gen_perm_mask_checked (vectype, indices);
11043 new_stmt = gimple_build_assign (NULL_TREE,
11044 VEC_PERM_EXPR,
11045 low, high, perm_mask);
11046 data_ref = NULL_TREE;
11048 else if (known_eq (nunits * 2, offset_nunits))
11050 /* We have a offset vector with double the number of
11051 lanes. Select the low/high part accordingly. */
11052 vec_offset = vec_offsets[(vec_num * j + i) / 2];
11053 if ((vec_num * j + i) & 1)
11055 int count = offset_nunits.to_constant ();
11056 vec_perm_builder sel (count, count, 1);
11057 sel.quick_grow (count);
11058 for (int i = 0; i < count; ++i)
11059 sel[i] = i | (count / 2);
11060 vec_perm_indices indices (sel, 2, count);
11061 tree perm_mask = vect_gen_perm_mask_checked
11062 (TREE_TYPE (vec_offset), indices);
11063 new_stmt = gimple_build_assign (NULL_TREE,
11064 VEC_PERM_EXPR,
11065 vec_offset,
11066 vec_offset,
11067 perm_mask);
11068 vec_offset = make_ssa_name (TREE_TYPE (vec_offset));
11069 gimple_set_lhs (new_stmt, vec_offset);
11070 vect_finish_stmt_generation (vinfo, stmt_info,
11071 new_stmt, gsi);
11073 new_stmt = vect_build_one_gather_load_call
11074 (vinfo, stmt_info, gsi, &gs_info,
11075 dataref_ptr, vec_offset, final_mask);
11076 data_ref = NULL_TREE;
11078 else
11079 gcc_unreachable ();
11081 else
11083 /* Emulated gather-scatter. */
11084 gcc_assert (!final_mask);
11085 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
11086 if (costing_p)
11088 /* For emulated gathers N offset vector element
11089 offset add is consumed by the load). */
11090 inside_cost = record_stmt_cost (cost_vec, const_nunits,
11091 vec_to_scalar, stmt_info,
11092 0, vect_body);
11093 /* N scalar loads plus gathering them into a
11094 vector. */
11095 inside_cost
11096 = record_stmt_cost (cost_vec, const_nunits, scalar_load,
11097 stmt_info, 0, vect_body);
11098 inside_cost
11099 = record_stmt_cost (cost_vec, 1, vec_construct,
11100 stmt_info, 0, vect_body);
11101 continue;
11103 unsigned HOST_WIDE_INT const_offset_nunits
11104 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
11105 .to_constant ();
11106 vec<constructor_elt, va_gc> *ctor_elts;
11107 vec_alloc (ctor_elts, const_nunits);
11108 gimple_seq stmts = NULL;
11109 /* We support offset vectors with more elements
11110 than the data vector for now. */
11111 unsigned HOST_WIDE_INT factor
11112 = const_offset_nunits / const_nunits;
11113 vec_offset = vec_offsets[j / factor];
11114 unsigned elt_offset = (j % factor) * const_nunits;
11115 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
11116 tree scale = size_int (gs_info.scale);
11117 align = get_object_alignment (DR_REF (first_dr_info->dr));
11118 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
11119 for (unsigned k = 0; k < const_nunits; ++k)
11121 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
11122 bitsize_int (k + elt_offset));
11123 tree idx
11124 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
11125 vec_offset, TYPE_SIZE (idx_type), boff);
11126 idx = gimple_convert (&stmts, sizetype, idx);
11127 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx,
11128 scale);
11129 tree ptr = gimple_build (&stmts, PLUS_EXPR,
11130 TREE_TYPE (dataref_ptr),
11131 dataref_ptr, idx);
11132 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
11133 tree elt = make_ssa_name (TREE_TYPE (vectype));
11134 tree ref = build2 (MEM_REF, ltype, ptr,
11135 build_int_cst (ref_type, 0));
11136 new_stmt = gimple_build_assign (elt, ref);
11137 gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
11138 gimple_seq_add_stmt (&stmts, new_stmt);
11139 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
11141 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
11142 new_stmt = gimple_build_assign (
11143 NULL_TREE, build_constructor (vectype, ctor_elts));
11144 data_ref = NULL_TREE;
11147 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11148 /* DATA_REF is null if we've already built the statement. */
11149 if (data_ref)
11151 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11152 new_stmt = gimple_build_assign (vec_dest, data_ref);
11154 new_temp = make_ssa_name (vec_dest, new_stmt);
11155 gimple_set_lhs (new_stmt, new_temp);
11156 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11158 /* Store vector loads in the corresponding SLP_NODE. */
11159 if (slp)
11160 slp_node->push_vec_def (new_stmt);
11163 if (!slp && !costing_p)
11164 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11167 if (!slp && !costing_p)
11168 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11170 if (costing_p && dump_enabled_p ())
11171 dump_printf_loc (MSG_NOTE, vect_location,
11172 "vect_model_load_cost: inside_cost = %u, "
11173 "prologue_cost = %u .\n",
11174 inside_cost, prologue_cost);
11175 return true;
11178 poly_uint64 group_elt = 0;
11179 unsigned int inside_cost = 0, prologue_cost = 0;
11180 for (j = 0; j < ncopies; j++)
11182 /* 1. Create the vector or array pointer update chain. */
11183 if (j == 0 && !costing_p)
11185 bool simd_lane_access_p
11186 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
11187 if (simd_lane_access_p
11188 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
11189 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
11190 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
11191 && integer_zerop (DR_INIT (first_dr_info->dr))
11192 && alias_sets_conflict_p (get_alias_set (aggr_type),
11193 get_alias_set (TREE_TYPE (ref_type)))
11194 && (alignment_support_scheme == dr_aligned
11195 || alignment_support_scheme == dr_unaligned_supported))
11197 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
11198 dataref_offset = build_int_cst (ref_type, 0);
11200 else if (diff_first_stmt_info)
11202 dataref_ptr
11203 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
11204 aggr_type, at_loop, offset, &dummy,
11205 gsi, &ptr_incr, simd_lane_access_p,
11206 bump);
11207 /* Adjust the pointer by the difference to first_stmt. */
11208 data_reference_p ptrdr
11209 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
11210 tree diff
11211 = fold_convert (sizetype,
11212 size_binop (MINUS_EXPR,
11213 DR_INIT (first_dr_info->dr),
11214 DR_INIT (ptrdr)));
11215 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11216 stmt_info, diff);
11217 if (alignment_support_scheme == dr_explicit_realign)
11219 msq = vect_setup_realignment (vinfo,
11220 first_stmt_info_for_drptr, gsi,
11221 &realignment_token,
11222 alignment_support_scheme,
11223 dataref_ptr, &at_loop);
11224 gcc_assert (!compute_in_loop);
11227 else
11228 dataref_ptr
11229 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
11230 at_loop,
11231 offset, &dummy, gsi, &ptr_incr,
11232 simd_lane_access_p, bump);
11233 if (mask)
11234 vec_mask = vec_masks[0];
11236 else if (!costing_p)
11238 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
11239 if (dataref_offset)
11240 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
11241 bump);
11242 else
11243 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11244 stmt_info, bump);
11245 if (mask)
11246 vec_mask = vec_masks[j];
11249 if (grouped_load || slp_perm)
11250 dr_chain.create (vec_num);
11252 gimple *new_stmt = NULL;
11253 for (i = 0; i < vec_num; i++)
11255 tree final_mask = NULL_TREE;
11256 tree final_len = NULL_TREE;
11257 tree bias = NULL_TREE;
11258 if (!costing_p)
11260 if (loop_masks)
11261 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
11262 vec_num * ncopies, vectype,
11263 vec_num * j + i);
11264 if (vec_mask)
11265 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
11266 final_mask, vec_mask, gsi);
11268 if (i > 0)
11269 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
11270 gsi, stmt_info, bump);
11273 /* 2. Create the vector-load in the loop. */
11274 switch (alignment_support_scheme)
11276 case dr_aligned:
11277 case dr_unaligned_supported:
11279 if (costing_p)
11280 break;
11282 unsigned int misalign;
11283 unsigned HOST_WIDE_INT align;
11284 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
11285 if (alignment_support_scheme == dr_aligned)
11286 misalign = 0;
11287 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
11289 align
11290 = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
11291 misalign = 0;
11293 else
11294 misalign = misalignment;
11295 if (dataref_offset == NULL_TREE
11296 && TREE_CODE (dataref_ptr) == SSA_NAME)
11297 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
11298 misalign);
11299 align = least_bit_hwi (misalign | align);
11301 /* Compute IFN when LOOP_LENS or final_mask valid. */
11302 machine_mode vmode = TYPE_MODE (vectype);
11303 machine_mode new_vmode = vmode;
11304 internal_fn partial_ifn = IFN_LAST;
11305 if (loop_lens)
11307 opt_machine_mode new_ovmode
11308 = get_len_load_store_mode (vmode, true, &partial_ifn);
11309 new_vmode = new_ovmode.require ();
11310 unsigned factor
11311 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
11312 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
11313 vec_num * ncopies, vectype,
11314 vec_num * j + i, factor);
11316 else if (final_mask)
11318 if (!can_vec_mask_load_store_p (
11319 vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
11320 &partial_ifn))
11321 gcc_unreachable ();
11324 if (partial_ifn == IFN_MASK_LEN_LOAD)
11326 if (!final_len)
11328 /* Pass VF value to 'len' argument of
11329 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11330 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11332 if (!final_mask)
11334 /* Pass all ones value to 'mask' argument of
11335 MASK_LEN_LOAD if final_mask is invalid. */
11336 mask_vectype = truth_type_for (vectype);
11337 final_mask = build_minus_one_cst (mask_vectype);
11340 if (final_len)
11342 signed char biasval
11343 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
11345 bias = build_int_cst (intQI_type_node, biasval);
11348 if (final_len)
11350 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11351 gcall *call;
11352 if (partial_ifn == IFN_MASK_LEN_LOAD)
11353 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
11354 dataref_ptr, ptr,
11355 final_mask, final_len,
11356 bias);
11357 else
11358 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
11359 dataref_ptr, ptr,
11360 final_len, bias);
11361 gimple_call_set_nothrow (call, true);
11362 new_stmt = call;
11363 data_ref = NULL_TREE;
11365 /* Need conversion if it's wrapped with VnQI. */
11366 if (vmode != new_vmode)
11368 tree new_vtype = build_vector_type_for_mode (
11369 unsigned_intQI_type_node, new_vmode);
11370 tree var
11371 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
11372 gimple_set_lhs (call, var);
11373 vect_finish_stmt_generation (vinfo, stmt_info, call,
11374 gsi);
11375 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
11376 new_stmt = gimple_build_assign (vec_dest,
11377 VIEW_CONVERT_EXPR, op);
11380 else if (final_mask)
11382 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
11383 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
11384 dataref_ptr, ptr,
11385 final_mask);
11386 gimple_call_set_nothrow (call, true);
11387 new_stmt = call;
11388 data_ref = NULL_TREE;
11390 else
11392 tree ltype = vectype;
11393 tree new_vtype = NULL_TREE;
11394 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
11395 unsigned int vect_align
11396 = vect_known_alignment_in_bytes (first_dr_info, vectype);
11397 unsigned int scalar_dr_size
11398 = vect_get_scalar_dr_size (first_dr_info);
11399 /* If there's no peeling for gaps but we have a gap
11400 with slp loads then load the lower half of the
11401 vector only. See get_group_load_store_type for
11402 when we apply this optimization. */
11403 if (slp
11404 && loop_vinfo
11405 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != 0
11406 && known_eq (nunits, (group_size - gap) * 2)
11407 && known_eq (nunits, group_size)
11408 && gap >= (vect_align / scalar_dr_size))
11410 tree half_vtype;
11411 new_vtype
11412 = vector_vector_composition_type (vectype, 2,
11413 &half_vtype);
11414 if (new_vtype != NULL_TREE)
11415 ltype = half_vtype;
11417 tree offset
11418 = (dataref_offset ? dataref_offset
11419 : build_int_cst (ref_type, 0));
11420 if (ltype != vectype
11421 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11423 unsigned HOST_WIDE_INT gap_offset
11424 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
11425 tree gapcst = build_int_cst (ref_type, gap_offset);
11426 offset = size_binop (PLUS_EXPR, offset, gapcst);
11428 data_ref
11429 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
11430 if (alignment_support_scheme == dr_aligned)
11432 else
11433 TREE_TYPE (data_ref)
11434 = build_aligned_type (TREE_TYPE (data_ref),
11435 align * BITS_PER_UNIT);
11436 if (ltype != vectype)
11438 vect_copy_ref_info (data_ref,
11439 DR_REF (first_dr_info->dr));
11440 tree tem = make_ssa_name (ltype);
11441 new_stmt = gimple_build_assign (tem, data_ref);
11442 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
11443 gsi);
11444 data_ref = NULL;
11445 vec<constructor_elt, va_gc> *v;
11446 vec_alloc (v, 2);
11447 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11449 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11450 build_zero_cst (ltype));
11451 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11453 else
11455 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11456 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11457 build_zero_cst (ltype));
11459 gcc_assert (new_vtype != NULL_TREE);
11460 if (new_vtype == vectype)
11461 new_stmt = gimple_build_assign (
11462 vec_dest, build_constructor (vectype, v));
11463 else
11465 tree new_vname = make_ssa_name (new_vtype);
11466 new_stmt = gimple_build_assign (
11467 new_vname, build_constructor (new_vtype, v));
11468 vect_finish_stmt_generation (vinfo, stmt_info,
11469 new_stmt, gsi);
11470 new_stmt = gimple_build_assign (
11471 vec_dest,
11472 build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
11476 break;
11478 case dr_explicit_realign:
11480 if (costing_p)
11481 break;
11482 tree ptr, bump;
11484 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11486 if (compute_in_loop)
11487 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
11488 &realignment_token,
11489 dr_explicit_realign,
11490 dataref_ptr, NULL);
11492 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11493 ptr = copy_ssa_name (dataref_ptr);
11494 else
11495 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11496 // For explicit realign the target alignment should be
11497 // known at compile time.
11498 unsigned HOST_WIDE_INT align
11499 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11500 new_stmt = gimple_build_assign (
11501 ptr, BIT_AND_EXPR, dataref_ptr,
11502 build_int_cst (TREE_TYPE (dataref_ptr),
11503 -(HOST_WIDE_INT) align));
11504 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11505 data_ref
11506 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11507 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11508 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11509 new_stmt = gimple_build_assign (vec_dest, data_ref);
11510 new_temp = make_ssa_name (vec_dest, new_stmt);
11511 gimple_assign_set_lhs (new_stmt, new_temp);
11512 gimple_move_vops (new_stmt, stmt_info->stmt);
11513 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11514 msq = new_temp;
11516 bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11517 bump = size_binop (MINUS_EXPR, bump, size_one_node);
11518 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11519 bump);
11520 new_stmt = gimple_build_assign (
11521 NULL_TREE, BIT_AND_EXPR, ptr,
11522 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11523 if (TREE_CODE (ptr) == SSA_NAME)
11524 ptr = copy_ssa_name (ptr, new_stmt);
11525 else
11526 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
11527 gimple_assign_set_lhs (new_stmt, ptr);
11528 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11529 data_ref
11530 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11531 break;
11533 case dr_explicit_realign_optimized:
11535 if (costing_p)
11536 break;
11537 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11538 new_temp = copy_ssa_name (dataref_ptr);
11539 else
11540 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11541 // We should only be doing this if we know the target
11542 // alignment at compile time.
11543 unsigned HOST_WIDE_INT align
11544 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11545 new_stmt = gimple_build_assign (
11546 new_temp, BIT_AND_EXPR, dataref_ptr,
11547 build_int_cst (TREE_TYPE (dataref_ptr),
11548 -(HOST_WIDE_INT) align));
11549 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11550 data_ref = build2 (MEM_REF, vectype, new_temp,
11551 build_int_cst (ref_type, 0));
11552 break;
11554 default:
11555 gcc_unreachable ();
11558 /* One common place to cost the above vect load for different
11559 alignment support schemes. */
11560 if (costing_p)
11562 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11563 only need to take care of the first stmt, whose
11564 stmt_info is first_stmt_info, vec_num iterating on it
11565 will cover the cost for the remaining, it's consistent
11566 with transforming. For the prologue cost for realign,
11567 we only need to count it once for the whole group. */
11568 bool first_stmt_info_p = first_stmt_info == stmt_info;
11569 bool add_realign_cost = first_stmt_info_p && i == 0;
11570 if (memory_access_type == VMAT_CONTIGUOUS
11571 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11572 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11573 && (!grouped_load || first_stmt_info_p)))
11574 vect_get_load_cost (vinfo, stmt_info, 1,
11575 alignment_support_scheme, misalignment,
11576 add_realign_cost, &inside_cost,
11577 &prologue_cost, cost_vec, cost_vec, true);
11579 else
11581 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11582 /* DATA_REF is null if we've already built the statement. */
11583 if (data_ref)
11585 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11586 new_stmt = gimple_build_assign (vec_dest, data_ref);
11588 new_temp = make_ssa_name (vec_dest, new_stmt);
11589 gimple_set_lhs (new_stmt, new_temp);
11590 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11593 /* 3. Handle explicit realignment if necessary/supported.
11594 Create in loop:
11595 vec_dest = realign_load (msq, lsq, realignment_token) */
11596 if (!costing_p
11597 && (alignment_support_scheme == dr_explicit_realign_optimized
11598 || alignment_support_scheme == dr_explicit_realign))
11600 lsq = gimple_assign_lhs (new_stmt);
11601 if (!realignment_token)
11602 realignment_token = dataref_ptr;
11603 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11604 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11605 lsq, realignment_token);
11606 new_temp = make_ssa_name (vec_dest, new_stmt);
11607 gimple_assign_set_lhs (new_stmt, new_temp);
11608 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11610 if (alignment_support_scheme == dr_explicit_realign_optimized)
11612 gcc_assert (phi);
11613 if (i == vec_num - 1 && j == ncopies - 1)
11614 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11615 UNKNOWN_LOCATION);
11616 msq = lsq;
11620 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11622 if (costing_p)
11623 inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
11624 stmt_info, 0, vect_body);
11625 else
11627 tree perm_mask = perm_mask_for_reverse (vectype);
11628 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
11629 perm_mask, stmt_info, gsi);
11630 new_stmt = SSA_NAME_DEF_STMT (new_temp);
11634 /* Collect vector loads and later create their permutation in
11635 vect_transform_grouped_load (). */
11636 if (!costing_p && (grouped_load || slp_perm))
11637 dr_chain.quick_push (new_temp);
11639 /* Store vector loads in the corresponding SLP_NODE. */
11640 if (!costing_p && slp && !slp_perm)
11641 slp_node->push_vec_def (new_stmt);
11643 /* With SLP permutation we load the gaps as well, without
11644 we need to skip the gaps after we manage to fully load
11645 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11646 group_elt += nunits;
11647 if (!costing_p
11648 && maybe_ne (group_gap_adj, 0U)
11649 && !slp_perm
11650 && known_eq (group_elt, group_size - group_gap_adj))
11652 poly_wide_int bump_val
11653 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11654 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11655 == -1)
11656 bump_val = -bump_val;
11657 tree bump = wide_int_to_tree (sizetype, bump_val);
11658 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11659 stmt_info, bump);
11660 group_elt = 0;
11663 /* Bump the vector pointer to account for a gap or for excess
11664 elements loaded for a permuted SLP load. */
11665 if (!costing_p
11666 && maybe_ne (group_gap_adj, 0U)
11667 && slp_perm)
11669 poly_wide_int bump_val
11670 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11671 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11672 bump_val = -bump_val;
11673 tree bump = wide_int_to_tree (sizetype, bump_val);
11674 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11675 stmt_info, bump);
11678 if (slp && !slp_perm)
11679 continue;
11681 if (slp_perm)
11683 unsigned n_perms;
11684 /* For SLP we know we've seen all possible uses of dr_chain so
11685 direct vect_transform_slp_perm_load to DCE the unused parts.
11686 ??? This is a hack to prevent compile-time issues as seen
11687 in PR101120 and friends. */
11688 if (costing_p)
11690 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
11691 true, &n_perms, nullptr);
11692 inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
11693 stmt_info, 0, vect_body);
11695 else
11697 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11698 gsi, vf, false, &n_perms,
11699 nullptr, true);
11700 gcc_assert (ok);
11703 else
11705 if (grouped_load)
11707 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11708 /* We assume that the cost of a single load-lanes instruction
11709 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11710 If a grouped access is instead being provided by a
11711 load-and-permute operation, include the cost of the
11712 permutes. */
11713 if (costing_p && first_stmt_info == stmt_info)
11715 /* Uses an even and odd extract operations or shuffle
11716 operations for each needed permute. */
11717 int group_size = DR_GROUP_SIZE (first_stmt_info);
11718 int nstmts = ceil_log2 (group_size) * group_size;
11719 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
11720 stmt_info, 0, vect_body);
11722 if (dump_enabled_p ())
11723 dump_printf_loc (MSG_NOTE, vect_location,
11724 "vect_model_load_cost:"
11725 "strided group_size = %d .\n",
11726 group_size);
11728 else if (!costing_p)
11730 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
11731 group_size, gsi);
11732 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11735 else if (!costing_p)
11736 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11738 dr_chain.release ();
11740 if (!slp && !costing_p)
11741 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11743 if (costing_p)
11745 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
11746 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11747 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11748 if (dump_enabled_p ())
11749 dump_printf_loc (MSG_NOTE, vect_location,
11750 "vect_model_load_cost: inside_cost = %u, "
11751 "prologue_cost = %u .\n",
11752 inside_cost, prologue_cost);
11755 return true;
11758 /* Function vect_is_simple_cond.
11760 Input:
11761 LOOP - the loop that is being vectorized.
11762 COND - Condition that is checked for simple use.
11764 Output:
11765 *COMP_VECTYPE - the vector type for the comparison.
11766 *DTS - The def types for the arguments of the comparison
11768 Returns whether a COND can be vectorized. Checks whether
11769 condition operands are supportable using vec_is_simple_use. */
11771 static bool
11772 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
11773 slp_tree slp_node, tree *comp_vectype,
11774 enum vect_def_type *dts, tree vectype)
11776 tree lhs, rhs;
11777 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11778 slp_tree slp_op;
11780 /* Mask case. */
11781 if (TREE_CODE (cond) == SSA_NAME
11782 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11784 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
11785 &slp_op, &dts[0], comp_vectype)
11786 || !*comp_vectype
11787 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11788 return false;
11789 return true;
11792 if (!COMPARISON_CLASS_P (cond))
11793 return false;
11795 lhs = TREE_OPERAND (cond, 0);
11796 rhs = TREE_OPERAND (cond, 1);
11798 if (TREE_CODE (lhs) == SSA_NAME)
11800 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
11801 &lhs, &slp_op, &dts[0], &vectype1))
11802 return false;
11804 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
11805 || TREE_CODE (lhs) == FIXED_CST)
11806 dts[0] = vect_constant_def;
11807 else
11808 return false;
11810 if (TREE_CODE (rhs) == SSA_NAME)
11812 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
11813 &rhs, &slp_op, &dts[1], &vectype2))
11814 return false;
11816 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
11817 || TREE_CODE (rhs) == FIXED_CST)
11818 dts[1] = vect_constant_def;
11819 else
11820 return false;
11822 if (vectype1 && vectype2
11823 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11824 TYPE_VECTOR_SUBPARTS (vectype2)))
11825 return false;
11827 *comp_vectype = vectype1 ? vectype1 : vectype2;
11828 /* Invariant comparison. */
11829 if (! *comp_vectype)
11831 tree scalar_type = TREE_TYPE (lhs);
11832 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11833 *comp_vectype = truth_type_for (vectype);
11834 else
11836 /* If we can widen the comparison to match vectype do so. */
11837 if (INTEGRAL_TYPE_P (scalar_type)
11838 && !slp_node
11839 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
11840 TYPE_SIZE (TREE_TYPE (vectype))))
11841 scalar_type = build_nonstandard_integer_type
11842 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
11843 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
11844 slp_node);
11848 return true;
11851 /* vectorizable_condition.
11853 Check if STMT_INFO is conditional modify expression that can be vectorized.
11854 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11855 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
11856 at GSI.
11858 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
11860 Return true if STMT_INFO is vectorizable in this way. */
11862 static bool
11863 vectorizable_condition (vec_info *vinfo,
11864 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11865 gimple **vec_stmt,
11866 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11868 tree scalar_dest = NULL_TREE;
11869 tree vec_dest = NULL_TREE;
11870 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
11871 tree then_clause, else_clause;
11872 tree comp_vectype = NULL_TREE;
11873 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
11874 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
11875 tree vec_compare;
11876 tree new_temp;
11877 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11878 enum vect_def_type dts[4]
11879 = {vect_unknown_def_type, vect_unknown_def_type,
11880 vect_unknown_def_type, vect_unknown_def_type};
11881 int ndts = 4;
11882 int ncopies;
11883 int vec_num;
11884 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11885 int i;
11886 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11887 vec<tree> vec_oprnds0 = vNULL;
11888 vec<tree> vec_oprnds1 = vNULL;
11889 vec<tree> vec_oprnds2 = vNULL;
11890 vec<tree> vec_oprnds3 = vNULL;
11891 tree vec_cmp_type;
11892 bool masked = false;
11894 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11895 return false;
11897 /* Is vectorizable conditional operation? */
11898 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11899 if (!stmt)
11900 return false;
11902 code = gimple_assign_rhs_code (stmt);
11903 if (code != COND_EXPR)
11904 return false;
11906 stmt_vec_info reduc_info = NULL;
11907 int reduc_index = -1;
11908 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
11909 bool for_reduction
11910 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
11911 if (for_reduction)
11913 if (slp_node)
11914 return false;
11915 reduc_info = info_for_reduction (vinfo, stmt_info);
11916 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
11917 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
11918 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
11919 || reduc_index != -1);
11921 else
11923 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11924 return false;
11927 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11928 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11930 if (slp_node)
11932 ncopies = 1;
11933 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
11935 else
11937 ncopies = vect_get_num_copies (loop_vinfo, vectype);
11938 vec_num = 1;
11941 gcc_assert (ncopies >= 1);
11942 if (for_reduction && ncopies > 1)
11943 return false; /* FORNOW */
11945 cond_expr = gimple_assign_rhs1 (stmt);
11947 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
11948 &comp_vectype, &dts[0], vectype)
11949 || !comp_vectype)
11950 return false;
11952 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
11953 slp_tree then_slp_node, else_slp_node;
11954 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
11955 &then_clause, &then_slp_node, &dts[2], &vectype1))
11956 return false;
11957 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
11958 &else_clause, &else_slp_node, &dts[3], &vectype2))
11959 return false;
11961 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
11962 return false;
11964 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
11965 return false;
11967 masked = !COMPARISON_CLASS_P (cond_expr);
11968 vec_cmp_type = truth_type_for (comp_vectype);
11970 if (vec_cmp_type == NULL_TREE)
11971 return false;
11973 cond_code = TREE_CODE (cond_expr);
11974 if (!masked)
11976 cond_expr0 = TREE_OPERAND (cond_expr, 0);
11977 cond_expr1 = TREE_OPERAND (cond_expr, 1);
11980 /* For conditional reductions, the "then" value needs to be the candidate
11981 value calculated by this iteration while the "else" value needs to be
11982 the result carried over from previous iterations. If the COND_EXPR
11983 is the other way around, we need to swap it. */
11984 bool must_invert_cmp_result = false;
11985 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
11987 if (masked)
11988 must_invert_cmp_result = true;
11989 else
11991 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
11992 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
11993 if (new_code == ERROR_MARK)
11994 must_invert_cmp_result = true;
11995 else
11997 cond_code = new_code;
11998 /* Make sure we don't accidentally use the old condition. */
11999 cond_expr = NULL_TREE;
12002 std::swap (then_clause, else_clause);
12005 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
12007 /* Boolean values may have another representation in vectors
12008 and therefore we prefer bit operations over comparison for
12009 them (which also works for scalar masks). We store opcodes
12010 to use in bitop1 and bitop2. Statement is vectorized as
12011 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12012 depending on bitop1 and bitop2 arity. */
12013 switch (cond_code)
12015 case GT_EXPR:
12016 bitop1 = BIT_NOT_EXPR;
12017 bitop2 = BIT_AND_EXPR;
12018 break;
12019 case GE_EXPR:
12020 bitop1 = BIT_NOT_EXPR;
12021 bitop2 = BIT_IOR_EXPR;
12022 break;
12023 case LT_EXPR:
12024 bitop1 = BIT_NOT_EXPR;
12025 bitop2 = BIT_AND_EXPR;
12026 std::swap (cond_expr0, cond_expr1);
12027 break;
12028 case LE_EXPR:
12029 bitop1 = BIT_NOT_EXPR;
12030 bitop2 = BIT_IOR_EXPR;
12031 std::swap (cond_expr0, cond_expr1);
12032 break;
12033 case NE_EXPR:
12034 bitop1 = BIT_XOR_EXPR;
12035 break;
12036 case EQ_EXPR:
12037 bitop1 = BIT_XOR_EXPR;
12038 bitop2 = BIT_NOT_EXPR;
12039 break;
12040 default:
12041 return false;
12043 cond_code = SSA_NAME;
12046 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
12047 && reduction_type == EXTRACT_LAST_REDUCTION
12048 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
12050 if (dump_enabled_p ())
12051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12052 "reduction comparison operation not supported.\n");
12053 return false;
12056 if (!vec_stmt)
12058 if (bitop1 != NOP_EXPR)
12060 machine_mode mode = TYPE_MODE (comp_vectype);
12061 optab optab;
12063 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
12064 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12065 return false;
12067 if (bitop2 != NOP_EXPR)
12069 optab = optab_for_tree_code (bitop2, comp_vectype,
12070 optab_default);
12071 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12072 return false;
12076 vect_cost_for_stmt kind = vector_stmt;
12077 if (reduction_type == EXTRACT_LAST_REDUCTION)
12078 /* Count one reduction-like operation per vector. */
12079 kind = vec_to_scalar;
12080 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
12081 && (masked
12082 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
12083 cond_code)
12084 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
12085 ERROR_MARK))))
12086 return false;
12088 if (slp_node
12089 && (!vect_maybe_update_slp_op_vectype
12090 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
12091 || (op_adjust == 1
12092 && !vect_maybe_update_slp_op_vectype
12093 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
12094 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
12095 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
12097 if (dump_enabled_p ())
12098 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12099 "incompatible vector types for invariants\n");
12100 return false;
12103 if (loop_vinfo && for_reduction
12104 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
12106 if (reduction_type == EXTRACT_LAST_REDUCTION)
12108 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12109 vectype, OPTIMIZE_FOR_SPEED))
12110 vect_record_loop_len (loop_vinfo,
12111 &LOOP_VINFO_LENS (loop_vinfo),
12112 ncopies * vec_num, vectype, 1);
12113 else
12114 vect_record_loop_mask (loop_vinfo,
12115 &LOOP_VINFO_MASKS (loop_vinfo),
12116 ncopies * vec_num, vectype, NULL);
12118 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12119 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
12121 if (dump_enabled_p ())
12122 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12123 "conditional reduction prevents the use"
12124 " of partial vectors.\n");
12125 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
12129 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
12130 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
12131 cost_vec, kind);
12132 return true;
12135 /* Transform. */
12137 /* Handle def. */
12138 scalar_dest = gimple_assign_lhs (stmt);
12139 if (reduction_type != EXTRACT_LAST_REDUCTION)
12140 vec_dest = vect_create_destination_var (scalar_dest, vectype);
12142 bool swap_cond_operands = false;
12144 /* See whether another part of the vectorized code applies a loop
12145 mask to the condition, or to its inverse. */
12147 vec_loop_masks *masks = NULL;
12148 vec_loop_lens *lens = NULL;
12149 if (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
12151 if (reduction_type == EXTRACT_LAST_REDUCTION)
12152 lens = &LOOP_VINFO_LENS (loop_vinfo);
12154 else if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
12156 if (reduction_type == EXTRACT_LAST_REDUCTION)
12157 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12158 else
12160 scalar_cond_masked_key cond (cond_expr, ncopies);
12161 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12162 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12163 else
12165 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
12166 tree_code orig_code = cond.code;
12167 cond.code = invert_tree_comparison (cond.code, honor_nans);
12168 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
12170 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12171 cond_code = cond.code;
12172 swap_cond_operands = true;
12174 else
12176 /* Try the inverse of the current mask. We check if the
12177 inverse mask is live and if so we generate a negate of
12178 the current mask such that we still honor NaNs. */
12179 cond.inverted_p = true;
12180 cond.code = orig_code;
12181 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
12183 masks = &LOOP_VINFO_MASKS (loop_vinfo);
12184 cond_code = cond.code;
12185 swap_cond_operands = true;
12186 must_invert_cmp_result = true;
12193 /* Handle cond expr. */
12194 if (masked)
12195 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12196 cond_expr, &vec_oprnds0, comp_vectype,
12197 then_clause, &vec_oprnds2, vectype,
12198 reduction_type != EXTRACT_LAST_REDUCTION
12199 ? else_clause : NULL, &vec_oprnds3, vectype);
12200 else
12201 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12202 cond_expr0, &vec_oprnds0, comp_vectype,
12203 cond_expr1, &vec_oprnds1, comp_vectype,
12204 then_clause, &vec_oprnds2, vectype,
12205 reduction_type != EXTRACT_LAST_REDUCTION
12206 ? else_clause : NULL, &vec_oprnds3, vectype);
12208 /* Arguments are ready. Create the new vector stmt. */
12209 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
12211 vec_then_clause = vec_oprnds2[i];
12212 if (reduction_type != EXTRACT_LAST_REDUCTION)
12213 vec_else_clause = vec_oprnds3[i];
12215 if (swap_cond_operands)
12216 std::swap (vec_then_clause, vec_else_clause);
12218 if (masked)
12219 vec_compare = vec_cond_lhs;
12220 else
12222 vec_cond_rhs = vec_oprnds1[i];
12223 if (bitop1 == NOP_EXPR)
12225 gimple_seq stmts = NULL;
12226 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
12227 vec_cond_lhs, vec_cond_rhs);
12228 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
12230 else
12232 new_temp = make_ssa_name (vec_cmp_type);
12233 gassign *new_stmt;
12234 if (bitop1 == BIT_NOT_EXPR)
12235 new_stmt = gimple_build_assign (new_temp, bitop1,
12236 vec_cond_rhs);
12237 else
12238 new_stmt
12239 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
12240 vec_cond_rhs);
12241 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12242 if (bitop2 == NOP_EXPR)
12243 vec_compare = new_temp;
12244 else if (bitop2 == BIT_NOT_EXPR
12245 && reduction_type != EXTRACT_LAST_REDUCTION)
12247 /* Instead of doing ~x ? y : z do x ? z : y. */
12248 vec_compare = new_temp;
12249 std::swap (vec_then_clause, vec_else_clause);
12251 else
12253 vec_compare = make_ssa_name (vec_cmp_type);
12254 if (bitop2 == BIT_NOT_EXPR)
12255 new_stmt
12256 = gimple_build_assign (vec_compare, bitop2, new_temp);
12257 else
12258 new_stmt
12259 = gimple_build_assign (vec_compare, bitop2,
12260 vec_cond_lhs, new_temp);
12261 vect_finish_stmt_generation (vinfo, stmt_info,
12262 new_stmt, gsi);
12267 /* If we decided to apply a loop mask to the result of the vector
12268 comparison, AND the comparison with the mask now. Later passes
12269 should then be able to reuse the AND results between mulitple
12270 vector statements.
12272 For example:
12273 for (int i = 0; i < 100; ++i)
12274 x[i] = y[i] ? z[i] : 10;
12276 results in following optimized GIMPLE:
12278 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12279 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12280 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12281 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12282 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12283 vect_iftmp.11_47, { 10, ... }>;
12285 instead of using a masked and unmasked forms of
12286 vec != { 0, ... } (masked in the MASK_LOAD,
12287 unmasked in the VEC_COND_EXPR). */
12289 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12290 in cases where that's necessary. */
12292 tree len = NULL_TREE, bias = NULL_TREE;
12293 if (masks || lens || reduction_type == EXTRACT_LAST_REDUCTION)
12295 if (!is_gimple_val (vec_compare))
12297 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12298 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12299 vec_compare);
12300 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12301 vec_compare = vec_compare_name;
12304 if (must_invert_cmp_result)
12306 tree vec_compare_name = make_ssa_name (vec_cmp_type);
12307 gassign *new_stmt = gimple_build_assign (vec_compare_name,
12308 BIT_NOT_EXPR,
12309 vec_compare);
12310 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12311 vec_compare = vec_compare_name;
12314 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST,
12315 vectype, OPTIMIZE_FOR_SPEED))
12317 if (lens)
12319 len = vect_get_loop_len (loop_vinfo, gsi, lens,
12320 vec_num * ncopies, vectype, i, 1);
12321 signed char biasval
12322 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
12323 bias = build_int_cst (intQI_type_node, biasval);
12325 else
12327 len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
12328 bias = build_int_cst (intQI_type_node, 0);
12331 if (masks)
12333 tree loop_mask
12334 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
12335 vectype, i);
12336 tree tmp2 = make_ssa_name (vec_cmp_type);
12337 gassign *g
12338 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
12339 loop_mask);
12340 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
12341 vec_compare = tmp2;
12345 gimple *new_stmt;
12346 if (reduction_type == EXTRACT_LAST_REDUCTION)
12348 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
12349 tree lhs = gimple_get_lhs (old_stmt);
12350 if (len)
12351 new_stmt = gimple_build_call_internal
12352 (IFN_LEN_FOLD_EXTRACT_LAST, 5, else_clause, vec_compare,
12353 vec_then_clause, len, bias);
12354 else
12355 new_stmt = gimple_build_call_internal
12356 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
12357 vec_then_clause);
12358 gimple_call_set_lhs (new_stmt, lhs);
12359 SSA_NAME_DEF_STMT (lhs) = new_stmt;
12360 if (old_stmt == gsi_stmt (*gsi))
12361 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
12362 else
12364 /* In this case we're moving the definition to later in the
12365 block. That doesn't matter because the only uses of the
12366 lhs are in phi statements. */
12367 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
12368 gsi_remove (&old_gsi, true);
12369 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12372 else
12374 new_temp = make_ssa_name (vec_dest);
12375 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
12376 vec_then_clause, vec_else_clause);
12377 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12379 if (slp_node)
12380 slp_node->push_vec_def (new_stmt);
12381 else
12382 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12385 if (!slp_node)
12386 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12388 vec_oprnds0.release ();
12389 vec_oprnds1.release ();
12390 vec_oprnds2.release ();
12391 vec_oprnds3.release ();
12393 return true;
12396 /* Helper of vectorizable_comparison.
12398 Check if STMT_INFO is comparison expression CODE that can be vectorized.
12399 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12400 comparison, put it in VEC_STMT, and insert it at GSI.
12402 Return true if STMT_INFO is vectorizable in this way. */
12404 static bool
12405 vectorizable_comparison_1 (vec_info *vinfo, tree vectype,
12406 stmt_vec_info stmt_info, tree_code code,
12407 gimple_stmt_iterator *gsi, gimple **vec_stmt,
12408 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12410 tree lhs, rhs1, rhs2;
12411 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
12412 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
12413 tree new_temp;
12414 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
12415 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
12416 int ndts = 2;
12417 poly_uint64 nunits;
12418 int ncopies;
12419 enum tree_code bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
12420 int i;
12421 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12422 vec<tree> vec_oprnds0 = vNULL;
12423 vec<tree> vec_oprnds1 = vNULL;
12424 tree mask_type;
12425 tree mask;
12427 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12428 return false;
12430 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
12431 return false;
12433 mask_type = vectype;
12434 nunits = TYPE_VECTOR_SUBPARTS (vectype);
12436 if (slp_node)
12437 ncopies = 1;
12438 else
12439 ncopies = vect_get_num_copies (loop_vinfo, vectype);
12441 gcc_assert (ncopies >= 1);
12443 if (TREE_CODE_CLASS (code) != tcc_comparison)
12444 return false;
12446 slp_tree slp_rhs1, slp_rhs2;
12447 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12448 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
12449 return false;
12451 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
12452 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
12453 return false;
12455 if (vectype1 && vectype2
12456 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
12457 TYPE_VECTOR_SUBPARTS (vectype2)))
12458 return false;
12460 vectype = vectype1 ? vectype1 : vectype2;
12462 /* Invariant comparison. */
12463 if (!vectype)
12465 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
12466 vectype = mask_type;
12467 else
12468 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
12469 slp_node);
12470 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
12471 return false;
12473 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
12474 return false;
12476 /* Can't compare mask and non-mask types. */
12477 if (vectype1 && vectype2
12478 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12479 return false;
12481 /* Boolean values may have another representation in vectors
12482 and therefore we prefer bit operations over comparison for
12483 them (which also works for scalar masks). We store opcodes
12484 to use in bitop1 and bitop2. Statement is vectorized as
12485 BITOP2 (rhs1 BITOP1 rhs2) or
12486 rhs1 BITOP2 (BITOP1 rhs2)
12487 depending on bitop1 and bitop2 arity. */
12488 bool swap_p = false;
12489 if (VECTOR_BOOLEAN_TYPE_P (vectype))
12491 if (code == GT_EXPR)
12493 bitop1 = BIT_NOT_EXPR;
12494 bitop2 = BIT_AND_EXPR;
12496 else if (code == GE_EXPR)
12498 bitop1 = BIT_NOT_EXPR;
12499 bitop2 = BIT_IOR_EXPR;
12501 else if (code == LT_EXPR)
12503 bitop1 = BIT_NOT_EXPR;
12504 bitop2 = BIT_AND_EXPR;
12505 swap_p = true;
12507 else if (code == LE_EXPR)
12509 bitop1 = BIT_NOT_EXPR;
12510 bitop2 = BIT_IOR_EXPR;
12511 swap_p = true;
12513 else
12515 bitop1 = BIT_XOR_EXPR;
12516 if (code == EQ_EXPR)
12517 bitop2 = BIT_NOT_EXPR;
12521 if (!vec_stmt)
12523 if (bitop1 == NOP_EXPR)
12525 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12526 return false;
12528 else
12530 machine_mode mode = TYPE_MODE (vectype);
12531 optab optab;
12533 optab = optab_for_tree_code (bitop1, vectype, optab_default);
12534 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12535 return false;
12537 if (bitop2 != NOP_EXPR)
12539 optab = optab_for_tree_code (bitop2, vectype, optab_default);
12540 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12541 return false;
12545 /* Put types on constant and invariant SLP children. */
12546 if (slp_node
12547 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12548 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
12550 if (dump_enabled_p ())
12551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12552 "incompatible vector types for invariants\n");
12553 return false;
12556 vect_model_simple_cost (vinfo, stmt_info,
12557 ncopies * (1 + (bitop2 != NOP_EXPR)),
12558 dts, ndts, slp_node, cost_vec);
12559 return true;
12562 /* Transform. */
12564 /* Handle def. */
12565 lhs = gimple_assign_lhs (STMT_VINFO_STMT (stmt_info));
12566 mask = vect_create_destination_var (lhs, mask_type);
12568 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12569 rhs1, &vec_oprnds0, vectype,
12570 rhs2, &vec_oprnds1, vectype);
12571 if (swap_p)
12572 std::swap (vec_oprnds0, vec_oprnds1);
12574 /* Arguments are ready. Create the new vector stmt. */
12575 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12577 gimple *new_stmt;
12578 vec_rhs2 = vec_oprnds1[i];
12580 new_temp = make_ssa_name (mask);
12581 if (bitop1 == NOP_EXPR)
12583 new_stmt = gimple_build_assign (new_temp, code,
12584 vec_rhs1, vec_rhs2);
12585 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12587 else
12589 if (bitop1 == BIT_NOT_EXPR)
12590 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12591 else
12592 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12593 vec_rhs2);
12594 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12595 if (bitop2 != NOP_EXPR)
12597 tree res = make_ssa_name (mask);
12598 if (bitop2 == BIT_NOT_EXPR)
12599 new_stmt = gimple_build_assign (res, bitop2, new_temp);
12600 else
12601 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12602 new_temp);
12603 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12606 if (slp_node)
12607 slp_node->push_vec_def (new_stmt);
12608 else
12609 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12612 if (!slp_node)
12613 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12615 vec_oprnds0.release ();
12616 vec_oprnds1.release ();
12618 return true;
12621 /* vectorizable_comparison.
12623 Check if STMT_INFO is comparison expression that can be vectorized.
12624 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12625 comparison, put it in VEC_STMT, and insert it at GSI.
12627 Return true if STMT_INFO is vectorizable in this way. */
12629 static bool
12630 vectorizable_comparison (vec_info *vinfo,
12631 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12632 gimple **vec_stmt,
12633 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
12635 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12637 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
12638 return false;
12640 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
12641 return false;
12643 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
12644 if (!stmt)
12645 return false;
12647 enum tree_code code = gimple_assign_rhs_code (stmt);
12648 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
12649 if (!vectorizable_comparison_1 (vinfo, vectype, stmt_info, code, gsi,
12650 vec_stmt, slp_node, cost_vec))
12651 return false;
12653 if (!vec_stmt)
12654 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
12656 return true;
12659 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
12660 can handle all live statements in the node. Otherwise return true
12661 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
12662 VEC_STMT_P is as for vectorizable_live_operation. */
12664 static bool
12665 can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
12666 slp_tree slp_node, slp_instance slp_node_instance,
12667 bool vec_stmt_p,
12668 stmt_vector_for_cost *cost_vec)
12670 if (slp_node)
12672 stmt_vec_info slp_stmt_info;
12673 unsigned int i;
12674 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
12676 if (STMT_VINFO_LIVE_P (slp_stmt_info)
12677 && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
12678 slp_node_instance, i,
12679 vec_stmt_p, cost_vec))
12680 return false;
12683 else if (STMT_VINFO_LIVE_P (stmt_info)
12684 && !vectorizable_live_operation (vinfo, stmt_info,
12685 slp_node, slp_node_instance, -1,
12686 vec_stmt_p, cost_vec))
12687 return false;
12689 return true;
12692 /* Make sure the statement is vectorizable. */
12694 opt_result
12695 vect_analyze_stmt (vec_info *vinfo,
12696 stmt_vec_info stmt_info, bool *need_to_vectorize,
12697 slp_tree node, slp_instance node_instance,
12698 stmt_vector_for_cost *cost_vec)
12700 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12701 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
12702 bool ok;
12703 gimple_seq pattern_def_seq;
12705 if (dump_enabled_p ())
12706 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
12707 stmt_info->stmt);
12709 if (gimple_has_volatile_ops (stmt_info->stmt))
12710 return opt_result::failure_at (stmt_info->stmt,
12711 "not vectorized:"
12712 " stmt has volatile operands: %G\n",
12713 stmt_info->stmt);
12715 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12716 && node == NULL
12717 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
12719 gimple_stmt_iterator si;
12721 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
12723 stmt_vec_info pattern_def_stmt_info
12724 = vinfo->lookup_stmt (gsi_stmt (si));
12725 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
12726 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
12728 /* Analyze def stmt of STMT if it's a pattern stmt. */
12729 if (dump_enabled_p ())
12730 dump_printf_loc (MSG_NOTE, vect_location,
12731 "==> examining pattern def statement: %G",
12732 pattern_def_stmt_info->stmt);
12734 opt_result res
12735 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
12736 need_to_vectorize, node, node_instance,
12737 cost_vec);
12738 if (!res)
12739 return res;
12744 /* Skip stmts that do not need to be vectorized. In loops this is expected
12745 to include:
12746 - the COND_EXPR which is the loop exit condition
12747 - any LABEL_EXPRs in the loop
12748 - computations that are used only for array indexing or loop control.
12749 In basic blocks we only analyze statements that are a part of some SLP
12750 instance, therefore, all the statements are relevant.
12752 Pattern statement needs to be analyzed instead of the original statement
12753 if the original statement is not relevant. Otherwise, we analyze both
12754 statements. In basic blocks we are called from some SLP instance
12755 traversal, don't analyze pattern stmts instead, the pattern stmts
12756 already will be part of SLP instance. */
12758 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
12759 if (!STMT_VINFO_RELEVANT_P (stmt_info)
12760 && !STMT_VINFO_LIVE_P (stmt_info))
12762 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12763 && pattern_stmt_info
12764 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12765 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12767 /* Analyze PATTERN_STMT instead of the original stmt. */
12768 stmt_info = pattern_stmt_info;
12769 if (dump_enabled_p ())
12770 dump_printf_loc (MSG_NOTE, vect_location,
12771 "==> examining pattern statement: %G",
12772 stmt_info->stmt);
12774 else
12776 if (dump_enabled_p ())
12777 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
12779 return opt_result::success ();
12782 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12783 && node == NULL
12784 && pattern_stmt_info
12785 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12786 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12788 /* Analyze PATTERN_STMT too. */
12789 if (dump_enabled_p ())
12790 dump_printf_loc (MSG_NOTE, vect_location,
12791 "==> examining pattern statement: %G",
12792 pattern_stmt_info->stmt);
12794 opt_result res
12795 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
12796 node_instance, cost_vec);
12797 if (!res)
12798 return res;
12801 switch (STMT_VINFO_DEF_TYPE (stmt_info))
12803 case vect_internal_def:
12804 break;
12806 case vect_reduction_def:
12807 case vect_nested_cycle:
12808 gcc_assert (!bb_vinfo
12809 && (relevance == vect_used_in_outer
12810 || relevance == vect_used_in_outer_by_reduction
12811 || relevance == vect_used_by_reduction
12812 || relevance == vect_unused_in_scope
12813 || relevance == vect_used_only_live));
12814 break;
12816 case vect_induction_def:
12817 case vect_first_order_recurrence:
12818 gcc_assert (!bb_vinfo);
12819 break;
12821 case vect_constant_def:
12822 case vect_external_def:
12823 case vect_unknown_def_type:
12824 default:
12825 gcc_unreachable ();
12828 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12829 if (node)
12830 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
12832 if (STMT_VINFO_RELEVANT_P (stmt_info))
12834 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
12835 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
12836 || (call && gimple_call_lhs (call) == NULL_TREE));
12837 *need_to_vectorize = true;
12840 if (PURE_SLP_STMT (stmt_info) && !node)
12842 if (dump_enabled_p ())
12843 dump_printf_loc (MSG_NOTE, vect_location,
12844 "handled only by SLP analysis\n");
12845 return opt_result::success ();
12848 ok = true;
12849 if (!bb_vinfo
12850 && (STMT_VINFO_RELEVANT_P (stmt_info)
12851 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
12852 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
12853 -mveclibabi= takes preference over library functions with
12854 the simd attribute. */
12855 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12856 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
12857 cost_vec)
12858 || vectorizable_conversion (vinfo, stmt_info,
12859 NULL, NULL, node, cost_vec)
12860 || vectorizable_operation (vinfo, stmt_info,
12861 NULL, NULL, node, cost_vec)
12862 || vectorizable_assignment (vinfo, stmt_info,
12863 NULL, NULL, node, cost_vec)
12864 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12865 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12866 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
12867 node, node_instance, cost_vec)
12868 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
12869 NULL, node, cost_vec)
12870 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12871 || vectorizable_condition (vinfo, stmt_info,
12872 NULL, NULL, node, cost_vec)
12873 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12874 cost_vec)
12875 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
12876 stmt_info, NULL, node)
12877 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
12878 stmt_info, NULL, node, cost_vec));
12879 else
12881 if (bb_vinfo)
12882 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12883 || vectorizable_simd_clone_call (vinfo, stmt_info,
12884 NULL, NULL, node, cost_vec)
12885 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
12886 cost_vec)
12887 || vectorizable_shift (vinfo, stmt_info,
12888 NULL, NULL, node, cost_vec)
12889 || vectorizable_operation (vinfo, stmt_info,
12890 NULL, NULL, node, cost_vec)
12891 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
12892 cost_vec)
12893 || vectorizable_load (vinfo, stmt_info,
12894 NULL, NULL, node, cost_vec)
12895 || vectorizable_store (vinfo, stmt_info,
12896 NULL, NULL, node, cost_vec)
12897 || vectorizable_condition (vinfo, stmt_info,
12898 NULL, NULL, node, cost_vec)
12899 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12900 cost_vec)
12901 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
12904 if (node)
12905 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12907 if (!ok)
12908 return opt_result::failure_at (stmt_info->stmt,
12909 "not vectorized:"
12910 " relevant stmt not supported: %G",
12911 stmt_info->stmt);
12913 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
12914 need extra handling, except for vectorizable reductions. */
12915 if (!bb_vinfo
12916 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
12917 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
12918 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
12919 stmt_info, node, node_instance,
12920 false, cost_vec))
12921 return opt_result::failure_at (stmt_info->stmt,
12922 "not vectorized:"
12923 " live stmt not supported: %G",
12924 stmt_info->stmt);
12926 return opt_result::success ();
12930 /* Function vect_transform_stmt.
12932 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
12934 bool
12935 vect_transform_stmt (vec_info *vinfo,
12936 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12937 slp_tree slp_node, slp_instance slp_node_instance)
12939 bool is_store = false;
12940 gimple *vec_stmt = NULL;
12941 bool done;
12943 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
12945 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12946 if (slp_node)
12947 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
12949 switch (STMT_VINFO_TYPE (stmt_info))
12951 case type_demotion_vec_info_type:
12952 case type_promotion_vec_info_type:
12953 case type_conversion_vec_info_type:
12954 done = vectorizable_conversion (vinfo, stmt_info,
12955 gsi, &vec_stmt, slp_node, NULL);
12956 gcc_assert (done);
12957 break;
12959 case induc_vec_info_type:
12960 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
12961 stmt_info, &vec_stmt, slp_node,
12962 NULL);
12963 gcc_assert (done);
12964 break;
12966 case shift_vec_info_type:
12967 done = vectorizable_shift (vinfo, stmt_info,
12968 gsi, &vec_stmt, slp_node, NULL);
12969 gcc_assert (done);
12970 break;
12972 case op_vec_info_type:
12973 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
12974 NULL);
12975 gcc_assert (done);
12976 break;
12978 case assignment_vec_info_type:
12979 done = vectorizable_assignment (vinfo, stmt_info,
12980 gsi, &vec_stmt, slp_node, NULL);
12981 gcc_assert (done);
12982 break;
12984 case load_vec_info_type:
12985 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
12986 NULL);
12987 gcc_assert (done);
12988 break;
12990 case store_vec_info_type:
12991 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
12992 && !slp_node
12993 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))
12994 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))))
12995 /* In case of interleaving, the whole chain is vectorized when the
12996 last store in the chain is reached. Store stmts before the last
12997 one are skipped, and there vec_stmt_info shouldn't be freed
12998 meanwhile. */
13000 else
13002 done = vectorizable_store (vinfo, stmt_info,
13003 gsi, &vec_stmt, slp_node, NULL);
13004 gcc_assert (done);
13005 is_store = true;
13007 break;
13009 case condition_vec_info_type:
13010 done = vectorizable_condition (vinfo, stmt_info,
13011 gsi, &vec_stmt, slp_node, NULL);
13012 gcc_assert (done);
13013 break;
13015 case comparison_vec_info_type:
13016 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
13017 slp_node, NULL);
13018 gcc_assert (done);
13019 break;
13021 case call_vec_info_type:
13022 done = vectorizable_call (vinfo, stmt_info,
13023 gsi, &vec_stmt, slp_node, NULL);
13024 break;
13026 case call_simd_clone_vec_info_type:
13027 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
13028 slp_node, NULL);
13029 break;
13031 case reduc_vec_info_type:
13032 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
13033 gsi, &vec_stmt, slp_node);
13034 gcc_assert (done);
13035 break;
13037 case cycle_phi_info_type:
13038 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
13039 &vec_stmt, slp_node, slp_node_instance);
13040 gcc_assert (done);
13041 break;
13043 case lc_phi_info_type:
13044 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
13045 stmt_info, &vec_stmt, slp_node);
13046 gcc_assert (done);
13047 break;
13049 case recurr_info_type:
13050 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
13051 stmt_info, &vec_stmt, slp_node, NULL);
13052 gcc_assert (done);
13053 break;
13055 case phi_info_type:
13056 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
13057 gcc_assert (done);
13058 break;
13060 default:
13061 if (!STMT_VINFO_LIVE_P (stmt_info))
13063 if (dump_enabled_p ())
13064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13065 "stmt not supported.\n");
13066 gcc_unreachable ();
13068 done = true;
13071 if (!slp_node && vec_stmt)
13072 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
13074 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
13076 /* Handle stmts whose DEF is used outside the loop-nest that is
13077 being vectorized. */
13078 done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
13079 slp_node_instance, true, NULL);
13080 gcc_assert (done);
13083 if (slp_node)
13084 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
13086 return is_store;
13090 /* Remove a group of stores (for SLP or interleaving), free their
13091 stmt_vec_info. */
13093 void
13094 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
13096 stmt_vec_info next_stmt_info = first_stmt_info;
13098 while (next_stmt_info)
13100 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
13101 next_stmt_info = vect_orig_stmt (next_stmt_info);
13102 /* Free the attached stmt_vec_info and remove the stmt. */
13103 vinfo->remove_stmt (next_stmt_info);
13104 next_stmt_info = tmp;
13108 /* If NUNITS is nonzero, return a vector type that contains NUNITS
13109 elements of type SCALAR_TYPE, or null if the target doesn't support
13110 such a type.
13112 If NUNITS is zero, return a vector type that contains elements of
13113 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13115 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13116 for this vectorization region and want to "autodetect" the best choice.
13117 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13118 and we want the new type to be interoperable with it. PREVAILING_MODE
13119 in this case can be a scalar integer mode or a vector mode; when it
13120 is a vector mode, the function acts like a tree-level version of
13121 related_vector_mode. */
13123 tree
13124 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
13125 tree scalar_type, poly_uint64 nunits)
13127 tree orig_scalar_type = scalar_type;
13128 scalar_mode inner_mode;
13129 machine_mode simd_mode;
13130 tree vectype;
13132 if ((!INTEGRAL_TYPE_P (scalar_type)
13133 && !POINTER_TYPE_P (scalar_type)
13134 && !SCALAR_FLOAT_TYPE_P (scalar_type))
13135 || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
13136 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
13137 return NULL_TREE;
13139 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
13141 /* Interoperability between modes requires one to be a constant multiple
13142 of the other, so that the number of vectors required for each operation
13143 is a compile-time constant. */
13144 if (prevailing_mode != VOIDmode
13145 && !constant_multiple_p (nunits * nbytes,
13146 GET_MODE_SIZE (prevailing_mode))
13147 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
13148 nunits * nbytes))
13149 return NULL_TREE;
13151 /* For vector types of elements whose mode precision doesn't
13152 match their types precision we use a element type of mode
13153 precision. The vectorization routines will have to make sure
13154 they support the proper result truncation/extension.
13155 We also make sure to build vector types with INTEGER_TYPE
13156 component type only. */
13157 if (INTEGRAL_TYPE_P (scalar_type)
13158 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
13159 || TREE_CODE (scalar_type) != INTEGER_TYPE))
13160 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
13161 TYPE_UNSIGNED (scalar_type));
13163 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13164 When the component mode passes the above test simply use a type
13165 corresponding to that mode. The theory is that any use that
13166 would cause problems with this will disable vectorization anyway. */
13167 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
13168 && !INTEGRAL_TYPE_P (scalar_type))
13169 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
13171 /* We can't build a vector type of elements with alignment bigger than
13172 their size. */
13173 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
13174 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
13175 TYPE_UNSIGNED (scalar_type));
13177 /* If we felt back to using the mode fail if there was
13178 no scalar type for it. */
13179 if (scalar_type == NULL_TREE)
13180 return NULL_TREE;
13182 /* If no prevailing mode was supplied, use the mode the target prefers.
13183 Otherwise lookup a vector mode based on the prevailing mode. */
13184 if (prevailing_mode == VOIDmode)
13186 gcc_assert (known_eq (nunits, 0U));
13187 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
13188 if (SCALAR_INT_MODE_P (simd_mode))
13190 /* Traditional behavior is not to take the integer mode
13191 literally, but simply to use it as a way of determining
13192 the vector size. It is up to mode_for_vector to decide
13193 what the TYPE_MODE should be.
13195 Note that nunits == 1 is allowed in order to support single
13196 element vector types. */
13197 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
13198 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13199 return NULL_TREE;
13202 else if (SCALAR_INT_MODE_P (prevailing_mode)
13203 || !related_vector_mode (prevailing_mode,
13204 inner_mode, nunits).exists (&simd_mode))
13206 /* Fall back to using mode_for_vector, mostly in the hope of being
13207 able to use an integer mode. */
13208 if (known_eq (nunits, 0U)
13209 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
13210 return NULL_TREE;
13212 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
13213 return NULL_TREE;
13216 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
13218 /* In cases where the mode was chosen by mode_for_vector, check that
13219 the target actually supports the chosen mode, or that it at least
13220 allows the vector mode to be replaced by a like-sized integer. */
13221 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
13222 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
13223 return NULL_TREE;
13225 /* Re-attach the address-space qualifier if we canonicalized the scalar
13226 type. */
13227 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
13228 return build_qualified_type
13229 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
13231 return vectype;
13234 /* Function get_vectype_for_scalar_type.
13236 Returns the vector type corresponding to SCALAR_TYPE as supported
13237 by the target. If GROUP_SIZE is nonzero and we're performing BB
13238 vectorization, make sure that the number of elements in the vector
13239 is no bigger than GROUP_SIZE. */
13241 tree
13242 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
13243 unsigned int group_size)
13245 /* For BB vectorization, we should always have a group size once we've
13246 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13247 are tentative requests during things like early data reference
13248 analysis and pattern recognition. */
13249 if (is_a <bb_vec_info> (vinfo))
13250 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13251 else
13252 group_size = 0;
13254 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13255 scalar_type);
13256 if (vectype && vinfo->vector_mode == VOIDmode)
13257 vinfo->vector_mode = TYPE_MODE (vectype);
13259 /* Register the natural choice of vector type, before the group size
13260 has been applied. */
13261 if (vectype)
13262 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
13264 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13265 try again with an explicit number of elements. */
13266 if (vectype
13267 && group_size
13268 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
13270 /* Start with the biggest number of units that fits within
13271 GROUP_SIZE and halve it until we find a valid vector type.
13272 Usually either the first attempt will succeed or all will
13273 fail (in the latter case because GROUP_SIZE is too small
13274 for the target), but it's possible that a target could have
13275 a hole between supported vector types.
13277 If GROUP_SIZE is not a power of 2, this has the effect of
13278 trying the largest power of 2 that fits within the group,
13279 even though the group is not a multiple of that vector size.
13280 The BB vectorizer will then try to carve up the group into
13281 smaller pieces. */
13282 unsigned int nunits = 1 << floor_log2 (group_size);
13285 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
13286 scalar_type, nunits);
13287 nunits /= 2;
13289 while (nunits > 1 && !vectype);
13292 return vectype;
13295 /* Return the vector type corresponding to SCALAR_TYPE as supported
13296 by the target. NODE, if nonnull, is the SLP tree node that will
13297 use the returned vector type. */
13299 tree
13300 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
13302 unsigned int group_size = 0;
13303 if (node)
13304 group_size = SLP_TREE_LANES (node);
13305 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13308 /* Function get_mask_type_for_scalar_type.
13310 Returns the mask type corresponding to a result of comparison
13311 of vectors of specified SCALAR_TYPE as supported by target.
13312 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13313 make sure that the number of elements in the vector is no bigger
13314 than GROUP_SIZE. */
13316 tree
13317 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
13318 unsigned int group_size)
13320 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13322 if (!vectype)
13323 return NULL;
13325 return truth_type_for (vectype);
13328 /* Function get_same_sized_vectype
13330 Returns a vector type corresponding to SCALAR_TYPE of size
13331 VECTOR_TYPE if supported by the target. */
13333 tree
13334 get_same_sized_vectype (tree scalar_type, tree vector_type)
13336 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
13337 return truth_type_for (vector_type);
13339 poly_uint64 nunits;
13340 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
13341 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
13342 return NULL_TREE;
13344 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
13345 scalar_type, nunits);
13348 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13349 would not change the chosen vector modes. */
13351 bool
13352 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
13354 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
13355 i != vinfo->used_vector_modes.end (); ++i)
13356 if (!VECTOR_MODE_P (*i)
13357 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
13358 return false;
13359 return true;
13362 /* Function vect_is_simple_use.
13364 Input:
13365 VINFO - the vect info of the loop or basic block that is being vectorized.
13366 OPERAND - operand in the loop or bb.
13367 Output:
13368 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13369 case OPERAND is an SSA_NAME that is defined in the vectorizable region
13370 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13371 the definition could be anywhere in the function
13372 DT - the type of definition
13374 Returns whether a stmt with OPERAND can be vectorized.
13375 For loops, supportable operands are constants, loop invariants, and operands
13376 that are defined by the current iteration of the loop. Unsupportable
13377 operands are those that are defined by a previous iteration of the loop (as
13378 is the case in reduction/induction computations).
13379 For basic blocks, supportable operands are constants and bb invariants.
13380 For now, operands defined outside the basic block are not supported. */
13382 bool
13383 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13384 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
13386 if (def_stmt_info_out)
13387 *def_stmt_info_out = NULL;
13388 if (def_stmt_out)
13389 *def_stmt_out = NULL;
13390 *dt = vect_unknown_def_type;
13392 if (dump_enabled_p ())
13394 dump_printf_loc (MSG_NOTE, vect_location,
13395 "vect_is_simple_use: operand ");
13396 if (TREE_CODE (operand) == SSA_NAME
13397 && !SSA_NAME_IS_DEFAULT_DEF (operand))
13398 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
13399 else
13400 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
13403 if (CONSTANT_CLASS_P (operand))
13404 *dt = vect_constant_def;
13405 else if (is_gimple_min_invariant (operand))
13406 *dt = vect_external_def;
13407 else if (TREE_CODE (operand) != SSA_NAME)
13408 *dt = vect_unknown_def_type;
13409 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
13410 *dt = vect_external_def;
13411 else
13413 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
13414 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
13415 if (!stmt_vinfo)
13416 *dt = vect_external_def;
13417 else
13419 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
13420 def_stmt = stmt_vinfo->stmt;
13421 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
13422 if (def_stmt_info_out)
13423 *def_stmt_info_out = stmt_vinfo;
13425 if (def_stmt_out)
13426 *def_stmt_out = def_stmt;
13429 if (dump_enabled_p ())
13431 dump_printf (MSG_NOTE, ", type of def: ");
13432 switch (*dt)
13434 case vect_uninitialized_def:
13435 dump_printf (MSG_NOTE, "uninitialized\n");
13436 break;
13437 case vect_constant_def:
13438 dump_printf (MSG_NOTE, "constant\n");
13439 break;
13440 case vect_external_def:
13441 dump_printf (MSG_NOTE, "external\n");
13442 break;
13443 case vect_internal_def:
13444 dump_printf (MSG_NOTE, "internal\n");
13445 break;
13446 case vect_induction_def:
13447 dump_printf (MSG_NOTE, "induction\n");
13448 break;
13449 case vect_reduction_def:
13450 dump_printf (MSG_NOTE, "reduction\n");
13451 break;
13452 case vect_double_reduction_def:
13453 dump_printf (MSG_NOTE, "double reduction\n");
13454 break;
13455 case vect_nested_cycle:
13456 dump_printf (MSG_NOTE, "nested cycle\n");
13457 break;
13458 case vect_first_order_recurrence:
13459 dump_printf (MSG_NOTE, "first order recurrence\n");
13460 break;
13461 case vect_unknown_def_type:
13462 dump_printf (MSG_NOTE, "unknown\n");
13463 break;
13467 if (*dt == vect_unknown_def_type)
13469 if (dump_enabled_p ())
13470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
13471 "Unsupported pattern.\n");
13472 return false;
13475 return true;
13478 /* Function vect_is_simple_use.
13480 Same as vect_is_simple_use but also determines the vector operand
13481 type of OPERAND and stores it to *VECTYPE. If the definition of
13482 OPERAND is vect_uninitialized_def, vect_constant_def or
13483 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13484 is responsible to compute the best suited vector type for the
13485 scalar operand. */
13487 bool
13488 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
13489 tree *vectype, stmt_vec_info *def_stmt_info_out,
13490 gimple **def_stmt_out)
13492 stmt_vec_info def_stmt_info;
13493 gimple *def_stmt;
13494 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
13495 return false;
13497 if (def_stmt_out)
13498 *def_stmt_out = def_stmt;
13499 if (def_stmt_info_out)
13500 *def_stmt_info_out = def_stmt_info;
13502 /* Now get a vector type if the def is internal, otherwise supply
13503 NULL_TREE and leave it up to the caller to figure out a proper
13504 type for the use stmt. */
13505 if (*dt == vect_internal_def
13506 || *dt == vect_induction_def
13507 || *dt == vect_reduction_def
13508 || *dt == vect_double_reduction_def
13509 || *dt == vect_nested_cycle
13510 || *dt == vect_first_order_recurrence)
13512 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
13513 gcc_assert (*vectype != NULL_TREE);
13514 if (dump_enabled_p ())
13515 dump_printf_loc (MSG_NOTE, vect_location,
13516 "vect_is_simple_use: vectype %T\n", *vectype);
13518 else if (*dt == vect_uninitialized_def
13519 || *dt == vect_constant_def
13520 || *dt == vect_external_def)
13521 *vectype = NULL_TREE;
13522 else
13523 gcc_unreachable ();
13525 return true;
13528 /* Function vect_is_simple_use.
13530 Same as vect_is_simple_use but determines the operand by operand
13531 position OPERAND from either STMT or SLP_NODE, filling in *OP
13532 and *SLP_DEF (when SLP_NODE is not NULL). */
13534 bool
13535 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
13536 unsigned operand, tree *op, slp_tree *slp_def,
13537 enum vect_def_type *dt,
13538 tree *vectype, stmt_vec_info *def_stmt_info_out)
13540 if (slp_node)
13542 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
13543 *slp_def = child;
13544 *vectype = SLP_TREE_VECTYPE (child);
13545 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
13547 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
13548 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
13550 else
13552 if (def_stmt_info_out)
13553 *def_stmt_info_out = NULL;
13554 *op = SLP_TREE_SCALAR_OPS (child)[0];
13555 *dt = SLP_TREE_DEF_TYPE (child);
13556 return true;
13559 else
13561 *slp_def = NULL;
13562 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
13564 if (gimple_assign_rhs_code (ass) == COND_EXPR
13565 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
13567 if (operand < 2)
13568 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
13569 else
13570 *op = gimple_op (ass, operand);
13572 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
13573 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
13574 else
13575 *op = gimple_op (ass, operand + 1);
13577 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
13578 *op = gimple_call_arg (call, operand);
13579 else
13580 gcc_unreachable ();
13581 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
13585 /* If OP is not NULL and is external or constant update its vector
13586 type with VECTYPE. Returns true if successful or false if not,
13587 for example when conflicting vector types are present. */
13589 bool
13590 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
13592 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
13593 return true;
13594 if (SLP_TREE_VECTYPE (op))
13595 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
13596 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
13597 should be handled by patters. Allow vect_constant_def for now. */
13598 if (VECTOR_BOOLEAN_TYPE_P (vectype)
13599 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
13600 return false;
13601 SLP_TREE_VECTYPE (op) = vectype;
13602 return true;
13605 /* Function supportable_widening_operation
13607 Check whether an operation represented by the code CODE is a
13608 widening operation that is supported by the target platform in
13609 vector form (i.e., when operating on arguments of type VECTYPE_IN
13610 producing a result of type VECTYPE_OUT).
13612 Widening operations we currently support are NOP (CONVERT), FLOAT,
13613 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
13614 are supported by the target platform either directly (via vector
13615 tree-codes), or via target builtins.
13617 Output:
13618 - CODE1 and CODE2 are codes of vector operations to be used when
13619 vectorizing the operation, if available.
13620 - MULTI_STEP_CVT determines the number of required intermediate steps in
13621 case of multi-step conversion (like char->short->int - in that case
13622 MULTI_STEP_CVT will be 1).
13623 - INTERM_TYPES contains the intermediate type required to perform the
13624 widening operation (short in the above example). */
13626 bool
13627 supportable_widening_operation (vec_info *vinfo,
13628 code_helper code,
13629 stmt_vec_info stmt_info,
13630 tree vectype_out, tree vectype_in,
13631 code_helper *code1,
13632 code_helper *code2,
13633 int *multi_step_cvt,
13634 vec<tree> *interm_types)
13636 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
13637 class loop *vect_loop = NULL;
13638 machine_mode vec_mode;
13639 enum insn_code icode1, icode2;
13640 optab optab1 = unknown_optab, optab2 = unknown_optab;
13641 tree vectype = vectype_in;
13642 tree wide_vectype = vectype_out;
13643 tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
13644 int i;
13645 tree prev_type, intermediate_type;
13646 machine_mode intermediate_mode, prev_mode;
13647 optab optab3, optab4;
13649 *multi_step_cvt = 0;
13650 if (loop_info)
13651 vect_loop = LOOP_VINFO_LOOP (loop_info);
13653 switch (code.safe_as_tree_code ())
13655 case MAX_TREE_CODES:
13656 /* Don't set c1 and c2 if code is not a tree_code. */
13657 break;
13659 case WIDEN_MULT_EXPR:
13660 /* The result of a vectorized widening operation usually requires
13661 two vectors (because the widened results do not fit into one vector).
13662 The generated vector results would normally be expected to be
13663 generated in the same order as in the original scalar computation,
13664 i.e. if 8 results are generated in each vector iteration, they are
13665 to be organized as follows:
13666 vect1: [res1,res2,res3,res4],
13667 vect2: [res5,res6,res7,res8].
13669 However, in the special case that the result of the widening
13670 operation is used in a reduction computation only, the order doesn't
13671 matter (because when vectorizing a reduction we change the order of
13672 the computation). Some targets can take advantage of this and
13673 generate more efficient code. For example, targets like Altivec,
13674 that support widen_mult using a sequence of {mult_even,mult_odd}
13675 generate the following vectors:
13676 vect1: [res1,res3,res5,res7],
13677 vect2: [res2,res4,res6,res8].
13679 When vectorizing outer-loops, we execute the inner-loop sequentially
13680 (each vectorized inner-loop iteration contributes to VF outer-loop
13681 iterations in parallel). We therefore don't allow to change the
13682 order of the computation in the inner-loop during outer-loop
13683 vectorization. */
13684 /* TODO: Another case in which order doesn't *really* matter is when we
13685 widen and then contract again, e.g. (short)((int)x * y >> 8).
13686 Normally, pack_trunc performs an even/odd permute, whereas the
13687 repack from an even/odd expansion would be an interleave, which
13688 would be significantly simpler for e.g. AVX2. */
13689 /* In any case, in order to avoid duplicating the code below, recurse
13690 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
13691 are properly set up for the caller. If we fail, we'll continue with
13692 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
13693 if (vect_loop
13694 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
13695 && !nested_in_vect_loop_p (vect_loop, stmt_info)
13696 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
13697 stmt_info, vectype_out,
13698 vectype_in, code1,
13699 code2, multi_step_cvt,
13700 interm_types))
13702 /* Elements in a vector with vect_used_by_reduction property cannot
13703 be reordered if the use chain with this property does not have the
13704 same operation. One such an example is s += a * b, where elements
13705 in a and b cannot be reordered. Here we check if the vector defined
13706 by STMT is only directly used in the reduction statement. */
13707 tree lhs = gimple_assign_lhs (stmt_info->stmt);
13708 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
13709 if (use_stmt_info
13710 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
13711 return true;
13713 c1 = VEC_WIDEN_MULT_LO_EXPR;
13714 c2 = VEC_WIDEN_MULT_HI_EXPR;
13715 break;
13717 case DOT_PROD_EXPR:
13718 c1 = DOT_PROD_EXPR;
13719 c2 = DOT_PROD_EXPR;
13720 break;
13722 case SAD_EXPR:
13723 c1 = SAD_EXPR;
13724 c2 = SAD_EXPR;
13725 break;
13727 case VEC_WIDEN_MULT_EVEN_EXPR:
13728 /* Support the recursion induced just above. */
13729 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
13730 c2 = VEC_WIDEN_MULT_ODD_EXPR;
13731 break;
13733 case WIDEN_LSHIFT_EXPR:
13734 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
13735 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
13736 break;
13738 CASE_CONVERT:
13739 c1 = VEC_UNPACK_LO_EXPR;
13740 c2 = VEC_UNPACK_HI_EXPR;
13741 break;
13743 case FLOAT_EXPR:
13744 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
13745 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
13746 break;
13748 case FIX_TRUNC_EXPR:
13749 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
13750 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
13751 break;
13753 default:
13754 gcc_unreachable ();
13757 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
13758 std::swap (c1, c2);
13760 if (code == FIX_TRUNC_EXPR)
13762 /* The signedness is determined from output operand. */
13763 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13764 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13766 else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
13767 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13768 && VECTOR_BOOLEAN_TYPE_P (vectype)
13769 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13770 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13772 /* If the input and result modes are the same, a different optab
13773 is needed where we pass in the number of units in vectype. */
13774 optab1 = vec_unpacks_sbool_lo_optab;
13775 optab2 = vec_unpacks_sbool_hi_optab;
13778 vec_mode = TYPE_MODE (vectype);
13779 if (widening_fn_p (code))
13781 /* If this is an internal fn then we must check whether the target
13782 supports either a low-high split or an even-odd split. */
13783 internal_fn ifn = as_internal_fn ((combined_fn) code);
13785 internal_fn lo, hi, even, odd;
13786 lookup_hilo_internal_fn (ifn, &lo, &hi);
13787 *code1 = as_combined_fn (lo);
13788 *code2 = as_combined_fn (hi);
13789 optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
13790 optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
13792 /* If we don't support low-high, then check for even-odd. */
13793 if (!optab1
13794 || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13795 || !optab2
13796 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13798 lookup_evenodd_internal_fn (ifn, &even, &odd);
13799 *code1 = as_combined_fn (even);
13800 *code2 = as_combined_fn (odd);
13801 optab1 = direct_internal_fn_optab (even, {vectype, vectype});
13802 optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
13805 else if (code.is_tree_code ())
13807 if (code == FIX_TRUNC_EXPR)
13809 /* The signedness is determined from output operand. */
13810 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13811 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13813 else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
13814 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13815 && VECTOR_BOOLEAN_TYPE_P (vectype)
13816 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13817 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13819 /* If the input and result modes are the same, a different optab
13820 is needed where we pass in the number of units in vectype. */
13821 optab1 = vec_unpacks_sbool_lo_optab;
13822 optab2 = vec_unpacks_sbool_hi_optab;
13824 else
13826 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13827 optab2 = optab_for_tree_code (c2, vectype, optab_default);
13829 *code1 = c1;
13830 *code2 = c2;
13833 if (!optab1 || !optab2)
13834 return false;
13836 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13837 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13838 return false;
13841 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13842 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13844 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13845 return true;
13846 /* For scalar masks we may have different boolean
13847 vector types having the same QImode. Thus we
13848 add additional check for elements number. */
13849 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
13850 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13851 return true;
13854 /* Check if it's a multi-step conversion that can be done using intermediate
13855 types. */
13857 prev_type = vectype;
13858 prev_mode = vec_mode;
13860 if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
13861 return false;
13863 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13864 intermediate steps in promotion sequence. We try
13865 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
13866 not. */
13867 interm_types->create (MAX_INTERM_CVT_STEPS);
13868 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
13870 intermediate_mode = insn_data[icode1].operand[0].mode;
13871 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13872 intermediate_type
13873 = vect_halve_mask_nunits (prev_type, intermediate_mode);
13874 else if (VECTOR_MODE_P (intermediate_mode))
13876 tree intermediate_element_type
13877 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
13878 TYPE_UNSIGNED (prev_type));
13879 intermediate_type
13880 = build_vector_type_for_mode (intermediate_element_type,
13881 intermediate_mode);
13883 else
13884 intermediate_type
13885 = lang_hooks.types.type_for_mode (intermediate_mode,
13886 TYPE_UNSIGNED (prev_type));
13888 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13889 && VECTOR_BOOLEAN_TYPE_P (prev_type)
13890 && intermediate_mode == prev_mode
13891 && SCALAR_INT_MODE_P (prev_mode))
13893 /* If the input and result modes are the same, a different optab
13894 is needed where we pass in the number of units in vectype. */
13895 optab3 = vec_unpacks_sbool_lo_optab;
13896 optab4 = vec_unpacks_sbool_hi_optab;
13898 else
13900 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
13901 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
13904 if (!optab3 || !optab4
13905 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
13906 || insn_data[icode1].operand[0].mode != intermediate_mode
13907 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
13908 || insn_data[icode2].operand[0].mode != intermediate_mode
13909 || ((icode1 = optab_handler (optab3, intermediate_mode))
13910 == CODE_FOR_nothing)
13911 || ((icode2 = optab_handler (optab4, intermediate_mode))
13912 == CODE_FOR_nothing))
13913 break;
13915 interm_types->quick_push (intermediate_type);
13916 (*multi_step_cvt)++;
13918 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13919 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13921 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13922 return true;
13923 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
13924 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13925 return true;
13928 prev_type = intermediate_type;
13929 prev_mode = intermediate_mode;
13932 interm_types->release ();
13933 return false;
13937 /* Function supportable_narrowing_operation
13939 Check whether an operation represented by the code CODE is a
13940 narrowing operation that is supported by the target platform in
13941 vector form (i.e., when operating on arguments of type VECTYPE_IN
13942 and producing a result of type VECTYPE_OUT).
13944 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
13945 and FLOAT. This function checks if these operations are supported by
13946 the target platform directly via vector tree-codes.
13948 Output:
13949 - CODE1 is the code of a vector operation to be used when
13950 vectorizing the operation, if available.
13951 - MULTI_STEP_CVT determines the number of required intermediate steps in
13952 case of multi-step conversion (like int->short->char - in that case
13953 MULTI_STEP_CVT will be 1).
13954 - INTERM_TYPES contains the intermediate type required to perform the
13955 narrowing operation (short in the above example). */
13957 bool
13958 supportable_narrowing_operation (code_helper code,
13959 tree vectype_out, tree vectype_in,
13960 code_helper *code1, int *multi_step_cvt,
13961 vec<tree> *interm_types)
13963 machine_mode vec_mode;
13964 enum insn_code icode1;
13965 optab optab1, interm_optab;
13966 tree vectype = vectype_in;
13967 tree narrow_vectype = vectype_out;
13968 enum tree_code c1;
13969 tree intermediate_type, prev_type;
13970 machine_mode intermediate_mode, prev_mode;
13971 int i;
13972 unsigned HOST_WIDE_INT n_elts;
13973 bool uns;
13975 if (!code.is_tree_code ())
13976 return false;
13978 *multi_step_cvt = 0;
13979 switch ((tree_code) code)
13981 CASE_CONVERT:
13982 c1 = VEC_PACK_TRUNC_EXPR;
13983 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
13984 && VECTOR_BOOLEAN_TYPE_P (vectype)
13985 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
13986 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
13987 && n_elts < BITS_PER_UNIT)
13988 optab1 = vec_pack_sbool_trunc_optab;
13989 else
13990 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13991 break;
13993 case FIX_TRUNC_EXPR:
13994 c1 = VEC_PACK_FIX_TRUNC_EXPR;
13995 /* The signedness is determined from output operand. */
13996 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13997 break;
13999 case FLOAT_EXPR:
14000 c1 = VEC_PACK_FLOAT_EXPR;
14001 optab1 = optab_for_tree_code (c1, vectype, optab_default);
14002 break;
14004 default:
14005 gcc_unreachable ();
14008 if (!optab1)
14009 return false;
14011 vec_mode = TYPE_MODE (vectype);
14012 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
14013 return false;
14015 *code1 = c1;
14017 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14019 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14020 return true;
14021 /* For scalar masks we may have different boolean
14022 vector types having the same QImode. Thus we
14023 add additional check for elements number. */
14024 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
14025 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14026 return true;
14029 if (code == FLOAT_EXPR)
14030 return false;
14032 /* Check if it's a multi-step conversion that can be done using intermediate
14033 types. */
14034 prev_mode = vec_mode;
14035 prev_type = vectype;
14036 if (code == FIX_TRUNC_EXPR)
14037 uns = TYPE_UNSIGNED (vectype_out);
14038 else
14039 uns = TYPE_UNSIGNED (vectype);
14041 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14042 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14043 costly than signed. */
14044 if (code == FIX_TRUNC_EXPR && uns)
14046 enum insn_code icode2;
14048 intermediate_type
14049 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
14050 interm_optab
14051 = optab_for_tree_code (c1, intermediate_type, optab_default);
14052 if (interm_optab != unknown_optab
14053 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
14054 && insn_data[icode1].operand[0].mode
14055 == insn_data[icode2].operand[0].mode)
14057 uns = false;
14058 optab1 = interm_optab;
14059 icode1 = icode2;
14063 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14064 intermediate steps in promotion sequence. We try
14065 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14066 interm_types->create (MAX_INTERM_CVT_STEPS);
14067 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
14069 intermediate_mode = insn_data[icode1].operand[0].mode;
14070 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
14071 intermediate_type
14072 = vect_double_mask_nunits (prev_type, intermediate_mode);
14073 else
14074 intermediate_type
14075 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
14076 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
14077 && VECTOR_BOOLEAN_TYPE_P (prev_type)
14078 && SCALAR_INT_MODE_P (prev_mode)
14079 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
14080 && n_elts < BITS_PER_UNIT)
14081 interm_optab = vec_pack_sbool_trunc_optab;
14082 else
14083 interm_optab
14084 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
14085 optab_default);
14086 if (!interm_optab
14087 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
14088 || insn_data[icode1].operand[0].mode != intermediate_mode
14089 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
14090 == CODE_FOR_nothing))
14091 break;
14093 interm_types->quick_push (intermediate_type);
14094 (*multi_step_cvt)++;
14096 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
14098 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14099 return true;
14100 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
14101 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
14102 return true;
14105 prev_mode = intermediate_mode;
14106 prev_type = intermediate_type;
14107 optab1 = interm_optab;
14110 interm_types->release ();
14111 return false;
14114 /* Generate and return a vector mask of MASK_TYPE such that
14115 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14116 Add the statements to SEQ. */
14118 tree
14119 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
14120 tree end_index, const char *name)
14122 tree cmp_type = TREE_TYPE (start_index);
14123 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
14124 cmp_type, mask_type,
14125 OPTIMIZE_FOR_SPEED));
14126 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
14127 start_index, end_index,
14128 build_zero_cst (mask_type));
14129 tree tmp;
14130 if (name)
14131 tmp = make_temp_ssa_name (mask_type, NULL, name);
14132 else
14133 tmp = make_ssa_name (mask_type);
14134 gimple_call_set_lhs (call, tmp);
14135 gimple_seq_add_stmt (seq, call);
14136 return tmp;
14139 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14140 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14142 tree
14143 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
14144 tree end_index)
14146 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
14147 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
14150 /* Try to compute the vector types required to vectorize STMT_INFO,
14151 returning true on success and false if vectorization isn't possible.
14152 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14153 take sure that the number of elements in the vectors is no bigger
14154 than GROUP_SIZE.
14156 On success:
14158 - Set *STMT_VECTYPE_OUT to:
14159 - NULL_TREE if the statement doesn't need to be vectorized;
14160 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14162 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14163 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14164 statement does not help to determine the overall number of units. */
14166 opt_result
14167 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
14168 tree *stmt_vectype_out,
14169 tree *nunits_vectype_out,
14170 unsigned int group_size)
14172 gimple *stmt = stmt_info->stmt;
14174 /* For BB vectorization, we should always have a group size once we've
14175 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14176 are tentative requests during things like early data reference
14177 analysis and pattern recognition. */
14178 if (is_a <bb_vec_info> (vinfo))
14179 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
14180 else
14181 group_size = 0;
14183 *stmt_vectype_out = NULL_TREE;
14184 *nunits_vectype_out = NULL_TREE;
14186 if (gimple_get_lhs (stmt) == NULL_TREE
14187 /* MASK_STORE has no lhs, but is ok. */
14188 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
14190 if (is_a <gcall *> (stmt))
14192 /* Ignore calls with no lhs. These must be calls to
14193 #pragma omp simd functions, and what vectorization factor
14194 it really needs can't be determined until
14195 vectorizable_simd_clone_call. */
14196 if (dump_enabled_p ())
14197 dump_printf_loc (MSG_NOTE, vect_location,
14198 "defer to SIMD clone analysis.\n");
14199 return opt_result::success ();
14202 return opt_result::failure_at (stmt,
14203 "not vectorized: irregular stmt.%G", stmt);
14206 tree vectype;
14207 tree scalar_type = NULL_TREE;
14208 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
14210 vectype = STMT_VINFO_VECTYPE (stmt_info);
14211 if (dump_enabled_p ())
14212 dump_printf_loc (MSG_NOTE, vect_location,
14213 "precomputed vectype: %T\n", vectype);
14215 else if (vect_use_mask_type_p (stmt_info))
14217 unsigned int precision = stmt_info->mask_precision;
14218 scalar_type = build_nonstandard_integer_type (precision, 1);
14219 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
14220 if (!vectype)
14221 return opt_result::failure_at (stmt, "not vectorized: unsupported"
14222 " data-type %T\n", scalar_type);
14223 if (dump_enabled_p ())
14224 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14226 else
14228 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
14229 scalar_type = TREE_TYPE (DR_REF (dr));
14230 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
14231 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
14232 else
14233 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
14235 if (dump_enabled_p ())
14237 if (group_size)
14238 dump_printf_loc (MSG_NOTE, vect_location,
14239 "get vectype for scalar type (group size %d):"
14240 " %T\n", group_size, scalar_type);
14241 else
14242 dump_printf_loc (MSG_NOTE, vect_location,
14243 "get vectype for scalar type: %T\n", scalar_type);
14245 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
14246 if (!vectype)
14247 return opt_result::failure_at (stmt,
14248 "not vectorized:"
14249 " unsupported data-type %T\n",
14250 scalar_type);
14252 if (dump_enabled_p ())
14253 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
14256 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
14257 return opt_result::failure_at (stmt,
14258 "not vectorized: vector stmt in loop:%G",
14259 stmt);
14261 *stmt_vectype_out = vectype;
14263 /* Don't try to compute scalar types if the stmt produces a boolean
14264 vector; use the existing vector type instead. */
14265 tree nunits_vectype = vectype;
14266 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
14268 /* The number of units is set according to the smallest scalar
14269 type (or the largest vector size, but we only support one
14270 vector size per vectorization). */
14271 scalar_type = vect_get_smallest_scalar_type (stmt_info,
14272 TREE_TYPE (vectype));
14273 if (scalar_type != TREE_TYPE (vectype))
14275 if (dump_enabled_p ())
14276 dump_printf_loc (MSG_NOTE, vect_location,
14277 "get vectype for smallest scalar type: %T\n",
14278 scalar_type);
14279 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
14280 group_size);
14281 if (!nunits_vectype)
14282 return opt_result::failure_at
14283 (stmt, "not vectorized: unsupported data-type %T\n",
14284 scalar_type);
14285 if (dump_enabled_p ())
14286 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
14287 nunits_vectype);
14291 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
14292 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
14293 return opt_result::failure_at (stmt,
14294 "Not vectorized: Incompatible number "
14295 "of vector subparts between %T and %T\n",
14296 nunits_vectype, *stmt_vectype_out);
14298 if (dump_enabled_p ())
14300 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
14301 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
14302 dump_printf (MSG_NOTE, "\n");
14305 *nunits_vectype_out = nunits_vectype;
14306 return opt_result::success ();
14309 /* Generate and return statement sequence that sets vector length LEN that is:
14311 min_of_start_and_end = min (START_INDEX, END_INDEX);
14312 left_len = END_INDEX - min_of_start_and_end;
14313 rhs = min (left_len, LEN_LIMIT);
14314 LEN = rhs;
14316 Note: the cost of the code generated by this function is modeled
14317 by vect_estimate_min_profitable_iters, so changes here may need
14318 corresponding changes there. */
14320 gimple_seq
14321 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
14323 gimple_seq stmts = NULL;
14324 tree len_type = TREE_TYPE (len);
14325 gcc_assert (TREE_TYPE (start_index) == len_type);
14327 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
14328 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
14329 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
14330 gimple* stmt = gimple_build_assign (len, rhs);
14331 gimple_seq_add_stmt (&stmts, stmt);
14333 return stmts;