libgomp, testsuite: Do not call nonstandard functions
[official-gcc.git] / gcc / tree-vect-stmts.cc
blob43502dc169fc361ad71c14032e0d7f174cd4347e
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
57 #include "regs.h"
58 #include "attribs.h"
59 #include "optabs-libfuncs.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* Return the vectorized type for the given statement. */
66 tree
67 stmt_vectype (class _stmt_vec_info *stmt_info)
69 return STMT_VINFO_VECTYPE (stmt_info);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
74 bool
75 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
77 gimple *stmt = STMT_VINFO_STMT (stmt_info);
78 basic_block bb = gimple_bb (stmt);
79 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
80 class loop* loop;
82 if (!loop_vinfo)
83 return false;
85 loop = LOOP_VINFO_LOOP (loop_vinfo);
87 return (bb->loop_father == loop->inner);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
94 static unsigned
95 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
96 enum vect_cost_for_stmt kind,
97 stmt_vec_info stmt_info, slp_tree node,
98 tree vectype, int misalign,
99 enum vect_cost_model_location where)
101 if ((kind == vector_load || kind == unaligned_load)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_gather_load;
104 if ((kind == vector_store || kind == unaligned_store)
105 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
106 kind = vector_scatter_store;
108 stmt_info_for_cost si
109 = { count, kind, where, stmt_info, node, vectype, misalign };
110 body_cost_vec->safe_push (si);
112 return (unsigned)
113 (builtin_vectorization_cost (kind, vectype, misalign) * count);
116 unsigned
117 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
118 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
119 tree vectype, int misalign,
120 enum vect_cost_model_location where)
122 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
123 vectype, misalign, where);
126 unsigned
127 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
128 enum vect_cost_for_stmt kind, slp_tree node,
129 tree vectype, int misalign,
130 enum vect_cost_model_location where)
132 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
133 vectype, misalign, where);
136 unsigned
137 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
138 enum vect_cost_for_stmt kind,
139 enum vect_cost_model_location where)
141 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
142 || kind == scalar_stmt);
143 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
144 NULL_TREE, 0, where);
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
149 static tree
150 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
152 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
153 "vect_array");
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
161 static tree
162 read_vector_array (vec_info *vinfo,
163 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
164 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
166 tree vect_type, vect, vect_name, array_ref;
167 gimple *new_stmt;
169 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170 vect_type = TREE_TYPE (TREE_TYPE (array));
171 vect = vect_create_destination_var (scalar_dest, vect_type);
172 array_ref = build4 (ARRAY_REF, vect_type, array,
173 build_int_cst (size_type_node, n),
174 NULL_TREE, NULL_TREE);
176 new_stmt = gimple_build_assign (vect, array_ref);
177 vect_name = make_ssa_name (vect, new_stmt);
178 gimple_assign_set_lhs (new_stmt, vect_name);
179 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
181 return vect_name;
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
188 static void
189 write_vector_array (vec_info *vinfo,
190 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
191 tree vect, tree array, unsigned HOST_WIDE_INT n)
193 tree array_ref;
194 gimple *new_stmt;
196 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
197 build_int_cst (size_type_node, n),
198 NULL_TREE, NULL_TREE);
200 new_stmt = gimple_build_assign (array_ref, vect);
201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
206 (and its group). */
208 static tree
209 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
211 tree mem_ref;
213 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
216 return mem_ref;
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
222 static void
223 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
224 gimple_stmt_iterator *gsi, tree var)
226 tree clobber = build_clobber (TREE_TYPE (var));
227 gimple *new_stmt = gimple_build_assign (var, clobber);
228 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
233 /* Function vect_mark_relevant.
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
237 static void
238 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
239 enum vect_relevant relevant, bool live_p)
241 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
242 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "mark relevant %d, live %d: %G", relevant, live_p,
247 stmt_info->stmt);
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE, vect_location,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
265 stmt_vec_info old_stmt_info = stmt_info;
266 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
268 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
269 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
271 if (live_p && relevant == vect_unused_in_scope)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
276 "relevant.\n");
277 relevant = vect_used_only_live;
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE, vect_location,
282 "mark relevant %d, live %d: %G", relevant, live_p,
283 stmt_info->stmt);
286 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
287 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
288 STMT_VINFO_RELEVANT (stmt_info) = relevant;
290 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE, vect_location,
295 "already marked relevant/live.\n");
296 return;
299 worklist->safe_push (stmt_info);
303 /* Function is_simple_and_all_uses_invariant
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
307 bool
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
309 loop_vec_info loop_vinfo)
311 tree op;
312 ssa_op_iter iter;
314 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
315 if (!stmt)
316 return false;
318 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
320 enum vect_def_type dt = vect_uninitialized_def;
322 if (!vect_is_simple_use (op, loop_vinfo, &dt))
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "use not simple.\n");
327 return false;
330 if (dt != vect_external_def && dt != vect_constant_def)
331 return false;
333 return true;
336 /* Function vect_stmt_relevant_p.
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
346 CHECKME: what other side effects would the vectorizer allow? */
348 static bool
349 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
350 enum vect_relevant *relevant, bool *live_p)
352 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
353 ssa_op_iter op_iter;
354 imm_use_iterator imm_iter;
355 use_operand_p use_p;
356 def_operand_p def_p;
358 *relevant = vect_unused_in_scope;
359 *live_p = false;
361 /* cond stmt other than loop exit cond. */
362 if (is_ctrl_stmt (stmt_info->stmt)
363 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
364 *relevant = vect_used_in_scope;
366 /* changing memory. */
367 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
368 if (gimple_vdef (stmt_info->stmt)
369 && !gimple_clobber_p (stmt_info->stmt))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: stmt has vdefs.\n");
374 *relevant = vect_used_in_scope;
377 /* uses outside the loop. */
378 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
380 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
382 basic_block bb = gimple_bb (USE_STMT (use_p));
383 if (!flow_bb_inside_loop_p (loop, bb))
385 if (is_gimple_debug (USE_STMT (use_p)))
386 continue;
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: used out of loop.\n");
392 /* We expect all such uses to be in the loop exit phis
393 (because of loop closed form) */
394 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
395 gcc_assert (bb == single_exit (loop)->dest);
397 *live_p = true;
402 if (*live_p && *relevant == vect_unused_in_scope
403 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
405 if (dump_enabled_p ())
406 dump_printf_loc (MSG_NOTE, vect_location,
407 "vec_stmt_relevant_p: stmt live but not relevant.\n");
408 *relevant = vect_used_only_live;
411 return (*live_p || *relevant);
415 /* Function exist_non_indexing_operands_for_use_p
417 USE is one of the uses attached to STMT_INFO. Check if USE is
418 used in STMT_INFO for anything other than indexing an array. */
420 static bool
421 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
423 tree operand;
425 /* USE corresponds to some operand in STMT. If there is no data
426 reference in STMT, then any operand that corresponds to USE
427 is not indexing an array. */
428 if (!STMT_VINFO_DATA_REF (stmt_info))
429 return true;
431 /* STMT has a data_ref. FORNOW this means that its of one of
432 the following forms:
433 -1- ARRAY_REF = var
434 -2- var = ARRAY_REF
435 (This should have been verified in analyze_data_refs).
437 'var' in the second case corresponds to a def, not a use,
438 so USE cannot correspond to any operands that are not used
439 for array indexing.
441 Therefore, all we need to check is if STMT falls into the
442 first case, and whether var corresponds to USE. */
444 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
445 if (!assign || !gimple_assign_copy_p (assign))
447 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
448 if (call && gimple_call_internal_p (call))
450 internal_fn ifn = gimple_call_internal_fn (call);
451 int mask_index = internal_fn_mask_index (ifn);
452 if (mask_index >= 0
453 && use == gimple_call_arg (call, mask_index))
454 return true;
455 int stored_value_index = internal_fn_stored_value_index (ifn);
456 if (stored_value_index >= 0
457 && use == gimple_call_arg (call, stored_value_index))
458 return true;
459 if (internal_gather_scatter_fn_p (ifn)
460 && use == gimple_call_arg (call, 1))
461 return true;
463 return false;
466 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
467 return false;
468 operand = gimple_assign_rhs1 (assign);
469 if (TREE_CODE (operand) != SSA_NAME)
470 return false;
472 if (operand == use)
473 return true;
475 return false;
480 Function process_use.
482 Inputs:
483 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
484 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
485 that defined USE. This is done by calling mark_relevant and passing it
486 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
487 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
488 be performed.
490 Outputs:
491 Generally, LIVE_P and RELEVANT are used to define the liveness and
492 relevance info of the DEF_STMT of this USE:
493 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
494 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
495 Exceptions:
496 - case 1: If USE is used only for address computations (e.g. array indexing),
497 which does not need to be directly vectorized, then the liveness/relevance
498 of the respective DEF_STMT is left unchanged.
499 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
500 we skip DEF_STMT cause it had already been processed.
501 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
502 "relevant" will be modified accordingly.
504 Return true if everything is as expected. Return false otherwise. */
506 static opt_result
507 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
508 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
509 bool force)
511 stmt_vec_info dstmt_vinfo;
512 enum vect_def_type dt;
514 /* case 1: we are only interested in uses that need to be vectorized. Uses
515 that are used for address computation are not considered relevant. */
516 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
517 return opt_result::success ();
519 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
520 return opt_result::failure_at (stmt_vinfo->stmt,
521 "not vectorized:"
522 " unsupported use in stmt.\n");
524 if (!dstmt_vinfo)
525 return opt_result::success ();
527 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
528 basic_block bb = gimple_bb (stmt_vinfo->stmt);
530 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
531 We have to force the stmt live since the epilogue loop needs it to
532 continue computing the reduction. */
533 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
534 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
537 && bb->loop_father == def_bb->loop_father)
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_NOTE, vect_location,
541 "reduc-stmt defining reduc-phi in the same nest.\n");
542 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
543 return opt_result::success ();
546 /* case 3a: outer-loop stmt defining an inner-loop stmt:
547 outer-loop-header-bb:
548 d = dstmt_vinfo
549 inner-loop:
550 stmt # use (d)
551 outer-loop-tail-bb:
552 ... */
553 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE, vect_location,
557 "outer-loop def-stmt defining inner-loop stmt.\n");
559 switch (relevant)
561 case vect_unused_in_scope:
562 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
563 vect_used_in_scope : vect_unused_in_scope;
564 break;
566 case vect_used_in_outer_by_reduction:
567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
568 relevant = vect_used_by_reduction;
569 break;
571 case vect_used_in_outer:
572 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
573 relevant = vect_used_in_scope;
574 break;
576 case vect_used_in_scope:
577 break;
579 default:
580 gcc_unreachable ();
584 /* case 3b: inner-loop stmt defining an outer-loop stmt:
585 outer-loop-header-bb:
587 inner-loop:
588 d = dstmt_vinfo
589 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
590 stmt # use (d) */
591 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
593 if (dump_enabled_p ())
594 dump_printf_loc (MSG_NOTE, vect_location,
595 "inner-loop def-stmt defining outer-loop stmt.\n");
597 switch (relevant)
599 case vect_unused_in_scope:
600 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
601 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
602 vect_used_in_outer_by_reduction : vect_unused_in_scope;
603 break;
605 case vect_used_by_reduction:
606 case vect_used_only_live:
607 relevant = vect_used_in_outer_by_reduction;
608 break;
610 case vect_used_in_scope:
611 relevant = vect_used_in_outer;
612 break;
614 default:
615 gcc_unreachable ();
618 /* We are also not interested in uses on loop PHI backedges that are
619 inductions. Otherwise we'll needlessly vectorize the IV increment
620 and cause hybrid SLP for SLP inductions. Unless the PHI is live
621 of course. */
622 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
623 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
624 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
625 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
626 loop_latch_edge (bb->loop_father))
627 == use))
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE, vect_location,
631 "induction value on backedge.\n");
632 return opt_result::success ();
636 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
637 return opt_result::success ();
641 /* Function vect_mark_stmts_to_be_vectorized.
643 Not all stmts in the loop need to be vectorized. For example:
645 for i...
646 for j...
647 1. T0 = i + j
648 2. T1 = a[T0]
650 3. j = j + 1
652 Stmt 1 and 3 do not need to be vectorized, because loop control and
653 addressing of vectorized data-refs are handled differently.
655 This pass detects such stmts. */
657 opt_result
658 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
660 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
661 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
662 unsigned int nbbs = loop->num_nodes;
663 gimple_stmt_iterator si;
664 unsigned int i;
665 basic_block bb;
666 bool live_p;
667 enum vect_relevant relevant;
669 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
671 auto_vec<stmt_vec_info, 64> worklist;
673 /* 1. Init worklist. */
674 for (i = 0; i < nbbs; i++)
676 bb = bbs[i];
677 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
679 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
682 phi_info->stmt);
684 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
685 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
687 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
689 if (is_gimple_debug (gsi_stmt (si)))
690 continue;
691 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE, vect_location,
694 "init: stmt relevant? %G", stmt_info->stmt);
696 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
697 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
701 /* 2. Process_worklist */
702 while (worklist.length () > 0)
704 use_operand_p use_p;
705 ssa_op_iter iter;
707 stmt_vec_info stmt_vinfo = worklist.pop ();
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE, vect_location,
710 "worklist: examine stmt: %G", stmt_vinfo->stmt);
712 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
713 (DEF_STMT) as relevant/irrelevant according to the relevance property
714 of STMT. */
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
717 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
718 propagated as is to the DEF_STMTs of its USEs.
720 One exception is when STMT has been identified as defining a reduction
721 variable; in this case we set the relevance to vect_used_by_reduction.
722 This is because we distinguish between two kinds of relevant stmts -
723 those that are used by a reduction computation, and those that are
724 (also) used by a regular computation. This allows us later on to
725 identify stmts that are used solely by a reduction, and therefore the
726 order of the results that they produce does not have to be kept. */
728 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
730 case vect_reduction_def:
731 gcc_assert (relevant != vect_unused_in_scope);
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_in_scope
734 && relevant != vect_used_by_reduction
735 && relevant != vect_used_only_live)
736 return opt_result::failure_at
737 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
738 break;
740 case vect_nested_cycle:
741 if (relevant != vect_unused_in_scope
742 && relevant != vect_used_in_outer_by_reduction
743 && relevant != vect_used_in_outer)
744 return opt_result::failure_at
745 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
746 break;
748 case vect_double_reduction_def:
749 if (relevant != vect_unused_in_scope
750 && relevant != vect_used_by_reduction
751 && relevant != vect_used_only_live)
752 return opt_result::failure_at
753 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
754 break;
756 default:
757 break;
760 if (is_pattern_stmt_p (stmt_vinfo))
762 /* Pattern statements are not inserted into the code, so
763 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
764 have to scan the RHS or function arguments instead. */
765 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
767 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
768 tree op = gimple_assign_rhs1 (assign);
770 i = 1;
771 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
773 opt_result res
774 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
775 loop_vinfo, relevant, &worklist, false);
776 if (!res)
777 return res;
778 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
779 loop_vinfo, relevant, &worklist, false);
780 if (!res)
781 return res;
782 i = 2;
784 for (; i < gimple_num_ops (assign); i++)
786 op = gimple_op (assign, i);
787 if (TREE_CODE (op) == SSA_NAME)
789 opt_result res
790 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
791 &worklist, false);
792 if (!res)
793 return res;
797 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
799 for (i = 0; i < gimple_call_num_args (call); i++)
801 tree arg = gimple_call_arg (call, i);
802 opt_result res
803 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
804 &worklist, false);
805 if (!res)
806 return res;
810 else
811 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
813 tree op = USE_FROM_PTR (use_p);
814 opt_result res
815 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
816 &worklist, false);
817 if (!res)
818 return res;
821 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
823 gather_scatter_info gs_info;
824 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
825 gcc_unreachable ();
826 opt_result res
827 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
828 &worklist, true);
829 if (!res)
831 if (fatal)
832 *fatal = false;
833 return res;
836 } /* while worklist */
838 return opt_result::success ();
841 /* Function vect_model_simple_cost.
843 Models cost for simple operations, i.e. those that only emit ncopies of a
844 single op. Right now, this does not account for multiple insns that could
845 be generated for the single vector op. We will handle that shortly. */
847 static void
848 vect_model_simple_cost (vec_info *,
849 stmt_vec_info stmt_info, int ncopies,
850 enum vect_def_type *dt,
851 int ndts,
852 slp_tree node,
853 stmt_vector_for_cost *cost_vec,
854 vect_cost_for_stmt kind = vector_stmt)
856 int inside_cost = 0, prologue_cost = 0;
858 gcc_assert (cost_vec != NULL);
860 /* ??? Somehow we need to fix this at the callers. */
861 if (node)
862 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
864 if (!node)
865 /* Cost the "broadcast" of a scalar operand in to a vector operand.
866 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
867 cost model. */
868 for (int i = 0; i < ndts; i++)
869 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
870 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
871 stmt_info, 0, vect_prologue);
873 /* Pass the inside-of-loop statements to the target-specific cost model. */
874 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
875 stmt_info, 0, vect_body);
877 if (dump_enabled_p ())
878 dump_printf_loc (MSG_NOTE, vect_location,
879 "vect_model_simple_cost: inside_cost = %d, "
880 "prologue_cost = %d .\n", inside_cost, prologue_cost);
884 /* Model cost for type demotion and promotion operations. PWR is
885 normally zero for single-step promotions and demotions. It will be
886 one if two-step promotion/demotion is required, and so on. NCOPIES
887 is the number of vector results (and thus number of instructions)
888 for the narrowest end of the operation chain. Each additional
889 step doubles the number of instructions required. If WIDEN_ARITH
890 is true the stmt is doing widening arithmetic. */
892 static void
893 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
894 enum vect_def_type *dt,
895 unsigned int ncopies, int pwr,
896 stmt_vector_for_cost *cost_vec,
897 bool widen_arith)
899 int i;
900 int inside_cost = 0, prologue_cost = 0;
902 for (i = 0; i < pwr + 1; i++)
904 inside_cost += record_stmt_cost (cost_vec, ncopies,
905 widen_arith
906 ? vector_stmt : vec_promote_demote,
907 stmt_info, 0, vect_body);
908 ncopies *= 2;
911 /* FORNOW: Assuming maximum 2 args per stmts. */
912 for (i = 0; i < 2; i++)
913 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
914 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
915 stmt_info, 0, vect_prologue);
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE, vect_location,
919 "vect_model_promotion_demotion_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost, prologue_cost);
923 /* Returns true if the current function returns DECL. */
925 static bool
926 cfun_returns (tree decl)
928 edge_iterator ei;
929 edge e;
930 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
932 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
933 if (!ret)
934 continue;
935 if (gimple_return_retval (ret) == decl)
936 return true;
937 /* We often end up with an aggregate copy to the result decl,
938 handle that case as well. First skip intermediate clobbers
939 though. */
940 gimple *def = ret;
943 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
945 while (gimple_clobber_p (def));
946 if (is_a <gassign *> (def)
947 && gimple_assign_lhs (def) == gimple_return_retval (ret)
948 && gimple_assign_rhs1 (def) == decl)
949 return true;
951 return false;
954 /* Function vect_model_store_cost
956 Models cost for stores. In the case of grouped accesses, one access
957 has the overhead of the grouped access attributed to it. */
959 static void
960 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
961 vect_memory_access_type memory_access_type,
962 gather_scatter_info *gs_info,
963 dr_alignment_support alignment_support_scheme,
964 int misalignment,
965 vec_load_store_type vls_type, slp_tree slp_node,
966 stmt_vector_for_cost *cost_vec)
968 unsigned int inside_cost = 0, prologue_cost = 0;
969 stmt_vec_info first_stmt_info = stmt_info;
970 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
972 /* ??? Somehow we need to fix this at the callers. */
973 if (slp_node)
974 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
976 if (vls_type == VLS_STORE_INVARIANT)
978 if (!slp_node)
979 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
980 stmt_info, 0, vect_prologue);
983 /* Grouped stores update all elements in the group at once,
984 so we want the DR for the first statement. */
985 if (!slp_node && grouped_access_p)
986 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
988 /* True if we should include any once-per-group costs as well as
989 the cost of the statement itself. For SLP we only get called
990 once per group anyhow. */
991 bool first_stmt_p = (first_stmt_info == stmt_info);
993 /* We assume that the cost of a single store-lanes instruction is
994 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
995 access is instead being provided by a permute-and-store operation,
996 include the cost of the permutes. */
997 if (first_stmt_p
998 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1000 /* Uses a high and low interleave or shuffle operations for each
1001 needed permute. */
1002 int group_size = DR_GROUP_SIZE (first_stmt_info);
1003 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1004 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1005 stmt_info, 0, vect_body);
1007 if (dump_enabled_p ())
1008 dump_printf_loc (MSG_NOTE, vect_location,
1009 "vect_model_store_cost: strided group_size = %d .\n",
1010 group_size);
1013 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1014 /* Costs of the stores. */
1015 if (memory_access_type == VMAT_ELEMENTWISE
1016 || memory_access_type == VMAT_GATHER_SCATTER)
1018 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1019 if (memory_access_type == VMAT_GATHER_SCATTER
1020 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1021 /* For emulated scatter N offset vector element extracts
1022 (we assume the scalar scaling and ptr + offset add is consumed by
1023 the load). */
1024 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1025 vec_to_scalar, stmt_info, 0,
1026 vect_body);
1027 /* N scalar stores plus extracting the elements. */
1028 inside_cost += record_stmt_cost (cost_vec,
1029 ncopies * assumed_nunits,
1030 scalar_store, stmt_info, 0, vect_body);
1032 else
1033 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1034 misalignment, &inside_cost, cost_vec);
1036 if (memory_access_type == VMAT_ELEMENTWISE
1037 || memory_access_type == VMAT_STRIDED_SLP
1038 || (memory_access_type == VMAT_GATHER_SCATTER
1039 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1041 /* N scalar stores plus extracting the elements. */
1042 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1043 inside_cost += record_stmt_cost (cost_vec,
1044 ncopies * assumed_nunits,
1045 vec_to_scalar, stmt_info, 0, vect_body);
1048 /* When vectorizing a store into the function result assign
1049 a penalty if the function returns in a multi-register location.
1050 In this case we assume we'll end up with having to spill the
1051 vector result and do piecewise loads as a conservative estimate. */
1052 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1053 if (base
1054 && (TREE_CODE (base) == RESULT_DECL
1055 || (DECL_P (base) && cfun_returns (base)))
1056 && !aggregate_value_p (base, cfun->decl))
1058 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1059 /* ??? Handle PARALLEL in some way. */
1060 if (REG_P (reg))
1062 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1063 /* Assume that a single reg-reg move is possible and cheap,
1064 do not account for vector to gp register move cost. */
1065 if (nregs > 1)
1067 /* Spill. */
1068 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1069 vector_store,
1070 stmt_info, 0, vect_epilogue);
1071 /* Loads. */
1072 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1073 scalar_load,
1074 stmt_info, 0, vect_epilogue);
1079 if (dump_enabled_p ())
1080 dump_printf_loc (MSG_NOTE, vect_location,
1081 "vect_model_store_cost: inside_cost = %d, "
1082 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1086 /* Calculate cost of DR's memory access. */
1087 void
1088 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1089 dr_alignment_support alignment_support_scheme,
1090 int misalignment,
1091 unsigned int *inside_cost,
1092 stmt_vector_for_cost *body_cost_vec)
1094 switch (alignment_support_scheme)
1096 case dr_aligned:
1098 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1099 vector_store, stmt_info, 0,
1100 vect_body);
1102 if (dump_enabled_p ())
1103 dump_printf_loc (MSG_NOTE, vect_location,
1104 "vect_model_store_cost: aligned.\n");
1105 break;
1108 case dr_unaligned_supported:
1110 /* Here, we assign an additional cost for the unaligned store. */
1111 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1112 unaligned_store, stmt_info,
1113 misalignment, vect_body);
1114 if (dump_enabled_p ())
1115 dump_printf_loc (MSG_NOTE, vect_location,
1116 "vect_model_store_cost: unaligned supported by "
1117 "hardware.\n");
1118 break;
1121 case dr_unaligned_unsupported:
1123 *inside_cost = VECT_MAX_COST;
1125 if (dump_enabled_p ())
1126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1127 "vect_model_store_cost: unsupported access.\n");
1128 break;
1131 default:
1132 gcc_unreachable ();
1136 /* Calculate cost of DR's memory access. */
1137 void
1138 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1139 dr_alignment_support alignment_support_scheme,
1140 int misalignment,
1141 bool add_realign_cost, unsigned int *inside_cost,
1142 unsigned int *prologue_cost,
1143 stmt_vector_for_cost *prologue_cost_vec,
1144 stmt_vector_for_cost *body_cost_vec,
1145 bool record_prologue_costs)
1147 switch (alignment_support_scheme)
1149 case dr_aligned:
1151 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1152 stmt_info, 0, vect_body);
1154 if (dump_enabled_p ())
1155 dump_printf_loc (MSG_NOTE, vect_location,
1156 "vect_model_load_cost: aligned.\n");
1158 break;
1160 case dr_unaligned_supported:
1162 /* Here, we assign an additional cost for the unaligned load. */
1163 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1164 unaligned_load, stmt_info,
1165 misalignment, vect_body);
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE, vect_location,
1169 "vect_model_load_cost: unaligned supported by "
1170 "hardware.\n");
1172 break;
1174 case dr_explicit_realign:
1176 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1177 vector_load, stmt_info, 0, vect_body);
1178 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1179 vec_perm, stmt_info, 0, vect_body);
1181 /* FIXME: If the misalignment remains fixed across the iterations of
1182 the containing loop, the following cost should be added to the
1183 prologue costs. */
1184 if (targetm.vectorize.builtin_mask_for_load)
1185 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1186 stmt_info, 0, vect_body);
1188 if (dump_enabled_p ())
1189 dump_printf_loc (MSG_NOTE, vect_location,
1190 "vect_model_load_cost: explicit realign\n");
1192 break;
1194 case dr_explicit_realign_optimized:
1196 if (dump_enabled_p ())
1197 dump_printf_loc (MSG_NOTE, vect_location,
1198 "vect_model_load_cost: unaligned software "
1199 "pipelined.\n");
1201 /* Unaligned software pipeline has a load of an address, an initial
1202 load, and possibly a mask operation to "prime" the loop. However,
1203 if this is an access in a group of loads, which provide grouped
1204 access, then the above cost should only be considered for one
1205 access in the group. Inside the loop, there is a load op
1206 and a realignment op. */
1208 if (add_realign_cost && record_prologue_costs)
1210 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1211 vector_stmt, stmt_info,
1212 0, vect_prologue);
1213 if (targetm.vectorize.builtin_mask_for_load)
1214 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1215 vector_stmt, stmt_info,
1216 0, vect_prologue);
1219 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1220 stmt_info, 0, vect_body);
1221 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1222 stmt_info, 0, vect_body);
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE, vect_location,
1226 "vect_model_load_cost: explicit realign optimized"
1227 "\n");
1229 break;
1232 case dr_unaligned_unsupported:
1234 *inside_cost = VECT_MAX_COST;
1236 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1238 "vect_model_load_cost: unsupported access.\n");
1239 break;
1242 default:
1243 gcc_unreachable ();
1247 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1248 the loop preheader for the vectorized stmt STMT_VINFO. */
1250 static void
1251 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1252 gimple_stmt_iterator *gsi)
1254 if (gsi)
1255 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1256 else
1257 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1259 if (dump_enabled_p ())
1260 dump_printf_loc (MSG_NOTE, vect_location,
1261 "created new init_stmt: %G", new_stmt);
1264 /* Function vect_init_vector.
1266 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1267 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1268 vector type a vector with all elements equal to VAL is created first.
1269 Place the initialization at GSI if it is not NULL. Otherwise, place the
1270 initialization at the loop preheader.
1271 Return the DEF of INIT_STMT.
1272 It will be used in the vectorization of STMT_INFO. */
1274 tree
1275 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1276 gimple_stmt_iterator *gsi)
1278 gimple *init_stmt;
1279 tree new_temp;
1281 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1282 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1284 gcc_assert (VECTOR_TYPE_P (type));
1285 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1287 /* Scalar boolean value should be transformed into
1288 all zeros or all ones value before building a vector. */
1289 if (VECTOR_BOOLEAN_TYPE_P (type))
1291 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1292 tree false_val = build_zero_cst (TREE_TYPE (type));
1294 if (CONSTANT_CLASS_P (val))
1295 val = integer_zerop (val) ? false_val : true_val;
1296 else
1298 new_temp = make_ssa_name (TREE_TYPE (type));
1299 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1300 val, true_val, false_val);
1301 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1302 val = new_temp;
1305 else
1307 gimple_seq stmts = NULL;
1308 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1309 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1310 TREE_TYPE (type), val);
1311 else
1312 /* ??? Condition vectorization expects us to do
1313 promotion of invariant/external defs. */
1314 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1315 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1316 !gsi_end_p (gsi2); )
1318 init_stmt = gsi_stmt (gsi2);
1319 gsi_remove (&gsi2, false);
1320 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1324 val = build_vector_from_val (type, val);
1327 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1328 init_stmt = gimple_build_assign (new_temp, val);
1329 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1330 return new_temp;
1334 /* Function vect_get_vec_defs_for_operand.
1336 OP is an operand in STMT_VINFO. This function returns a vector of
1337 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1339 In the case that OP is an SSA_NAME which is defined in the loop, then
1340 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1342 In case OP is an invariant or constant, a new stmt that creates a vector def
1343 needs to be introduced. VECTYPE may be used to specify a required type for
1344 vector invariant. */
1346 void
1347 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1348 unsigned ncopies,
1349 tree op, vec<tree> *vec_oprnds, tree vectype)
1351 gimple *def_stmt;
1352 enum vect_def_type dt;
1353 bool is_simple_use;
1354 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_NOTE, vect_location,
1358 "vect_get_vec_defs_for_operand: %T\n", op);
1360 stmt_vec_info def_stmt_info;
1361 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1362 &def_stmt_info, &def_stmt);
1363 gcc_assert (is_simple_use);
1364 if (def_stmt && dump_enabled_p ())
1365 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1367 vec_oprnds->create (ncopies);
1368 if (dt == vect_constant_def || dt == vect_external_def)
1370 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1371 tree vector_type;
1373 if (vectype)
1374 vector_type = vectype;
1375 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1376 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1377 vector_type = truth_type_for (stmt_vectype);
1378 else
1379 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1381 gcc_assert (vector_type);
1382 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1383 while (ncopies--)
1384 vec_oprnds->quick_push (vop);
1386 else
1388 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1389 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1390 for (unsigned i = 0; i < ncopies; ++i)
1391 vec_oprnds->quick_push (gimple_get_lhs
1392 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1397 /* Get vectorized definitions for OP0 and OP1. */
1399 void
1400 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1401 unsigned ncopies,
1402 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1403 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1404 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1405 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1407 if (slp_node)
1409 if (op0)
1410 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1411 if (op1)
1412 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1413 if (op2)
1414 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1415 if (op3)
1416 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1418 else
1420 if (op0)
1421 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1422 op0, vec_oprnds0, vectype0);
1423 if (op1)
1424 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1425 op1, vec_oprnds1, vectype1);
1426 if (op2)
1427 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1428 op2, vec_oprnds2, vectype2);
1429 if (op3)
1430 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1431 op3, vec_oprnds3, vectype3);
1435 void
1436 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1437 unsigned ncopies,
1438 tree op0, vec<tree> *vec_oprnds0,
1439 tree op1, vec<tree> *vec_oprnds1,
1440 tree op2, vec<tree> *vec_oprnds2,
1441 tree op3, vec<tree> *vec_oprnds3)
1443 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1444 op0, vec_oprnds0, NULL_TREE,
1445 op1, vec_oprnds1, NULL_TREE,
1446 op2, vec_oprnds2, NULL_TREE,
1447 op3, vec_oprnds3, NULL_TREE);
1450 /* Helper function called by vect_finish_replace_stmt and
1451 vect_finish_stmt_generation. Set the location of the new
1452 statement and create and return a stmt_vec_info for it. */
1454 static void
1455 vect_finish_stmt_generation_1 (vec_info *,
1456 stmt_vec_info stmt_info, gimple *vec_stmt)
1458 if (dump_enabled_p ())
1459 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1461 if (stmt_info)
1463 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1465 /* While EH edges will generally prevent vectorization, stmt might
1466 e.g. be in a must-not-throw region. Ensure newly created stmts
1467 that could throw are part of the same region. */
1468 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1469 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1470 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1472 else
1473 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1476 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1477 which sets the same scalar result as STMT_INFO did. Create and return a
1478 stmt_vec_info for VEC_STMT. */
1480 void
1481 vect_finish_replace_stmt (vec_info *vinfo,
1482 stmt_vec_info stmt_info, gimple *vec_stmt)
1484 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1485 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1487 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1488 gsi_replace (&gsi, vec_stmt, true);
1490 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1493 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1494 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1496 void
1497 vect_finish_stmt_generation (vec_info *vinfo,
1498 stmt_vec_info stmt_info, gimple *vec_stmt,
1499 gimple_stmt_iterator *gsi)
1501 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1503 if (!gsi_end_p (*gsi)
1504 && gimple_has_mem_ops (vec_stmt))
1506 gimple *at_stmt = gsi_stmt (*gsi);
1507 tree vuse = gimple_vuse (at_stmt);
1508 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1510 tree vdef = gimple_vdef (at_stmt);
1511 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1512 gimple_set_modified (vec_stmt, true);
1513 /* If we have an SSA vuse and insert a store, update virtual
1514 SSA form to avoid triggering the renamer. Do so only
1515 if we can easily see all uses - which is what almost always
1516 happens with the way vectorized stmts are inserted. */
1517 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1518 && ((is_gimple_assign (vec_stmt)
1519 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1520 || (is_gimple_call (vec_stmt)
1521 && (!(gimple_call_flags (vec_stmt)
1522 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1523 || (gimple_call_lhs (vec_stmt)
1524 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1526 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1527 gimple_set_vdef (vec_stmt, new_vdef);
1528 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1532 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1533 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1536 /* We want to vectorize a call to combined function CFN with function
1537 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1538 as the types of all inputs. Check whether this is possible using
1539 an internal function, returning its code if so or IFN_LAST if not. */
1541 static internal_fn
1542 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1543 tree vectype_out, tree vectype_in)
1545 internal_fn ifn;
1546 if (internal_fn_p (cfn))
1547 ifn = as_internal_fn (cfn);
1548 else
1549 ifn = associated_internal_fn (fndecl);
1550 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1552 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1553 if (info.vectorizable)
1555 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1556 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1557 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1558 OPTIMIZE_FOR_SPEED))
1559 return ifn;
1562 return IFN_LAST;
1566 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1567 gimple_stmt_iterator *);
1569 /* Check whether a load or store statement in the loop described by
1570 LOOP_VINFO is possible in a loop using partial vectors. This is
1571 testing whether the vectorizer pass has the appropriate support,
1572 as well as whether the target does.
1574 VLS_TYPE says whether the statement is a load or store and VECTYPE
1575 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1576 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1577 says how the load or store is going to be implemented and GROUP_SIZE
1578 is the number of load or store statements in the containing group.
1579 If the access is a gather load or scatter store, GS_INFO describes
1580 its arguments. If the load or store is conditional, SCALAR_MASK is the
1581 condition under which it occurs.
1583 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1584 vectors is not supported, otherwise record the required rgroup control
1585 types. */
1587 static void
1588 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1589 slp_tree slp_node,
1590 vec_load_store_type vls_type,
1591 int group_size,
1592 vect_memory_access_type
1593 memory_access_type,
1594 gather_scatter_info *gs_info,
1595 tree scalar_mask)
1597 /* Invariant loads need no special support. */
1598 if (memory_access_type == VMAT_INVARIANT)
1599 return;
1601 unsigned int nvectors;
1602 if (slp_node)
1603 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1604 else
1605 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1607 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1608 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1609 machine_mode vecmode = TYPE_MODE (vectype);
1610 bool is_load = (vls_type == VLS_LOAD);
1611 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1613 internal_fn ifn
1614 = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1615 : vect_store_lanes_supported (vectype, group_size, true));
1616 if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1617 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1618 else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1619 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1620 scalar_mask);
1621 else
1623 if (dump_enabled_p ())
1624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1625 "can't operate on partial vectors because"
1626 " the target doesn't have an appropriate"
1627 " load/store-lanes instruction.\n");
1628 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1630 return;
1633 if (memory_access_type == VMAT_GATHER_SCATTER)
1635 internal_fn ifn = (is_load
1636 ? IFN_MASK_GATHER_LOAD
1637 : IFN_MASK_SCATTER_STORE);
1638 internal_fn len_ifn = (is_load
1639 ? IFN_MASK_LEN_GATHER_LOAD
1640 : IFN_MASK_LEN_SCATTER_STORE);
1641 if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1642 gs_info->memory_type,
1643 gs_info->offset_vectype,
1644 gs_info->scale))
1645 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1646 else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1647 gs_info->memory_type,
1648 gs_info->offset_vectype,
1649 gs_info->scale))
1650 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1651 scalar_mask);
1652 else
1654 if (dump_enabled_p ())
1655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1656 "can't operate on partial vectors because"
1657 " the target doesn't have an appropriate"
1658 " gather load or scatter store instruction.\n");
1659 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1661 return;
1664 if (memory_access_type != VMAT_CONTIGUOUS
1665 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1667 /* Element X of the data must come from iteration i * VF + X of the
1668 scalar loop. We need more work to support other mappings. */
1669 if (dump_enabled_p ())
1670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1671 "can't operate on partial vectors because an"
1672 " access isn't contiguous.\n");
1673 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1674 return;
1677 if (!VECTOR_MODE_P (vecmode))
1679 if (dump_enabled_p ())
1680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1681 "can't operate on partial vectors when emulating"
1682 " vector operations.\n");
1683 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1684 return;
1687 /* We might load more scalars than we need for permuting SLP loads.
1688 We checked in get_group_load_store_type that the extra elements
1689 don't leak into a new vector. */
1690 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1692 unsigned int nvectors;
1693 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1694 return nvectors;
1695 gcc_unreachable ();
1698 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1699 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1700 machine_mode mask_mode;
1701 machine_mode vmode;
1702 bool using_partial_vectors_p = false;
1703 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1705 nvectors = group_memory_nvectors (group_size * vf, nunits);
1706 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1707 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1708 using_partial_vectors_p = true;
1710 else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1711 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1713 nvectors = group_memory_nvectors (group_size * vf, nunits);
1714 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1715 using_partial_vectors_p = true;
1718 if (!using_partial_vectors_p)
1720 if (dump_enabled_p ())
1721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1722 "can't operate on partial vectors because the"
1723 " target doesn't have the appropriate partial"
1724 " vectorization load or store.\n");
1725 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1729 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1730 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1731 that needs to be applied to all loads and stores in a vectorized loop.
1732 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1733 otherwise return VEC_MASK & LOOP_MASK.
1735 MASK_TYPE is the type of both masks. If new statements are needed,
1736 insert them before GSI. */
1738 static tree
1739 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1740 tree vec_mask, gimple_stmt_iterator *gsi)
1742 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1743 if (!loop_mask)
1744 return vec_mask;
1746 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1748 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1749 return vec_mask;
1751 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1752 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1753 vec_mask, loop_mask);
1755 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1756 return and_res;
1759 /* Determine whether we can use a gather load or scatter store to vectorize
1760 strided load or store STMT_INFO by truncating the current offset to a
1761 smaller width. We need to be able to construct an offset vector:
1763 { 0, X, X*2, X*3, ... }
1765 without loss of precision, where X is STMT_INFO's DR_STEP.
1767 Return true if this is possible, describing the gather load or scatter
1768 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1770 static bool
1771 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1772 loop_vec_info loop_vinfo, bool masked_p,
1773 gather_scatter_info *gs_info)
1775 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1776 data_reference *dr = dr_info->dr;
1777 tree step = DR_STEP (dr);
1778 if (TREE_CODE (step) != INTEGER_CST)
1780 /* ??? Perhaps we could use range information here? */
1781 if (dump_enabled_p ())
1782 dump_printf_loc (MSG_NOTE, vect_location,
1783 "cannot truncate variable step.\n");
1784 return false;
1787 /* Get the number of bits in an element. */
1788 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1789 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1790 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1792 /* Set COUNT to the upper limit on the number of elements - 1.
1793 Start with the maximum vectorization factor. */
1794 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1796 /* Try lowering COUNT to the number of scalar latch iterations. */
1797 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1798 widest_int max_iters;
1799 if (max_loop_iterations (loop, &max_iters)
1800 && max_iters < count)
1801 count = max_iters.to_shwi ();
1803 /* Try scales of 1 and the element size. */
1804 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1805 wi::overflow_type overflow = wi::OVF_NONE;
1806 for (int i = 0; i < 2; ++i)
1808 int scale = scales[i];
1809 widest_int factor;
1810 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1811 continue;
1813 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1814 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1815 if (overflow)
1816 continue;
1817 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1818 unsigned int min_offset_bits = wi::min_precision (range, sign);
1820 /* Find the narrowest viable offset type. */
1821 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1822 tree offset_type = build_nonstandard_integer_type (offset_bits,
1823 sign == UNSIGNED);
1825 /* See whether the target supports the operation with an offset
1826 no narrower than OFFSET_TYPE. */
1827 tree memory_type = TREE_TYPE (DR_REF (dr));
1828 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1829 vectype, memory_type, offset_type, scale,
1830 &gs_info->ifn, &gs_info->offset_vectype)
1831 || gs_info->ifn == IFN_LAST)
1832 continue;
1834 gs_info->decl = NULL_TREE;
1835 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1836 but we don't need to store that here. */
1837 gs_info->base = NULL_TREE;
1838 gs_info->element_type = TREE_TYPE (vectype);
1839 gs_info->offset = fold_convert (offset_type, step);
1840 gs_info->offset_dt = vect_constant_def;
1841 gs_info->scale = scale;
1842 gs_info->memory_type = memory_type;
1843 return true;
1846 if (overflow && dump_enabled_p ())
1847 dump_printf_loc (MSG_NOTE, vect_location,
1848 "truncating gather/scatter offset to %d bits"
1849 " might change its value.\n", element_bits);
1851 return false;
1854 /* Return true if we can use gather/scatter internal functions to
1855 vectorize STMT_INFO, which is a grouped or strided load or store.
1856 MASKED_P is true if load or store is conditional. When returning
1857 true, fill in GS_INFO with the information required to perform the
1858 operation. */
1860 static bool
1861 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1862 loop_vec_info loop_vinfo, bool masked_p,
1863 gather_scatter_info *gs_info)
1865 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1866 || gs_info->ifn == IFN_LAST)
1867 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1868 masked_p, gs_info);
1870 tree old_offset_type = TREE_TYPE (gs_info->offset);
1871 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1873 gcc_assert (TYPE_PRECISION (new_offset_type)
1874 >= TYPE_PRECISION (old_offset_type));
1875 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_NOTE, vect_location,
1879 "using gather/scatter for strided/grouped access,"
1880 " scale = %d\n", gs_info->scale);
1882 return true;
1885 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1886 elements with a known constant step. Return -1 if that step
1887 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1889 static int
1890 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1892 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1893 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1894 size_zero_node);
1897 /* If the target supports a permute mask that reverses the elements in
1898 a vector of type VECTYPE, return that mask, otherwise return null. */
1900 static tree
1901 perm_mask_for_reverse (tree vectype)
1903 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1905 /* The encoding has a single stepped pattern. */
1906 vec_perm_builder sel (nunits, 1, 3);
1907 for (int i = 0; i < 3; ++i)
1908 sel.quick_push (nunits - 1 - i);
1910 vec_perm_indices indices (sel, 1, nunits);
1911 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1912 indices))
1913 return NULL_TREE;
1914 return vect_gen_perm_mask_checked (vectype, indices);
1917 /* A subroutine of get_load_store_type, with a subset of the same
1918 arguments. Handle the case where STMT_INFO is a load or store that
1919 accesses consecutive elements with a negative step. Sets *POFFSET
1920 to the offset to be applied to the DR for the first access. */
1922 static vect_memory_access_type
1923 get_negative_load_store_type (vec_info *vinfo,
1924 stmt_vec_info stmt_info, tree vectype,
1925 vec_load_store_type vls_type,
1926 unsigned int ncopies, poly_int64 *poffset)
1928 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1929 dr_alignment_support alignment_support_scheme;
1931 if (ncopies > 1)
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1935 "multiple types with negative step.\n");
1936 return VMAT_ELEMENTWISE;
1939 /* For backward running DRs the first access in vectype actually is
1940 N-1 elements before the address of the DR. */
1941 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1942 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1944 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
1945 alignment_support_scheme
1946 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1947 if (alignment_support_scheme != dr_aligned
1948 && alignment_support_scheme != dr_unaligned_supported)
1950 if (dump_enabled_p ())
1951 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1952 "negative step but alignment required.\n");
1953 *poffset = 0;
1954 return VMAT_ELEMENTWISE;
1957 if (vls_type == VLS_STORE_INVARIANT)
1959 if (dump_enabled_p ())
1960 dump_printf_loc (MSG_NOTE, vect_location,
1961 "negative step with invariant source;"
1962 " no permute needed.\n");
1963 return VMAT_CONTIGUOUS_DOWN;
1966 if (!perm_mask_for_reverse (vectype))
1968 if (dump_enabled_p ())
1969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1970 "negative step and reversing not supported.\n");
1971 *poffset = 0;
1972 return VMAT_ELEMENTWISE;
1975 return VMAT_CONTIGUOUS_REVERSE;
1978 /* STMT_INFO is either a masked or unconditional store. Return the value
1979 being stored. */
1981 tree
1982 vect_get_store_rhs (stmt_vec_info stmt_info)
1984 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1986 gcc_assert (gimple_assign_single_p (assign));
1987 return gimple_assign_rhs1 (assign);
1989 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1991 internal_fn ifn = gimple_call_internal_fn (call);
1992 int index = internal_fn_stored_value_index (ifn);
1993 gcc_assert (index >= 0);
1994 return gimple_call_arg (call, index);
1996 gcc_unreachable ();
1999 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2001 This function returns a vector type which can be composed with NETLS pieces,
2002 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2003 same vector size as the return vector. It checks target whether supports
2004 pieces-size vector mode for construction firstly, if target fails to, check
2005 pieces-size scalar mode for construction further. It returns NULL_TREE if
2006 fails to find the available composition.
2008 For example, for (vtype=V16QI, nelts=4), we can probably get:
2009 - V16QI with PTYPE V4QI.
2010 - V4SI with PTYPE SI.
2011 - NULL_TREE. */
2013 static tree
2014 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2016 gcc_assert (VECTOR_TYPE_P (vtype));
2017 gcc_assert (known_gt (nelts, 0U));
2019 machine_mode vmode = TYPE_MODE (vtype);
2020 if (!VECTOR_MODE_P (vmode))
2021 return NULL_TREE;
2023 /* When we are asked to compose the vector from its components let
2024 that happen directly. */
2025 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
2027 *ptype = TREE_TYPE (vtype);
2028 return vtype;
2031 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2032 unsigned int pbsize;
2033 if (constant_multiple_p (vbsize, nelts, &pbsize))
2035 /* First check if vec_init optab supports construction from
2036 vector pieces directly. */
2037 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2038 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2039 machine_mode rmode;
2040 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2041 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2042 != CODE_FOR_nothing))
2044 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2045 return vtype;
2048 /* Otherwise check if exists an integer type of the same piece size and
2049 if vec_init optab supports construction from it directly. */
2050 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2051 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2052 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2053 != CODE_FOR_nothing))
2055 *ptype = build_nonstandard_integer_type (pbsize, 1);
2056 return build_vector_type (*ptype, nelts);
2060 return NULL_TREE;
2063 /* A subroutine of get_load_store_type, with a subset of the same
2064 arguments. Handle the case where STMT_INFO is part of a grouped load
2065 or store.
2067 For stores, the statements in the group are all consecutive
2068 and there is no gap at the end. For loads, the statements in the
2069 group might not be consecutive; there can be gaps between statements
2070 as well as at the end. */
2072 static bool
2073 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2074 tree vectype, slp_tree slp_node,
2075 bool masked_p, vec_load_store_type vls_type,
2076 vect_memory_access_type *memory_access_type,
2077 poly_int64 *poffset,
2078 dr_alignment_support *alignment_support_scheme,
2079 int *misalignment,
2080 gather_scatter_info *gs_info,
2081 internal_fn *lanes_ifn)
2083 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2084 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2085 stmt_vec_info first_stmt_info;
2086 unsigned int group_size;
2087 unsigned HOST_WIDE_INT gap;
2088 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2090 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2091 group_size = DR_GROUP_SIZE (first_stmt_info);
2092 gap = DR_GROUP_GAP (first_stmt_info);
2094 else
2096 first_stmt_info = stmt_info;
2097 group_size = 1;
2098 gap = 0;
2100 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2101 bool single_element_p = (stmt_info == first_stmt_info
2102 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2103 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2105 /* True if the vectorized statements would access beyond the last
2106 statement in the group. */
2107 bool overrun_p = false;
2109 /* True if we can cope with such overrun by peeling for gaps, so that
2110 there is at least one final scalar iteration after the vector loop. */
2111 bool can_overrun_p = (!masked_p
2112 && vls_type == VLS_LOAD
2113 && loop_vinfo
2114 && !loop->inner);
2116 /* There can only be a gap at the end of the group if the stride is
2117 known at compile time. */
2118 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2120 /* Stores can't yet have gaps. */
2121 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2123 if (slp_node)
2125 /* For SLP vectorization we directly vectorize a subchain
2126 without permutation. */
2127 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2128 first_dr_info
2129 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2130 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2132 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2133 separated by the stride, until we have a complete vector.
2134 Fall back to scalar accesses if that isn't possible. */
2135 if (multiple_p (nunits, group_size))
2136 *memory_access_type = VMAT_STRIDED_SLP;
2137 else
2138 *memory_access_type = VMAT_ELEMENTWISE;
2140 else
2142 overrun_p = loop_vinfo && gap != 0;
2143 if (overrun_p && vls_type != VLS_LOAD)
2145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2146 "Grouped store with gaps requires"
2147 " non-consecutive accesses\n");
2148 return false;
2150 /* An overrun is fine if the trailing elements are smaller
2151 than the alignment boundary B. Every vector access will
2152 be a multiple of B and so we are guaranteed to access a
2153 non-gap element in the same B-sized block. */
2154 if (overrun_p
2155 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2156 vectype)
2157 / vect_get_scalar_dr_size (first_dr_info)))
2158 overrun_p = false;
2160 /* If the gap splits the vector in half and the target
2161 can do half-vector operations avoid the epilogue peeling
2162 by simply loading half of the vector only. Usually
2163 the construction with an upper zero half will be elided. */
2164 dr_alignment_support alss;
2165 int misalign = dr_misalignment (first_dr_info, vectype);
2166 tree half_vtype;
2167 if (overrun_p
2168 && !masked_p
2169 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2170 vectype, misalign)))
2171 == dr_aligned
2172 || alss == dr_unaligned_supported)
2173 && known_eq (nunits, (group_size - gap) * 2)
2174 && known_eq (nunits, group_size)
2175 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2176 != NULL_TREE))
2177 overrun_p = false;
2179 if (overrun_p && !can_overrun_p)
2181 if (dump_enabled_p ())
2182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2183 "Peeling for outer loop is not supported\n");
2184 return false;
2186 int cmp = compare_step_with_zero (vinfo, stmt_info);
2187 if (cmp < 0)
2189 if (single_element_p)
2190 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2191 only correct for single element "interleaving" SLP. */
2192 *memory_access_type = get_negative_load_store_type
2193 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2194 else
2196 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2197 separated by the stride, until we have a complete vector.
2198 Fall back to scalar accesses if that isn't possible. */
2199 if (multiple_p (nunits, group_size))
2200 *memory_access_type = VMAT_STRIDED_SLP;
2201 else
2202 *memory_access_type = VMAT_ELEMENTWISE;
2205 else if (cmp == 0 && loop_vinfo)
2207 gcc_assert (vls_type == VLS_LOAD);
2208 *memory_access_type = VMAT_INVARIANT;
2209 /* Invariant accesses perform only component accesses, alignment
2210 is irrelevant for them. */
2211 *alignment_support_scheme = dr_unaligned_supported;
2213 else
2214 *memory_access_type = VMAT_CONTIGUOUS;
2216 /* When we have a contiguous access across loop iterations
2217 but the access in the loop doesn't cover the full vector
2218 we can end up with no gap recorded but still excess
2219 elements accessed, see PR103116. Make sure we peel for
2220 gaps if necessary and sufficient and give up if not.
2222 If there is a combination of the access not covering the full
2223 vector and a gap recorded then we may need to peel twice. */
2224 if (loop_vinfo
2225 && *memory_access_type == VMAT_CONTIGUOUS
2226 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2227 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2228 nunits))
2230 unsigned HOST_WIDE_INT cnunits, cvf;
2231 if (!can_overrun_p
2232 || !nunits.is_constant (&cnunits)
2233 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2234 /* Peeling for gaps assumes that a single scalar iteration
2235 is enough to make sure the last vector iteration doesn't
2236 access excess elements.
2237 ??? Enhancements include peeling multiple iterations
2238 or using masked loads with a static mask. */
2239 || (group_size * cvf) % cnunits + group_size - gap < cnunits)
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2243 "peeling for gaps insufficient for "
2244 "access\n");
2245 return false;
2247 overrun_p = true;
2251 else
2253 /* We can always handle this case using elementwise accesses,
2254 but see if something more efficient is available. */
2255 *memory_access_type = VMAT_ELEMENTWISE;
2257 /* If there is a gap at the end of the group then these optimizations
2258 would access excess elements in the last iteration. */
2259 bool would_overrun_p = (gap != 0);
2260 /* An overrun is fine if the trailing elements are smaller than the
2261 alignment boundary B. Every vector access will be a multiple of B
2262 and so we are guaranteed to access a non-gap element in the
2263 same B-sized block. */
2264 if (would_overrun_p
2265 && !masked_p
2266 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2267 / vect_get_scalar_dr_size (first_dr_info)))
2268 would_overrun_p = false;
2270 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2271 && (can_overrun_p || !would_overrun_p)
2272 && compare_step_with_zero (vinfo, stmt_info) > 0)
2274 /* First cope with the degenerate case of a single-element
2275 vector. */
2276 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2279 else
2281 /* Otherwise try using LOAD/STORE_LANES. */
2282 *lanes_ifn
2283 = vls_type == VLS_LOAD
2284 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2285 : vect_store_lanes_supported (vectype, group_size,
2286 masked_p);
2287 if (*lanes_ifn != IFN_LAST)
2289 *memory_access_type = VMAT_LOAD_STORE_LANES;
2290 overrun_p = would_overrun_p;
2293 /* If that fails, try using permuting loads. */
2294 else if (vls_type == VLS_LOAD
2295 ? vect_grouped_load_supported (vectype,
2296 single_element_p,
2297 group_size)
2298 : vect_grouped_store_supported (vectype, group_size))
2300 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2301 overrun_p = would_overrun_p;
2306 /* As a last resort, trying using a gather load or scatter store.
2308 ??? Although the code can handle all group sizes correctly,
2309 it probably isn't a win to use separate strided accesses based
2310 on nearby locations. Or, even if it's a win over scalar code,
2311 it might not be a win over vectorizing at a lower VF, if that
2312 allows us to use contiguous accesses. */
2313 if (*memory_access_type == VMAT_ELEMENTWISE
2314 && single_element_p
2315 && loop_vinfo
2316 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2317 masked_p, gs_info))
2318 *memory_access_type = VMAT_GATHER_SCATTER;
2321 if (*memory_access_type == VMAT_GATHER_SCATTER
2322 || *memory_access_type == VMAT_ELEMENTWISE)
2324 *alignment_support_scheme = dr_unaligned_supported;
2325 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2327 else
2329 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2330 *alignment_support_scheme
2331 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2332 *misalignment);
2335 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2337 /* STMT is the leader of the group. Check the operands of all the
2338 stmts of the group. */
2339 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2340 while (next_stmt_info)
2342 tree op = vect_get_store_rhs (next_stmt_info);
2343 enum vect_def_type dt;
2344 if (!vect_is_simple_use (op, vinfo, &dt))
2346 if (dump_enabled_p ())
2347 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2348 "use not simple.\n");
2349 return false;
2351 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2355 if (overrun_p)
2357 gcc_assert (can_overrun_p);
2358 if (dump_enabled_p ())
2359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2360 "Data access with gaps requires scalar "
2361 "epilogue loop\n");
2362 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2365 return true;
2368 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2369 if there is a memory access type that the vectorized form can use,
2370 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2371 or scatters, fill in GS_INFO accordingly. In addition
2372 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2373 the target does not support the alignment scheme. *MISALIGNMENT
2374 is set according to the alignment of the access (including
2375 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2377 SLP says whether we're performing SLP rather than loop vectorization.
2378 MASKED_P is true if the statement is conditional on a vectorized mask.
2379 VECTYPE is the vector type that the vectorized statements will use.
2380 NCOPIES is the number of vector statements that will be needed. */
2382 static bool
2383 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2384 tree vectype, slp_tree slp_node,
2385 bool masked_p, vec_load_store_type vls_type,
2386 unsigned int ncopies,
2387 vect_memory_access_type *memory_access_type,
2388 poly_int64 *poffset,
2389 dr_alignment_support *alignment_support_scheme,
2390 int *misalignment,
2391 gather_scatter_info *gs_info,
2392 internal_fn *lanes_ifn)
2394 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2395 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2396 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2397 *poffset = 0;
2398 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2400 *memory_access_type = VMAT_GATHER_SCATTER;
2401 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2402 gcc_unreachable ();
2403 /* When using internal functions, we rely on pattern recognition
2404 to convert the type of the offset to the type that the target
2405 requires, with the result being a call to an internal function.
2406 If that failed for some reason (e.g. because another pattern
2407 took priority), just handle cases in which the offset already
2408 has the right type. */
2409 else if (gs_info->ifn != IFN_LAST
2410 && !is_gimple_call (stmt_info->stmt)
2411 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2412 TREE_TYPE (gs_info->offset_vectype)))
2414 if (dump_enabled_p ())
2415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2416 "%s offset requires a conversion\n",
2417 vls_type == VLS_LOAD ? "gather" : "scatter");
2418 return false;
2420 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2421 &gs_info->offset_dt,
2422 &gs_info->offset_vectype))
2424 if (dump_enabled_p ())
2425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2426 "%s index use not simple.\n",
2427 vls_type == VLS_LOAD ? "gather" : "scatter");
2428 return false;
2430 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2432 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2433 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2434 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2435 (gs_info->offset_vectype),
2436 TYPE_VECTOR_SUBPARTS (vectype)))
2438 if (dump_enabled_p ())
2439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2440 "unsupported vector types for emulated "
2441 "gather.\n");
2442 return false;
2445 /* Gather-scatter accesses perform only component accesses, alignment
2446 is irrelevant for them. */
2447 *alignment_support_scheme = dr_unaligned_supported;
2449 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
2451 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2452 masked_p,
2453 vls_type, memory_access_type, poffset,
2454 alignment_support_scheme,
2455 misalignment, gs_info, lanes_ifn))
2456 return false;
2458 else if (STMT_VINFO_STRIDED_P (stmt_info))
2460 gcc_assert (!slp_node);
2461 if (loop_vinfo
2462 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2463 masked_p, gs_info))
2464 *memory_access_type = VMAT_GATHER_SCATTER;
2465 else
2466 *memory_access_type = VMAT_ELEMENTWISE;
2467 /* Alignment is irrelevant here. */
2468 *alignment_support_scheme = dr_unaligned_supported;
2470 else
2472 int cmp = compare_step_with_zero (vinfo, stmt_info);
2473 if (cmp == 0)
2475 gcc_assert (vls_type == VLS_LOAD);
2476 *memory_access_type = VMAT_INVARIANT;
2477 /* Invariant accesses perform only component accesses, alignment
2478 is irrelevant for them. */
2479 *alignment_support_scheme = dr_unaligned_supported;
2481 else
2483 if (cmp < 0)
2484 *memory_access_type = get_negative_load_store_type
2485 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2486 else
2487 *memory_access_type = VMAT_CONTIGUOUS;
2488 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2489 vectype, *poffset);
2490 *alignment_support_scheme
2491 = vect_supportable_dr_alignment (vinfo,
2492 STMT_VINFO_DR_INFO (stmt_info),
2493 vectype, *misalignment);
2497 if ((*memory_access_type == VMAT_ELEMENTWISE
2498 || *memory_access_type == VMAT_STRIDED_SLP)
2499 && !nunits.is_constant ())
2501 if (dump_enabled_p ())
2502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2503 "Not using elementwise accesses due to variable "
2504 "vectorization factor.\n");
2505 return false;
2508 if (*alignment_support_scheme == dr_unaligned_unsupported)
2510 if (dump_enabled_p ())
2511 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2512 "unsupported unaligned access\n");
2513 return false;
2516 /* FIXME: At the moment the cost model seems to underestimate the
2517 cost of using elementwise accesses. This check preserves the
2518 traditional behavior until that can be fixed. */
2519 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2520 if (!first_stmt_info)
2521 first_stmt_info = stmt_info;
2522 if (*memory_access_type == VMAT_ELEMENTWISE
2523 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2524 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2525 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2526 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2528 if (dump_enabled_p ())
2529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2530 "not falling back to elementwise accesses\n");
2531 return false;
2533 return true;
2536 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2537 conditional operation STMT_INFO. When returning true, store the mask
2538 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2539 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2540 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2542 static bool
2543 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2544 slp_tree slp_node, unsigned mask_index,
2545 tree *mask, slp_tree *mask_node,
2546 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2548 enum vect_def_type mask_dt;
2549 tree mask_vectype;
2550 slp_tree mask_node_1;
2551 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2552 mask, &mask_node_1, &mask_dt, &mask_vectype))
2554 if (dump_enabled_p ())
2555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2556 "mask use not simple.\n");
2557 return false;
2560 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2562 if (dump_enabled_p ())
2563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2564 "mask argument is not a boolean.\n");
2565 return false;
2568 /* If the caller is not prepared for adjusting an external/constant
2569 SLP mask vector type fail. */
2570 if (slp_node
2571 && !mask_node
2572 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2574 if (dump_enabled_p ())
2575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2576 "SLP mask argument is not vectorized.\n");
2577 return false;
2580 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2581 if (!mask_vectype)
2582 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2584 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2586 if (dump_enabled_p ())
2587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2588 "could not find an appropriate vector mask type.\n");
2589 return false;
2592 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2593 TYPE_VECTOR_SUBPARTS (vectype)))
2595 if (dump_enabled_p ())
2596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2597 "vector mask type %T"
2598 " does not match vector data type %T.\n",
2599 mask_vectype, vectype);
2601 return false;
2604 *mask_dt_out = mask_dt;
2605 *mask_vectype_out = mask_vectype;
2606 if (mask_node)
2607 *mask_node = mask_node_1;
2608 return true;
2611 /* Return true if stored value RHS is suitable for vectorizing store
2612 statement STMT_INFO. When returning true, store the type of the
2613 definition in *RHS_DT_OUT, the type of the vectorized store value in
2614 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2616 static bool
2617 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2618 slp_tree slp_node, tree rhs,
2619 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2620 vec_load_store_type *vls_type_out)
2622 /* In the case this is a store from a constant make sure
2623 native_encode_expr can handle it. */
2624 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2626 if (dump_enabled_p ())
2627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2628 "cannot encode constant as a byte sequence.\n");
2629 return false;
2632 unsigned op_no = 0;
2633 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2635 if (gimple_call_internal_p (call)
2636 && internal_store_fn_p (gimple_call_internal_fn (call)))
2637 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2640 enum vect_def_type rhs_dt;
2641 tree rhs_vectype;
2642 slp_tree slp_op;
2643 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2644 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2646 if (dump_enabled_p ())
2647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2648 "use not simple.\n");
2649 return false;
2652 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2653 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2655 if (dump_enabled_p ())
2656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2657 "incompatible vector types.\n");
2658 return false;
2661 *rhs_dt_out = rhs_dt;
2662 *rhs_vectype_out = rhs_vectype;
2663 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2664 *vls_type_out = VLS_STORE_INVARIANT;
2665 else
2666 *vls_type_out = VLS_STORE;
2667 return true;
2670 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2671 Note that we support masks with floating-point type, in which case the
2672 floats are interpreted as a bitmask. */
2674 static tree
2675 vect_build_all_ones_mask (vec_info *vinfo,
2676 stmt_vec_info stmt_info, tree masktype)
2678 if (TREE_CODE (masktype) == INTEGER_TYPE)
2679 return build_int_cst (masktype, -1);
2680 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2682 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2683 mask = build_vector_from_val (masktype, mask);
2684 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2686 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2688 REAL_VALUE_TYPE r;
2689 long tmp[6];
2690 for (int j = 0; j < 6; ++j)
2691 tmp[j] = -1;
2692 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2693 tree mask = build_real (TREE_TYPE (masktype), r);
2694 mask = build_vector_from_val (masktype, mask);
2695 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2697 gcc_unreachable ();
2700 /* Build an all-zero merge value of type VECTYPE while vectorizing
2701 STMT_INFO as a gather load. */
2703 static tree
2704 vect_build_zero_merge_argument (vec_info *vinfo,
2705 stmt_vec_info stmt_info, tree vectype)
2707 tree merge;
2708 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2709 merge = build_int_cst (TREE_TYPE (vectype), 0);
2710 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2712 REAL_VALUE_TYPE r;
2713 long tmp[6];
2714 for (int j = 0; j < 6; ++j)
2715 tmp[j] = 0;
2716 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2717 merge = build_real (TREE_TYPE (vectype), r);
2719 else
2720 gcc_unreachable ();
2721 merge = build_vector_from_val (vectype, merge);
2722 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2725 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2726 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2727 the gather load operation. If the load is conditional, MASK is the
2728 unvectorized condition and MASK_DT is its definition type, otherwise
2729 MASK is null. */
2731 static void
2732 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2733 gimple_stmt_iterator *gsi,
2734 gimple **vec_stmt,
2735 gather_scatter_info *gs_info,
2736 tree mask,
2737 stmt_vector_for_cost *cost_vec)
2739 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2740 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2741 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2742 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2743 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2744 edge pe = loop_preheader_edge (loop);
2745 enum { NARROW, NONE, WIDEN } modifier;
2746 poly_uint64 gather_off_nunits
2747 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2749 /* FIXME: Keep the previous costing way in vect_model_load_cost by costing
2750 N scalar loads, but it should be tweaked to use target specific costs
2751 on related gather load calls. */
2752 if (cost_vec)
2754 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
2755 unsigned int inside_cost;
2756 inside_cost = record_stmt_cost (cost_vec, ncopies * assumed_nunits,
2757 scalar_load, stmt_info, 0, vect_body);
2758 if (dump_enabled_p ())
2759 dump_printf_loc (MSG_NOTE, vect_location,
2760 "vect_model_load_cost: inside_cost = %d, "
2761 "prologue_cost = 0 .\n",
2762 inside_cost);
2763 return;
2766 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2767 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2768 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2769 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2770 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2771 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2772 tree scaletype = TREE_VALUE (arglist);
2773 tree real_masktype = masktype;
2774 gcc_checking_assert (types_compatible_p (srctype, rettype)
2775 && (!mask
2776 || TREE_CODE (masktype) == INTEGER_TYPE
2777 || types_compatible_p (srctype, masktype)));
2778 if (mask)
2779 masktype = truth_type_for (srctype);
2781 tree mask_halftype = masktype;
2782 tree perm_mask = NULL_TREE;
2783 tree mask_perm_mask = NULL_TREE;
2784 if (known_eq (nunits, gather_off_nunits))
2785 modifier = NONE;
2786 else if (known_eq (nunits * 2, gather_off_nunits))
2788 modifier = WIDEN;
2790 /* Currently widening gathers and scatters are only supported for
2791 fixed-length vectors. */
2792 int count = gather_off_nunits.to_constant ();
2793 vec_perm_builder sel (count, count, 1);
2794 for (int i = 0; i < count; ++i)
2795 sel.quick_push (i | (count / 2));
2797 vec_perm_indices indices (sel, 1, count);
2798 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2799 indices);
2801 else if (known_eq (nunits, gather_off_nunits * 2))
2803 modifier = NARROW;
2805 /* Currently narrowing gathers and scatters are only supported for
2806 fixed-length vectors. */
2807 int count = nunits.to_constant ();
2808 vec_perm_builder sel (count, count, 1);
2809 sel.quick_grow (count);
2810 for (int i = 0; i < count; ++i)
2811 sel[i] = i < count / 2 ? i : i + count / 2;
2812 vec_perm_indices indices (sel, 2, count);
2813 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2815 ncopies *= 2;
2817 if (mask && VECTOR_TYPE_P (real_masktype))
2819 for (int i = 0; i < count; ++i)
2820 sel[i] = i | (count / 2);
2821 indices.new_vector (sel, 2, count);
2822 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2824 else if (mask)
2825 mask_halftype = truth_type_for (gs_info->offset_vectype);
2827 else
2828 gcc_unreachable ();
2830 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2831 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2833 tree ptr = fold_convert (ptrtype, gs_info->base);
2834 if (!is_gimple_min_invariant (ptr))
2836 gimple_seq seq;
2837 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2838 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2839 gcc_assert (!new_bb);
2842 tree scale = build_int_cst (scaletype, gs_info->scale);
2844 tree vec_oprnd0 = NULL_TREE;
2845 tree vec_mask = NULL_TREE;
2846 tree src_op = NULL_TREE;
2847 tree mask_op = NULL_TREE;
2848 tree prev_res = NULL_TREE;
2850 if (!mask)
2852 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2853 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2856 auto_vec<tree> vec_oprnds0;
2857 auto_vec<tree> vec_masks;
2858 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2859 modifier == WIDEN ? ncopies / 2 : ncopies,
2860 gs_info->offset, &vec_oprnds0);
2861 if (mask)
2862 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2863 modifier == NARROW ? ncopies / 2 : ncopies,
2864 mask, &vec_masks, masktype);
2865 for (int j = 0; j < ncopies; ++j)
2867 tree op, var;
2868 if (modifier == WIDEN && (j & 1))
2869 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2870 perm_mask, stmt_info, gsi);
2871 else
2872 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2874 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2876 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2877 TYPE_VECTOR_SUBPARTS (idxtype)));
2878 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2879 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2880 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2881 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2882 op = var;
2885 if (mask)
2887 if (mask_perm_mask && (j & 1))
2888 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2889 mask_perm_mask, stmt_info, gsi);
2890 else
2892 if (modifier == NARROW)
2894 if ((j & 1) == 0)
2895 vec_mask = vec_masks[j / 2];
2897 else
2898 vec_mask = vec_masks[j];
2900 mask_op = vec_mask;
2901 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2903 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2904 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2905 gcc_assert (known_eq (sub1, sub2));
2906 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2907 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2908 gassign *new_stmt
2909 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2910 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2911 mask_op = var;
2914 if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2916 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2917 gassign *new_stmt
2918 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2919 : VEC_UNPACK_LO_EXPR,
2920 mask_op);
2921 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2922 mask_op = var;
2924 src_op = mask_op;
2927 tree mask_arg = mask_op;
2928 if (masktype != real_masktype)
2930 tree utype, optype = TREE_TYPE (mask_op);
2931 if (VECTOR_TYPE_P (real_masktype)
2932 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2933 utype = real_masktype;
2934 else
2935 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2936 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2937 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2938 gassign *new_stmt
2939 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2940 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2941 mask_arg = var;
2942 if (!useless_type_conversion_p (real_masktype, utype))
2944 gcc_assert (TYPE_PRECISION (utype)
2945 <= TYPE_PRECISION (real_masktype));
2946 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2947 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2948 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2949 mask_arg = var;
2951 src_op = build_zero_cst (srctype);
2953 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2954 mask_arg, scale);
2956 if (!useless_type_conversion_p (vectype, rettype))
2958 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2959 TYPE_VECTOR_SUBPARTS (rettype)));
2960 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2961 gimple_call_set_lhs (new_stmt, op);
2962 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2963 var = make_ssa_name (vec_dest);
2964 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2965 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2966 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2968 else
2970 var = make_ssa_name (vec_dest, new_stmt);
2971 gimple_call_set_lhs (new_stmt, var);
2972 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2975 if (modifier == NARROW)
2977 if ((j & 1) == 0)
2979 prev_res = var;
2980 continue;
2982 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2983 stmt_info, gsi);
2984 new_stmt = SSA_NAME_DEF_STMT (var);
2987 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2989 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2992 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2993 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2994 the scatter store operation. If the store is conditional, MASK is the
2995 unvectorized condition, otherwise MASK is null. */
2997 static void
2998 vect_build_scatter_store_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2999 gimple_stmt_iterator *gsi, gimple **vec_stmt,
3000 gather_scatter_info *gs_info, tree mask)
3002 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3003 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3004 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3005 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
3006 enum { NARROW, NONE, WIDEN } modifier;
3007 poly_uint64 scatter_off_nunits
3008 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
3010 tree perm_mask = NULL_TREE, mask_halfvectype = NULL_TREE;
3011 if (known_eq (nunits, scatter_off_nunits))
3012 modifier = NONE;
3013 else if (known_eq (nunits * 2, scatter_off_nunits))
3015 modifier = WIDEN;
3017 /* Currently gathers and scatters are only supported for
3018 fixed-length vectors. */
3019 unsigned int count = scatter_off_nunits.to_constant ();
3020 vec_perm_builder sel (count, count, 1);
3021 for (unsigned i = 0; i < (unsigned int) count; ++i)
3022 sel.quick_push (i | (count / 2));
3024 vec_perm_indices indices (sel, 1, count);
3025 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype, indices);
3026 gcc_assert (perm_mask != NULL_TREE);
3028 else if (known_eq (nunits, scatter_off_nunits * 2))
3030 modifier = NARROW;
3032 /* Currently gathers and scatters are only supported for
3033 fixed-length vectors. */
3034 unsigned int count = nunits.to_constant ();
3035 vec_perm_builder sel (count, count, 1);
3036 for (unsigned i = 0; i < (unsigned int) count; ++i)
3037 sel.quick_push (i | (count / 2));
3039 vec_perm_indices indices (sel, 2, count);
3040 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
3041 gcc_assert (perm_mask != NULL_TREE);
3042 ncopies *= 2;
3044 if (mask)
3045 mask_halfvectype = truth_type_for (gs_info->offset_vectype);
3047 else
3048 gcc_unreachable ();
3050 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
3051 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
3052 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
3053 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
3054 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
3055 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
3056 tree scaletype = TREE_VALUE (arglist);
3058 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
3059 && TREE_CODE (rettype) == VOID_TYPE);
3061 tree ptr = fold_convert (ptrtype, gs_info->base);
3062 if (!is_gimple_min_invariant (ptr))
3064 gimple_seq seq;
3065 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
3066 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3067 edge pe = loop_preheader_edge (loop);
3068 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
3069 gcc_assert (!new_bb);
3072 tree mask_arg = NULL_TREE;
3073 if (mask == NULL_TREE)
3075 mask_arg = build_int_cst (masktype, -1);
3076 mask_arg = vect_init_vector (vinfo, stmt_info, mask_arg, masktype, NULL);
3079 tree scale = build_int_cst (scaletype, gs_info->scale);
3081 auto_vec<tree> vec_oprnds0;
3082 auto_vec<tree> vec_oprnds1;
3083 auto_vec<tree> vec_masks;
3084 if (mask)
3086 tree mask_vectype = truth_type_for (vectype);
3087 vect_get_vec_defs_for_operand (vinfo, stmt_info,
3088 modifier == NARROW ? ncopies / 2 : ncopies,
3089 mask, &vec_masks, mask_vectype);
3091 vect_get_vec_defs_for_operand (vinfo, stmt_info,
3092 modifier == WIDEN ? ncopies / 2 : ncopies,
3093 gs_info->offset, &vec_oprnds0);
3094 tree op = vect_get_store_rhs (stmt_info);
3095 vect_get_vec_defs_for_operand (vinfo, stmt_info,
3096 modifier == NARROW ? ncopies / 2 : ncopies, op,
3097 &vec_oprnds1);
3099 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3100 tree mask_op = NULL_TREE;
3101 tree src, vec_mask;
3102 for (int j = 0; j < ncopies; ++j)
3104 if (modifier == WIDEN)
3106 if (j & 1)
3107 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0, perm_mask,
3108 stmt_info, gsi);
3109 else
3110 op = vec_oprnd0 = vec_oprnds0[j / 2];
3111 src = vec_oprnd1 = vec_oprnds1[j];
3112 if (mask)
3113 mask_op = vec_mask = vec_masks[j];
3115 else if (modifier == NARROW)
3117 if (j & 1)
3118 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
3119 perm_mask, stmt_info, gsi);
3120 else
3121 src = vec_oprnd1 = vec_oprnds1[j / 2];
3122 op = vec_oprnd0 = vec_oprnds0[j];
3123 if (mask)
3124 mask_op = vec_mask = vec_masks[j / 2];
3126 else
3128 op = vec_oprnd0 = vec_oprnds0[j];
3129 src = vec_oprnd1 = vec_oprnds1[j];
3130 if (mask)
3131 mask_op = vec_mask = vec_masks[j];
3134 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
3136 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
3137 TYPE_VECTOR_SUBPARTS (srctype)));
3138 tree var = vect_get_new_ssa_name (srctype, vect_simple_var);
3139 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
3140 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
3141 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3142 src = var;
3145 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
3147 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
3148 TYPE_VECTOR_SUBPARTS (idxtype)));
3149 tree var = vect_get_new_ssa_name (idxtype, vect_simple_var);
3150 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
3151 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
3152 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3153 op = var;
3156 if (mask)
3158 tree utype;
3159 mask_arg = mask_op;
3160 if (modifier == NARROW)
3162 tree var
3163 = vect_get_new_ssa_name (mask_halfvectype, vect_simple_var);
3164 gassign *new_stmt
3165 = gimple_build_assign (var,
3166 (j & 1) ? VEC_UNPACK_HI_EXPR
3167 : VEC_UNPACK_LO_EXPR,
3168 mask_op);
3169 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3170 mask_arg = var;
3172 tree optype = TREE_TYPE (mask_arg);
3173 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
3174 utype = masktype;
3175 else
3176 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
3177 tree var = vect_get_new_ssa_name (utype, vect_scalar_var);
3178 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
3179 gassign *new_stmt
3180 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
3181 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3182 mask_arg = var;
3183 if (!useless_type_conversion_p (masktype, utype))
3185 gcc_assert (TYPE_PRECISION (utype) <= TYPE_PRECISION (masktype));
3186 tree var = vect_get_new_ssa_name (masktype, vect_scalar_var);
3187 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
3188 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3189 mask_arg = var;
3193 gcall *new_stmt
3194 = gimple_build_call (gs_info->decl, 5, ptr, mask_arg, op, src, scale);
3195 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3197 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3199 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3202 /* Prepare the base and offset in GS_INFO for vectorization.
3203 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
3204 to the vectorized offset argument for the first copy of STMT_INFO.
3205 STMT_INFO is the statement described by GS_INFO and LOOP is the
3206 containing loop. */
3208 static void
3209 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
3210 class loop *loop, stmt_vec_info stmt_info,
3211 slp_tree slp_node, gather_scatter_info *gs_info,
3212 tree *dataref_ptr, vec<tree> *vec_offset)
3214 gimple_seq stmts = NULL;
3215 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3216 if (stmts != NULL)
3218 basic_block new_bb;
3219 edge pe = loop_preheader_edge (loop);
3220 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3221 gcc_assert (!new_bb);
3223 if (slp_node)
3224 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3225 else
3227 unsigned ncopies
3228 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
3229 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
3230 gs_info->offset, vec_offset,
3231 gs_info->offset_vectype);
3235 /* Prepare to implement a grouped or strided load or store using
3236 the gather load or scatter store operation described by GS_INFO.
3237 STMT_INFO is the load or store statement.
3239 Set *DATAREF_BUMP to the amount that should be added to the base
3240 address after each copy of the vectorized statement. Set *VEC_OFFSET
3241 to an invariant offset vector in which element I has the value
3242 I * DR_STEP / SCALE. */
3244 static void
3245 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3246 loop_vec_info loop_vinfo,
3247 gimple_stmt_iterator *gsi,
3248 gather_scatter_info *gs_info,
3249 tree *dataref_bump, tree *vec_offset,
3250 vec_loop_lens *loop_lens)
3252 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3253 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3255 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3257 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
3258 ivtmp_8 = _31 * 16 (step in bytes);
3259 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
3260 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
3261 tree loop_len
3262 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
3263 tree tmp
3264 = fold_build2 (MULT_EXPR, sizetype,
3265 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3266 loop_len);
3267 *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
3268 GSI_SAME_STMT);
3270 else
3272 tree bump
3273 = size_binop (MULT_EXPR,
3274 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3275 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3276 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3279 /* The offset given in GS_INFO can have pointer type, so use the element
3280 type of the vector instead. */
3281 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3283 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3284 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3285 ssize_int (gs_info->scale));
3286 step = fold_convert (offset_type, step);
3288 /* Create {0, X, X*2, X*3, ...}. */
3289 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3290 build_zero_cst (offset_type), step);
3291 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3294 /* Prepare the pointer IVs which needs to be updated by a variable amount.
3295 Such variable amount is the outcome of .SELECT_VL. In this case, we can
3296 allow each iteration process the flexible number of elements as long as
3297 the number <= vf elments.
3299 Return data reference according to SELECT_VL.
3300 If new statements are needed, insert them before GSI. */
3302 static tree
3303 vect_get_loop_variant_data_ptr_increment (
3304 vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
3305 vec_loop_lens *loop_lens, dr_vec_info *dr_info,
3306 vect_memory_access_type memory_access_type)
3308 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3309 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3311 /* gather/scatter never reach here. */
3312 gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
3314 /* When we support SELECT_VL pattern, we dynamic adjust
3315 the memory address by .SELECT_VL result.
3317 The result of .SELECT_VL is the number of elements to
3318 be processed of each iteration. So the memory address
3319 adjustment operation should be:
3321 addr = addr + .SELECT_VL (ARG..) * step;
3323 tree loop_len
3324 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
3325 tree len_type = TREE_TYPE (loop_len);
3326 /* Since the outcome of .SELECT_VL is element size, we should adjust
3327 it into bytesize so that it can be used in address pointer variable
3328 amount IVs adjustment. */
3329 tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
3330 wide_int_to_tree (len_type, wi::to_widest (step)));
3331 tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
3332 gassign *assign = gimple_build_assign (bump, tmp);
3333 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
3334 return bump;
3337 /* Return the amount that should be added to a vector pointer to move
3338 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3339 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3340 vectorization. */
3342 static tree
3343 vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
3344 dr_vec_info *dr_info, tree aggr_type,
3345 vect_memory_access_type memory_access_type,
3346 vec_loop_lens *loop_lens = nullptr)
3348 if (memory_access_type == VMAT_INVARIANT)
3349 return size_zero_node;
3351 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3352 if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3353 return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
3354 loop_lens, dr_info,
3355 memory_access_type);
3357 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3358 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3359 if (tree_int_cst_sgn (step) == -1)
3360 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3361 return iv_step;
3364 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3366 static bool
3367 vectorizable_bswap (vec_info *vinfo,
3368 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3369 gimple **vec_stmt, slp_tree slp_node,
3370 slp_tree *slp_op,
3371 tree vectype_in, stmt_vector_for_cost *cost_vec)
3373 tree op, vectype;
3374 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3375 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3376 unsigned ncopies;
3378 op = gimple_call_arg (stmt, 0);
3379 vectype = STMT_VINFO_VECTYPE (stmt_info);
3380 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3382 /* Multiple types in SLP are handled by creating the appropriate number of
3383 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3384 case of SLP. */
3385 if (slp_node)
3386 ncopies = 1;
3387 else
3388 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3390 gcc_assert (ncopies >= 1);
3392 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3393 if (! char_vectype)
3394 return false;
3396 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3397 unsigned word_bytes;
3398 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3399 return false;
3401 /* The encoding uses one stepped pattern for each byte in the word. */
3402 vec_perm_builder elts (num_bytes, word_bytes, 3);
3403 for (unsigned i = 0; i < 3; ++i)
3404 for (unsigned j = 0; j < word_bytes; ++j)
3405 elts.quick_push ((i + 1) * word_bytes - j - 1);
3407 vec_perm_indices indices (elts, 1, num_bytes);
3408 machine_mode vmode = TYPE_MODE (char_vectype);
3409 if (!can_vec_perm_const_p (vmode, vmode, indices))
3410 return false;
3412 if (! vec_stmt)
3414 if (slp_node
3415 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3417 if (dump_enabled_p ())
3418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3419 "incompatible vector types for invariants\n");
3420 return false;
3423 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3424 DUMP_VECT_SCOPE ("vectorizable_bswap");
3425 record_stmt_cost (cost_vec,
3426 1, vector_stmt, stmt_info, 0, vect_prologue);
3427 record_stmt_cost (cost_vec,
3428 slp_node
3429 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3430 vec_perm, stmt_info, 0, vect_body);
3431 return true;
3434 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3436 /* Transform. */
3437 vec<tree> vec_oprnds = vNULL;
3438 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3439 op, &vec_oprnds);
3440 /* Arguments are ready. create the new vector stmt. */
3441 unsigned i;
3442 tree vop;
3443 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3445 gimple *new_stmt;
3446 tree tem = make_ssa_name (char_vectype);
3447 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3448 char_vectype, vop));
3449 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3450 tree tem2 = make_ssa_name (char_vectype);
3451 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3452 tem, tem, bswap_vconst);
3453 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3454 tem = make_ssa_name (vectype);
3455 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3456 vectype, tem2));
3457 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3458 if (slp_node)
3459 slp_node->push_vec_def (new_stmt);
3460 else
3461 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3464 if (!slp_node)
3465 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3467 vec_oprnds.release ();
3468 return true;
3471 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3472 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3473 in a single step. On success, store the binary pack code in
3474 *CONVERT_CODE. */
3476 static bool
3477 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3478 code_helper *convert_code)
3480 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3481 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3482 return false;
3484 code_helper code;
3485 int multi_step_cvt = 0;
3486 auto_vec <tree, 8> interm_types;
3487 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3488 &code, &multi_step_cvt, &interm_types)
3489 || multi_step_cvt)
3490 return false;
3492 *convert_code = code;
3493 return true;
3496 /* Function vectorizable_call.
3498 Check if STMT_INFO performs a function call that can be vectorized.
3499 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3500 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3501 Return true if STMT_INFO is vectorizable in this way. */
3503 static bool
3504 vectorizable_call (vec_info *vinfo,
3505 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3506 gimple **vec_stmt, slp_tree slp_node,
3507 stmt_vector_for_cost *cost_vec)
3509 gcall *stmt;
3510 tree vec_dest;
3511 tree scalar_dest;
3512 tree op;
3513 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3514 tree vectype_out, vectype_in;
3515 poly_uint64 nunits_in;
3516 poly_uint64 nunits_out;
3517 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3518 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3519 tree fndecl, new_temp, rhs_type;
3520 enum vect_def_type dt[4]
3521 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3522 vect_unknown_def_type };
3523 tree vectypes[ARRAY_SIZE (dt)] = {};
3524 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3525 int ndts = ARRAY_SIZE (dt);
3526 int ncopies, j;
3527 auto_vec<tree, 8> vargs;
3528 enum { NARROW, NONE, WIDEN } modifier;
3529 size_t i, nargs;
3530 tree lhs;
3532 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3533 return false;
3535 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3536 && ! vec_stmt)
3537 return false;
3539 /* Is STMT_INFO a vectorizable call? */
3540 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3541 if (!stmt)
3542 return false;
3544 if (gimple_call_internal_p (stmt)
3545 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3546 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3547 /* Handled by vectorizable_load and vectorizable_store. */
3548 return false;
3550 if (gimple_call_lhs (stmt) == NULL_TREE
3551 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3552 return false;
3554 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3556 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3558 /* Process function arguments. */
3559 rhs_type = NULL_TREE;
3560 vectype_in = NULL_TREE;
3561 nargs = gimple_call_num_args (stmt);
3563 /* Bail out if the function has more than four arguments, we do not have
3564 interesting builtin functions to vectorize with more than two arguments
3565 except for fma. No arguments is also not good. */
3566 if (nargs == 0 || nargs > 4)
3567 return false;
3569 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3570 combined_fn cfn = gimple_call_combined_fn (stmt);
3571 if (cfn == CFN_GOMP_SIMD_LANE)
3573 nargs = 0;
3574 rhs_type = unsigned_type_node;
3577 int mask_opno = -1;
3578 if (internal_fn_p (cfn))
3579 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3581 for (i = 0; i < nargs; i++)
3583 if ((int) i == mask_opno)
3585 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3586 &op, &slp_op[i], &dt[i], &vectypes[i]))
3587 return false;
3588 continue;
3591 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3592 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3594 if (dump_enabled_p ())
3595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3596 "use not simple.\n");
3597 return false;
3600 /* We can only handle calls with arguments of the same type. */
3601 if (rhs_type
3602 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3604 if (dump_enabled_p ())
3605 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3606 "argument types differ.\n");
3607 return false;
3609 if (!rhs_type)
3610 rhs_type = TREE_TYPE (op);
3612 if (!vectype_in)
3613 vectype_in = vectypes[i];
3614 else if (vectypes[i]
3615 && !types_compatible_p (vectypes[i], vectype_in))
3617 if (dump_enabled_p ())
3618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3619 "argument vector types differ.\n");
3620 return false;
3623 /* If all arguments are external or constant defs, infer the vector type
3624 from the scalar type. */
3625 if (!vectype_in)
3626 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3627 if (vec_stmt)
3628 gcc_assert (vectype_in);
3629 if (!vectype_in)
3631 if (dump_enabled_p ())
3632 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3633 "no vectype for scalar type %T\n", rhs_type);
3635 return false;
3637 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3638 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3639 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3640 by a pack of the two vectors into an SI vector. We would need
3641 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3642 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3644 if (dump_enabled_p ())
3645 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3646 "mismatched vector sizes %T and %T\n",
3647 vectype_in, vectype_out);
3648 return false;
3651 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3652 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3654 if (dump_enabled_p ())
3655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3656 "mixed mask and nonmask vector types\n");
3657 return false;
3660 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3662 if (dump_enabled_p ())
3663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3664 "use emulated vector type for call\n");
3665 return false;
3668 /* FORNOW */
3669 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3670 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3671 if (known_eq (nunits_in * 2, nunits_out))
3672 modifier = NARROW;
3673 else if (known_eq (nunits_out, nunits_in))
3674 modifier = NONE;
3675 else if (known_eq (nunits_out * 2, nunits_in))
3676 modifier = WIDEN;
3677 else
3678 return false;
3680 /* We only handle functions that do not read or clobber memory. */
3681 if (gimple_vuse (stmt))
3683 if (dump_enabled_p ())
3684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3685 "function reads from or writes to memory.\n");
3686 return false;
3689 /* For now, we only vectorize functions if a target specific builtin
3690 is available. TODO -- in some cases, it might be profitable to
3691 insert the calls for pieces of the vector, in order to be able
3692 to vectorize other operations in the loop. */
3693 fndecl = NULL_TREE;
3694 internal_fn ifn = IFN_LAST;
3695 tree callee = gimple_call_fndecl (stmt);
3697 /* First try using an internal function. */
3698 code_helper convert_code = MAX_TREE_CODES;
3699 if (cfn != CFN_LAST
3700 && (modifier == NONE
3701 || (modifier == NARROW
3702 && simple_integer_narrowing (vectype_out, vectype_in,
3703 &convert_code))))
3704 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3705 vectype_in);
3707 /* If that fails, try asking for a target-specific built-in function. */
3708 if (ifn == IFN_LAST)
3710 if (cfn != CFN_LAST)
3711 fndecl = targetm.vectorize.builtin_vectorized_function
3712 (cfn, vectype_out, vectype_in);
3713 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3714 fndecl = targetm.vectorize.builtin_md_vectorized_function
3715 (callee, vectype_out, vectype_in);
3718 if (ifn == IFN_LAST && !fndecl)
3720 if (cfn == CFN_GOMP_SIMD_LANE
3721 && !slp_node
3722 && loop_vinfo
3723 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3724 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3725 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3726 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3728 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3729 { 0, 1, 2, ... vf - 1 } vector. */
3730 gcc_assert (nargs == 0);
3732 else if (modifier == NONE
3733 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3734 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3735 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3736 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3737 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3738 slp_op, vectype_in, cost_vec);
3739 else
3741 if (dump_enabled_p ())
3742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3743 "function is not vectorizable.\n");
3744 return false;
3748 if (slp_node)
3749 ncopies = 1;
3750 else if (modifier == NARROW && ifn == IFN_LAST)
3751 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3752 else
3753 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3755 /* Sanity check: make sure that at least one copy of the vectorized stmt
3756 needs to be generated. */
3757 gcc_assert (ncopies >= 1);
3759 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3760 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3761 internal_fn cond_len_fn = get_len_internal_fn (ifn);
3762 int len_opno = internal_fn_len_index (cond_len_fn);
3763 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3764 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3765 if (!vec_stmt) /* transformation not required. */
3767 if (slp_node)
3768 for (i = 0; i < nargs; ++i)
3769 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3770 vectypes[i]
3771 ? vectypes[i] : vectype_in))
3773 if (dump_enabled_p ())
3774 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3775 "incompatible vector types for invariants\n");
3776 return false;
3778 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3779 DUMP_VECT_SCOPE ("vectorizable_call");
3780 vect_model_simple_cost (vinfo, stmt_info,
3781 ncopies, dt, ndts, slp_node, cost_vec);
3782 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3783 record_stmt_cost (cost_vec, ncopies / 2,
3784 vec_promote_demote, stmt_info, 0, vect_body);
3786 if (loop_vinfo
3787 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3788 && (reduc_idx >= 0 || mask_opno >= 0))
3790 if (reduc_idx >= 0
3791 && (cond_fn == IFN_LAST
3792 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3793 OPTIMIZE_FOR_SPEED))
3794 && (cond_len_fn == IFN_LAST
3795 || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3796 OPTIMIZE_FOR_SPEED)))
3798 if (dump_enabled_p ())
3799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3800 "can't use a fully-masked loop because no"
3801 " conditional operation is available.\n");
3802 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3804 else
3806 unsigned int nvectors
3807 = (slp_node
3808 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3809 : ncopies);
3810 tree scalar_mask = NULL_TREE;
3811 if (mask_opno >= 0)
3812 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3813 if (cond_len_fn != IFN_LAST
3814 && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3815 OPTIMIZE_FOR_SPEED))
3816 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3818 else
3819 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3820 scalar_mask);
3823 return true;
3826 /* Transform. */
3828 if (dump_enabled_p ())
3829 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3831 /* Handle def. */
3832 scalar_dest = gimple_call_lhs (stmt);
3833 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3835 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3836 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3837 unsigned int vect_nargs = nargs;
3838 if (len_loop_p)
3840 if (len_opno >= 0)
3842 ifn = cond_len_fn;
3843 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3844 vect_nargs += 2;
3846 else if (reduc_idx >= 0)
3847 gcc_unreachable ();
3849 else if (masked_loop_p && reduc_idx >= 0)
3851 ifn = cond_fn;
3852 vect_nargs += 2;
3855 if (modifier == NONE || ifn != IFN_LAST)
3857 tree prev_res = NULL_TREE;
3858 vargs.safe_grow (vect_nargs, true);
3859 auto_vec<vec<tree> > vec_defs (nargs);
3860 for (j = 0; j < ncopies; ++j)
3862 /* Build argument list for the vectorized call. */
3863 if (slp_node)
3865 vec<tree> vec_oprnds0;
3867 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3868 vec_oprnds0 = vec_defs[0];
3870 /* Arguments are ready. Create the new vector stmt. */
3871 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3873 int varg = 0;
3874 if (masked_loop_p && reduc_idx >= 0)
3876 unsigned int vec_num = vec_oprnds0.length ();
3877 /* Always true for SLP. */
3878 gcc_assert (ncopies == 1);
3879 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
3880 gsi, masks, vec_num,
3881 vectype_out, i);
3883 size_t k;
3884 for (k = 0; k < nargs; k++)
3886 vec<tree> vec_oprndsk = vec_defs[k];
3887 vargs[varg++] = vec_oprndsk[i];
3889 if (masked_loop_p && reduc_idx >= 0)
3890 vargs[varg++] = vargs[reduc_idx + 1];
3891 gimple *new_stmt;
3892 if (modifier == NARROW)
3894 /* We don't define any narrowing conditional functions
3895 at present. */
3896 gcc_assert (mask_opno < 0);
3897 tree half_res = make_ssa_name (vectype_in);
3898 gcall *call
3899 = gimple_build_call_internal_vec (ifn, vargs);
3900 gimple_call_set_lhs (call, half_res);
3901 gimple_call_set_nothrow (call, true);
3902 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3903 if ((i & 1) == 0)
3905 prev_res = half_res;
3906 continue;
3908 new_temp = make_ssa_name (vec_dest);
3909 new_stmt = vect_gimple_build (new_temp, convert_code,
3910 prev_res, half_res);
3911 vect_finish_stmt_generation (vinfo, stmt_info,
3912 new_stmt, gsi);
3914 else
3916 if (len_opno >= 0 && len_loop_p)
3918 unsigned int vec_num = vec_oprnds0.length ();
3919 /* Always true for SLP. */
3920 gcc_assert (ncopies == 1);
3921 tree len
3922 = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3923 vectype_out, i, 1);
3924 signed char biasval
3925 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3926 tree bias = build_int_cst (intQI_type_node, biasval);
3927 vargs[len_opno] = len;
3928 vargs[len_opno + 1] = bias;
3930 else if (mask_opno >= 0 && masked_loop_p)
3932 unsigned int vec_num = vec_oprnds0.length ();
3933 /* Always true for SLP. */
3934 gcc_assert (ncopies == 1);
3935 tree mask = vect_get_loop_mask (loop_vinfo,
3936 gsi, masks, vec_num,
3937 vectype_out, i);
3938 vargs[mask_opno] = prepare_vec_mask
3939 (loop_vinfo, TREE_TYPE (mask), mask,
3940 vargs[mask_opno], gsi);
3943 gcall *call;
3944 if (ifn != IFN_LAST)
3945 call = gimple_build_call_internal_vec (ifn, vargs);
3946 else
3947 call = gimple_build_call_vec (fndecl, vargs);
3948 new_temp = make_ssa_name (vec_dest, call);
3949 gimple_call_set_lhs (call, new_temp);
3950 gimple_call_set_nothrow (call, true);
3951 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3952 new_stmt = call;
3954 slp_node->push_vec_def (new_stmt);
3956 continue;
3959 int varg = 0;
3960 if (masked_loop_p && reduc_idx >= 0)
3961 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3962 vectype_out, j);
3963 for (i = 0; i < nargs; i++)
3965 op = gimple_call_arg (stmt, i);
3966 if (j == 0)
3968 vec_defs.quick_push (vNULL);
3969 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3970 op, &vec_defs[i],
3971 vectypes[i]);
3973 vargs[varg++] = vec_defs[i][j];
3975 if (masked_loop_p && reduc_idx >= 0)
3976 vargs[varg++] = vargs[reduc_idx + 1];
3978 if (len_opno >= 0 && len_loop_p)
3980 tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3981 vectype_out, j, 1);
3982 signed char biasval
3983 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3984 tree bias = build_int_cst (intQI_type_node, biasval);
3985 vargs[len_opno] = len;
3986 vargs[len_opno + 1] = bias;
3988 else if (mask_opno >= 0 && masked_loop_p)
3990 tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3991 vectype_out, j);
3992 vargs[mask_opno]
3993 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3994 vargs[mask_opno], gsi);
3997 gimple *new_stmt;
3998 if (cfn == CFN_GOMP_SIMD_LANE)
4000 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
4001 tree new_var
4002 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
4003 gimple *init_stmt = gimple_build_assign (new_var, cst);
4004 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
4005 new_temp = make_ssa_name (vec_dest);
4006 new_stmt = gimple_build_assign (new_temp, new_var);
4007 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4009 else if (modifier == NARROW)
4011 /* We don't define any narrowing conditional functions at
4012 present. */
4013 gcc_assert (mask_opno < 0);
4014 tree half_res = make_ssa_name (vectype_in);
4015 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
4016 gimple_call_set_lhs (call, half_res);
4017 gimple_call_set_nothrow (call, true);
4018 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
4019 if ((j & 1) == 0)
4021 prev_res = half_res;
4022 continue;
4024 new_temp = make_ssa_name (vec_dest);
4025 new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
4026 half_res);
4027 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4029 else
4031 gcall *call;
4032 if (ifn != IFN_LAST)
4033 call = gimple_build_call_internal_vec (ifn, vargs);
4034 else
4035 call = gimple_build_call_vec (fndecl, vargs);
4036 new_temp = make_ssa_name (vec_dest, call);
4037 gimple_call_set_lhs (call, new_temp);
4038 gimple_call_set_nothrow (call, true);
4039 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
4040 new_stmt = call;
4043 if (j == (modifier == NARROW ? 1 : 0))
4044 *vec_stmt = new_stmt;
4045 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4047 for (i = 0; i < nargs; i++)
4049 vec<tree> vec_oprndsi = vec_defs[i];
4050 vec_oprndsi.release ();
4053 else if (modifier == NARROW)
4055 auto_vec<vec<tree> > vec_defs (nargs);
4056 /* We don't define any narrowing conditional functions at present. */
4057 gcc_assert (mask_opno < 0);
4058 for (j = 0; j < ncopies; ++j)
4060 /* Build argument list for the vectorized call. */
4061 if (j == 0)
4062 vargs.create (nargs * 2);
4063 else
4064 vargs.truncate (0);
4066 if (slp_node)
4068 vec<tree> vec_oprnds0;
4070 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
4071 vec_oprnds0 = vec_defs[0];
4073 /* Arguments are ready. Create the new vector stmt. */
4074 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
4076 size_t k;
4077 vargs.truncate (0);
4078 for (k = 0; k < nargs; k++)
4080 vec<tree> vec_oprndsk = vec_defs[k];
4081 vargs.quick_push (vec_oprndsk[i]);
4082 vargs.quick_push (vec_oprndsk[i + 1]);
4084 gcall *call;
4085 if (ifn != IFN_LAST)
4086 call = gimple_build_call_internal_vec (ifn, vargs);
4087 else
4088 call = gimple_build_call_vec (fndecl, vargs);
4089 new_temp = make_ssa_name (vec_dest, call);
4090 gimple_call_set_lhs (call, new_temp);
4091 gimple_call_set_nothrow (call, true);
4092 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
4093 slp_node->push_vec_def (call);
4095 continue;
4098 for (i = 0; i < nargs; i++)
4100 op = gimple_call_arg (stmt, i);
4101 if (j == 0)
4103 vec_defs.quick_push (vNULL);
4104 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
4105 op, &vec_defs[i], vectypes[i]);
4107 vec_oprnd0 = vec_defs[i][2*j];
4108 vec_oprnd1 = vec_defs[i][2*j+1];
4110 vargs.quick_push (vec_oprnd0);
4111 vargs.quick_push (vec_oprnd1);
4114 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
4115 new_temp = make_ssa_name (vec_dest, new_stmt);
4116 gimple_call_set_lhs (new_stmt, new_temp);
4117 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4119 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4122 if (!slp_node)
4123 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
4125 for (i = 0; i < nargs; i++)
4127 vec<tree> vec_oprndsi = vec_defs[i];
4128 vec_oprndsi.release ();
4131 else
4132 /* No current target implements this case. */
4133 return false;
4135 vargs.release ();
4137 /* The call in STMT might prevent it from being removed in dce.
4138 We however cannot remove it here, due to the way the ssa name
4139 it defines is mapped to the new definition. So just replace
4140 rhs of the statement with something harmless. */
4142 if (slp_node)
4143 return true;
4145 stmt_info = vect_orig_stmt (stmt_info);
4146 lhs = gimple_get_lhs (stmt_info->stmt);
4148 gassign *new_stmt
4149 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4150 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
4152 return true;
4156 struct simd_call_arg_info
4158 tree vectype;
4159 tree op;
4160 HOST_WIDE_INT linear_step;
4161 enum vect_def_type dt;
4162 unsigned int align;
4163 bool simd_lane_linear;
4166 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
4167 is linear within simd lane (but not within whole loop), note it in
4168 *ARGINFO. */
4170 static void
4171 vect_simd_lane_linear (tree op, class loop *loop,
4172 struct simd_call_arg_info *arginfo)
4174 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
4176 if (!is_gimple_assign (def_stmt)
4177 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
4178 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
4179 return;
4181 tree base = gimple_assign_rhs1 (def_stmt);
4182 HOST_WIDE_INT linear_step = 0;
4183 tree v = gimple_assign_rhs2 (def_stmt);
4184 while (TREE_CODE (v) == SSA_NAME)
4186 tree t;
4187 def_stmt = SSA_NAME_DEF_STMT (v);
4188 if (is_gimple_assign (def_stmt))
4189 switch (gimple_assign_rhs_code (def_stmt))
4191 case PLUS_EXPR:
4192 t = gimple_assign_rhs2 (def_stmt);
4193 if (linear_step || TREE_CODE (t) != INTEGER_CST)
4194 return;
4195 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
4196 v = gimple_assign_rhs1 (def_stmt);
4197 continue;
4198 case MULT_EXPR:
4199 t = gimple_assign_rhs2 (def_stmt);
4200 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
4201 return;
4202 linear_step = tree_to_shwi (t);
4203 v = gimple_assign_rhs1 (def_stmt);
4204 continue;
4205 CASE_CONVERT:
4206 t = gimple_assign_rhs1 (def_stmt);
4207 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
4208 || (TYPE_PRECISION (TREE_TYPE (v))
4209 < TYPE_PRECISION (TREE_TYPE (t))))
4210 return;
4211 if (!linear_step)
4212 linear_step = 1;
4213 v = t;
4214 continue;
4215 default:
4216 return;
4218 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
4219 && loop->simduid
4220 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
4221 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
4222 == loop->simduid))
4224 if (!linear_step)
4225 linear_step = 1;
4226 arginfo->linear_step = linear_step;
4227 arginfo->op = base;
4228 arginfo->simd_lane_linear = true;
4229 return;
4234 /* Return the number of elements in vector type VECTYPE, which is associated
4235 with a SIMD clone. At present these vectors always have a constant
4236 length. */
4238 static unsigned HOST_WIDE_INT
4239 simd_clone_subparts (tree vectype)
4241 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
4244 /* Function vectorizable_simd_clone_call.
4246 Check if STMT_INFO performs a function call that can be vectorized
4247 by calling a simd clone of the function.
4248 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4249 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4250 Return true if STMT_INFO is vectorizable in this way. */
4252 static bool
4253 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
4254 gimple_stmt_iterator *gsi,
4255 gimple **vec_stmt, slp_tree slp_node,
4256 stmt_vector_for_cost *)
4258 tree vec_dest;
4259 tree scalar_dest;
4260 tree op, type;
4261 tree vec_oprnd0 = NULL_TREE;
4262 tree vectype;
4263 poly_uint64 nunits;
4264 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4265 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4266 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
4267 tree fndecl, new_temp;
4268 int ncopies, j;
4269 auto_vec<simd_call_arg_info> arginfo;
4270 vec<tree> vargs = vNULL;
4271 size_t i, nargs;
4272 tree lhs, rtype, ratype;
4273 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
4274 int arg_offset = 0;
4276 /* Is STMT a vectorizable call? */
4277 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4278 if (!stmt)
4279 return false;
4281 fndecl = gimple_call_fndecl (stmt);
4282 if (fndecl == NULL_TREE
4283 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
4285 fndecl = gimple_call_arg (stmt, 0);
4286 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
4287 fndecl = TREE_OPERAND (fndecl, 0);
4288 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
4289 arg_offset = 1;
4291 if (fndecl == NULL_TREE)
4292 return false;
4294 struct cgraph_node *node = cgraph_node::get (fndecl);
4295 if (node == NULL || node->simd_clones == NULL)
4296 return false;
4298 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4299 return false;
4301 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4302 && ! vec_stmt)
4303 return false;
4305 if (gimple_call_lhs (stmt)
4306 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4307 return false;
4309 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4311 vectype = STMT_VINFO_VECTYPE (stmt_info);
4313 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4314 return false;
4316 /* FORNOW */
4317 if (slp_node)
4318 return false;
4320 /* Process function arguments. */
4321 nargs = gimple_call_num_args (stmt) - arg_offset;
4323 /* Bail out if the function has zero arguments. */
4324 if (nargs == 0)
4325 return false;
4327 arginfo.reserve (nargs, true);
4329 for (i = 0; i < nargs; i++)
4331 simd_call_arg_info thisarginfo;
4332 affine_iv iv;
4334 thisarginfo.linear_step = 0;
4335 thisarginfo.align = 0;
4336 thisarginfo.op = NULL_TREE;
4337 thisarginfo.simd_lane_linear = false;
4339 op = gimple_call_arg (stmt, i + arg_offset);
4340 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
4341 &thisarginfo.vectype)
4342 || thisarginfo.dt == vect_uninitialized_def)
4344 if (dump_enabled_p ())
4345 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4346 "use not simple.\n");
4347 return false;
4350 if (thisarginfo.dt == vect_constant_def
4351 || thisarginfo.dt == vect_external_def)
4352 gcc_assert (thisarginfo.vectype == NULL_TREE);
4353 else
4354 gcc_assert (thisarginfo.vectype != NULL_TREE);
4356 /* For linear arguments, the analyze phase should have saved
4357 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4358 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4359 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4361 gcc_assert (vec_stmt);
4362 thisarginfo.linear_step
4363 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4364 thisarginfo.op
4365 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4366 thisarginfo.simd_lane_linear
4367 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4368 == boolean_true_node);
4369 /* If loop has been peeled for alignment, we need to adjust it. */
4370 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4371 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4372 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4374 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4375 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4376 tree opt = TREE_TYPE (thisarginfo.op);
4377 bias = fold_convert (TREE_TYPE (step), bias);
4378 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4379 thisarginfo.op
4380 = fold_build2 (POINTER_TYPE_P (opt)
4381 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4382 thisarginfo.op, bias);
4385 else if (!vec_stmt
4386 && thisarginfo.dt != vect_constant_def
4387 && thisarginfo.dt != vect_external_def
4388 && loop_vinfo
4389 && TREE_CODE (op) == SSA_NAME
4390 && simple_iv (loop, loop_containing_stmt (stmt), op,
4391 &iv, false)
4392 && tree_fits_shwi_p (iv.step))
4394 thisarginfo.linear_step = tree_to_shwi (iv.step);
4395 thisarginfo.op = iv.base;
4397 else if ((thisarginfo.dt == vect_constant_def
4398 || thisarginfo.dt == vect_external_def)
4399 && POINTER_TYPE_P (TREE_TYPE (op)))
4400 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4401 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4402 linear too. */
4403 if (POINTER_TYPE_P (TREE_TYPE (op))
4404 && !thisarginfo.linear_step
4405 && !vec_stmt
4406 && thisarginfo.dt != vect_constant_def
4407 && thisarginfo.dt != vect_external_def
4408 && loop_vinfo
4409 && !slp_node
4410 && TREE_CODE (op) == SSA_NAME)
4411 vect_simd_lane_linear (op, loop, &thisarginfo);
4413 arginfo.quick_push (thisarginfo);
4416 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4417 if (!vf.is_constant ())
4419 if (dump_enabled_p ())
4420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4421 "not considering SIMD clones; not yet supported"
4422 " for variable-width vectors.\n");
4423 return false;
4426 unsigned int badness = 0;
4427 struct cgraph_node *bestn = NULL;
4428 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4429 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4430 else
4431 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4432 n = n->simdclone->next_clone)
4434 unsigned int this_badness = 0;
4435 unsigned int num_calls;
4436 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4437 || n->simdclone->nargs != nargs)
4438 continue;
4439 if (num_calls != 1)
4440 this_badness += exact_log2 (num_calls) * 4096;
4441 if (n->simdclone->inbranch)
4442 this_badness += 8192;
4443 int target_badness = targetm.simd_clone.usable (n);
4444 if (target_badness < 0)
4445 continue;
4446 this_badness += target_badness * 512;
4447 for (i = 0; i < nargs; i++)
4449 switch (n->simdclone->args[i].arg_type)
4451 case SIMD_CLONE_ARG_TYPE_VECTOR:
4452 if (!useless_type_conversion_p
4453 (n->simdclone->args[i].orig_type,
4454 TREE_TYPE (gimple_call_arg (stmt, i + arg_offset))))
4455 i = -1;
4456 else if (arginfo[i].dt == vect_constant_def
4457 || arginfo[i].dt == vect_external_def
4458 || arginfo[i].linear_step)
4459 this_badness += 64;
4460 break;
4461 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4462 if (arginfo[i].dt != vect_constant_def
4463 && arginfo[i].dt != vect_external_def)
4464 i = -1;
4465 break;
4466 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4467 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4468 if (arginfo[i].dt == vect_constant_def
4469 || arginfo[i].dt == vect_external_def
4470 || (arginfo[i].linear_step
4471 != n->simdclone->args[i].linear_step))
4472 i = -1;
4473 break;
4474 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4475 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4476 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4477 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4478 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4479 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4480 /* FORNOW */
4481 i = -1;
4482 break;
4483 case SIMD_CLONE_ARG_TYPE_MASK:
4484 break;
4486 if (i == (size_t) -1)
4487 break;
4488 if (n->simdclone->args[i].alignment > arginfo[i].align)
4490 i = -1;
4491 break;
4493 if (arginfo[i].align)
4494 this_badness += (exact_log2 (arginfo[i].align)
4495 - exact_log2 (n->simdclone->args[i].alignment));
4497 if (i == (size_t) -1)
4498 continue;
4499 if (bestn == NULL || this_badness < badness)
4501 bestn = n;
4502 badness = this_badness;
4506 if (bestn == NULL)
4507 return false;
4509 for (i = 0; i < nargs; i++)
4511 if ((arginfo[i].dt == vect_constant_def
4512 || arginfo[i].dt == vect_external_def)
4513 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4515 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i + arg_offset));
4516 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4517 slp_node);
4518 if (arginfo[i].vectype == NULL
4519 || !constant_multiple_p (bestn->simdclone->simdlen,
4520 simd_clone_subparts (arginfo[i].vectype)))
4521 return false;
4524 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4525 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4527 if (dump_enabled_p ())
4528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4529 "vector mask arguments are not supported.\n");
4530 return false;
4533 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
4534 && bestn->simdclone->mask_mode == VOIDmode
4535 && (simd_clone_subparts (bestn->simdclone->args[i].vector_type)
4536 != simd_clone_subparts (arginfo[i].vectype)))
4538 /* FORNOW we only have partial support for vector-type masks that
4539 can't hold all of simdlen. */
4540 if (dump_enabled_p ())
4541 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4542 vect_location,
4543 "in-branch vector clones are not yet"
4544 " supported for mismatched vector sizes.\n");
4545 return false;
4547 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
4548 && bestn->simdclone->mask_mode != VOIDmode)
4550 /* FORNOW don't support integer-type masks. */
4551 if (dump_enabled_p ())
4552 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4553 vect_location,
4554 "in-branch vector clones are not yet"
4555 " supported for integer mask modes.\n");
4556 return false;
4560 fndecl = bestn->decl;
4561 nunits = bestn->simdclone->simdlen;
4562 ncopies = vector_unroll_factor (vf, nunits);
4564 /* If the function isn't const, only allow it in simd loops where user
4565 has asserted that at least nunits consecutive iterations can be
4566 performed using SIMD instructions. */
4567 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4568 && gimple_vuse (stmt))
4569 return false;
4571 /* Sanity check: make sure that at least one copy of the vectorized stmt
4572 needs to be generated. */
4573 gcc_assert (ncopies >= 1);
4575 if (!vec_stmt) /* transformation not required. */
4577 /* When the original call is pure or const but the SIMD ABI dictates
4578 an aggregate return we will have to use a virtual definition and
4579 in a loop eventually even need to add a virtual PHI. That's
4580 not straight-forward so allow to fix this up via renaming. */
4581 if (gimple_call_lhs (stmt)
4582 && !gimple_vdef (stmt)
4583 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4584 vinfo->any_known_not_updated_vssa = true;
4585 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4586 for (i = 0; i < nargs; i++)
4587 if ((bestn->simdclone->args[i].arg_type
4588 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4589 || (bestn->simdclone->args[i].arg_type
4590 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4592 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4593 + 1,
4594 true);
4595 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4596 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4597 ? size_type_node : TREE_TYPE (arginfo[i].op);
4598 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4599 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4600 tree sll = arginfo[i].simd_lane_linear
4601 ? boolean_true_node : boolean_false_node;
4602 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4604 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4605 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4606 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4607 dt, slp_node, cost_vec); */
4608 return true;
4611 /* Transform. */
4613 if (dump_enabled_p ())
4614 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4616 /* Handle def. */
4617 scalar_dest = gimple_call_lhs (stmt);
4618 vec_dest = NULL_TREE;
4619 rtype = NULL_TREE;
4620 ratype = NULL_TREE;
4621 if (scalar_dest)
4623 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4624 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4625 if (TREE_CODE (rtype) == ARRAY_TYPE)
4627 ratype = rtype;
4628 rtype = TREE_TYPE (ratype);
4632 auto_vec<vec<tree> > vec_oprnds;
4633 auto_vec<unsigned> vec_oprnds_i;
4634 vec_oprnds.safe_grow_cleared (nargs, true);
4635 vec_oprnds_i.safe_grow_cleared (nargs, true);
4636 for (j = 0; j < ncopies; ++j)
4638 /* Build argument list for the vectorized call. */
4639 if (j == 0)
4640 vargs.create (nargs);
4641 else
4642 vargs.truncate (0);
4644 for (i = 0; i < nargs; i++)
4646 unsigned int k, l, m, o;
4647 tree atype;
4648 op = gimple_call_arg (stmt, i + arg_offset);
4649 switch (bestn->simdclone->args[i].arg_type)
4651 case SIMD_CLONE_ARG_TYPE_VECTOR:
4652 atype = bestn->simdclone->args[i].vector_type;
4653 o = vector_unroll_factor (nunits,
4654 simd_clone_subparts (atype));
4655 for (m = j * o; m < (j + 1) * o; m++)
4657 if (simd_clone_subparts (atype)
4658 < simd_clone_subparts (arginfo[i].vectype))
4660 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4661 k = (simd_clone_subparts (arginfo[i].vectype)
4662 / simd_clone_subparts (atype));
4663 gcc_assert ((k & (k - 1)) == 0);
4664 if (m == 0)
4666 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4667 ncopies * o / k, op,
4668 &vec_oprnds[i]);
4669 vec_oprnds_i[i] = 0;
4670 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4672 else
4674 vec_oprnd0 = arginfo[i].op;
4675 if ((m & (k - 1)) == 0)
4676 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4678 arginfo[i].op = vec_oprnd0;
4679 vec_oprnd0
4680 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4681 bitsize_int (prec),
4682 bitsize_int ((m & (k - 1)) * prec));
4683 gassign *new_stmt
4684 = gimple_build_assign (make_ssa_name (atype),
4685 vec_oprnd0);
4686 vect_finish_stmt_generation (vinfo, stmt_info,
4687 new_stmt, gsi);
4688 vargs.safe_push (gimple_assign_lhs (new_stmt));
4690 else
4692 k = (simd_clone_subparts (atype)
4693 / simd_clone_subparts (arginfo[i].vectype));
4694 gcc_assert ((k & (k - 1)) == 0);
4695 vec<constructor_elt, va_gc> *ctor_elts;
4696 if (k != 1)
4697 vec_alloc (ctor_elts, k);
4698 else
4699 ctor_elts = NULL;
4700 for (l = 0; l < k; l++)
4702 if (m == 0 && l == 0)
4704 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4705 k * o * ncopies,
4707 &vec_oprnds[i]);
4708 vec_oprnds_i[i] = 0;
4709 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4711 else
4712 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4713 arginfo[i].op = vec_oprnd0;
4714 if (k == 1)
4715 break;
4716 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4717 vec_oprnd0);
4719 if (k == 1)
4720 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4721 atype))
4723 vec_oprnd0
4724 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4725 gassign *new_stmt
4726 = gimple_build_assign (make_ssa_name (atype),
4727 vec_oprnd0);
4728 vect_finish_stmt_generation (vinfo, stmt_info,
4729 new_stmt, gsi);
4730 vargs.safe_push (gimple_assign_lhs (new_stmt));
4732 else
4733 vargs.safe_push (vec_oprnd0);
4734 else
4736 vec_oprnd0 = build_constructor (atype, ctor_elts);
4737 gassign *new_stmt
4738 = gimple_build_assign (make_ssa_name (atype),
4739 vec_oprnd0);
4740 vect_finish_stmt_generation (vinfo, stmt_info,
4741 new_stmt, gsi);
4742 vargs.safe_push (gimple_assign_lhs (new_stmt));
4746 break;
4747 case SIMD_CLONE_ARG_TYPE_MASK:
4748 atype = bestn->simdclone->args[i].vector_type;
4749 if (bestn->simdclone->mask_mode != VOIDmode)
4751 /* FORNOW: this is disabled above. */
4752 gcc_unreachable ();
4754 else
4756 tree elt_type = TREE_TYPE (atype);
4757 tree one = fold_convert (elt_type, integer_one_node);
4758 tree zero = fold_convert (elt_type, integer_zero_node);
4759 o = vector_unroll_factor (nunits,
4760 simd_clone_subparts (atype));
4761 for (m = j * o; m < (j + 1) * o; m++)
4763 if (simd_clone_subparts (atype)
4764 < simd_clone_subparts (arginfo[i].vectype))
4766 /* The mask type has fewer elements than simdlen. */
4768 /* FORNOW */
4769 gcc_unreachable ();
4771 else if (simd_clone_subparts (atype)
4772 == simd_clone_subparts (arginfo[i].vectype))
4774 /* The SIMD clone function has the same number of
4775 elements as the current function. */
4776 if (m == 0)
4778 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4779 o * ncopies,
4781 &vec_oprnds[i]);
4782 vec_oprnds_i[i] = 0;
4784 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4785 vec_oprnd0
4786 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4787 build_vector_from_val (atype, one),
4788 build_vector_from_val (atype, zero));
4789 gassign *new_stmt
4790 = gimple_build_assign (make_ssa_name (atype),
4791 vec_oprnd0);
4792 vect_finish_stmt_generation (vinfo, stmt_info,
4793 new_stmt, gsi);
4794 vargs.safe_push (gimple_assign_lhs (new_stmt));
4796 else
4798 /* The mask type has more elements than simdlen. */
4800 /* FORNOW */
4801 gcc_unreachable ();
4805 break;
4806 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4807 vargs.safe_push (op);
4808 break;
4809 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4810 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4811 if (j == 0)
4813 gimple_seq stmts;
4814 arginfo[i].op
4815 = force_gimple_operand (unshare_expr (arginfo[i].op),
4816 &stmts, true, NULL_TREE);
4817 if (stmts != NULL)
4819 basic_block new_bb;
4820 edge pe = loop_preheader_edge (loop);
4821 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4822 gcc_assert (!new_bb);
4824 if (arginfo[i].simd_lane_linear)
4826 vargs.safe_push (arginfo[i].op);
4827 break;
4829 tree phi_res = copy_ssa_name (op);
4830 gphi *new_phi = create_phi_node (phi_res, loop->header);
4831 add_phi_arg (new_phi, arginfo[i].op,
4832 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4833 enum tree_code code
4834 = POINTER_TYPE_P (TREE_TYPE (op))
4835 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4836 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4837 ? sizetype : TREE_TYPE (op);
4838 poly_widest_int cst
4839 = wi::mul (bestn->simdclone->args[i].linear_step,
4840 ncopies * nunits);
4841 tree tcst = wide_int_to_tree (type, cst);
4842 tree phi_arg = copy_ssa_name (op);
4843 gassign *new_stmt
4844 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4845 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4846 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4847 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4848 UNKNOWN_LOCATION);
4849 arginfo[i].op = phi_res;
4850 vargs.safe_push (phi_res);
4852 else
4854 enum tree_code code
4855 = POINTER_TYPE_P (TREE_TYPE (op))
4856 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4857 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4858 ? sizetype : TREE_TYPE (op);
4859 poly_widest_int cst
4860 = wi::mul (bestn->simdclone->args[i].linear_step,
4861 j * nunits);
4862 tree tcst = wide_int_to_tree (type, cst);
4863 new_temp = make_ssa_name (TREE_TYPE (op));
4864 gassign *new_stmt
4865 = gimple_build_assign (new_temp, code,
4866 arginfo[i].op, tcst);
4867 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4868 vargs.safe_push (new_temp);
4870 break;
4871 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4872 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4873 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4874 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4875 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4876 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4877 default:
4878 gcc_unreachable ();
4882 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4883 if (vec_dest)
4885 gcc_assert (ratype
4886 || known_eq (simd_clone_subparts (rtype), nunits));
4887 if (ratype)
4888 new_temp = create_tmp_var (ratype);
4889 else if (useless_type_conversion_p (vectype, rtype))
4890 new_temp = make_ssa_name (vec_dest, new_call);
4891 else
4892 new_temp = make_ssa_name (rtype, new_call);
4893 gimple_call_set_lhs (new_call, new_temp);
4895 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4896 gimple *new_stmt = new_call;
4898 if (vec_dest)
4900 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4902 unsigned int k, l;
4903 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4904 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4905 k = vector_unroll_factor (nunits,
4906 simd_clone_subparts (vectype));
4907 gcc_assert ((k & (k - 1)) == 0);
4908 for (l = 0; l < k; l++)
4910 tree t;
4911 if (ratype)
4913 t = build_fold_addr_expr (new_temp);
4914 t = build2 (MEM_REF, vectype, t,
4915 build_int_cst (TREE_TYPE (t), l * bytes));
4917 else
4918 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4919 bitsize_int (prec), bitsize_int (l * prec));
4920 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4921 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4923 if (j == 0 && l == 0)
4924 *vec_stmt = new_stmt;
4925 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4928 if (ratype)
4929 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4930 continue;
4932 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4934 unsigned int k = (simd_clone_subparts (vectype)
4935 / simd_clone_subparts (rtype));
4936 gcc_assert ((k & (k - 1)) == 0);
4937 if ((j & (k - 1)) == 0)
4938 vec_alloc (ret_ctor_elts, k);
4939 if (ratype)
4941 unsigned int m, o;
4942 o = vector_unroll_factor (nunits,
4943 simd_clone_subparts (rtype));
4944 for (m = 0; m < o; m++)
4946 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4947 size_int (m), NULL_TREE, NULL_TREE);
4948 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4949 tem);
4950 vect_finish_stmt_generation (vinfo, stmt_info,
4951 new_stmt, gsi);
4952 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4953 gimple_assign_lhs (new_stmt));
4955 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4957 else
4958 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4959 if ((j & (k - 1)) != k - 1)
4960 continue;
4961 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4962 new_stmt
4963 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4964 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4966 if ((unsigned) j == k - 1)
4967 *vec_stmt = new_stmt;
4968 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4969 continue;
4971 else if (ratype)
4973 tree t = build_fold_addr_expr (new_temp);
4974 t = build2 (MEM_REF, vectype, t,
4975 build_int_cst (TREE_TYPE (t), 0));
4976 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4977 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4978 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4980 else if (!useless_type_conversion_p (vectype, rtype))
4982 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4983 new_stmt
4984 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4985 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4989 if (j == 0)
4990 *vec_stmt = new_stmt;
4991 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4994 for (i = 0; i < nargs; ++i)
4996 vec<tree> oprndsi = vec_oprnds[i];
4997 oprndsi.release ();
4999 vargs.release ();
5001 /* Mark the clone as no longer being a candidate for GC. */
5002 bestn->gc_candidate = false;
5004 /* The call in STMT might prevent it from being removed in dce.
5005 We however cannot remove it here, due to the way the ssa name
5006 it defines is mapped to the new definition. So just replace
5007 rhs of the statement with something harmless. */
5009 if (slp_node)
5010 return true;
5012 gimple *new_stmt;
5013 if (scalar_dest)
5015 type = TREE_TYPE (scalar_dest);
5016 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
5017 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
5019 else
5020 new_stmt = gimple_build_nop ();
5021 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
5022 unlink_stmt_vdef (stmt);
5024 return true;
5028 /* Function vect_gen_widened_results_half
5030 Create a vector stmt whose code, type, number of arguments, and result
5031 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
5032 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
5033 In the case that CODE is a CALL_EXPR, this means that a call to DECL
5034 needs to be created (DECL is a function-decl of a target-builtin).
5035 STMT_INFO is the original scalar stmt that we are vectorizing. */
5037 static gimple *
5038 vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
5039 tree vec_oprnd0, tree vec_oprnd1, int op_type,
5040 tree vec_dest, gimple_stmt_iterator *gsi,
5041 stmt_vec_info stmt_info)
5043 gimple *new_stmt;
5044 tree new_temp;
5046 /* Generate half of the widened result: */
5047 if (op_type != binary_op)
5048 vec_oprnd1 = NULL;
5049 new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
5050 new_temp = make_ssa_name (vec_dest, new_stmt);
5051 gimple_set_lhs (new_stmt, new_temp);
5052 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5054 return new_stmt;
5058 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
5059 For multi-step conversions store the resulting vectors and call the function
5060 recursively. When NARROW_SRC_P is true, there's still a conversion after
5061 narrowing, don't store the vectors in the SLP_NODE or in vector info of
5062 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
5064 static void
5065 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
5066 int multi_step_cvt,
5067 stmt_vec_info stmt_info,
5068 vec<tree> &vec_dsts,
5069 gimple_stmt_iterator *gsi,
5070 slp_tree slp_node, code_helper code,
5071 bool narrow_src_p)
5073 unsigned int i;
5074 tree vop0, vop1, new_tmp, vec_dest;
5076 vec_dest = vec_dsts.pop ();
5078 for (i = 0; i < vec_oprnds->length (); i += 2)
5080 /* Create demotion operation. */
5081 vop0 = (*vec_oprnds)[i];
5082 vop1 = (*vec_oprnds)[i + 1];
5083 gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
5084 new_tmp = make_ssa_name (vec_dest, new_stmt);
5085 gimple_set_lhs (new_stmt, new_tmp);
5086 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5087 if (multi_step_cvt || narrow_src_p)
5088 /* Store the resulting vector for next recursive call,
5089 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
5090 (*vec_oprnds)[i/2] = new_tmp;
5091 else
5093 /* This is the last step of the conversion sequence. Store the
5094 vectors in SLP_NODE or in vector info of the scalar statement
5095 (or in STMT_VINFO_RELATED_STMT chain). */
5096 if (slp_node)
5097 slp_node->push_vec_def (new_stmt);
5098 else
5099 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5103 /* For multi-step demotion operations we first generate demotion operations
5104 from the source type to the intermediate types, and then combine the
5105 results (stored in VEC_OPRNDS) in demotion operation to the destination
5106 type. */
5107 if (multi_step_cvt)
5109 /* At each level of recursion we have half of the operands we had at the
5110 previous level. */
5111 vec_oprnds->truncate ((i+1)/2);
5112 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
5113 multi_step_cvt - 1,
5114 stmt_info, vec_dsts, gsi,
5115 slp_node, VEC_PACK_TRUNC_EXPR,
5116 narrow_src_p);
5119 vec_dsts.quick_push (vec_dest);
5123 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5124 and VEC_OPRNDS1, for a binary operation associated with scalar statement
5125 STMT_INFO. For multi-step conversions store the resulting vectors and
5126 call the function recursively. */
5128 static void
5129 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
5130 vec<tree> *vec_oprnds0,
5131 vec<tree> *vec_oprnds1,
5132 stmt_vec_info stmt_info, tree vec_dest,
5133 gimple_stmt_iterator *gsi,
5134 code_helper ch1,
5135 code_helper ch2, int op_type)
5137 int i;
5138 tree vop0, vop1, new_tmp1, new_tmp2;
5139 gimple *new_stmt1, *new_stmt2;
5140 vec<tree> vec_tmp = vNULL;
5142 vec_tmp.create (vec_oprnds0->length () * 2);
5143 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5145 if (op_type == binary_op)
5146 vop1 = (*vec_oprnds1)[i];
5147 else
5148 vop1 = NULL_TREE;
5150 /* Generate the two halves of promotion operation. */
5151 new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
5152 op_type, vec_dest, gsi,
5153 stmt_info);
5154 new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
5155 op_type, vec_dest, gsi,
5156 stmt_info);
5157 if (is_gimple_call (new_stmt1))
5159 new_tmp1 = gimple_call_lhs (new_stmt1);
5160 new_tmp2 = gimple_call_lhs (new_stmt2);
5162 else
5164 new_tmp1 = gimple_assign_lhs (new_stmt1);
5165 new_tmp2 = gimple_assign_lhs (new_stmt2);
5168 /* Store the results for the next step. */
5169 vec_tmp.quick_push (new_tmp1);
5170 vec_tmp.quick_push (new_tmp2);
5173 vec_oprnds0->release ();
5174 *vec_oprnds0 = vec_tmp;
5177 /* Create vectorized promotion stmts for widening stmts using only half the
5178 potential vector size for input. */
5179 static void
5180 vect_create_half_widening_stmts (vec_info *vinfo,
5181 vec<tree> *vec_oprnds0,
5182 vec<tree> *vec_oprnds1,
5183 stmt_vec_info stmt_info, tree vec_dest,
5184 gimple_stmt_iterator *gsi,
5185 code_helper code1,
5186 int op_type)
5188 int i;
5189 tree vop0, vop1;
5190 gimple *new_stmt1;
5191 gimple *new_stmt2;
5192 gimple *new_stmt3;
5193 vec<tree> vec_tmp = vNULL;
5195 vec_tmp.create (vec_oprnds0->length ());
5196 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
5198 tree new_tmp1, new_tmp2, new_tmp3, out_type;
5200 gcc_assert (op_type == binary_op);
5201 vop1 = (*vec_oprnds1)[i];
5203 /* Widen the first vector input. */
5204 out_type = TREE_TYPE (vec_dest);
5205 new_tmp1 = make_ssa_name (out_type);
5206 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
5207 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
5208 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5210 /* Widen the second vector input. */
5211 new_tmp2 = make_ssa_name (out_type);
5212 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5213 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
5214 /* Perform the operation. With both vector inputs widened. */
5215 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5217 else
5219 /* Perform the operation. With the single vector input widened. */
5220 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5223 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
5224 gimple_assign_set_lhs (new_stmt3, new_tmp3);
5225 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
5227 /* Store the results for the next step. */
5228 vec_tmp.quick_push (new_tmp3);
5231 vec_oprnds0->release ();
5232 *vec_oprnds0 = vec_tmp;
5236 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5237 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5238 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5239 Return true if STMT_INFO is vectorizable in this way. */
5241 static bool
5242 vectorizable_conversion (vec_info *vinfo,
5243 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5244 gimple **vec_stmt, slp_tree slp_node,
5245 stmt_vector_for_cost *cost_vec)
5247 tree vec_dest, cvt_op = NULL_TREE;
5248 tree scalar_dest;
5249 tree op0, op1 = NULL_TREE;
5250 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5251 tree_code tc1, tc2;
5252 code_helper code, code1, code2;
5253 code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5254 tree new_temp;
5255 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5256 int ndts = 2;
5257 poly_uint64 nunits_in;
5258 poly_uint64 nunits_out;
5259 tree vectype_out, vectype_in;
5260 int ncopies, i;
5261 tree lhs_type, rhs_type;
5262 /* For conversions between floating point and integer, there're 2 NARROW
5263 cases. NARROW_SRC is for FLOAT_EXPR, means
5264 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5265 This is safe when the range of the source integer can fit into the lower
5266 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5267 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5268 For other conversions, when there's narrowing, NARROW_DST is used as
5269 default. */
5270 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5271 vec<tree> vec_oprnds0 = vNULL;
5272 vec<tree> vec_oprnds1 = vNULL;
5273 tree vop0;
5274 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5275 int multi_step_cvt = 0;
5276 vec<tree> interm_types = vNULL;
5277 tree intermediate_type, cvt_type = NULL_TREE;
5278 int op_type;
5279 unsigned short fltsz;
5281 /* Is STMT a vectorizable conversion? */
5283 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5284 return false;
5286 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5287 && ! vec_stmt)
5288 return false;
5290 gimple* stmt = stmt_info->stmt;
5291 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
5292 return false;
5294 if (gimple_get_lhs (stmt) == NULL_TREE
5295 || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5296 return false;
5298 if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5299 return false;
5301 if (is_gimple_assign (stmt))
5303 code = gimple_assign_rhs_code (stmt);
5304 op_type = TREE_CODE_LENGTH ((tree_code) code);
5306 else if (gimple_call_internal_p (stmt))
5308 code = gimple_call_internal_fn (stmt);
5309 op_type = gimple_call_num_args (stmt);
5311 else
5312 return false;
5314 bool widen_arith = (code == WIDEN_MULT_EXPR
5315 || code == WIDEN_LSHIFT_EXPR
5316 || widening_fn_p (code));
5318 if (!widen_arith
5319 && !CONVERT_EXPR_CODE_P (code)
5320 && code != FIX_TRUNC_EXPR
5321 && code != FLOAT_EXPR)
5322 return false;
5324 /* Check types of lhs and rhs. */
5325 scalar_dest = gimple_get_lhs (stmt);
5326 lhs_type = TREE_TYPE (scalar_dest);
5327 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5329 /* Check the operands of the operation. */
5330 slp_tree slp_op0, slp_op1 = NULL;
5331 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5332 0, &op0, &slp_op0, &dt[0], &vectype_in))
5334 if (dump_enabled_p ())
5335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5336 "use not simple.\n");
5337 return false;
5340 rhs_type = TREE_TYPE (op0);
5341 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5342 && !((INTEGRAL_TYPE_P (lhs_type)
5343 && INTEGRAL_TYPE_P (rhs_type))
5344 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5345 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5346 return false;
5348 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5349 && ((INTEGRAL_TYPE_P (lhs_type)
5350 && !type_has_mode_precision_p (lhs_type))
5351 || (INTEGRAL_TYPE_P (rhs_type)
5352 && !type_has_mode_precision_p (rhs_type))))
5354 if (dump_enabled_p ())
5355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5356 "type conversion to/from bit-precision unsupported."
5357 "\n");
5358 return false;
5361 if (op_type == binary_op)
5363 gcc_assert (code == WIDEN_MULT_EXPR
5364 || code == WIDEN_LSHIFT_EXPR
5365 || widening_fn_p (code));
5367 op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
5368 gimple_call_arg (stmt, 0);
5369 tree vectype1_in;
5370 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5371 &op1, &slp_op1, &dt[1], &vectype1_in))
5373 if (dump_enabled_p ())
5374 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5375 "use not simple.\n");
5376 return false;
5378 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5379 OP1. */
5380 if (!vectype_in)
5381 vectype_in = vectype1_in;
5384 /* If op0 is an external or constant def, infer the vector type
5385 from the scalar type. */
5386 if (!vectype_in)
5387 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5388 if (vec_stmt)
5389 gcc_assert (vectype_in);
5390 if (!vectype_in)
5392 if (dump_enabled_p ())
5393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5394 "no vectype for scalar type %T\n", rhs_type);
5396 return false;
5399 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5400 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5402 if (dump_enabled_p ())
5403 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5404 "can't convert between boolean and non "
5405 "boolean vectors %T\n", rhs_type);
5407 return false;
5410 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5411 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5412 if (known_eq (nunits_out, nunits_in))
5413 if (widen_arith)
5414 modifier = WIDEN;
5415 else
5416 modifier = NONE;
5417 else if (multiple_p (nunits_out, nunits_in))
5418 modifier = NARROW_DST;
5419 else
5421 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5422 modifier = WIDEN;
5425 /* Multiple types in SLP are handled by creating the appropriate number of
5426 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5427 case of SLP. */
5428 if (slp_node)
5429 ncopies = 1;
5430 else if (modifier == NARROW_DST)
5431 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5432 else
5433 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5435 /* Sanity check: make sure that at least one copy of the vectorized stmt
5436 needs to be generated. */
5437 gcc_assert (ncopies >= 1);
5439 bool found_mode = false;
5440 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5441 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5442 opt_scalar_mode rhs_mode_iter;
5444 /* Supportable by target? */
5445 switch (modifier)
5447 case NONE:
5448 if (code != FIX_TRUNC_EXPR
5449 && code != FLOAT_EXPR
5450 && !CONVERT_EXPR_CODE_P (code))
5451 return false;
5452 gcc_assert (code.is_tree_code ());
5453 if (supportable_convert_operation ((tree_code) code, vectype_out,
5454 vectype_in, &tc1))
5456 code1 = tc1;
5457 break;
5460 /* For conversions between float and integer types try whether
5461 we can use intermediate signed integer types to support the
5462 conversion. */
5463 if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
5464 && (code == FLOAT_EXPR ||
5465 (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
5467 bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
5468 bool float_expr_p = code == FLOAT_EXPR;
5469 unsigned short target_size;
5470 scalar_mode intermediate_mode;
5471 if (demotion)
5473 intermediate_mode = lhs_mode;
5474 target_size = GET_MODE_SIZE (rhs_mode);
5476 else
5478 target_size = GET_MODE_SIZE (lhs_mode);
5479 if (!int_mode_for_size
5480 (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
5481 goto unsupported;
5483 code1 = float_expr_p ? code : NOP_EXPR;
5484 codecvt1 = float_expr_p ? NOP_EXPR : code;
5485 opt_scalar_mode mode_iter;
5486 FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
5488 intermediate_mode = mode_iter.require ();
5490 if (GET_MODE_SIZE (intermediate_mode) > target_size)
5491 break;
5493 scalar_mode cvt_mode;
5494 if (!int_mode_for_size
5495 (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
5496 break;
5498 cvt_type = build_nonstandard_integer_type
5499 (GET_MODE_BITSIZE (cvt_mode), 0);
5501 /* Check if the intermediate type can hold OP0's range.
5502 When converting from float to integer this is not necessary
5503 because values that do not fit the (smaller) target type are
5504 unspecified anyway. */
5505 if (demotion && float_expr_p)
5507 wide_int op_min_value, op_max_value;
5508 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5509 break;
5511 if (cvt_type == NULL_TREE
5512 || (wi::min_precision (op_max_value, SIGNED)
5513 > TYPE_PRECISION (cvt_type))
5514 || (wi::min_precision (op_min_value, SIGNED)
5515 > TYPE_PRECISION (cvt_type)))
5516 continue;
5519 cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
5520 /* This should only happened for SLP as long as loop vectorizer
5521 only supports same-sized vector. */
5522 if (cvt_type == NULL_TREE
5523 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
5524 || !supportable_convert_operation ((tree_code) code1,
5525 vectype_out,
5526 cvt_type, &tc1)
5527 || !supportable_convert_operation ((tree_code) codecvt1,
5528 cvt_type,
5529 vectype_in, &tc2))
5530 continue;
5532 found_mode = true;
5533 break;
5536 if (found_mode)
5538 multi_step_cvt++;
5539 interm_types.safe_push (cvt_type);
5540 cvt_type = NULL_TREE;
5541 code1 = tc1;
5542 codecvt1 = tc2;
5543 break;
5546 /* FALLTHRU */
5547 unsupported:
5548 if (dump_enabled_p ())
5549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5550 "conversion not supported by target.\n");
5551 return false;
5553 case WIDEN:
5554 if (known_eq (nunits_in, nunits_out))
5556 if (!(code.is_tree_code ()
5557 && supportable_half_widening_operation ((tree_code) code,
5558 vectype_out, vectype_in,
5559 &tc1)))
5560 goto unsupported;
5561 code1 = tc1;
5562 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5563 break;
5565 if (supportable_widening_operation (vinfo, code, stmt_info,
5566 vectype_out, vectype_in, &code1,
5567 &code2, &multi_step_cvt,
5568 &interm_types))
5570 /* Binary widening operation can only be supported directly by the
5571 architecture. */
5572 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5573 break;
5576 if (code != FLOAT_EXPR
5577 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5578 goto unsupported;
5580 fltsz = GET_MODE_SIZE (lhs_mode);
5581 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5583 rhs_mode = rhs_mode_iter.require ();
5584 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5585 break;
5587 cvt_type
5588 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5589 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5590 if (cvt_type == NULL_TREE)
5591 goto unsupported;
5593 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5595 tc1 = ERROR_MARK;
5596 gcc_assert (code.is_tree_code ());
5597 if (!supportable_convert_operation ((tree_code) code, vectype_out,
5598 cvt_type, &tc1))
5599 goto unsupported;
5600 codecvt1 = tc1;
5602 else if (!supportable_widening_operation (vinfo, code,
5603 stmt_info, vectype_out,
5604 cvt_type, &codecvt1,
5605 &codecvt2, &multi_step_cvt,
5606 &interm_types))
5607 continue;
5608 else
5609 gcc_assert (multi_step_cvt == 0);
5611 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5612 cvt_type,
5613 vectype_in, &code1,
5614 &code2, &multi_step_cvt,
5615 &interm_types))
5617 found_mode = true;
5618 break;
5622 if (!found_mode)
5623 goto unsupported;
5625 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5626 codecvt2 = ERROR_MARK;
5627 else
5629 multi_step_cvt++;
5630 interm_types.safe_push (cvt_type);
5631 cvt_type = NULL_TREE;
5633 break;
5635 case NARROW_DST:
5636 gcc_assert (op_type == unary_op);
5637 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5638 &code1, &multi_step_cvt,
5639 &interm_types))
5640 break;
5642 if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5643 goto unsupported;
5645 if (code == FIX_TRUNC_EXPR)
5647 cvt_type
5648 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5649 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5650 if (cvt_type == NULL_TREE)
5651 goto unsupported;
5652 if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5653 &tc1))
5654 codecvt1 = tc1;
5655 else
5656 goto unsupported;
5657 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5658 &code1, &multi_step_cvt,
5659 &interm_types))
5660 break;
5662 /* If op0 can be represented with low precision integer,
5663 truncate it to cvt_type and the do FLOAT_EXPR. */
5664 else if (code == FLOAT_EXPR)
5666 wide_int op_min_value, op_max_value;
5667 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5668 goto unsupported;
5670 cvt_type
5671 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5672 if (cvt_type == NULL_TREE
5673 || (wi::min_precision (op_max_value, SIGNED)
5674 > TYPE_PRECISION (cvt_type))
5675 || (wi::min_precision (op_min_value, SIGNED)
5676 > TYPE_PRECISION (cvt_type)))
5677 goto unsupported;
5679 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5680 if (cvt_type == NULL_TREE)
5681 goto unsupported;
5682 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5683 &code1, &multi_step_cvt,
5684 &interm_types))
5685 goto unsupported;
5686 if (supportable_convert_operation ((tree_code) code, vectype_out,
5687 cvt_type, &tc1))
5689 codecvt1 = tc1;
5690 modifier = NARROW_SRC;
5691 break;
5695 goto unsupported;
5697 default:
5698 gcc_unreachable ();
5701 if (!vec_stmt) /* transformation not required. */
5703 if (slp_node
5704 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5705 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5707 if (dump_enabled_p ())
5708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5709 "incompatible vector types for invariants\n");
5710 return false;
5712 DUMP_VECT_SCOPE ("vectorizable_conversion");
5713 if (modifier == NONE)
5715 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5716 vect_model_simple_cost (vinfo, stmt_info,
5717 ncopies * (1 + multi_step_cvt),
5718 dt, ndts, slp_node, cost_vec);
5720 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5722 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5723 /* The final packing step produces one vector result per copy. */
5724 unsigned int nvectors
5725 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5726 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5727 multi_step_cvt, cost_vec,
5728 widen_arith);
5730 else
5732 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5733 /* The initial unpacking step produces two vector results
5734 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5735 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5736 unsigned int nvectors
5737 = (slp_node
5738 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5739 : ncopies * 2);
5740 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5741 multi_step_cvt, cost_vec,
5742 widen_arith);
5744 interm_types.release ();
5745 return true;
5748 /* Transform. */
5749 if (dump_enabled_p ())
5750 dump_printf_loc (MSG_NOTE, vect_location,
5751 "transform conversion. ncopies = %d.\n", ncopies);
5753 if (op_type == binary_op)
5755 if (CONSTANT_CLASS_P (op0))
5756 op0 = fold_convert (TREE_TYPE (op1), op0);
5757 else if (CONSTANT_CLASS_P (op1))
5758 op1 = fold_convert (TREE_TYPE (op0), op1);
5761 /* In case of multi-step conversion, we first generate conversion operations
5762 to the intermediate types, and then from that types to the final one.
5763 We create vector destinations for the intermediate type (TYPES) received
5764 from supportable_*_operation, and store them in the correct order
5765 for future use in vect_create_vectorized_*_stmts (). */
5766 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5767 bool widen_or_narrow_float_p
5768 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5769 vec_dest = vect_create_destination_var (scalar_dest,
5770 widen_or_narrow_float_p
5771 ? cvt_type : vectype_out);
5772 vec_dsts.quick_push (vec_dest);
5774 if (multi_step_cvt)
5776 for (i = interm_types.length () - 1;
5777 interm_types.iterate (i, &intermediate_type); i--)
5779 vec_dest = vect_create_destination_var (scalar_dest,
5780 intermediate_type);
5781 vec_dsts.quick_push (vec_dest);
5785 if (cvt_type)
5786 vec_dest = vect_create_destination_var (scalar_dest,
5787 widen_or_narrow_float_p
5788 ? vectype_out : cvt_type);
5790 int ninputs = 1;
5791 if (!slp_node)
5793 if (modifier == WIDEN)
5795 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5797 if (multi_step_cvt)
5798 ninputs = vect_pow2 (multi_step_cvt);
5799 ninputs *= 2;
5803 switch (modifier)
5805 case NONE:
5806 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5807 op0, &vec_oprnds0);
5808 /* vec_dest is intermediate type operand when multi_step_cvt. */
5809 if (multi_step_cvt)
5811 cvt_op = vec_dest;
5812 vec_dest = vec_dsts[0];
5815 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5817 /* Arguments are ready, create the new vector stmt. */
5818 gimple* new_stmt;
5819 if (multi_step_cvt)
5821 gcc_assert (multi_step_cvt == 1);
5822 new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5823 new_temp = make_ssa_name (cvt_op, new_stmt);
5824 gimple_assign_set_lhs (new_stmt, new_temp);
5825 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5826 vop0 = new_temp;
5828 new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5829 new_temp = make_ssa_name (vec_dest, new_stmt);
5830 gimple_set_lhs (new_stmt, new_temp);
5831 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5833 if (slp_node)
5834 slp_node->push_vec_def (new_stmt);
5835 else
5836 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5838 break;
5840 case WIDEN:
5841 /* In case the vectorization factor (VF) is bigger than the number
5842 of elements that we can fit in a vectype (nunits), we have to
5843 generate more than one vector stmt - i.e - we need to "unroll"
5844 the vector stmt by a factor VF/nunits. */
5845 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5846 op0, &vec_oprnds0,
5847 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5848 &vec_oprnds1);
5849 if (code == WIDEN_LSHIFT_EXPR)
5851 int oprnds_size = vec_oprnds0.length ();
5852 vec_oprnds1.create (oprnds_size);
5853 for (i = 0; i < oprnds_size; ++i)
5854 vec_oprnds1.quick_push (op1);
5856 /* Arguments are ready. Create the new vector stmts. */
5857 for (i = multi_step_cvt; i >= 0; i--)
5859 tree this_dest = vec_dsts[i];
5860 code_helper c1 = code1, c2 = code2;
5861 if (i == 0 && codecvt2 != ERROR_MARK)
5863 c1 = codecvt1;
5864 c2 = codecvt2;
5866 if (known_eq (nunits_out, nunits_in))
5867 vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
5868 stmt_info, this_dest, gsi, c1,
5869 op_type);
5870 else
5871 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5872 &vec_oprnds1, stmt_info,
5873 this_dest, gsi,
5874 c1, c2, op_type);
5877 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5879 gimple *new_stmt;
5880 if (cvt_type)
5882 new_temp = make_ssa_name (vec_dest);
5883 new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5884 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5886 else
5887 new_stmt = SSA_NAME_DEF_STMT (vop0);
5889 if (slp_node)
5890 slp_node->push_vec_def (new_stmt);
5891 else
5892 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5894 break;
5896 case NARROW_SRC:
5897 case NARROW_DST:
5898 /* In case the vectorization factor (VF) is bigger than the number
5899 of elements that we can fit in a vectype (nunits), we have to
5900 generate more than one vector stmt - i.e - we need to "unroll"
5901 the vector stmt by a factor VF/nunits. */
5902 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5903 op0, &vec_oprnds0);
5904 /* Arguments are ready. Create the new vector stmts. */
5905 if (cvt_type && modifier == NARROW_DST)
5906 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5908 new_temp = make_ssa_name (vec_dest);
5909 gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5910 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5911 vec_oprnds0[i] = new_temp;
5914 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5915 multi_step_cvt,
5916 stmt_info, vec_dsts, gsi,
5917 slp_node, code1,
5918 modifier == NARROW_SRC);
5919 /* After demoting op0 to cvt_type, convert it to dest. */
5920 if (cvt_type && code == FLOAT_EXPR)
5922 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5924 /* Arguments are ready, create the new vector stmt. */
5925 gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5926 gimple *new_stmt
5927 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5928 new_temp = make_ssa_name (vec_dest, new_stmt);
5929 gimple_set_lhs (new_stmt, new_temp);
5930 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5932 /* This is the last step of the conversion sequence. Store the
5933 vectors in SLP_NODE or in vector info of the scalar statement
5934 (or in STMT_VINFO_RELATED_STMT chain). */
5935 if (slp_node)
5936 slp_node->push_vec_def (new_stmt);
5937 else
5938 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5941 break;
5943 if (!slp_node)
5944 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5946 vec_oprnds0.release ();
5947 vec_oprnds1.release ();
5948 interm_types.release ();
5950 return true;
5953 /* Return true if we can assume from the scalar form of STMT_INFO that
5954 neither the scalar nor the vector forms will generate code. STMT_INFO
5955 is known not to involve a data reference. */
5957 bool
5958 vect_nop_conversion_p (stmt_vec_info stmt_info)
5960 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5961 if (!stmt)
5962 return false;
5964 tree lhs = gimple_assign_lhs (stmt);
5965 tree_code code = gimple_assign_rhs_code (stmt);
5966 tree rhs = gimple_assign_rhs1 (stmt);
5968 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5969 return true;
5971 if (CONVERT_EXPR_CODE_P (code))
5972 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5974 return false;
5977 /* Function vectorizable_assignment.
5979 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5980 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5981 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5982 Return true if STMT_INFO is vectorizable in this way. */
5984 static bool
5985 vectorizable_assignment (vec_info *vinfo,
5986 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5987 gimple **vec_stmt, slp_tree slp_node,
5988 stmt_vector_for_cost *cost_vec)
5990 tree vec_dest;
5991 tree scalar_dest;
5992 tree op;
5993 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5994 tree new_temp;
5995 enum vect_def_type dt[1] = {vect_unknown_def_type};
5996 int ndts = 1;
5997 int ncopies;
5998 int i;
5999 vec<tree> vec_oprnds = vNULL;
6000 tree vop;
6001 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6002 enum tree_code code;
6003 tree vectype_in;
6005 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6006 return false;
6008 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6009 && ! vec_stmt)
6010 return false;
6012 /* Is vectorizable assignment? */
6013 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6014 if (!stmt)
6015 return false;
6017 scalar_dest = gimple_assign_lhs (stmt);
6018 if (TREE_CODE (scalar_dest) != SSA_NAME)
6019 return false;
6021 if (STMT_VINFO_DATA_REF (stmt_info))
6022 return false;
6024 code = gimple_assign_rhs_code (stmt);
6025 if (!(gimple_assign_single_p (stmt)
6026 || code == PAREN_EXPR
6027 || CONVERT_EXPR_CODE_P (code)))
6028 return false;
6030 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6031 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6033 /* Multiple types in SLP are handled by creating the appropriate number of
6034 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6035 case of SLP. */
6036 if (slp_node)
6037 ncopies = 1;
6038 else
6039 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6041 gcc_assert (ncopies >= 1);
6043 slp_tree slp_op;
6044 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
6045 &dt[0], &vectype_in))
6047 if (dump_enabled_p ())
6048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6049 "use not simple.\n");
6050 return false;
6052 if (!vectype_in)
6053 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
6055 /* We can handle NOP_EXPR conversions that do not change the number
6056 of elements or the vector size. */
6057 if ((CONVERT_EXPR_CODE_P (code)
6058 || code == VIEW_CONVERT_EXPR)
6059 && (!vectype_in
6060 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
6061 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
6062 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
6063 return false;
6065 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
6067 if (dump_enabled_p ())
6068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6069 "can't convert between boolean and non "
6070 "boolean vectors %T\n", TREE_TYPE (op));
6072 return false;
6075 /* We do not handle bit-precision changes. */
6076 if ((CONVERT_EXPR_CODE_P (code)
6077 || code == VIEW_CONVERT_EXPR)
6078 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6079 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6080 || (INTEGRAL_TYPE_P (TREE_TYPE (op))
6081 && !type_has_mode_precision_p (TREE_TYPE (op))))
6082 /* But a conversion that does not change the bit-pattern is ok. */
6083 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
6084 && INTEGRAL_TYPE_P (TREE_TYPE (op))
6085 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
6086 > TYPE_PRECISION (TREE_TYPE (op)))
6087 && TYPE_UNSIGNED (TREE_TYPE (op))))
6089 if (dump_enabled_p ())
6090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6091 "type conversion to/from bit-precision "
6092 "unsupported.\n");
6093 return false;
6096 if (!vec_stmt) /* transformation not required. */
6098 if (slp_node
6099 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
6101 if (dump_enabled_p ())
6102 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6103 "incompatible vector types for invariants\n");
6104 return false;
6106 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
6107 DUMP_VECT_SCOPE ("vectorizable_assignment");
6108 if (!vect_nop_conversion_p (stmt_info))
6109 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
6110 cost_vec);
6111 return true;
6114 /* Transform. */
6115 if (dump_enabled_p ())
6116 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
6118 /* Handle def. */
6119 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6121 /* Handle use. */
6122 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
6124 /* Arguments are ready. create the new vector stmt. */
6125 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
6127 if (CONVERT_EXPR_CODE_P (code)
6128 || code == VIEW_CONVERT_EXPR)
6129 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
6130 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
6131 new_temp = make_ssa_name (vec_dest, new_stmt);
6132 gimple_assign_set_lhs (new_stmt, new_temp);
6133 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6134 if (slp_node)
6135 slp_node->push_vec_def (new_stmt);
6136 else
6137 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6139 if (!slp_node)
6140 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6142 vec_oprnds.release ();
6143 return true;
6147 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6148 either as shift by a scalar or by a vector. */
6150 bool
6151 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
6154 machine_mode vec_mode;
6155 optab optab;
6156 int icode;
6157 tree vectype;
6159 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
6160 if (!vectype)
6161 return false;
6163 optab = optab_for_tree_code (code, vectype, optab_scalar);
6164 if (!optab
6165 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
6167 optab = optab_for_tree_code (code, vectype, optab_vector);
6168 if (!optab
6169 || (optab_handler (optab, TYPE_MODE (vectype))
6170 == CODE_FOR_nothing))
6171 return false;
6174 vec_mode = TYPE_MODE (vectype);
6175 icode = (int) optab_handler (optab, vec_mode);
6176 if (icode == CODE_FOR_nothing)
6177 return false;
6179 return true;
6183 /* Function vectorizable_shift.
6185 Check if STMT_INFO performs a shift operation that can be vectorized.
6186 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6187 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6188 Return true if STMT_INFO is vectorizable in this way. */
6190 static bool
6191 vectorizable_shift (vec_info *vinfo,
6192 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6193 gimple **vec_stmt, slp_tree slp_node,
6194 stmt_vector_for_cost *cost_vec)
6196 tree vec_dest;
6197 tree scalar_dest;
6198 tree op0, op1 = NULL;
6199 tree vec_oprnd1 = NULL_TREE;
6200 tree vectype;
6201 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6202 enum tree_code code;
6203 machine_mode vec_mode;
6204 tree new_temp;
6205 optab optab;
6206 int icode;
6207 machine_mode optab_op2_mode;
6208 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
6209 int ndts = 2;
6210 poly_uint64 nunits_in;
6211 poly_uint64 nunits_out;
6212 tree vectype_out;
6213 tree op1_vectype;
6214 int ncopies;
6215 int i;
6216 vec<tree> vec_oprnds0 = vNULL;
6217 vec<tree> vec_oprnds1 = vNULL;
6218 tree vop0, vop1;
6219 unsigned int k;
6220 bool scalar_shift_arg = true;
6221 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6222 bool incompatible_op1_vectype_p = false;
6224 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6225 return false;
6227 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6228 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6229 && ! vec_stmt)
6230 return false;
6232 /* Is STMT a vectorizable binary/unary operation? */
6233 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6234 if (!stmt)
6235 return false;
6237 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6238 return false;
6240 code = gimple_assign_rhs_code (stmt);
6242 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6243 || code == RROTATE_EXPR))
6244 return false;
6246 scalar_dest = gimple_assign_lhs (stmt);
6247 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6248 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6250 if (dump_enabled_p ())
6251 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6252 "bit-precision shifts not supported.\n");
6253 return false;
6256 slp_tree slp_op0;
6257 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6258 0, &op0, &slp_op0, &dt[0], &vectype))
6260 if (dump_enabled_p ())
6261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6262 "use not simple.\n");
6263 return false;
6265 /* If op0 is an external or constant def, infer the vector type
6266 from the scalar type. */
6267 if (!vectype)
6268 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6269 if (vec_stmt)
6270 gcc_assert (vectype);
6271 if (!vectype)
6273 if (dump_enabled_p ())
6274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6275 "no vectype for scalar type\n");
6276 return false;
6279 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6280 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6281 if (maybe_ne (nunits_out, nunits_in))
6282 return false;
6284 stmt_vec_info op1_def_stmt_info;
6285 slp_tree slp_op1;
6286 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
6287 &dt[1], &op1_vectype, &op1_def_stmt_info))
6289 if (dump_enabled_p ())
6290 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6291 "use not simple.\n");
6292 return false;
6295 /* Multiple types in SLP are handled by creating the appropriate number of
6296 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6297 case of SLP. */
6298 if (slp_node)
6299 ncopies = 1;
6300 else
6301 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6303 gcc_assert (ncopies >= 1);
6305 /* Determine whether the shift amount is a vector, or scalar. If the
6306 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6308 if ((dt[1] == vect_internal_def
6309 || dt[1] == vect_induction_def
6310 || dt[1] == vect_nested_cycle)
6311 && !slp_node)
6312 scalar_shift_arg = false;
6313 else if (dt[1] == vect_constant_def
6314 || dt[1] == vect_external_def
6315 || dt[1] == vect_internal_def)
6317 /* In SLP, need to check whether the shift count is the same,
6318 in loops if it is a constant or invariant, it is always
6319 a scalar shift. */
6320 if (slp_node)
6322 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6323 stmt_vec_info slpstmt_info;
6325 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6327 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
6328 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
6329 scalar_shift_arg = false;
6332 /* For internal SLP defs we have to make sure we see scalar stmts
6333 for all vector elements.
6334 ??? For different vectors we could resort to a different
6335 scalar shift operand but code-generation below simply always
6336 takes the first. */
6337 if (dt[1] == vect_internal_def
6338 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6339 stmts.length ()))
6340 scalar_shift_arg = false;
6343 /* If the shift amount is computed by a pattern stmt we cannot
6344 use the scalar amount directly thus give up and use a vector
6345 shift. */
6346 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
6347 scalar_shift_arg = false;
6349 else
6351 if (dump_enabled_p ())
6352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6353 "operand mode requires invariant argument.\n");
6354 return false;
6357 /* Vector shifted by vector. */
6358 bool was_scalar_shift_arg = scalar_shift_arg;
6359 if (!scalar_shift_arg)
6361 optab = optab_for_tree_code (code, vectype, optab_vector);
6362 if (dump_enabled_p ())
6363 dump_printf_loc (MSG_NOTE, vect_location,
6364 "vector/vector shift/rotate found.\n");
6366 if (!op1_vectype)
6367 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6368 slp_op1);
6369 incompatible_op1_vectype_p
6370 = (op1_vectype == NULL_TREE
6371 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
6372 TYPE_VECTOR_SUBPARTS (vectype))
6373 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6374 if (incompatible_op1_vectype_p
6375 && (!slp_node
6376 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6377 || slp_op1->refcnt != 1))
6379 if (dump_enabled_p ())
6380 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6381 "unusable type for last operand in"
6382 " vector/vector shift/rotate.\n");
6383 return false;
6386 /* See if the machine has a vector shifted by scalar insn and if not
6387 then see if it has a vector shifted by vector insn. */
6388 else
6390 optab = optab_for_tree_code (code, vectype, optab_scalar);
6391 if (optab
6392 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6394 if (dump_enabled_p ())
6395 dump_printf_loc (MSG_NOTE, vect_location,
6396 "vector/scalar shift/rotate found.\n");
6398 else
6400 optab = optab_for_tree_code (code, vectype, optab_vector);
6401 if (optab
6402 && (optab_handler (optab, TYPE_MODE (vectype))
6403 != CODE_FOR_nothing))
6405 scalar_shift_arg = false;
6407 if (dump_enabled_p ())
6408 dump_printf_loc (MSG_NOTE, vect_location,
6409 "vector/vector shift/rotate found.\n");
6411 if (!op1_vectype)
6412 op1_vectype = get_vectype_for_scalar_type (vinfo,
6413 TREE_TYPE (op1),
6414 slp_op1);
6416 /* Unlike the other binary operators, shifts/rotates have
6417 the rhs being int, instead of the same type as the lhs,
6418 so make sure the scalar is the right type if we are
6419 dealing with vectors of long long/long/short/char. */
6420 incompatible_op1_vectype_p
6421 = (!op1_vectype
6422 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6423 TREE_TYPE (op1)));
6424 if (incompatible_op1_vectype_p
6425 && dt[1] == vect_internal_def)
6427 if (dump_enabled_p ())
6428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6429 "unusable type for last operand in"
6430 " vector/vector shift/rotate.\n");
6431 return false;
6437 /* Supportable by target? */
6438 if (!optab)
6440 if (dump_enabled_p ())
6441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6442 "no optab.\n");
6443 return false;
6445 vec_mode = TYPE_MODE (vectype);
6446 icode = (int) optab_handler (optab, vec_mode);
6447 if (icode == CODE_FOR_nothing)
6449 if (dump_enabled_p ())
6450 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6451 "op not supported by target.\n");
6452 return false;
6454 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6455 if (vect_emulated_vector_p (vectype))
6456 return false;
6458 if (!vec_stmt) /* transformation not required. */
6460 if (slp_node
6461 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6462 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6463 && (!incompatible_op1_vectype_p
6464 || dt[1] == vect_constant_def)
6465 && !vect_maybe_update_slp_op_vectype
6466 (slp_op1,
6467 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6469 if (dump_enabled_p ())
6470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6471 "incompatible vector types for invariants\n");
6472 return false;
6474 /* Now adjust the constant shift amount in place. */
6475 if (slp_node
6476 && incompatible_op1_vectype_p
6477 && dt[1] == vect_constant_def)
6479 for (unsigned i = 0;
6480 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6482 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6483 = fold_convert (TREE_TYPE (vectype),
6484 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6485 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6486 == INTEGER_CST));
6489 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6490 DUMP_VECT_SCOPE ("vectorizable_shift");
6491 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6492 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6493 return true;
6496 /* Transform. */
6498 if (dump_enabled_p ())
6499 dump_printf_loc (MSG_NOTE, vect_location,
6500 "transform binary/unary operation.\n");
6502 if (incompatible_op1_vectype_p && !slp_node)
6504 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6505 op1 = fold_convert (TREE_TYPE (vectype), op1);
6506 if (dt[1] != vect_constant_def)
6507 op1 = vect_init_vector (vinfo, stmt_info, op1,
6508 TREE_TYPE (vectype), NULL);
6511 /* Handle def. */
6512 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6514 if (scalar_shift_arg && dt[1] != vect_internal_def)
6516 /* Vector shl and shr insn patterns can be defined with scalar
6517 operand 2 (shift operand). In this case, use constant or loop
6518 invariant op1 directly, without extending it to vector mode
6519 first. */
6520 optab_op2_mode = insn_data[icode].operand[2].mode;
6521 if (!VECTOR_MODE_P (optab_op2_mode))
6523 if (dump_enabled_p ())
6524 dump_printf_loc (MSG_NOTE, vect_location,
6525 "operand 1 using scalar mode.\n");
6526 vec_oprnd1 = op1;
6527 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6528 vec_oprnds1.quick_push (vec_oprnd1);
6529 /* Store vec_oprnd1 for every vector stmt to be created.
6530 We check during the analysis that all the shift arguments
6531 are the same.
6532 TODO: Allow different constants for different vector
6533 stmts generated for an SLP instance. */
6534 for (k = 0;
6535 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6536 vec_oprnds1.quick_push (vec_oprnd1);
6539 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6541 if (was_scalar_shift_arg)
6543 /* If the argument was the same in all lanes create
6544 the correctly typed vector shift amount directly. */
6545 op1 = fold_convert (TREE_TYPE (vectype), op1);
6546 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6547 !loop_vinfo ? gsi : NULL);
6548 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6549 !loop_vinfo ? gsi : NULL);
6550 vec_oprnds1.create (slp_node->vec_stmts_size);
6551 for (k = 0; k < slp_node->vec_stmts_size; k++)
6552 vec_oprnds1.quick_push (vec_oprnd1);
6554 else if (dt[1] == vect_constant_def)
6555 /* The constant shift amount has been adjusted in place. */
6557 else
6558 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6561 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6562 (a special case for certain kind of vector shifts); otherwise,
6563 operand 1 should be of a vector type (the usual case). */
6564 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6565 op0, &vec_oprnds0,
6566 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6568 /* Arguments are ready. Create the new vector stmt. */
6569 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6571 /* For internal defs where we need to use a scalar shift arg
6572 extract the first lane. */
6573 if (scalar_shift_arg && dt[1] == vect_internal_def)
6575 vop1 = vec_oprnds1[0];
6576 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6577 gassign *new_stmt
6578 = gimple_build_assign (new_temp,
6579 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6580 vop1,
6581 TYPE_SIZE (TREE_TYPE (new_temp)),
6582 bitsize_zero_node));
6583 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6584 vop1 = new_temp;
6586 else
6587 vop1 = vec_oprnds1[i];
6588 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6589 new_temp = make_ssa_name (vec_dest, new_stmt);
6590 gimple_assign_set_lhs (new_stmt, new_temp);
6591 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6592 if (slp_node)
6593 slp_node->push_vec_def (new_stmt);
6594 else
6595 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6598 if (!slp_node)
6599 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6601 vec_oprnds0.release ();
6602 vec_oprnds1.release ();
6604 return true;
6607 /* Function vectorizable_operation.
6609 Check if STMT_INFO performs a binary, unary or ternary operation that can
6610 be vectorized.
6611 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6612 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6613 Return true if STMT_INFO is vectorizable in this way. */
6615 static bool
6616 vectorizable_operation (vec_info *vinfo,
6617 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6618 gimple **vec_stmt, slp_tree slp_node,
6619 stmt_vector_for_cost *cost_vec)
6621 tree vec_dest;
6622 tree scalar_dest;
6623 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6624 tree vectype;
6625 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6626 enum tree_code code, orig_code;
6627 machine_mode vec_mode;
6628 tree new_temp;
6629 int op_type;
6630 optab optab;
6631 bool target_support_p;
6632 enum vect_def_type dt[3]
6633 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6634 int ndts = 3;
6635 poly_uint64 nunits_in;
6636 poly_uint64 nunits_out;
6637 tree vectype_out;
6638 int ncopies, vec_num;
6639 int i;
6640 vec<tree> vec_oprnds0 = vNULL;
6641 vec<tree> vec_oprnds1 = vNULL;
6642 vec<tree> vec_oprnds2 = vNULL;
6643 tree vop0, vop1, vop2;
6644 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6646 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6647 return false;
6649 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6650 && ! vec_stmt)
6651 return false;
6653 /* Is STMT a vectorizable binary/unary operation? */
6654 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6655 if (!stmt)
6656 return false;
6658 /* Loads and stores are handled in vectorizable_{load,store}. */
6659 if (STMT_VINFO_DATA_REF (stmt_info))
6660 return false;
6662 orig_code = code = gimple_assign_rhs_code (stmt);
6664 /* Shifts are handled in vectorizable_shift. */
6665 if (code == LSHIFT_EXPR
6666 || code == RSHIFT_EXPR
6667 || code == LROTATE_EXPR
6668 || code == RROTATE_EXPR)
6669 return false;
6671 /* Comparisons are handled in vectorizable_comparison. */
6672 if (TREE_CODE_CLASS (code) == tcc_comparison)
6673 return false;
6675 /* Conditions are handled in vectorizable_condition. */
6676 if (code == COND_EXPR)
6677 return false;
6679 /* For pointer addition and subtraction, we should use the normal
6680 plus and minus for the vector operation. */
6681 if (code == POINTER_PLUS_EXPR)
6682 code = PLUS_EXPR;
6683 if (code == POINTER_DIFF_EXPR)
6684 code = MINUS_EXPR;
6686 /* Support only unary or binary operations. */
6687 op_type = TREE_CODE_LENGTH (code);
6688 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6690 if (dump_enabled_p ())
6691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6692 "num. args = %d (not unary/binary/ternary op).\n",
6693 op_type);
6694 return false;
6697 scalar_dest = gimple_assign_lhs (stmt);
6698 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6700 /* Most operations cannot handle bit-precision types without extra
6701 truncations. */
6702 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6703 if (!mask_op_p
6704 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6705 /* Exception are bitwise binary operations. */
6706 && code != BIT_IOR_EXPR
6707 && code != BIT_XOR_EXPR
6708 && code != BIT_AND_EXPR)
6710 if (dump_enabled_p ())
6711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6712 "bit-precision arithmetic not supported.\n");
6713 return false;
6716 slp_tree slp_op0;
6717 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6718 0, &op0, &slp_op0, &dt[0], &vectype))
6720 if (dump_enabled_p ())
6721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6722 "use not simple.\n");
6723 return false;
6725 bool is_invariant = (dt[0] == vect_external_def
6726 || dt[0] == vect_constant_def);
6727 /* If op0 is an external or constant def, infer the vector type
6728 from the scalar type. */
6729 if (!vectype)
6731 /* For boolean type we cannot determine vectype by
6732 invariant value (don't know whether it is a vector
6733 of booleans or vector of integers). We use output
6734 vectype because operations on boolean don't change
6735 type. */
6736 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6738 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6740 if (dump_enabled_p ())
6741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6742 "not supported operation on bool value.\n");
6743 return false;
6745 vectype = vectype_out;
6747 else
6748 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6749 slp_node);
6751 if (vec_stmt)
6752 gcc_assert (vectype);
6753 if (!vectype)
6755 if (dump_enabled_p ())
6756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6757 "no vectype for scalar type %T\n",
6758 TREE_TYPE (op0));
6760 return false;
6763 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6764 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6765 if (maybe_ne (nunits_out, nunits_in))
6766 return false;
6768 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6769 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6770 if (op_type == binary_op || op_type == ternary_op)
6772 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6773 1, &op1, &slp_op1, &dt[1], &vectype2))
6775 if (dump_enabled_p ())
6776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6777 "use not simple.\n");
6778 return false;
6780 is_invariant &= (dt[1] == vect_external_def
6781 || dt[1] == vect_constant_def);
6782 if (vectype2
6783 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6784 return false;
6786 if (op_type == ternary_op)
6788 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6789 2, &op2, &slp_op2, &dt[2], &vectype3))
6791 if (dump_enabled_p ())
6792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6793 "use not simple.\n");
6794 return false;
6796 is_invariant &= (dt[2] == vect_external_def
6797 || dt[2] == vect_constant_def);
6798 if (vectype3
6799 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6800 return false;
6803 /* Multiple types in SLP are handled by creating the appropriate number of
6804 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6805 case of SLP. */
6806 if (slp_node)
6808 ncopies = 1;
6809 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6811 else
6813 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6814 vec_num = 1;
6817 gcc_assert (ncopies >= 1);
6819 /* Reject attempts to combine mask types with nonmask types, e.g. if
6820 we have an AND between a (nonmask) boolean loaded from memory and
6821 a (mask) boolean result of a comparison.
6823 TODO: We could easily fix these cases up using pattern statements. */
6824 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6825 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6826 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6828 if (dump_enabled_p ())
6829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6830 "mixed mask and nonmask vector types\n");
6831 return false;
6834 /* Supportable by target? */
6836 vec_mode = TYPE_MODE (vectype);
6837 if (code == MULT_HIGHPART_EXPR)
6838 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6839 else
6841 optab = optab_for_tree_code (code, vectype, optab_default);
6842 if (!optab)
6844 if (dump_enabled_p ())
6845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6846 "no optab.\n");
6847 return false;
6849 target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing
6850 || optab_libfunc (optab, vec_mode));
6853 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6854 if (!target_support_p || using_emulated_vectors_p)
6856 if (dump_enabled_p ())
6857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6858 "op not supported by target.\n");
6859 /* When vec_mode is not a vector mode and we verified ops we
6860 do not have to lower like AND are natively supported let
6861 those through even when the mode isn't word_mode. For
6862 ops we have to lower the lowering code assumes we are
6863 dealing with word_mode. */
6864 if ((((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6865 || !target_support_p)
6866 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6867 /* Check only during analysis. */
6868 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6870 if (dump_enabled_p ())
6871 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6872 return false;
6874 if (dump_enabled_p ())
6875 dump_printf_loc (MSG_NOTE, vect_location,
6876 "proceeding using word mode.\n");
6877 using_emulated_vectors_p = true;
6880 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6881 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6882 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6883 internal_fn cond_fn = get_conditional_internal_fn (code);
6884 internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6886 /* If operating on inactive elements could generate spurious traps,
6887 we need to restrict the operation to active lanes. Note that this
6888 specifically doesn't apply to unhoisted invariants, since they
6889 operate on the same value for every lane.
6891 Similarly, if this operation is part of a reduction, a fully-masked
6892 loop should only change the active lanes of the reduction chain,
6893 keeping the inactive lanes as-is. */
6894 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6895 || reduc_idx >= 0);
6897 if (!vec_stmt) /* transformation not required. */
6899 if (loop_vinfo
6900 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6901 && mask_out_inactive)
6903 if (cond_len_fn != IFN_LAST
6904 && direct_internal_fn_supported_p (cond_len_fn, vectype,
6905 OPTIMIZE_FOR_SPEED))
6906 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6908 else if (cond_fn != IFN_LAST
6909 && direct_internal_fn_supported_p (cond_fn, vectype,
6910 OPTIMIZE_FOR_SPEED))
6911 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6912 vectype, NULL);
6913 else
6915 if (dump_enabled_p ())
6916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6917 "can't use a fully-masked loop because no"
6918 " conditional operation is available.\n");
6919 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6923 /* Put types on constant and invariant SLP children. */
6924 if (slp_node
6925 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6926 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6927 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6929 if (dump_enabled_p ())
6930 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6931 "incompatible vector types for invariants\n");
6932 return false;
6935 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6936 DUMP_VECT_SCOPE ("vectorizable_operation");
6937 vect_model_simple_cost (vinfo, stmt_info,
6938 ncopies, dt, ndts, slp_node, cost_vec);
6939 if (using_emulated_vectors_p)
6941 /* The above vect_model_simple_cost call handles constants
6942 in the prologue and (mis-)costs one of the stmts as
6943 vector stmt. See below for the actual lowering that will
6944 be applied. */
6945 unsigned n
6946 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6947 switch (code)
6949 case PLUS_EXPR:
6950 n *= 5;
6951 break;
6952 case MINUS_EXPR:
6953 n *= 6;
6954 break;
6955 case NEGATE_EXPR:
6956 n *= 4;
6957 break;
6958 default:
6959 /* Bit operations do not have extra cost and are accounted
6960 as vector stmt by vect_model_simple_cost. */
6961 n = 0;
6962 break;
6964 if (n != 0)
6966 /* We also need to materialize two large constants. */
6967 record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6968 0, vect_prologue);
6969 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6970 0, vect_body);
6973 return true;
6976 /* Transform. */
6978 if (dump_enabled_p ())
6979 dump_printf_loc (MSG_NOTE, vect_location,
6980 "transform binary/unary operation.\n");
6982 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6983 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6985 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6986 vectors with unsigned elements, but the result is signed. So, we
6987 need to compute the MINUS_EXPR into vectype temporary and
6988 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6989 tree vec_cvt_dest = NULL_TREE;
6990 if (orig_code == POINTER_DIFF_EXPR)
6992 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6993 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6995 /* Handle def. */
6996 else
6997 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6999 /* In case the vectorization factor (VF) is bigger than the number
7000 of elements that we can fit in a vectype (nunits), we have to generate
7001 more than one vector stmt - i.e - we need to "unroll" the
7002 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7003 from one copy of the vector stmt to the next, in the field
7004 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7005 stages to find the correct vector defs to be used when vectorizing
7006 stmts that use the defs of the current stmt. The example below
7007 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
7008 we need to create 4 vectorized stmts):
7010 before vectorization:
7011 RELATED_STMT VEC_STMT
7012 S1: x = memref - -
7013 S2: z = x + 1 - -
7015 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
7016 there):
7017 RELATED_STMT VEC_STMT
7018 VS1_0: vx0 = memref0 VS1_1 -
7019 VS1_1: vx1 = memref1 VS1_2 -
7020 VS1_2: vx2 = memref2 VS1_3 -
7021 VS1_3: vx3 = memref3 - -
7022 S1: x = load - VS1_0
7023 S2: z = x + 1 - -
7025 step2: vectorize stmt S2 (done here):
7026 To vectorize stmt S2 we first need to find the relevant vector
7027 def for the first operand 'x'. This is, as usual, obtained from
7028 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
7029 that defines 'x' (S1). This way we find the stmt VS1_0, and the
7030 relevant vector def 'vx0'. Having found 'vx0' we can generate
7031 the vector stmt VS2_0, and as usual, record it in the
7032 STMT_VINFO_VEC_STMT of stmt S2.
7033 When creating the second copy (VS2_1), we obtain the relevant vector
7034 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
7035 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
7036 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
7037 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
7038 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
7039 chain of stmts and pointers:
7040 RELATED_STMT VEC_STMT
7041 VS1_0: vx0 = memref0 VS1_1 -
7042 VS1_1: vx1 = memref1 VS1_2 -
7043 VS1_2: vx2 = memref2 VS1_3 -
7044 VS1_3: vx3 = memref3 - -
7045 S1: x = load - VS1_0
7046 VS2_0: vz0 = vx0 + v1 VS2_1 -
7047 VS2_1: vz1 = vx1 + v1 VS2_2 -
7048 VS2_2: vz2 = vx2 + v1 VS2_3 -
7049 VS2_3: vz3 = vx3 + v1 - -
7050 S2: z = x + 1 - VS2_0 */
7052 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
7053 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
7054 /* Arguments are ready. Create the new vector stmt. */
7055 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
7057 gimple *new_stmt = NULL;
7058 vop1 = ((op_type == binary_op || op_type == ternary_op)
7059 ? vec_oprnds1[i] : NULL_TREE);
7060 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
7061 if (using_emulated_vectors_p
7062 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
7064 /* Lower the operation. This follows vector lowering. */
7065 unsigned int width = vector_element_bits (vectype);
7066 tree inner_type = TREE_TYPE (vectype);
7067 tree word_type
7068 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
7069 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
7070 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
7071 tree high_bits
7072 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
7073 tree wvop0 = make_ssa_name (word_type);
7074 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
7075 build1 (VIEW_CONVERT_EXPR,
7076 word_type, vop0));
7077 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7078 tree result_low, signs;
7079 if (code == PLUS_EXPR || code == MINUS_EXPR)
7081 tree wvop1 = make_ssa_name (word_type);
7082 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
7083 build1 (VIEW_CONVERT_EXPR,
7084 word_type, vop1));
7085 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7086 signs = make_ssa_name (word_type);
7087 new_stmt = gimple_build_assign (signs,
7088 BIT_XOR_EXPR, wvop0, wvop1);
7089 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7090 tree b_low = make_ssa_name (word_type);
7091 new_stmt = gimple_build_assign (b_low,
7092 BIT_AND_EXPR, wvop1, low_bits);
7093 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7094 tree a_low = make_ssa_name (word_type);
7095 if (code == PLUS_EXPR)
7096 new_stmt = gimple_build_assign (a_low,
7097 BIT_AND_EXPR, wvop0, low_bits);
7098 else
7099 new_stmt = gimple_build_assign (a_low,
7100 BIT_IOR_EXPR, wvop0, high_bits);
7101 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7102 if (code == MINUS_EXPR)
7104 new_stmt = gimple_build_assign (NULL_TREE,
7105 BIT_NOT_EXPR, signs);
7106 signs = make_ssa_name (word_type);
7107 gimple_assign_set_lhs (new_stmt, signs);
7108 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7110 new_stmt = gimple_build_assign (NULL_TREE,
7111 BIT_AND_EXPR, signs, high_bits);
7112 signs = make_ssa_name (word_type);
7113 gimple_assign_set_lhs (new_stmt, signs);
7114 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7115 result_low = make_ssa_name (word_type);
7116 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
7117 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7119 else
7121 tree a_low = make_ssa_name (word_type);
7122 new_stmt = gimple_build_assign (a_low,
7123 BIT_AND_EXPR, wvop0, low_bits);
7124 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7125 signs = make_ssa_name (word_type);
7126 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
7127 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7128 new_stmt = gimple_build_assign (NULL_TREE,
7129 BIT_AND_EXPR, signs, high_bits);
7130 signs = make_ssa_name (word_type);
7131 gimple_assign_set_lhs (new_stmt, signs);
7132 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7133 result_low = make_ssa_name (word_type);
7134 new_stmt = gimple_build_assign (result_low,
7135 MINUS_EXPR, high_bits, a_low);
7136 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7138 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
7139 signs);
7140 result_low = make_ssa_name (word_type);
7141 gimple_assign_set_lhs (new_stmt, result_low);
7142 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7143 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
7144 build1 (VIEW_CONVERT_EXPR,
7145 vectype, result_low));
7146 new_temp = make_ssa_name (vectype);
7147 gimple_assign_set_lhs (new_stmt, new_temp);
7148 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7150 else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
7152 tree mask;
7153 if (masked_loop_p)
7154 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7155 vec_num * ncopies, vectype, i);
7156 else
7157 /* Dummy mask. */
7158 mask = build_minus_one_cst (truth_type_for (vectype));
7159 auto_vec<tree> vops (6);
7160 vops.quick_push (mask);
7161 vops.quick_push (vop0);
7162 if (vop1)
7163 vops.quick_push (vop1);
7164 if (vop2)
7165 vops.quick_push (vop2);
7166 if (reduc_idx >= 0)
7168 /* Perform the operation on active elements only and take
7169 inactive elements from the reduction chain input. */
7170 gcc_assert (!vop2);
7171 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
7173 else
7175 auto else_value = targetm.preferred_else_value
7176 (cond_fn, vectype, vops.length () - 1, &vops[1]);
7177 vops.quick_push (else_value);
7179 if (len_loop_p)
7181 tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
7182 vec_num * ncopies, vectype, i, 1);
7183 signed char biasval
7184 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
7185 tree bias = build_int_cst (intQI_type_node, biasval);
7186 vops.quick_push (len);
7187 vops.quick_push (bias);
7189 gcall *call
7190 = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
7191 : cond_len_fn,
7192 vops);
7193 new_temp = make_ssa_name (vec_dest, call);
7194 gimple_call_set_lhs (call, new_temp);
7195 gimple_call_set_nothrow (call, true);
7196 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
7197 new_stmt = call;
7199 else
7201 tree mask = NULL_TREE;
7202 /* When combining two masks check if either of them is elsewhere
7203 combined with a loop mask, if that's the case we can mark that the
7204 new combined mask doesn't need to be combined with a loop mask. */
7205 if (masked_loop_p
7206 && code == BIT_AND_EXPR
7207 && VECTOR_BOOLEAN_TYPE_P (vectype))
7209 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
7210 ncopies}))
7212 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7213 vec_num * ncopies, vectype, i);
7215 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7216 vop0, gsi);
7219 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
7220 ncopies }))
7222 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7223 vec_num * ncopies, vectype, i);
7225 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7226 vop1, gsi);
7230 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7231 new_temp = make_ssa_name (vec_dest, new_stmt);
7232 gimple_assign_set_lhs (new_stmt, new_temp);
7233 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7234 if (using_emulated_vectors_p)
7235 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7237 /* Enter the combined value into the vector cond hash so we don't
7238 AND it with a loop mask again. */
7239 if (mask)
7240 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
7243 if (vec_cvt_dest)
7245 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7246 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7247 new_temp);
7248 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
7249 gimple_assign_set_lhs (new_stmt, new_temp);
7250 vect_finish_stmt_generation (vinfo, stmt_info,
7251 new_stmt, gsi);
7254 if (slp_node)
7255 slp_node->push_vec_def (new_stmt);
7256 else
7257 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7260 if (!slp_node)
7261 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7263 vec_oprnds0.release ();
7264 vec_oprnds1.release ();
7265 vec_oprnds2.release ();
7267 return true;
7270 /* A helper function to ensure data reference DR_INFO's base alignment. */
7272 static void
7273 ensure_base_align (dr_vec_info *dr_info)
7275 /* Alignment is only analyzed for the first element of a DR group,
7276 use that to look at base alignment we need to enforce. */
7277 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7278 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7280 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7282 if (dr_info->base_misaligned)
7284 tree base_decl = dr_info->base_decl;
7286 // We should only be able to increase the alignment of a base object if
7287 // we know what its new alignment should be at compile time.
7288 unsigned HOST_WIDE_INT align_base_to =
7289 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7291 if (decl_in_symtab_p (base_decl))
7292 symtab_node::get (base_decl)->increase_alignment (align_base_to);
7293 else if (DECL_ALIGN (base_decl) < align_base_to)
7295 SET_DECL_ALIGN (base_decl, align_base_to);
7296 DECL_USER_ALIGN (base_decl) = 1;
7298 dr_info->base_misaligned = false;
7303 /* Function get_group_alias_ptr_type.
7305 Return the alias type for the group starting at FIRST_STMT_INFO. */
7307 static tree
7308 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7310 struct data_reference *first_dr, *next_dr;
7312 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7313 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7314 while (next_stmt_info)
7316 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7317 if (get_alias_set (DR_REF (first_dr))
7318 != get_alias_set (DR_REF (next_dr)))
7320 if (dump_enabled_p ())
7321 dump_printf_loc (MSG_NOTE, vect_location,
7322 "conflicting alias set types.\n");
7323 return ptr_type_node;
7325 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7327 return reference_alias_ptr_type (DR_REF (first_dr));
7331 /* Function scan_operand_equal_p.
7333 Helper function for check_scan_store. Compare two references
7334 with .GOMP_SIMD_LANE bases. */
7336 static bool
7337 scan_operand_equal_p (tree ref1, tree ref2)
7339 tree ref[2] = { ref1, ref2 };
7340 poly_int64 bitsize[2], bitpos[2];
7341 tree offset[2], base[2];
7342 for (int i = 0; i < 2; ++i)
7344 machine_mode mode;
7345 int unsignedp, reversep, volatilep = 0;
7346 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7347 &offset[i], &mode, &unsignedp,
7348 &reversep, &volatilep);
7349 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
7350 return false;
7351 if (TREE_CODE (base[i]) == MEM_REF
7352 && offset[i] == NULL_TREE
7353 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7355 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7356 if (is_gimple_assign (def_stmt)
7357 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
7358 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7359 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7361 if (maybe_ne (mem_ref_offset (base[i]), 0))
7362 return false;
7363 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7364 offset[i] = gimple_assign_rhs2 (def_stmt);
7369 if (!operand_equal_p (base[0], base[1], 0))
7370 return false;
7371 if (maybe_ne (bitsize[0], bitsize[1]))
7372 return false;
7373 if (offset[0] != offset[1])
7375 if (!offset[0] || !offset[1])
7376 return false;
7377 if (!operand_equal_p (offset[0], offset[1], 0))
7379 tree step[2];
7380 for (int i = 0; i < 2; ++i)
7382 step[i] = integer_one_node;
7383 if (TREE_CODE (offset[i]) == SSA_NAME)
7385 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7386 if (is_gimple_assign (def_stmt)
7387 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
7388 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7389 == INTEGER_CST))
7391 step[i] = gimple_assign_rhs2 (def_stmt);
7392 offset[i] = gimple_assign_rhs1 (def_stmt);
7395 else if (TREE_CODE (offset[i]) == MULT_EXPR)
7397 step[i] = TREE_OPERAND (offset[i], 1);
7398 offset[i] = TREE_OPERAND (offset[i], 0);
7400 tree rhs1 = NULL_TREE;
7401 if (TREE_CODE (offset[i]) == SSA_NAME)
7403 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7404 if (gimple_assign_cast_p (def_stmt))
7405 rhs1 = gimple_assign_rhs1 (def_stmt);
7407 else if (CONVERT_EXPR_P (offset[i]))
7408 rhs1 = TREE_OPERAND (offset[i], 0);
7409 if (rhs1
7410 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7411 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7412 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7413 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7414 offset[i] = rhs1;
7416 if (!operand_equal_p (offset[0], offset[1], 0)
7417 || !operand_equal_p (step[0], step[1], 0))
7418 return false;
7421 return true;
7425 enum scan_store_kind {
7426 /* Normal permutation. */
7427 scan_store_kind_perm,
7429 /* Whole vector left shift permutation with zero init. */
7430 scan_store_kind_lshift_zero,
7432 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7433 scan_store_kind_lshift_cond
7436 /* Function check_scan_store.
7438 Verify if we can perform the needed permutations or whole vector shifts.
7439 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7440 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7441 to do at each step. */
7443 static int
7444 scan_store_can_perm_p (tree vectype, tree init,
7445 vec<enum scan_store_kind> *use_whole_vector = NULL)
7447 enum machine_mode vec_mode = TYPE_MODE (vectype);
7448 unsigned HOST_WIDE_INT nunits;
7449 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7450 return -1;
7451 int units_log2 = exact_log2 (nunits);
7452 if (units_log2 <= 0)
7453 return -1;
7455 int i;
7456 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7457 for (i = 0; i <= units_log2; ++i)
7459 unsigned HOST_WIDE_INT j, k;
7460 enum scan_store_kind kind = scan_store_kind_perm;
7461 vec_perm_builder sel (nunits, nunits, 1);
7462 sel.quick_grow (nunits);
7463 if (i == units_log2)
7465 for (j = 0; j < nunits; ++j)
7466 sel[j] = nunits - 1;
7468 else
7470 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7471 sel[j] = j;
7472 for (k = 0; j < nunits; ++j, ++k)
7473 sel[j] = nunits + k;
7475 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7476 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7478 if (i == units_log2)
7479 return -1;
7481 if (whole_vector_shift_kind == scan_store_kind_perm)
7483 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
7484 return -1;
7485 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7486 /* Whole vector shifts shift in zeros, so if init is all zero
7487 constant, there is no need to do anything further. */
7488 if ((TREE_CODE (init) != INTEGER_CST
7489 && TREE_CODE (init) != REAL_CST)
7490 || !initializer_zerop (init))
7492 tree masktype = truth_type_for (vectype);
7493 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7494 return -1;
7495 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7498 kind = whole_vector_shift_kind;
7500 if (use_whole_vector)
7502 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7503 use_whole_vector->safe_grow_cleared (i, true);
7504 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7505 use_whole_vector->safe_push (kind);
7509 return units_log2;
7513 /* Function check_scan_store.
7515 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7517 static bool
7518 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7519 enum vect_def_type rhs_dt, bool slp, tree mask,
7520 vect_memory_access_type memory_access_type)
7522 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7523 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7524 tree ref_type;
7526 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7527 if (slp
7528 || mask
7529 || memory_access_type != VMAT_CONTIGUOUS
7530 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7531 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7532 || loop_vinfo == NULL
7533 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7534 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7535 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7536 || !integer_zerop (DR_INIT (dr_info->dr))
7537 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7538 || !alias_sets_conflict_p (get_alias_set (vectype),
7539 get_alias_set (TREE_TYPE (ref_type))))
7541 if (dump_enabled_p ())
7542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7543 "unsupported OpenMP scan store.\n");
7544 return false;
7547 /* We need to pattern match code built by OpenMP lowering and simplified
7548 by following optimizations into something we can handle.
7549 #pragma omp simd reduction(inscan,+:r)
7550 for (...)
7552 r += something ();
7553 #pragma omp scan inclusive (r)
7554 use (r);
7556 shall have body with:
7557 // Initialization for input phase, store the reduction initializer:
7558 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7559 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7560 D.2042[_21] = 0;
7561 // Actual input phase:
7563 r.0_5 = D.2042[_20];
7564 _6 = _4 + r.0_5;
7565 D.2042[_20] = _6;
7566 // Initialization for scan phase:
7567 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7568 _26 = D.2043[_25];
7569 _27 = D.2042[_25];
7570 _28 = _26 + _27;
7571 D.2043[_25] = _28;
7572 D.2042[_25] = _28;
7573 // Actual scan phase:
7575 r.1_8 = D.2042[_20];
7577 The "omp simd array" variable D.2042 holds the privatized copy used
7578 inside of the loop and D.2043 is another one that holds copies of
7579 the current original list item. The separate GOMP_SIMD_LANE ifn
7580 kinds are there in order to allow optimizing the initializer store
7581 and combiner sequence, e.g. if it is originally some C++ish user
7582 defined reduction, but allow the vectorizer to pattern recognize it
7583 and turn into the appropriate vectorized scan.
7585 For exclusive scan, this is slightly different:
7586 #pragma omp simd reduction(inscan,+:r)
7587 for (...)
7589 use (r);
7590 #pragma omp scan exclusive (r)
7591 r += something ();
7593 shall have body with:
7594 // Initialization for input phase, store the reduction initializer:
7595 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7596 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7597 D.2042[_21] = 0;
7598 // Actual input phase:
7600 r.0_5 = D.2042[_20];
7601 _6 = _4 + r.0_5;
7602 D.2042[_20] = _6;
7603 // Initialization for scan phase:
7604 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7605 _26 = D.2043[_25];
7606 D.2044[_25] = _26;
7607 _27 = D.2042[_25];
7608 _28 = _26 + _27;
7609 D.2043[_25] = _28;
7610 // Actual scan phase:
7612 r.1_8 = D.2044[_20];
7613 ... */
7615 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7617 /* Match the D.2042[_21] = 0; store above. Just require that
7618 it is a constant or external definition store. */
7619 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7621 fail_init:
7622 if (dump_enabled_p ())
7623 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7624 "unsupported OpenMP scan initializer store.\n");
7625 return false;
7628 if (! loop_vinfo->scan_map)
7629 loop_vinfo->scan_map = new hash_map<tree, tree>;
7630 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7631 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7632 if (cached)
7633 goto fail_init;
7634 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7636 /* These stores can be vectorized normally. */
7637 return true;
7640 if (rhs_dt != vect_internal_def)
7642 fail:
7643 if (dump_enabled_p ())
7644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7645 "unsupported OpenMP scan combiner pattern.\n");
7646 return false;
7649 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7650 tree rhs = gimple_assign_rhs1 (stmt);
7651 if (TREE_CODE (rhs) != SSA_NAME)
7652 goto fail;
7654 gimple *other_store_stmt = NULL;
7655 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7656 bool inscan_var_store
7657 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7659 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7661 if (!inscan_var_store)
7663 use_operand_p use_p;
7664 imm_use_iterator iter;
7665 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7667 gimple *use_stmt = USE_STMT (use_p);
7668 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7669 continue;
7670 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7671 || !is_gimple_assign (use_stmt)
7672 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7673 || other_store_stmt
7674 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7675 goto fail;
7676 other_store_stmt = use_stmt;
7678 if (other_store_stmt == NULL)
7679 goto fail;
7680 rhs = gimple_assign_lhs (other_store_stmt);
7681 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7682 goto fail;
7685 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7687 use_operand_p use_p;
7688 imm_use_iterator iter;
7689 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7691 gimple *use_stmt = USE_STMT (use_p);
7692 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7693 continue;
7694 if (other_store_stmt)
7695 goto fail;
7696 other_store_stmt = use_stmt;
7699 else
7700 goto fail;
7702 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7703 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7704 || !is_gimple_assign (def_stmt)
7705 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7706 goto fail;
7708 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7709 /* For pointer addition, we should use the normal plus for the vector
7710 operation. */
7711 switch (code)
7713 case POINTER_PLUS_EXPR:
7714 code = PLUS_EXPR;
7715 break;
7716 case MULT_HIGHPART_EXPR:
7717 goto fail;
7718 default:
7719 break;
7721 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7722 goto fail;
7724 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7725 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7726 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7727 goto fail;
7729 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7730 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7731 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7732 || !gimple_assign_load_p (load1_stmt)
7733 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7734 || !gimple_assign_load_p (load2_stmt))
7735 goto fail;
7737 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7738 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7739 if (load1_stmt_info == NULL
7740 || load2_stmt_info == NULL
7741 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7742 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7743 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7744 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7745 goto fail;
7747 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7749 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7750 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7751 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7752 goto fail;
7753 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7754 tree lrhs;
7755 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7756 lrhs = rhs1;
7757 else
7758 lrhs = rhs2;
7759 use_operand_p use_p;
7760 imm_use_iterator iter;
7761 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7763 gimple *use_stmt = USE_STMT (use_p);
7764 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7765 continue;
7766 if (other_store_stmt)
7767 goto fail;
7768 other_store_stmt = use_stmt;
7772 if (other_store_stmt == NULL)
7773 goto fail;
7774 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7775 || !gimple_store_p (other_store_stmt))
7776 goto fail;
7778 stmt_vec_info other_store_stmt_info
7779 = loop_vinfo->lookup_stmt (other_store_stmt);
7780 if (other_store_stmt_info == NULL
7781 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7782 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7783 goto fail;
7785 gimple *stmt1 = stmt;
7786 gimple *stmt2 = other_store_stmt;
7787 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7788 std::swap (stmt1, stmt2);
7789 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7790 gimple_assign_rhs1 (load2_stmt)))
7792 std::swap (rhs1, rhs2);
7793 std::swap (load1_stmt, load2_stmt);
7794 std::swap (load1_stmt_info, load2_stmt_info);
7796 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7797 gimple_assign_rhs1 (load1_stmt)))
7798 goto fail;
7800 tree var3 = NULL_TREE;
7801 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7802 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7803 gimple_assign_rhs1 (load2_stmt)))
7804 goto fail;
7805 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7807 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7808 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7809 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7810 goto fail;
7811 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7812 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7813 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7814 || lookup_attribute ("omp simd inscan exclusive",
7815 DECL_ATTRIBUTES (var3)))
7816 goto fail;
7819 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7820 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7821 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7822 goto fail;
7824 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7825 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7826 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7827 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7828 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7829 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7830 goto fail;
7832 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7833 std::swap (var1, var2);
7835 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7837 if (!lookup_attribute ("omp simd inscan exclusive",
7838 DECL_ATTRIBUTES (var1)))
7839 goto fail;
7840 var1 = var3;
7843 if (loop_vinfo->scan_map == NULL)
7844 goto fail;
7845 tree *init = loop_vinfo->scan_map->get (var1);
7846 if (init == NULL)
7847 goto fail;
7849 /* The IL is as expected, now check if we can actually vectorize it.
7850 Inclusive scan:
7851 _26 = D.2043[_25];
7852 _27 = D.2042[_25];
7853 _28 = _26 + _27;
7854 D.2043[_25] = _28;
7855 D.2042[_25] = _28;
7856 should be vectorized as (where _40 is the vectorized rhs
7857 from the D.2042[_21] = 0; store):
7858 _30 = MEM <vector(8) int> [(int *)&D.2043];
7859 _31 = MEM <vector(8) int> [(int *)&D.2042];
7860 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7861 _33 = _31 + _32;
7862 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7863 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7864 _35 = _33 + _34;
7865 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7866 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7867 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7868 _37 = _35 + _36;
7869 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7870 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7871 _38 = _30 + _37;
7872 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7873 MEM <vector(8) int> [(int *)&D.2043] = _39;
7874 MEM <vector(8) int> [(int *)&D.2042] = _38;
7875 Exclusive scan:
7876 _26 = D.2043[_25];
7877 D.2044[_25] = _26;
7878 _27 = D.2042[_25];
7879 _28 = _26 + _27;
7880 D.2043[_25] = _28;
7881 should be vectorized as (where _40 is the vectorized rhs
7882 from the D.2042[_21] = 0; store):
7883 _30 = MEM <vector(8) int> [(int *)&D.2043];
7884 _31 = MEM <vector(8) int> [(int *)&D.2042];
7885 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7886 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7887 _34 = _32 + _33;
7888 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7889 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7890 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7891 _36 = _34 + _35;
7892 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7893 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7894 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7895 _38 = _36 + _37;
7896 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7897 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7898 _39 = _30 + _38;
7899 _50 = _31 + _39;
7900 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7901 MEM <vector(8) int> [(int *)&D.2044] = _39;
7902 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7903 enum machine_mode vec_mode = TYPE_MODE (vectype);
7904 optab optab = optab_for_tree_code (code, vectype, optab_default);
7905 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7906 goto fail;
7908 int units_log2 = scan_store_can_perm_p (vectype, *init);
7909 if (units_log2 == -1)
7910 goto fail;
7912 return true;
7916 /* Function vectorizable_scan_store.
7918 Helper of vectorizable_score, arguments like on vectorizable_store.
7919 Handle only the transformation, checking is done in check_scan_store. */
7921 static bool
7922 vectorizable_scan_store (vec_info *vinfo,
7923 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7924 gimple **vec_stmt, int ncopies)
7926 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7927 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7928 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7929 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7931 if (dump_enabled_p ())
7932 dump_printf_loc (MSG_NOTE, vect_location,
7933 "transform scan store. ncopies = %d\n", ncopies);
7935 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7936 tree rhs = gimple_assign_rhs1 (stmt);
7937 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7939 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7940 bool inscan_var_store
7941 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7943 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7945 use_operand_p use_p;
7946 imm_use_iterator iter;
7947 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7949 gimple *use_stmt = USE_STMT (use_p);
7950 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7951 continue;
7952 rhs = gimple_assign_lhs (use_stmt);
7953 break;
7957 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7958 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7959 if (code == POINTER_PLUS_EXPR)
7960 code = PLUS_EXPR;
7961 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7962 && commutative_tree_code (code));
7963 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7964 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7965 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7966 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7967 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7968 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7969 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7970 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7971 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7972 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7973 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7975 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7977 std::swap (rhs1, rhs2);
7978 std::swap (var1, var2);
7979 std::swap (load1_dr_info, load2_dr_info);
7982 tree *init = loop_vinfo->scan_map->get (var1);
7983 gcc_assert (init);
7985 unsigned HOST_WIDE_INT nunits;
7986 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7987 gcc_unreachable ();
7988 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7989 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7990 gcc_assert (units_log2 > 0);
7991 auto_vec<tree, 16> perms;
7992 perms.quick_grow (units_log2 + 1);
7993 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7994 for (int i = 0; i <= units_log2; ++i)
7996 unsigned HOST_WIDE_INT j, k;
7997 vec_perm_builder sel (nunits, nunits, 1);
7998 sel.quick_grow (nunits);
7999 if (i == units_log2)
8000 for (j = 0; j < nunits; ++j)
8001 sel[j] = nunits - 1;
8002 else
8004 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
8005 sel[j] = j;
8006 for (k = 0; j < nunits; ++j, ++k)
8007 sel[j] = nunits + k;
8009 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
8010 if (!use_whole_vector.is_empty ()
8011 && use_whole_vector[i] != scan_store_kind_perm)
8013 if (zero_vec == NULL_TREE)
8014 zero_vec = build_zero_cst (vectype);
8015 if (masktype == NULL_TREE
8016 && use_whole_vector[i] == scan_store_kind_lshift_cond)
8017 masktype = truth_type_for (vectype);
8018 perms[i] = vect_gen_perm_mask_any (vectype, indices);
8020 else
8021 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
8024 tree vec_oprnd1 = NULL_TREE;
8025 tree vec_oprnd2 = NULL_TREE;
8026 tree vec_oprnd3 = NULL_TREE;
8027 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
8028 tree dataref_offset = build_int_cst (ref_type, 0);
8029 tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
8030 vectype, VMAT_CONTIGUOUS);
8031 tree ldataref_ptr = NULL_TREE;
8032 tree orig = NULL_TREE;
8033 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
8034 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
8035 auto_vec<tree> vec_oprnds1;
8036 auto_vec<tree> vec_oprnds2;
8037 auto_vec<tree> vec_oprnds3;
8038 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
8039 *init, &vec_oprnds1,
8040 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
8041 rhs2, &vec_oprnds3);
8042 for (int j = 0; j < ncopies; j++)
8044 vec_oprnd1 = vec_oprnds1[j];
8045 if (ldataref_ptr == NULL)
8046 vec_oprnd2 = vec_oprnds2[j];
8047 vec_oprnd3 = vec_oprnds3[j];
8048 if (j == 0)
8049 orig = vec_oprnd3;
8050 else if (!inscan_var_store)
8051 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8053 if (ldataref_ptr)
8055 vec_oprnd2 = make_ssa_name (vectype);
8056 tree data_ref = fold_build2 (MEM_REF, vectype,
8057 unshare_expr (ldataref_ptr),
8058 dataref_offset);
8059 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
8060 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
8061 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8062 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8063 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8066 tree v = vec_oprnd2;
8067 for (int i = 0; i < units_log2; ++i)
8069 tree new_temp = make_ssa_name (vectype);
8070 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
8071 (zero_vec
8072 && (use_whole_vector[i]
8073 != scan_store_kind_perm))
8074 ? zero_vec : vec_oprnd1, v,
8075 perms[i]);
8076 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8077 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8078 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8080 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
8082 /* Whole vector shift shifted in zero bits, but if *init
8083 is not initializer_zerop, we need to replace those elements
8084 with elements from vec_oprnd1. */
8085 tree_vector_builder vb (masktype, nunits, 1);
8086 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
8087 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
8088 ? boolean_false_node : boolean_true_node);
8090 tree new_temp2 = make_ssa_name (vectype);
8091 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
8092 new_temp, vec_oprnd1);
8093 vect_finish_stmt_generation (vinfo, stmt_info,
8094 g, gsi);
8095 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8096 new_temp = new_temp2;
8099 /* For exclusive scan, perform the perms[i] permutation once
8100 more. */
8101 if (i == 0
8102 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
8103 && v == vec_oprnd2)
8105 v = new_temp;
8106 --i;
8107 continue;
8110 tree new_temp2 = make_ssa_name (vectype);
8111 g = gimple_build_assign (new_temp2, code, v, new_temp);
8112 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8113 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8115 v = new_temp2;
8118 tree new_temp = make_ssa_name (vectype);
8119 gimple *g = gimple_build_assign (new_temp, code, orig, v);
8120 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8121 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8123 tree last_perm_arg = new_temp;
8124 /* For exclusive scan, new_temp computed above is the exclusive scan
8125 prefix sum. Turn it into inclusive prefix sum for the broadcast
8126 of the last element into orig. */
8127 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
8129 last_perm_arg = make_ssa_name (vectype);
8130 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
8131 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8132 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8135 orig = make_ssa_name (vectype);
8136 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
8137 last_perm_arg, perms[units_log2]);
8138 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8139 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8141 if (!inscan_var_store)
8143 tree data_ref = fold_build2 (MEM_REF, vectype,
8144 unshare_expr (dataref_ptr),
8145 dataref_offset);
8146 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8147 g = gimple_build_assign (data_ref, new_temp);
8148 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8149 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8153 if (inscan_var_store)
8154 for (int j = 0; j < ncopies; j++)
8156 if (j != 0)
8157 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8159 tree data_ref = fold_build2 (MEM_REF, vectype,
8160 unshare_expr (dataref_ptr),
8161 dataref_offset);
8162 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
8163 gimple *g = gimple_build_assign (data_ref, orig);
8164 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
8165 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
8167 return true;
8171 /* Function vectorizable_store.
8173 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8174 that can be vectorized.
8175 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8176 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8177 Return true if STMT_INFO is vectorizable in this way. */
8179 static bool
8180 vectorizable_store (vec_info *vinfo,
8181 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8182 gimple **vec_stmt, slp_tree slp_node,
8183 stmt_vector_for_cost *cost_vec)
8185 tree data_ref;
8186 tree op;
8187 tree vec_oprnd = NULL_TREE;
8188 tree elem_type;
8189 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
8190 class loop *loop = NULL;
8191 machine_mode vec_mode;
8192 tree dummy;
8193 enum vect_def_type rhs_dt = vect_unknown_def_type;
8194 enum vect_def_type mask_dt = vect_unknown_def_type;
8195 tree dataref_ptr = NULL_TREE;
8196 tree dataref_offset = NULL_TREE;
8197 gimple *ptr_incr = NULL;
8198 int ncopies;
8199 int j;
8200 stmt_vec_info first_stmt_info;
8201 bool grouped_store;
8202 unsigned int group_size, i;
8203 vec<tree> oprnds = vNULL;
8204 vec<tree> result_chain = vNULL;
8205 vec<tree> vec_oprnds = vNULL;
8206 bool slp = (slp_node != NULL);
8207 unsigned int vec_num;
8208 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
8209 tree aggr_type;
8210 gather_scatter_info gs_info;
8211 poly_uint64 vf;
8212 vec_load_store_type vls_type;
8213 tree ref_type;
8215 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8216 return false;
8218 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8219 && ! vec_stmt)
8220 return false;
8222 /* Is vectorizable store? */
8224 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8225 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8227 tree scalar_dest = gimple_assign_lhs (assign);
8228 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8229 && is_pattern_stmt_p (stmt_info))
8230 scalar_dest = TREE_OPERAND (scalar_dest, 0);
8231 if (TREE_CODE (scalar_dest) != ARRAY_REF
8232 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8233 && TREE_CODE (scalar_dest) != INDIRECT_REF
8234 && TREE_CODE (scalar_dest) != COMPONENT_REF
8235 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8236 && TREE_CODE (scalar_dest) != REALPART_EXPR
8237 && TREE_CODE (scalar_dest) != MEM_REF)
8238 return false;
8240 else
8242 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8243 if (!call || !gimple_call_internal_p (call))
8244 return false;
8246 internal_fn ifn = gimple_call_internal_fn (call);
8247 if (!internal_store_fn_p (ifn))
8248 return false;
8250 if (slp_node != NULL)
8252 if (dump_enabled_p ())
8253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8254 "SLP of masked stores not supported.\n");
8255 return false;
8258 int mask_index = internal_fn_mask_index (ifn);
8259 if (mask_index >= 0
8260 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8261 &mask, NULL, &mask_dt, &mask_vectype))
8262 return false;
8265 op = vect_get_store_rhs (stmt_info);
8267 /* Cannot have hybrid store SLP -- that would mean storing to the
8268 same location twice. */
8269 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8271 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8272 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8274 if (loop_vinfo)
8276 loop = LOOP_VINFO_LOOP (loop_vinfo);
8277 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8279 else
8280 vf = 1;
8282 /* Multiple types in SLP are handled by creating the appropriate number of
8283 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8284 case of SLP. */
8285 if (slp)
8286 ncopies = 1;
8287 else
8288 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8290 gcc_assert (ncopies >= 1);
8292 /* FORNOW. This restriction should be relaxed. */
8293 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
8295 if (dump_enabled_p ())
8296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8297 "multiple types in nested loop.\n");
8298 return false;
8301 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8302 op, &rhs_dt, &rhs_vectype, &vls_type))
8303 return false;
8305 elem_type = TREE_TYPE (vectype);
8306 vec_mode = TYPE_MODE (vectype);
8308 if (!STMT_VINFO_DATA_REF (stmt_info))
8309 return false;
8311 vect_memory_access_type memory_access_type;
8312 enum dr_alignment_support alignment_support_scheme;
8313 int misalignment;
8314 poly_int64 poffset;
8315 internal_fn lanes_ifn;
8316 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
8317 ncopies, &memory_access_type, &poffset,
8318 &alignment_support_scheme, &misalignment, &gs_info,
8319 &lanes_ifn))
8320 return false;
8322 if (mask)
8324 if (memory_access_type == VMAT_CONTIGUOUS)
8326 if (!VECTOR_MODE_P (vec_mode)
8327 || !can_vec_mask_load_store_p (vec_mode,
8328 TYPE_MODE (mask_vectype), false))
8329 return false;
8331 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8332 && (memory_access_type != VMAT_GATHER_SCATTER
8333 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8335 if (dump_enabled_p ())
8336 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8337 "unsupported access type for masked store.\n");
8338 return false;
8340 else if (memory_access_type == VMAT_GATHER_SCATTER
8341 && gs_info.ifn == IFN_LAST
8342 && !gs_info.decl)
8344 if (dump_enabled_p ())
8345 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8346 "unsupported masked emulated scatter.\n");
8347 return false;
8350 else
8352 /* FORNOW. In some cases can vectorize even if data-type not supported
8353 (e.g. - array initialization with 0). */
8354 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
8355 return false;
8358 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8359 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8360 && memory_access_type != VMAT_GATHER_SCATTER
8361 && (slp || memory_access_type != VMAT_CONTIGUOUS));
8362 if (grouped_store)
8364 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8365 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8366 group_size = DR_GROUP_SIZE (first_stmt_info);
8368 else
8370 first_stmt_info = stmt_info;
8371 first_dr_info = dr_info;
8372 group_size = vec_num = 1;
8375 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
8377 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
8378 memory_access_type))
8379 return false;
8382 if (!vec_stmt) /* transformation not required. */
8384 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8386 if (loop_vinfo
8387 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8388 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8389 vls_type, group_size,
8390 memory_access_type, &gs_info,
8391 mask);
8393 if (slp_node
8394 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8395 vectype))
8397 if (dump_enabled_p ())
8398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8399 "incompatible vector types for invariants\n");
8400 return false;
8403 if (dump_enabled_p ()
8404 && memory_access_type != VMAT_ELEMENTWISE
8405 && memory_access_type != VMAT_GATHER_SCATTER
8406 && alignment_support_scheme != dr_aligned)
8407 dump_printf_loc (MSG_NOTE, vect_location,
8408 "Vectorizing an unaligned access.\n");
8410 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8411 vect_model_store_cost (vinfo, stmt_info, ncopies,
8412 memory_access_type, &gs_info,
8413 alignment_support_scheme,
8414 misalignment, vls_type, slp_node, cost_vec);
8415 return true;
8417 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8419 /* Transform. */
8421 ensure_base_align (dr_info);
8423 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8425 vect_build_scatter_store_calls (vinfo, stmt_info, gsi, vec_stmt,
8426 &gs_info, mask);
8427 return true;
8429 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8430 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8432 if (grouped_store)
8434 /* FORNOW */
8435 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8437 if (slp)
8439 grouped_store = false;
8440 /* VEC_NUM is the number of vect stmts to be created for this
8441 group. */
8442 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8443 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8444 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8445 == first_stmt_info);
8446 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8447 op = vect_get_store_rhs (first_stmt_info);
8449 else
8450 /* VEC_NUM is the number of vect stmts to be created for this
8451 group. */
8452 vec_num = group_size;
8454 ref_type = get_group_alias_ptr_type (first_stmt_info);
8456 else
8457 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8459 if (dump_enabled_p ())
8460 dump_printf_loc (MSG_NOTE, vect_location,
8461 "transform store. ncopies = %d\n", ncopies);
8463 if (memory_access_type == VMAT_ELEMENTWISE
8464 || memory_access_type == VMAT_STRIDED_SLP)
8466 gimple_stmt_iterator incr_gsi;
8467 bool insert_after;
8468 gimple *incr;
8469 tree offvar;
8470 tree ivstep;
8471 tree running_off;
8472 tree stride_base, stride_step, alias_off;
8473 tree vec_oprnd;
8474 tree dr_offset;
8475 unsigned int g;
8476 /* Checked by get_load_store_type. */
8477 unsigned int const_nunits = nunits.to_constant ();
8479 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8480 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8482 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8483 stride_base
8484 = fold_build_pointer_plus
8485 (DR_BASE_ADDRESS (first_dr_info->dr),
8486 size_binop (PLUS_EXPR,
8487 convert_to_ptrofftype (dr_offset),
8488 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8489 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8491 /* For a store with loop-invariant (but other than power-of-2)
8492 stride (i.e. not a grouped access) like so:
8494 for (i = 0; i < n; i += stride)
8495 array[i] = ...;
8497 we generate a new induction variable and new stores from
8498 the components of the (vectorized) rhs:
8500 for (j = 0; ; j += VF*stride)
8501 vectemp = ...;
8502 tmp1 = vectemp[0];
8503 array[j] = tmp1;
8504 tmp2 = vectemp[1];
8505 array[j + stride] = tmp2;
8509 unsigned nstores = const_nunits;
8510 unsigned lnel = 1;
8511 tree ltype = elem_type;
8512 tree lvectype = vectype;
8513 if (slp)
8515 if (group_size < const_nunits
8516 && const_nunits % group_size == 0)
8518 nstores = const_nunits / group_size;
8519 lnel = group_size;
8520 ltype = build_vector_type (elem_type, group_size);
8521 lvectype = vectype;
8523 /* First check if vec_extract optab doesn't support extraction
8524 of vector elts directly. */
8525 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8526 machine_mode vmode;
8527 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8528 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8529 group_size).exists (&vmode)
8530 || (convert_optab_handler (vec_extract_optab,
8531 TYPE_MODE (vectype), vmode)
8532 == CODE_FOR_nothing))
8534 /* Try to avoid emitting an extract of vector elements
8535 by performing the extracts using an integer type of the
8536 same size, extracting from a vector of those and then
8537 re-interpreting it as the original vector type if
8538 supported. */
8539 unsigned lsize
8540 = group_size * GET_MODE_BITSIZE (elmode);
8541 unsigned int lnunits = const_nunits / group_size;
8542 /* If we can't construct such a vector fall back to
8543 element extracts from the original vector type and
8544 element size stores. */
8545 if (int_mode_for_size (lsize, 0).exists (&elmode)
8546 && VECTOR_MODE_P (TYPE_MODE (vectype))
8547 && related_vector_mode (TYPE_MODE (vectype), elmode,
8548 lnunits).exists (&vmode)
8549 && (convert_optab_handler (vec_extract_optab,
8550 vmode, elmode)
8551 != CODE_FOR_nothing))
8553 nstores = lnunits;
8554 lnel = group_size;
8555 ltype = build_nonstandard_integer_type (lsize, 1);
8556 lvectype = build_vector_type (ltype, nstores);
8558 /* Else fall back to vector extraction anyway.
8559 Fewer stores are more important than avoiding spilling
8560 of the vector we extract from. Compared to the
8561 construction case in vectorizable_load no store-forwarding
8562 issue exists here for reasonable archs. */
8565 else if (group_size >= const_nunits
8566 && group_size % const_nunits == 0)
8568 nstores = 1;
8569 lnel = const_nunits;
8570 ltype = vectype;
8571 lvectype = vectype;
8573 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8574 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8577 ivstep = stride_step;
8578 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8579 build_int_cst (TREE_TYPE (ivstep), vf));
8581 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8583 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8584 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8585 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
8586 loop, &incr_gsi, insert_after,
8587 &offvar, NULL);
8588 incr = gsi_stmt (incr_gsi);
8590 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8592 alias_off = build_int_cst (ref_type, 0);
8593 stmt_vec_info next_stmt_info = first_stmt_info;
8594 for (g = 0; g < group_size; g++)
8596 running_off = offvar;
8597 if (g)
8599 tree size = TYPE_SIZE_UNIT (ltype);
8600 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
8601 size);
8602 tree newoff = copy_ssa_name (running_off, NULL);
8603 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8604 running_off, pos);
8605 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8606 running_off = newoff;
8608 if (!slp)
8609 op = vect_get_store_rhs (next_stmt_info);
8610 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
8611 op, &vec_oprnds);
8612 unsigned int group_el = 0;
8613 unsigned HOST_WIDE_INT
8614 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8615 for (j = 0; j < ncopies; j++)
8617 vec_oprnd = vec_oprnds[j];
8618 /* Pun the vector to extract from if necessary. */
8619 if (lvectype != vectype)
8621 tree tem = make_ssa_name (lvectype);
8622 gimple *pun
8623 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8624 lvectype, vec_oprnd));
8625 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8626 vec_oprnd = tem;
8628 for (i = 0; i < nstores; i++)
8630 tree newref, newoff;
8631 gimple *incr, *assign;
8632 tree size = TYPE_SIZE (ltype);
8633 /* Extract the i'th component. */
8634 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8635 bitsize_int (i), size);
8636 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8637 size, pos);
8639 elem = force_gimple_operand_gsi (gsi, elem, true,
8640 NULL_TREE, true,
8641 GSI_SAME_STMT);
8643 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8644 group_el * elsz);
8645 newref = build2 (MEM_REF, ltype,
8646 running_off, this_off);
8647 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8649 /* And store it to *running_off. */
8650 assign = gimple_build_assign (newref, elem);
8651 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8653 group_el += lnel;
8654 if (! slp
8655 || group_el == group_size)
8657 newoff = copy_ssa_name (running_off, NULL);
8658 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8659 running_off, stride_step);
8660 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8662 running_off = newoff;
8663 group_el = 0;
8665 if (g == group_size - 1
8666 && !slp)
8668 if (j == 0 && i == 0)
8669 *vec_stmt = assign;
8670 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8674 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8675 vec_oprnds.release ();
8676 if (slp)
8677 break;
8680 return true;
8683 auto_vec<tree> dr_chain (group_size);
8684 oprnds.create (group_size);
8686 gcc_assert (alignment_support_scheme);
8687 vec_loop_masks *loop_masks
8688 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8689 ? &LOOP_VINFO_MASKS (loop_vinfo)
8690 : NULL);
8691 vec_loop_lens *loop_lens
8692 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8693 ? &LOOP_VINFO_LENS (loop_vinfo)
8694 : NULL);
8696 /* Shouldn't go with length-based approach if fully masked. */
8697 gcc_assert (!loop_lens || !loop_masks);
8699 /* Targets with store-lane instructions must not require explicit
8700 realignment. vect_supportable_dr_alignment always returns either
8701 dr_aligned or dr_unaligned_supported for masked operations. */
8702 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8703 && !mask
8704 && !loop_masks)
8705 || alignment_support_scheme == dr_aligned
8706 || alignment_support_scheme == dr_unaligned_supported);
8708 tree offset = NULL_TREE;
8709 if (!known_eq (poffset, 0))
8710 offset = size_int (poffset);
8712 tree bump;
8713 tree vec_offset = NULL_TREE;
8714 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8716 aggr_type = NULL_TREE;
8717 bump = NULL_TREE;
8719 else if (memory_access_type == VMAT_GATHER_SCATTER)
8721 aggr_type = elem_type;
8722 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
8723 &bump, &vec_offset, loop_lens);
8725 else
8727 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8728 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8729 else
8730 aggr_type = vectype;
8731 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8732 memory_access_type, loop_lens);
8735 if (mask)
8736 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8738 /* In case the vectorization factor (VF) is bigger than the number
8739 of elements that we can fit in a vectype (nunits), we have to generate
8740 more than one vector stmt - i.e - we need to "unroll" the
8741 vector stmt by a factor VF/nunits. */
8743 /* In case of interleaving (non-unit grouped access):
8745 S1: &base + 2 = x2
8746 S2: &base = x0
8747 S3: &base + 1 = x1
8748 S4: &base + 3 = x3
8750 We create vectorized stores starting from base address (the access of the
8751 first stmt in the chain (S2 in the above example), when the last store stmt
8752 of the chain (S4) is reached:
8754 VS1: &base = vx2
8755 VS2: &base + vec_size*1 = vx0
8756 VS3: &base + vec_size*2 = vx1
8757 VS4: &base + vec_size*3 = vx3
8759 Then permutation statements are generated:
8761 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8762 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8765 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8766 (the order of the data-refs in the output of vect_permute_store_chain
8767 corresponds to the order of scalar stmts in the interleaving chain - see
8768 the documentation of vect_permute_store_chain()).
8770 In case of both multiple types and interleaving, above vector stores and
8771 permutation stmts are created for every copy. The result vector stmts are
8772 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8773 STMT_VINFO_RELATED_STMT for the next copies.
8776 auto_vec<tree> vec_masks;
8777 tree vec_mask = NULL;
8778 auto_vec<tree> vec_offsets;
8779 auto_vec<vec<tree> > gvec_oprnds;
8780 gvec_oprnds.safe_grow_cleared (group_size, true);
8781 for (j = 0; j < ncopies; j++)
8783 gimple *new_stmt;
8784 if (j == 0)
8786 if (slp)
8788 /* Get vectorized arguments for SLP_NODE. */
8789 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8790 op, &vec_oprnds);
8791 vec_oprnd = vec_oprnds[0];
8793 else
8795 /* For interleaved stores we collect vectorized defs for all the
8796 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8797 used as an input to vect_permute_store_chain().
8799 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8800 and OPRNDS are of size 1. */
8801 stmt_vec_info next_stmt_info = first_stmt_info;
8802 for (i = 0; i < group_size; i++)
8804 /* Since gaps are not supported for interleaved stores,
8805 DR_GROUP_SIZE is the exact number of stmts in the chain.
8806 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8807 that there is no interleaving, DR_GROUP_SIZE is 1,
8808 and only one iteration of the loop will be executed. */
8809 op = vect_get_store_rhs (next_stmt_info);
8810 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8811 ncopies, op, &gvec_oprnds[i]);
8812 vec_oprnd = gvec_oprnds[i][0];
8813 dr_chain.quick_push (gvec_oprnds[i][0]);
8814 oprnds.quick_push (gvec_oprnds[i][0]);
8815 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8817 if (mask)
8819 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8820 mask, &vec_masks, mask_vectype);
8821 vec_mask = vec_masks[0];
8825 /* We should have catched mismatched types earlier. */
8826 gcc_assert (useless_type_conversion_p (vectype,
8827 TREE_TYPE (vec_oprnd)));
8828 bool simd_lane_access_p
8829 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8830 if (simd_lane_access_p
8831 && !loop_masks
8832 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8833 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8834 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8835 && integer_zerop (DR_INIT (first_dr_info->dr))
8836 && alias_sets_conflict_p (get_alias_set (aggr_type),
8837 get_alias_set (TREE_TYPE (ref_type))))
8839 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8840 dataref_offset = build_int_cst (ref_type, 0);
8842 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8843 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8844 slp_node, &gs_info, &dataref_ptr,
8845 &vec_offsets);
8846 else
8847 dataref_ptr
8848 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8849 simd_lane_access_p ? loop : NULL,
8850 offset, &dummy, gsi, &ptr_incr,
8851 simd_lane_access_p, bump);
8853 else
8855 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8856 /* For interleaved stores we created vectorized defs for all the
8857 defs stored in OPRNDS in the previous iteration (previous copy).
8858 DR_CHAIN is then used as an input to vect_permute_store_chain().
8859 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8860 OPRNDS are of size 1. */
8861 for (i = 0; i < group_size; i++)
8863 vec_oprnd = gvec_oprnds[i][j];
8864 dr_chain[i] = gvec_oprnds[i][j];
8865 oprnds[i] = gvec_oprnds[i][j];
8867 if (mask)
8868 vec_mask = vec_masks[j];
8869 if (dataref_offset)
8870 dataref_offset
8871 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8872 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8873 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8874 stmt_info, bump);
8877 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8879 tree vec_array;
8881 /* Get an array into which we can store the individual vectors. */
8882 vec_array = create_vector_array (vectype, vec_num);
8884 /* Invalidate the current contents of VEC_ARRAY. This should
8885 become an RTL clobber too, which prevents the vector registers
8886 from being upward-exposed. */
8887 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8889 /* Store the individual vectors into the array. */
8890 for (i = 0; i < vec_num; i++)
8892 vec_oprnd = dr_chain[i];
8893 write_vector_array (vinfo, stmt_info,
8894 gsi, vec_oprnd, vec_array, i);
8897 tree final_mask = NULL;
8898 tree final_len = NULL;
8899 tree bias = NULL;
8900 if (loop_masks)
8901 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8902 ncopies, vectype, j);
8903 if (vec_mask)
8904 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8905 final_mask, vec_mask, gsi);
8907 if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8909 if (loop_lens)
8910 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8911 ncopies, vectype, j, 1);
8912 else
8913 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8914 signed char biasval
8915 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8916 bias = build_int_cst (intQI_type_node, biasval);
8917 if (!final_mask)
8919 mask_vectype = truth_type_for (vectype);
8920 final_mask = build_minus_one_cst (mask_vectype);
8924 gcall *call;
8925 if (final_len && final_mask)
8927 /* Emit:
8928 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8929 LEN, BIAS, VEC_ARRAY). */
8930 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8931 tree alias_ptr = build_int_cst (ref_type, align);
8932 call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
8933 dataref_ptr, alias_ptr,
8934 final_mask, final_len, bias,
8935 vec_array);
8937 else if (final_mask)
8939 /* Emit:
8940 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8941 VEC_ARRAY). */
8942 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8943 tree alias_ptr = build_int_cst (ref_type, align);
8944 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8945 dataref_ptr, alias_ptr,
8946 final_mask, vec_array);
8948 else
8950 /* Emit:
8951 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8952 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8953 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8954 vec_array);
8955 gimple_call_set_lhs (call, data_ref);
8957 gimple_call_set_nothrow (call, true);
8958 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8959 new_stmt = call;
8961 /* Record that VEC_ARRAY is now dead. */
8962 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8964 else
8966 new_stmt = NULL;
8967 if (grouped_store)
8969 if (j == 0)
8970 result_chain.create (group_size);
8971 /* Permute. */
8972 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8973 gsi, &result_chain);
8976 stmt_vec_info next_stmt_info = first_stmt_info;
8977 for (i = 0; i < vec_num; i++)
8979 unsigned misalign;
8980 unsigned HOST_WIDE_INT align;
8982 tree final_mask = NULL_TREE;
8983 tree final_len = NULL_TREE;
8984 tree bias = NULL_TREE;
8985 if (loop_masks)
8986 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8987 vec_num * ncopies,
8988 vectype, vec_num * j + i);
8989 if (vec_mask)
8990 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8991 final_mask, vec_mask, gsi);
8993 if (memory_access_type == VMAT_GATHER_SCATTER
8994 && gs_info.ifn != IFN_LAST)
8996 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8997 vec_offset = vec_offsets[vec_num * j + i];
8998 tree scale = size_int (gs_info.scale);
9000 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
9002 if (loop_lens)
9003 final_len
9004 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9005 vec_num * ncopies, vectype,
9006 vec_num * j + i, 1);
9007 else
9008 final_len
9009 = build_int_cst (sizetype,
9010 TYPE_VECTOR_SUBPARTS (vectype));
9011 signed char biasval
9012 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9013 bias = build_int_cst (intQI_type_node, biasval);
9014 if (!final_mask)
9016 mask_vectype = truth_type_for (vectype);
9017 final_mask = build_minus_one_cst (mask_vectype);
9021 gcall *call;
9022 if (final_len && final_mask)
9023 call
9024 = gimple_build_call_internal (IFN_MASK_LEN_SCATTER_STORE,
9025 7, dataref_ptr, vec_offset,
9026 scale, vec_oprnd, final_mask,
9027 final_len, bias);
9028 else if (final_mask)
9029 call = gimple_build_call_internal
9030 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
9031 scale, vec_oprnd, final_mask);
9032 else
9033 call = gimple_build_call_internal
9034 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
9035 scale, vec_oprnd);
9036 gimple_call_set_nothrow (call, true);
9037 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9038 new_stmt = call;
9039 break;
9041 else if (memory_access_type == VMAT_GATHER_SCATTER)
9043 /* Emulated scatter. */
9044 gcc_assert (!final_mask);
9045 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9046 unsigned HOST_WIDE_INT const_offset_nunits
9047 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9048 .to_constant ();
9049 vec<constructor_elt, va_gc> *ctor_elts;
9050 vec_alloc (ctor_elts, const_nunits);
9051 gimple_seq stmts = NULL;
9052 tree elt_type = TREE_TYPE (vectype);
9053 unsigned HOST_WIDE_INT elt_size
9054 = tree_to_uhwi (TYPE_SIZE (elt_type));
9055 /* We support offset vectors with more elements
9056 than the data vector for now. */
9057 unsigned HOST_WIDE_INT factor
9058 = const_offset_nunits / const_nunits;
9059 vec_offset = vec_offsets[j / factor];
9060 unsigned elt_offset = (j % factor) * const_nunits;
9061 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9062 tree scale = size_int (gs_info.scale);
9063 align = get_object_alignment (DR_REF (first_dr_info->dr));
9064 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9065 for (unsigned k = 0; k < const_nunits; ++k)
9067 /* Compute the offsetted pointer. */
9068 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9069 bitsize_int (k + elt_offset));
9070 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9071 idx_type, vec_offset,
9072 TYPE_SIZE (idx_type), boff);
9073 idx = gimple_convert (&stmts, sizetype, idx);
9074 idx = gimple_build (&stmts, MULT_EXPR,
9075 sizetype, idx, scale);
9076 tree ptr = gimple_build (&stmts, PLUS_EXPR,
9077 TREE_TYPE (dataref_ptr),
9078 dataref_ptr, idx);
9079 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9080 /* Extract the element to be stored. */
9081 tree elt = gimple_build (&stmts, BIT_FIELD_REF,
9082 TREE_TYPE (vectype), vec_oprnd,
9083 TYPE_SIZE (elt_type),
9084 bitsize_int (k * elt_size));
9085 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9086 stmts = NULL;
9087 tree ref = build2 (MEM_REF, ltype, ptr,
9088 build_int_cst (ref_type, 0));
9089 new_stmt = gimple_build_assign (ref, elt);
9090 vect_finish_stmt_generation (vinfo, stmt_info,
9091 new_stmt, gsi);
9093 break;
9096 if (i > 0)
9097 /* Bump the vector pointer. */
9098 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9099 gsi, stmt_info, bump);
9101 if (slp)
9102 vec_oprnd = vec_oprnds[i];
9103 else if (grouped_store)
9104 /* For grouped stores vectorized defs are interleaved in
9105 vect_permute_store_chain(). */
9106 vec_oprnd = result_chain[i];
9108 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9109 if (alignment_support_scheme == dr_aligned)
9110 misalign = 0;
9111 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9113 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
9114 misalign = 0;
9116 else
9117 misalign = misalignment;
9118 if (dataref_offset == NULL_TREE
9119 && TREE_CODE (dataref_ptr) == SSA_NAME)
9120 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9121 misalign);
9122 align = least_bit_hwi (misalign | align);
9124 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9126 tree perm_mask = perm_mask_for_reverse (vectype);
9127 tree perm_dest = vect_create_destination_var
9128 (vect_get_store_rhs (stmt_info), vectype);
9129 tree new_temp = make_ssa_name (perm_dest);
9131 /* Generate the permute statement. */
9132 gimple *perm_stmt
9133 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9134 vec_oprnd, perm_mask);
9135 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9137 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9138 vec_oprnd = new_temp;
9141 /* Compute IFN when LOOP_LENS or final_mask valid. */
9142 machine_mode vmode = TYPE_MODE (vectype);
9143 machine_mode new_vmode = vmode;
9144 internal_fn partial_ifn = IFN_LAST;
9145 if (loop_lens)
9147 opt_machine_mode new_ovmode
9148 = get_len_load_store_mode (vmode, false, &partial_ifn);
9149 new_vmode = new_ovmode.require ();
9150 unsigned factor
9151 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9152 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9153 vec_num * ncopies, vectype,
9154 vec_num * j + i, factor);
9156 else if (final_mask)
9158 if (!can_vec_mask_load_store_p (vmode,
9159 TYPE_MODE (TREE_TYPE (final_mask)),
9160 false, &partial_ifn))
9161 gcc_unreachable ();
9164 if (partial_ifn == IFN_MASK_LEN_STORE)
9166 if (!final_len)
9168 /* Pass VF value to 'len' argument of
9169 MASK_LEN_STORE if LOOP_LENS is invalid. */
9170 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9172 if (!final_mask)
9174 /* Pass all ones value to 'mask' argument of
9175 MASK_LEN_STORE if final_mask is invalid. */
9176 mask_vectype = truth_type_for (vectype);
9177 final_mask = build_minus_one_cst (mask_vectype);
9180 if (final_len)
9182 signed char biasval
9183 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9185 bias = build_int_cst (intQI_type_node, biasval);
9188 /* Arguments are ready. Create the new vector stmt. */
9189 if (final_len)
9191 gcall *call;
9192 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9193 /* Need conversion if it's wrapped with VnQI. */
9194 if (vmode != new_vmode)
9196 tree new_vtype
9197 = build_vector_type_for_mode (unsigned_intQI_type_node,
9198 new_vmode);
9199 tree var
9200 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9201 vec_oprnd
9202 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9203 gassign *new_stmt
9204 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
9205 vec_oprnd);
9206 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
9207 gsi);
9208 vec_oprnd = var;
9211 if (partial_ifn == IFN_MASK_LEN_STORE)
9212 call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9213 dataref_ptr, ptr,
9214 final_mask, final_len,
9215 bias, vec_oprnd);
9216 else
9217 call
9218 = gimple_build_call_internal (IFN_LEN_STORE, 5,
9219 dataref_ptr, ptr,
9220 final_len, bias,
9221 vec_oprnd);
9222 gimple_call_set_nothrow (call, true);
9223 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9224 new_stmt = call;
9226 else if (final_mask)
9228 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9229 gcall *call
9230 = gimple_build_call_internal (IFN_MASK_STORE, 4,
9231 dataref_ptr, ptr,
9232 final_mask, vec_oprnd);
9233 gimple_call_set_nothrow (call, true);
9234 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9235 new_stmt = call;
9237 else
9239 data_ref = fold_build2 (MEM_REF, vectype,
9240 dataref_ptr,
9241 dataref_offset
9242 ? dataref_offset
9243 : build_int_cst (ref_type, 0));
9244 if (alignment_support_scheme == dr_aligned)
9246 else
9247 TREE_TYPE (data_ref)
9248 = build_aligned_type (TREE_TYPE (data_ref),
9249 align * BITS_PER_UNIT);
9250 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9251 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9252 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9255 if (slp)
9256 continue;
9258 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9259 if (!next_stmt_info)
9260 break;
9263 if (!slp)
9265 if (j == 0)
9266 *vec_stmt = new_stmt;
9267 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9271 for (i = 0; i < group_size; ++i)
9273 vec<tree> oprndsi = gvec_oprnds[i];
9274 oprndsi.release ();
9276 oprnds.release ();
9277 result_chain.release ();
9278 vec_oprnds.release ();
9280 return true;
9283 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9284 VECTOR_CST mask. No checks are made that the target platform supports the
9285 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9286 vect_gen_perm_mask_checked. */
9288 tree
9289 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9291 tree mask_type;
9293 poly_uint64 nunits = sel.length ();
9294 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9296 mask_type = build_vector_type (ssizetype, nunits);
9297 return vec_perm_indices_to_tree (mask_type, sel);
9300 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9301 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9303 tree
9304 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9306 machine_mode vmode = TYPE_MODE (vectype);
9307 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9308 return vect_gen_perm_mask_any (vectype, sel);
9311 /* Given a vector variable X and Y, that was generated for the scalar
9312 STMT_INFO, generate instructions to permute the vector elements of X and Y
9313 using permutation mask MASK_VEC, insert them at *GSI and return the
9314 permuted vector variable. */
9316 static tree
9317 permute_vec_elements (vec_info *vinfo,
9318 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9319 gimple_stmt_iterator *gsi)
9321 tree vectype = TREE_TYPE (x);
9322 tree perm_dest, data_ref;
9323 gimple *perm_stmt;
9325 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9326 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9327 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9328 else
9329 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9330 data_ref = make_ssa_name (perm_dest);
9332 /* Generate the permute statement. */
9333 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9334 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9336 return data_ref;
9339 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9340 inserting them on the loops preheader edge. Returns true if we
9341 were successful in doing so (and thus STMT_INFO can be moved then),
9342 otherwise returns false. HOIST_P indicates if we want to hoist the
9343 definitions of all SSA uses, it would be false when we are costing. */
9345 static bool
9346 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
9348 ssa_op_iter i;
9349 tree op;
9350 bool any = false;
9352 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9354 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9355 if (!gimple_nop_p (def_stmt)
9356 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9358 /* Make sure we don't need to recurse. While we could do
9359 so in simple cases when there are more complex use webs
9360 we don't have an easy way to preserve stmt order to fulfil
9361 dependencies within them. */
9362 tree op2;
9363 ssa_op_iter i2;
9364 if (gimple_code (def_stmt) == GIMPLE_PHI)
9365 return false;
9366 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9368 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9369 if (!gimple_nop_p (def_stmt2)
9370 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9371 return false;
9373 any = true;
9377 if (!any)
9378 return true;
9380 if (!hoist_p)
9381 return true;
9383 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9385 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9386 if (!gimple_nop_p (def_stmt)
9387 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9389 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9390 gsi_remove (&gsi, false);
9391 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9395 return true;
9398 /* vectorizable_load.
9400 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9401 that can be vectorized.
9402 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9403 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9404 Return true if STMT_INFO is vectorizable in this way. */
9406 static bool
9407 vectorizable_load (vec_info *vinfo,
9408 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9409 gimple **vec_stmt, slp_tree slp_node,
9410 stmt_vector_for_cost *cost_vec)
9412 tree scalar_dest;
9413 tree vec_dest = NULL;
9414 tree data_ref = NULL;
9415 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9416 class loop *loop = NULL;
9417 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9418 bool nested_in_vect_loop = false;
9419 tree elem_type;
9420 /* Avoid false positive uninitialized warning, see PR110652. */
9421 tree new_temp = NULL_TREE;
9422 machine_mode mode;
9423 tree dummy;
9424 tree dataref_ptr = NULL_TREE;
9425 tree dataref_offset = NULL_TREE;
9426 gimple *ptr_incr = NULL;
9427 int ncopies;
9428 int i, j;
9429 unsigned int group_size;
9430 poly_uint64 group_gap_adj;
9431 tree msq = NULL_TREE, lsq;
9432 tree realignment_token = NULL_TREE;
9433 gphi *phi = NULL;
9434 vec<tree> dr_chain = vNULL;
9435 bool grouped_load = false;
9436 stmt_vec_info first_stmt_info;
9437 stmt_vec_info first_stmt_info_for_drptr = NULL;
9438 bool compute_in_loop = false;
9439 class loop *at_loop;
9440 int vec_num;
9441 bool slp = (slp_node != NULL);
9442 bool slp_perm = false;
9443 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9444 poly_uint64 vf;
9445 tree aggr_type;
9446 gather_scatter_info gs_info;
9447 tree ref_type;
9448 enum vect_def_type mask_dt = vect_unknown_def_type;
9450 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9451 return false;
9453 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9454 && ! vec_stmt)
9455 return false;
9457 if (!STMT_VINFO_DATA_REF (stmt_info))
9458 return false;
9460 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9461 int mask_index = -1;
9462 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9464 scalar_dest = gimple_assign_lhs (assign);
9465 if (TREE_CODE (scalar_dest) != SSA_NAME)
9466 return false;
9468 tree_code code = gimple_assign_rhs_code (assign);
9469 if (code != ARRAY_REF
9470 && code != BIT_FIELD_REF
9471 && code != INDIRECT_REF
9472 && code != COMPONENT_REF
9473 && code != IMAGPART_EXPR
9474 && code != REALPART_EXPR
9475 && code != MEM_REF
9476 && TREE_CODE_CLASS (code) != tcc_declaration)
9477 return false;
9479 else
9481 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9482 if (!call || !gimple_call_internal_p (call))
9483 return false;
9485 internal_fn ifn = gimple_call_internal_fn (call);
9486 if (!internal_load_fn_p (ifn))
9487 return false;
9489 scalar_dest = gimple_call_lhs (call);
9490 if (!scalar_dest)
9491 return false;
9493 mask_index = internal_fn_mask_index (ifn);
9494 /* ??? For SLP the mask operand is always last. */
9495 if (mask_index >= 0 && slp_node)
9496 mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
9497 if (mask_index >= 0
9498 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9499 &mask, NULL, &mask_dt, &mask_vectype))
9500 return false;
9503 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9504 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9506 if (loop_vinfo)
9508 loop = LOOP_VINFO_LOOP (loop_vinfo);
9509 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9510 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9512 else
9513 vf = 1;
9515 /* Multiple types in SLP are handled by creating the appropriate number of
9516 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9517 case of SLP. */
9518 if (slp)
9519 ncopies = 1;
9520 else
9521 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9523 gcc_assert (ncopies >= 1);
9525 /* FORNOW. This restriction should be relaxed. */
9526 if (nested_in_vect_loop && ncopies > 1)
9528 if (dump_enabled_p ())
9529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9530 "multiple types in nested loop.\n");
9531 return false;
9534 /* Invalidate assumptions made by dependence analysis when vectorization
9535 on the unrolled body effectively re-orders stmts. */
9536 if (ncopies > 1
9537 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9538 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9539 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9541 if (dump_enabled_p ())
9542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9543 "cannot perform implicit CSE when unrolling "
9544 "with negative dependence distance\n");
9545 return false;
9548 elem_type = TREE_TYPE (vectype);
9549 mode = TYPE_MODE (vectype);
9551 /* FORNOW. In some cases can vectorize even if data-type not supported
9552 (e.g. - data copies). */
9553 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
9555 if (dump_enabled_p ())
9556 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9557 "Aligned load, but unsupported type.\n");
9558 return false;
9561 /* Check if the load is a part of an interleaving chain. */
9562 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9564 grouped_load = true;
9565 /* FORNOW */
9566 gcc_assert (!nested_in_vect_loop);
9567 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9569 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9570 group_size = DR_GROUP_SIZE (first_stmt_info);
9572 /* Refuse non-SLP vectorization of SLP-only groups. */
9573 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9575 if (dump_enabled_p ())
9576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9577 "cannot vectorize load in non-SLP mode.\n");
9578 return false;
9581 /* Invalidate assumptions made by dependence analysis when vectorization
9582 on the unrolled body effectively re-orders stmts. */
9583 if (!PURE_SLP_STMT (stmt_info)
9584 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9585 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9586 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9588 if (dump_enabled_p ())
9589 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9590 "cannot perform implicit CSE when performing "
9591 "group loads with negative dependence distance\n");
9592 return false;
9595 else
9596 group_size = 1;
9598 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9600 slp_perm = true;
9602 if (!loop_vinfo)
9604 /* In BB vectorization we may not actually use a loaded vector
9605 accessing elements in excess of DR_GROUP_SIZE. */
9606 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9607 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9608 unsigned HOST_WIDE_INT nunits;
9609 unsigned j, k, maxk = 0;
9610 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
9611 if (k > maxk)
9612 maxk = k;
9613 tree vectype = SLP_TREE_VECTYPE (slp_node);
9614 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
9615 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
9617 if (dump_enabled_p ())
9618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9619 "BB vectorization with gaps at the end of "
9620 "a load is not supported\n");
9621 return false;
9625 auto_vec<tree> tem;
9626 unsigned n_perms;
9627 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
9628 true, &n_perms))
9630 if (dump_enabled_p ())
9631 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
9632 vect_location,
9633 "unsupported load permutation\n");
9634 return false;
9638 vect_memory_access_type memory_access_type;
9639 enum dr_alignment_support alignment_support_scheme;
9640 int misalignment;
9641 poly_int64 poffset;
9642 internal_fn lanes_ifn;
9643 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
9644 ncopies, &memory_access_type, &poffset,
9645 &alignment_support_scheme, &misalignment, &gs_info,
9646 &lanes_ifn))
9647 return false;
9649 if (mask)
9651 if (memory_access_type == VMAT_CONTIGUOUS)
9653 machine_mode vec_mode = TYPE_MODE (vectype);
9654 if (!VECTOR_MODE_P (vec_mode)
9655 || !can_vec_mask_load_store_p (vec_mode,
9656 TYPE_MODE (mask_vectype), true))
9657 return false;
9659 else if (memory_access_type != VMAT_LOAD_STORE_LANES
9660 && memory_access_type != VMAT_GATHER_SCATTER)
9662 if (dump_enabled_p ())
9663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9664 "unsupported access type for masked load.\n");
9665 return false;
9667 else if (memory_access_type == VMAT_GATHER_SCATTER
9668 && gs_info.ifn == IFN_LAST
9669 && !gs_info.decl)
9671 if (dump_enabled_p ())
9672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9673 "unsupported masked emulated gather.\n");
9674 return false;
9678 bool costing_p = !vec_stmt;
9680 if (costing_p) /* transformation not required. */
9682 if (slp_node
9683 && mask
9684 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
9685 mask_vectype))
9687 if (dump_enabled_p ())
9688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9689 "incompatible vector types for invariants\n");
9690 return false;
9693 if (!slp)
9694 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
9696 if (loop_vinfo
9697 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
9698 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
9699 VLS_LOAD, group_size,
9700 memory_access_type, &gs_info,
9701 mask);
9703 if (dump_enabled_p ()
9704 && memory_access_type != VMAT_ELEMENTWISE
9705 && memory_access_type != VMAT_GATHER_SCATTER
9706 && alignment_support_scheme != dr_aligned)
9707 dump_printf_loc (MSG_NOTE, vect_location,
9708 "Vectorizing an unaligned access.\n");
9710 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9711 vinfo->any_known_not_updated_vssa = true;
9713 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
9716 if (!slp)
9717 gcc_assert (memory_access_type
9718 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
9720 if (dump_enabled_p () && !costing_p)
9721 dump_printf_loc (MSG_NOTE, vect_location,
9722 "transform load. ncopies = %d\n", ncopies);
9724 /* Transform. */
9726 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
9727 ensure_base_align (dr_info);
9729 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
9731 vect_build_gather_load_calls (vinfo, stmt_info, gsi, vec_stmt, &gs_info,
9732 mask, cost_vec);
9733 return true;
9736 if (memory_access_type == VMAT_INVARIANT)
9738 gcc_assert (!grouped_load && !mask && !bb_vinfo);
9739 /* If we have versioned for aliasing or the loop doesn't
9740 have any data dependencies that would preclude this,
9741 then we are sure this is a loop invariant load and
9742 thus we can insert it on the preheader edge.
9743 TODO: hoist_defs_of_uses should ideally be computed
9744 once at analysis time, remembered and used in the
9745 transform time. */
9746 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
9747 && !nested_in_vect_loop
9748 && hoist_defs_of_uses (stmt_info, loop, !costing_p));
9749 if (costing_p)
9751 enum vect_cost_model_location cost_loc
9752 = hoist_p ? vect_prologue : vect_body;
9753 unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
9754 stmt_info, 0, cost_loc);
9755 cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0,
9756 cost_loc);
9757 unsigned int prologue_cost = hoist_p ? cost : 0;
9758 unsigned int inside_cost = hoist_p ? 0 : cost;
9759 if (dump_enabled_p ())
9760 dump_printf_loc (MSG_NOTE, vect_location,
9761 "vect_model_load_cost: inside_cost = %d, "
9762 "prologue_cost = %d .\n",
9763 inside_cost, prologue_cost);
9764 return true;
9766 if (hoist_p)
9768 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
9769 if (dump_enabled_p ())
9770 dump_printf_loc (MSG_NOTE, vect_location,
9771 "hoisting out of the vectorized loop: %G",
9772 (gimple *) stmt);
9773 scalar_dest = copy_ssa_name (scalar_dest);
9774 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
9775 edge pe = loop_preheader_edge (loop);
9776 gphi *vphi = get_virtual_phi (loop->header);
9777 tree vuse;
9778 if (vphi)
9779 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
9780 else
9781 vuse = gimple_vuse (gsi_stmt (*gsi));
9782 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
9783 gimple_set_vuse (new_stmt, vuse);
9784 gsi_insert_on_edge_immediate (pe, new_stmt);
9786 /* These copies are all equivalent. */
9787 if (hoist_p)
9788 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9789 vectype, NULL);
9790 else
9792 gimple_stmt_iterator gsi2 = *gsi;
9793 gsi_next (&gsi2);
9794 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9795 vectype, &gsi2);
9797 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
9798 if (slp)
9799 for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
9800 slp_node->push_vec_def (new_stmt);
9801 else
9803 for (j = 0; j < ncopies; ++j)
9804 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9805 *vec_stmt = new_stmt;
9807 return true;
9810 if (memory_access_type == VMAT_ELEMENTWISE
9811 || memory_access_type == VMAT_STRIDED_SLP)
9813 gimple_stmt_iterator incr_gsi;
9814 bool insert_after;
9815 tree offvar;
9816 tree ivstep;
9817 tree running_off;
9818 vec<constructor_elt, va_gc> *v = NULL;
9819 tree stride_base, stride_step, alias_off;
9820 /* Checked by get_load_store_type. */
9821 unsigned int const_nunits = nunits.to_constant ();
9822 unsigned HOST_WIDE_INT cst_offset = 0;
9823 tree dr_offset;
9824 unsigned int inside_cost = 0;
9826 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
9827 gcc_assert (!nested_in_vect_loop);
9829 if (grouped_load)
9831 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9832 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9834 else
9836 first_stmt_info = stmt_info;
9837 first_dr_info = dr_info;
9840 if (slp && grouped_load)
9842 group_size = DR_GROUP_SIZE (first_stmt_info);
9843 ref_type = get_group_alias_ptr_type (first_stmt_info);
9845 else
9847 if (grouped_load)
9848 cst_offset
9849 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
9850 * vect_get_place_in_interleaving_chain (stmt_info,
9851 first_stmt_info));
9852 group_size = 1;
9853 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9856 if (!costing_p)
9858 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9859 stride_base = fold_build_pointer_plus (
9860 DR_BASE_ADDRESS (first_dr_info->dr),
9861 size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
9862 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9863 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9865 /* For a load with loop-invariant (but other than power-of-2)
9866 stride (i.e. not a grouped access) like so:
9868 for (i = 0; i < n; i += stride)
9869 ... = array[i];
9871 we generate a new induction variable and new accesses to
9872 form a new vector (or vectors, depending on ncopies):
9874 for (j = 0; ; j += VF*stride)
9875 tmp1 = array[j];
9876 tmp2 = array[j + stride];
9878 vectemp = {tmp1, tmp2, ...}
9881 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9882 build_int_cst (TREE_TYPE (stride_step), vf));
9884 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9886 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9887 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9888 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
9889 loop, &incr_gsi, insert_after,
9890 &offvar, NULL);
9892 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9895 running_off = offvar;
9896 alias_off = build_int_cst (ref_type, 0);
9897 int nloads = const_nunits;
9898 int lnel = 1;
9899 tree ltype = TREE_TYPE (vectype);
9900 tree lvectype = vectype;
9901 auto_vec<tree> dr_chain;
9902 if (memory_access_type == VMAT_STRIDED_SLP)
9904 if (group_size < const_nunits)
9906 /* First check if vec_init optab supports construction from vector
9907 elts directly. Otherwise avoid emitting a constructor of
9908 vector elements by performing the loads using an integer type
9909 of the same size, constructing a vector of those and then
9910 re-interpreting it as the original vector type. This avoids a
9911 huge runtime penalty due to the general inability to perform
9912 store forwarding from smaller stores to a larger load. */
9913 tree ptype;
9914 tree vtype
9915 = vector_vector_composition_type (vectype,
9916 const_nunits / group_size,
9917 &ptype);
9918 if (vtype != NULL_TREE)
9920 nloads = const_nunits / group_size;
9921 lnel = group_size;
9922 lvectype = vtype;
9923 ltype = ptype;
9926 else
9928 nloads = 1;
9929 lnel = const_nunits;
9930 ltype = vectype;
9932 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9934 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9935 else if (nloads == 1)
9936 ltype = vectype;
9938 if (slp)
9940 /* For SLP permutation support we need to load the whole group,
9941 not only the number of vector stmts the permutation result
9942 fits in. */
9943 if (slp_perm)
9945 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9946 variable VF. */
9947 unsigned int const_vf = vf.to_constant ();
9948 ncopies = CEIL (group_size * const_vf, const_nunits);
9949 dr_chain.create (ncopies);
9951 else
9952 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9954 unsigned int group_el = 0;
9955 unsigned HOST_WIDE_INT
9956 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9957 unsigned int n_groups = 0;
9958 for (j = 0; j < ncopies; j++)
9960 if (nloads > 1 && !costing_p)
9961 vec_alloc (v, nloads);
9962 gimple *new_stmt = NULL;
9963 for (i = 0; i < nloads; i++)
9965 if (costing_p)
9967 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
9968 avoid ICE, see PR110776. */
9969 if (VECTOR_TYPE_P (ltype)
9970 && memory_access_type != VMAT_ELEMENTWISE)
9971 vect_get_load_cost (vinfo, stmt_info, 1,
9972 alignment_support_scheme, misalignment,
9973 false, &inside_cost, nullptr, cost_vec,
9974 cost_vec, true);
9975 else
9976 inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
9977 stmt_info, 0, vect_body);
9978 continue;
9980 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9981 group_el * elsz + cst_offset);
9982 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9983 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9984 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9985 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9986 if (nloads > 1)
9987 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9988 gimple_assign_lhs (new_stmt));
9990 group_el += lnel;
9991 if (! slp
9992 || group_el == group_size)
9994 n_groups++;
9995 /* When doing SLP make sure to not load elements from
9996 the next vector iteration, those will not be accessed
9997 so just use the last element again. See PR107451. */
9998 if (!slp || known_lt (n_groups, vf))
10000 tree newoff = copy_ssa_name (running_off);
10001 gimple *incr
10002 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10003 running_off, stride_step);
10004 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
10005 running_off = newoff;
10007 group_el = 0;
10011 if (nloads > 1)
10013 if (costing_p)
10014 inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
10015 stmt_info, 0, vect_body);
10016 else
10018 tree vec_inv = build_constructor (lvectype, v);
10019 new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
10020 lvectype, gsi);
10021 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10022 if (lvectype != vectype)
10024 new_stmt
10025 = gimple_build_assign (make_ssa_name (vectype),
10026 VIEW_CONVERT_EXPR,
10027 build1 (VIEW_CONVERT_EXPR,
10028 vectype, new_temp));
10029 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10030 gsi);
10035 if (!costing_p)
10037 if (slp)
10039 if (slp_perm)
10040 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
10041 else
10042 slp_node->push_vec_def (new_stmt);
10044 else
10046 if (j == 0)
10047 *vec_stmt = new_stmt;
10048 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10052 if (slp_perm)
10054 unsigned n_perms;
10055 if (costing_p)
10057 unsigned n_loads;
10058 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10059 true, &n_perms, &n_loads);
10060 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
10061 first_stmt_info, 0, vect_body);
10063 else
10064 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10065 false, &n_perms);
10068 if (costing_p && dump_enabled_p ())
10069 dump_printf_loc (MSG_NOTE, vect_location,
10070 "vect_model_load_cost: inside_cost = %u, "
10071 "prologue_cost = 0 .\n",
10072 inside_cost);
10074 return true;
10077 if (memory_access_type == VMAT_GATHER_SCATTER
10078 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
10079 grouped_load = false;
10081 if (grouped_load
10082 || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10084 if (grouped_load)
10086 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10087 group_size = DR_GROUP_SIZE (first_stmt_info);
10089 else
10091 first_stmt_info = stmt_info;
10092 group_size = 1;
10094 /* For SLP vectorization we directly vectorize a subchain
10095 without permutation. */
10096 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10097 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10098 /* For BB vectorization always use the first stmt to base
10099 the data ref pointer on. */
10100 if (bb_vinfo)
10101 first_stmt_info_for_drptr
10102 = vect_find_first_scalar_stmt_in_slp (slp_node);
10104 /* Check if the chain of loads is already vectorized. */
10105 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10106 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10107 ??? But we can only do so if there is exactly one
10108 as we have no way to get at the rest. Leave the CSE
10109 opportunity alone.
10110 ??? With the group load eventually participating
10111 in multiple different permutations (having multiple
10112 slp nodes which refer to the same group) the CSE
10113 is even wrong code. See PR56270. */
10114 && !slp)
10116 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10117 return true;
10119 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10120 group_gap_adj = 0;
10122 /* VEC_NUM is the number of vect stmts to be created for this group. */
10123 if (slp)
10125 grouped_load = false;
10126 /* If an SLP permutation is from N elements to N elements,
10127 and if one vector holds a whole number of N, we can load
10128 the inputs to the permutation in the same way as an
10129 unpermuted sequence. In other cases we need to load the
10130 whole group, not only the number of vector stmts the
10131 permutation result fits in. */
10132 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10133 if (slp_perm
10134 && (group_size != scalar_lanes
10135 || !multiple_p (nunits, group_size)))
10137 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10138 variable VF; see vect_transform_slp_perm_load. */
10139 unsigned int const_vf = vf.to_constant ();
10140 unsigned int const_nunits = nunits.to_constant ();
10141 vec_num = CEIL (group_size * const_vf, const_nunits);
10142 group_gap_adj = vf * group_size - nunits * vec_num;
10144 else
10146 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10147 group_gap_adj
10148 = group_size - scalar_lanes;
10151 else
10152 vec_num = group_size;
10154 ref_type = get_group_alias_ptr_type (first_stmt_info);
10156 else
10158 first_stmt_info = stmt_info;
10159 first_dr_info = dr_info;
10160 group_size = vec_num = 1;
10161 group_gap_adj = 0;
10162 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10163 if (slp)
10164 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10167 gcc_assert (alignment_support_scheme);
10168 vec_loop_masks *loop_masks
10169 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10170 ? &LOOP_VINFO_MASKS (loop_vinfo)
10171 : NULL);
10172 vec_loop_lens *loop_lens
10173 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10174 ? &LOOP_VINFO_LENS (loop_vinfo)
10175 : NULL);
10177 /* Shouldn't go with length-based approach if fully masked. */
10178 gcc_assert (!loop_lens || !loop_masks);
10180 /* Targets with store-lane instructions must not require explicit
10181 realignment. vect_supportable_dr_alignment always returns either
10182 dr_aligned or dr_unaligned_supported for masked operations. */
10183 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10184 && !mask
10185 && !loop_masks)
10186 || alignment_support_scheme == dr_aligned
10187 || alignment_support_scheme == dr_unaligned_supported);
10189 /* In case the vectorization factor (VF) is bigger than the number
10190 of elements that we can fit in a vectype (nunits), we have to generate
10191 more than one vector stmt - i.e - we need to "unroll" the
10192 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10193 from one copy of the vector stmt to the next, in the field
10194 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10195 stages to find the correct vector defs to be used when vectorizing
10196 stmts that use the defs of the current stmt. The example below
10197 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10198 need to create 4 vectorized stmts):
10200 before vectorization:
10201 RELATED_STMT VEC_STMT
10202 S1: x = memref - -
10203 S2: z = x + 1 - -
10205 step 1: vectorize stmt S1:
10206 We first create the vector stmt VS1_0, and, as usual, record a
10207 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10208 Next, we create the vector stmt VS1_1, and record a pointer to
10209 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10210 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10211 stmts and pointers:
10212 RELATED_STMT VEC_STMT
10213 VS1_0: vx0 = memref0 VS1_1 -
10214 VS1_1: vx1 = memref1 VS1_2 -
10215 VS1_2: vx2 = memref2 VS1_3 -
10216 VS1_3: vx3 = memref3 - -
10217 S1: x = load - VS1_0
10218 S2: z = x + 1 - -
10221 /* In case of interleaving (non-unit grouped access):
10223 S1: x2 = &base + 2
10224 S2: x0 = &base
10225 S3: x1 = &base + 1
10226 S4: x3 = &base + 3
10228 Vectorized loads are created in the order of memory accesses
10229 starting from the access of the first stmt of the chain:
10231 VS1: vx0 = &base
10232 VS2: vx1 = &base + vec_size*1
10233 VS3: vx3 = &base + vec_size*2
10234 VS4: vx4 = &base + vec_size*3
10236 Then permutation statements are generated:
10238 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10239 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10242 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10243 (the order of the data-refs in the output of vect_permute_load_chain
10244 corresponds to the order of scalar stmts in the interleaving chain - see
10245 the documentation of vect_permute_load_chain()).
10246 The generation of permutation stmts and recording them in
10247 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10249 In case of both multiple types and interleaving, the vector loads and
10250 permutation stmts above are created for every copy. The result vector
10251 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10252 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10254 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10255 on a target that supports unaligned accesses (dr_unaligned_supported)
10256 we generate the following code:
10257 p = initial_addr;
10258 indx = 0;
10259 loop {
10260 p = p + indx * vectype_size;
10261 vec_dest = *(p);
10262 indx = indx + 1;
10265 Otherwise, the data reference is potentially unaligned on a target that
10266 does not support unaligned accesses (dr_explicit_realign_optimized) -
10267 then generate the following code, in which the data in each iteration is
10268 obtained by two vector loads, one from the previous iteration, and one
10269 from the current iteration:
10270 p1 = initial_addr;
10271 msq_init = *(floor(p1))
10272 p2 = initial_addr + VS - 1;
10273 realignment_token = call target_builtin;
10274 indx = 0;
10275 loop {
10276 p2 = p2 + indx * vectype_size
10277 lsq = *(floor(p2))
10278 vec_dest = realign_load (msq, lsq, realignment_token)
10279 indx = indx + 1;
10280 msq = lsq;
10281 } */
10283 /* If the misalignment remains the same throughout the execution of the
10284 loop, we can create the init_addr and permutation mask at the loop
10285 preheader. Otherwise, it needs to be created inside the loop.
10286 This can only occur when vectorizing memory accesses in the inner-loop
10287 nested within an outer-loop that is being vectorized. */
10289 if (nested_in_vect_loop
10290 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10291 GET_MODE_SIZE (TYPE_MODE (vectype))))
10293 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10294 compute_in_loop = true;
10297 bool diff_first_stmt_info
10298 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10300 tree offset = NULL_TREE;
10301 if ((alignment_support_scheme == dr_explicit_realign_optimized
10302 || alignment_support_scheme == dr_explicit_realign)
10303 && !compute_in_loop)
10305 /* If we have different first_stmt_info, we can't set up realignment
10306 here, since we can't guarantee first_stmt_info DR has been
10307 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10308 distance from first_stmt_info DR instead as below. */
10309 if (!costing_p)
10311 if (!diff_first_stmt_info)
10312 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10313 &realignment_token,
10314 alignment_support_scheme, NULL_TREE,
10315 &at_loop);
10316 if (alignment_support_scheme == dr_explicit_realign_optimized)
10318 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10319 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10320 size_one_node);
10321 gcc_assert (!first_stmt_info_for_drptr);
10325 else
10326 at_loop = loop;
10328 if (!known_eq (poffset, 0))
10329 offset = (offset
10330 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10331 : size_int (poffset));
10333 tree bump;
10334 tree vec_offset = NULL_TREE;
10335 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10337 aggr_type = NULL_TREE;
10338 bump = NULL_TREE;
10340 else if (memory_access_type == VMAT_GATHER_SCATTER)
10342 aggr_type = elem_type;
10343 if (!costing_p)
10344 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
10345 &bump, &vec_offset, loop_lens);
10347 else
10349 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10350 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
10351 else
10352 aggr_type = vectype;
10353 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10354 memory_access_type, loop_lens);
10357 auto_vec<tree> vec_offsets;
10358 auto_vec<tree> vec_masks;
10359 if (mask && !costing_p)
10361 if (slp_node)
10362 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10363 &vec_masks);
10364 else
10365 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
10366 &vec_masks, mask_vectype);
10369 tree vec_mask = NULL_TREE;
10370 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10372 gcc_assert (alignment_support_scheme == dr_aligned
10373 || alignment_support_scheme == dr_unaligned_supported);
10374 gcc_assert (grouped_load && !slp);
10376 unsigned int inside_cost = 0, prologue_cost = 0;
10377 for (j = 0; j < ncopies; j++)
10379 if (costing_p)
10381 /* An IFN_LOAD_LANES will load all its vector results,
10382 regardless of which ones we actually need. Account
10383 for the cost of unused results. */
10384 if (first_stmt_info == stmt_info)
10386 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10387 stmt_vec_info next_stmt_info = first_stmt_info;
10390 gaps -= 1;
10391 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10393 while (next_stmt_info);
10394 if (gaps)
10396 if (dump_enabled_p ())
10397 dump_printf_loc (MSG_NOTE, vect_location,
10398 "vect_model_load_cost: %d "
10399 "unused vectors.\n",
10400 gaps);
10401 vect_get_load_cost (vinfo, stmt_info, gaps,
10402 alignment_support_scheme,
10403 misalignment, false, &inside_cost,
10404 &prologue_cost, cost_vec, cost_vec,
10405 true);
10408 vect_get_load_cost (vinfo, stmt_info, 1, alignment_support_scheme,
10409 misalignment, false, &inside_cost,
10410 &prologue_cost, cost_vec, cost_vec, true);
10411 continue;
10414 /* 1. Create the vector or array pointer update chain. */
10415 if (j == 0)
10416 dataref_ptr
10417 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10418 at_loop, offset, &dummy, gsi,
10419 &ptr_incr, false, bump);
10420 else
10422 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10423 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10424 stmt_info, bump);
10426 if (mask)
10427 vec_mask = vec_masks[j];
10429 tree vec_array = create_vector_array (vectype, vec_num);
10431 tree final_mask = NULL_TREE;
10432 tree final_len = NULL_TREE;
10433 tree bias = NULL_TREE;
10434 if (loop_masks)
10435 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10436 ncopies, vectype, j);
10437 if (vec_mask)
10438 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
10439 vec_mask, gsi);
10441 if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10443 if (loop_lens)
10444 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10445 ncopies, vectype, j, 1);
10446 else
10447 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10448 signed char biasval
10449 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10450 bias = build_int_cst (intQI_type_node, biasval);
10451 if (!final_mask)
10453 mask_vectype = truth_type_for (vectype);
10454 final_mask = build_minus_one_cst (mask_vectype);
10458 gcall *call;
10459 if (final_len && final_mask)
10461 /* Emit:
10462 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10463 VEC_MASK, LEN, BIAS). */
10464 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10465 tree alias_ptr = build_int_cst (ref_type, align);
10466 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
10467 dataref_ptr, alias_ptr,
10468 final_mask, final_len, bias);
10470 else if (final_mask)
10472 /* Emit:
10473 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10474 VEC_MASK). */
10475 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10476 tree alias_ptr = build_int_cst (ref_type, align);
10477 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
10478 dataref_ptr, alias_ptr,
10479 final_mask);
10481 else
10483 /* Emit:
10484 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10485 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
10486 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10488 gimple_call_set_lhs (call, vec_array);
10489 gimple_call_set_nothrow (call, true);
10490 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
10492 dr_chain.create (vec_num);
10493 /* Extract each vector into an SSA_NAME. */
10494 for (i = 0; i < vec_num; i++)
10496 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10497 vec_array, i);
10498 dr_chain.quick_push (new_temp);
10501 /* Record the mapping between SSA_NAMEs and statements. */
10502 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10504 /* Record that VEC_ARRAY is now dead. */
10505 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10507 dr_chain.release ();
10509 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10512 if (costing_p && dump_enabled_p ())
10513 dump_printf_loc (MSG_NOTE, vect_location,
10514 "vect_model_load_cost: inside_cost = %u, "
10515 "prologue_cost = %u .\n",
10516 inside_cost, prologue_cost);
10518 return true;
10521 if (memory_access_type == VMAT_GATHER_SCATTER)
10523 gcc_assert (alignment_support_scheme == dr_aligned
10524 || alignment_support_scheme == dr_unaligned_supported);
10525 gcc_assert (!grouped_load && !slp_perm);
10527 unsigned int inside_cost = 0, prologue_cost = 0;
10528 for (j = 0; j < ncopies; j++)
10530 /* 1. Create the vector or array pointer update chain. */
10531 if (j == 0 && !costing_p)
10533 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10534 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10535 slp_node, &gs_info, &dataref_ptr,
10536 &vec_offsets);
10537 else
10538 dataref_ptr
10539 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10540 at_loop, offset, &dummy, gsi,
10541 &ptr_incr, false, bump);
10543 else if (!costing_p)
10545 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10546 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10547 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10548 gsi, stmt_info, bump);
10551 if (mask && !costing_p)
10552 vec_mask = vec_masks[j];
10554 gimple *new_stmt = NULL;
10555 for (i = 0; i < vec_num; i++)
10557 tree final_mask = NULL_TREE;
10558 tree final_len = NULL_TREE;
10559 tree bias = NULL_TREE;
10560 if (!costing_p)
10562 if (loop_masks)
10563 final_mask
10564 = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10565 vec_num * ncopies, vectype,
10566 vec_num * j + i);
10567 if (vec_mask)
10568 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10569 final_mask, vec_mask, gsi);
10571 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10572 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10573 gsi, stmt_info, bump);
10576 /* 2. Create the vector-load in the loop. */
10577 unsigned HOST_WIDE_INT align;
10578 if (gs_info.ifn != IFN_LAST)
10580 if (costing_p)
10582 unsigned int cnunits = vect_nunits_for_cost (vectype);
10583 inside_cost
10584 = record_stmt_cost (cost_vec, cnunits, scalar_load,
10585 stmt_info, 0, vect_body);
10586 continue;
10588 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10589 vec_offset = vec_offsets[vec_num * j + i];
10590 tree zero = build_zero_cst (vectype);
10591 tree scale = size_int (gs_info.scale);
10593 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
10595 if (loop_lens)
10596 final_len
10597 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10598 vec_num * ncopies, vectype,
10599 vec_num * j + i, 1);
10600 else
10601 final_len
10602 = build_int_cst (sizetype,
10603 TYPE_VECTOR_SUBPARTS (vectype));
10604 signed char biasval
10605 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10606 bias = build_int_cst (intQI_type_node, biasval);
10607 if (!final_mask)
10609 mask_vectype = truth_type_for (vectype);
10610 final_mask = build_minus_one_cst (mask_vectype);
10614 gcall *call;
10615 if (final_len && final_mask)
10616 call
10617 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD, 7,
10618 dataref_ptr, vec_offset,
10619 scale, zero, final_mask,
10620 final_len, bias);
10621 else if (final_mask)
10622 call = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
10623 dataref_ptr, vec_offset,
10624 scale, zero, final_mask);
10625 else
10626 call = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
10627 dataref_ptr, vec_offset,
10628 scale, zero);
10629 gimple_call_set_nothrow (call, true);
10630 new_stmt = call;
10631 data_ref = NULL_TREE;
10633 else
10635 /* Emulated gather-scatter. */
10636 gcc_assert (!final_mask);
10637 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
10638 if (costing_p)
10640 /* For emulated gathers N offset vector element
10641 offset add is consumed by the load). */
10642 inside_cost = record_stmt_cost (cost_vec, const_nunits,
10643 vec_to_scalar, stmt_info,
10644 0, vect_body);
10645 /* N scalar loads plus gathering them into a
10646 vector. */
10647 inside_cost
10648 = record_stmt_cost (cost_vec, const_nunits, scalar_load,
10649 stmt_info, 0, vect_body);
10650 inside_cost
10651 = record_stmt_cost (cost_vec, 1, vec_construct,
10652 stmt_info, 0, vect_body);
10653 continue;
10655 unsigned HOST_WIDE_INT const_offset_nunits
10656 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
10657 .to_constant ();
10658 vec<constructor_elt, va_gc> *ctor_elts;
10659 vec_alloc (ctor_elts, const_nunits);
10660 gimple_seq stmts = NULL;
10661 /* We support offset vectors with more elements
10662 than the data vector for now. */
10663 unsigned HOST_WIDE_INT factor
10664 = const_offset_nunits / const_nunits;
10665 vec_offset = vec_offsets[j / factor];
10666 unsigned elt_offset = (j % factor) * const_nunits;
10667 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
10668 tree scale = size_int (gs_info.scale);
10669 align = get_object_alignment (DR_REF (first_dr_info->dr));
10670 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
10671 for (unsigned k = 0; k < const_nunits; ++k)
10673 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
10674 bitsize_int (k + elt_offset));
10675 tree idx
10676 = gimple_build (&stmts, BIT_FIELD_REF, idx_type,
10677 vec_offset, TYPE_SIZE (idx_type), boff);
10678 idx = gimple_convert (&stmts, sizetype, idx);
10679 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx,
10680 scale);
10681 tree ptr = gimple_build (&stmts, PLUS_EXPR,
10682 TREE_TYPE (dataref_ptr),
10683 dataref_ptr, idx);
10684 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
10685 tree elt = make_ssa_name (TREE_TYPE (vectype));
10686 tree ref = build2 (MEM_REF, ltype, ptr,
10687 build_int_cst (ref_type, 0));
10688 new_stmt = gimple_build_assign (elt, ref);
10689 gimple_set_vuse (new_stmt, gimple_vuse (gsi_stmt (*gsi)));
10690 gimple_seq_add_stmt (&stmts, new_stmt);
10691 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
10693 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
10694 new_stmt = gimple_build_assign (
10695 NULL_TREE, build_constructor (vectype, ctor_elts));
10696 data_ref = NULL_TREE;
10699 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10700 /* DATA_REF is null if we've already built the statement. */
10701 if (data_ref)
10703 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10704 new_stmt = gimple_build_assign (vec_dest, data_ref);
10706 new_temp = make_ssa_name (vec_dest, new_stmt);
10707 gimple_set_lhs (new_stmt, new_temp);
10708 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10710 /* Store vector loads in the corresponding SLP_NODE. */
10711 if (slp)
10712 slp_node->push_vec_def (new_stmt);
10715 if (!slp && !costing_p)
10716 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10719 if (!slp && !costing_p)
10720 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10722 if (costing_p && dump_enabled_p ())
10723 dump_printf_loc (MSG_NOTE, vect_location,
10724 "vect_model_load_cost: inside_cost = %u, "
10725 "prologue_cost = %u .\n",
10726 inside_cost, prologue_cost);
10727 return true;
10730 poly_uint64 group_elt = 0;
10731 unsigned int inside_cost = 0, prologue_cost = 0;
10732 for (j = 0; j < ncopies; j++)
10734 /* 1. Create the vector or array pointer update chain. */
10735 if (j == 0 && !costing_p)
10737 bool simd_lane_access_p
10738 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
10739 if (simd_lane_access_p
10740 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
10741 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
10742 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
10743 && integer_zerop (DR_INIT (first_dr_info->dr))
10744 && alias_sets_conflict_p (get_alias_set (aggr_type),
10745 get_alias_set (TREE_TYPE (ref_type)))
10746 && (alignment_support_scheme == dr_aligned
10747 || alignment_support_scheme == dr_unaligned_supported))
10749 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
10750 dataref_offset = build_int_cst (ref_type, 0);
10752 else if (diff_first_stmt_info)
10754 dataref_ptr
10755 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
10756 aggr_type, at_loop, offset, &dummy,
10757 gsi, &ptr_incr, simd_lane_access_p,
10758 bump);
10759 /* Adjust the pointer by the difference to first_stmt. */
10760 data_reference_p ptrdr
10761 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
10762 tree diff
10763 = fold_convert (sizetype,
10764 size_binop (MINUS_EXPR,
10765 DR_INIT (first_dr_info->dr),
10766 DR_INIT (ptrdr)));
10767 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10768 stmt_info, diff);
10769 if (alignment_support_scheme == dr_explicit_realign)
10771 msq = vect_setup_realignment (vinfo,
10772 first_stmt_info_for_drptr, gsi,
10773 &realignment_token,
10774 alignment_support_scheme,
10775 dataref_ptr, &at_loop);
10776 gcc_assert (!compute_in_loop);
10779 else
10780 dataref_ptr
10781 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10782 at_loop,
10783 offset, &dummy, gsi, &ptr_incr,
10784 simd_lane_access_p, bump);
10785 if (mask)
10786 vec_mask = vec_masks[0];
10788 else if (!costing_p)
10790 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10791 if (dataref_offset)
10792 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
10793 bump);
10794 else
10795 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10796 stmt_info, bump);
10797 if (mask)
10798 vec_mask = vec_masks[j];
10801 if (grouped_load || slp_perm)
10802 dr_chain.create (vec_num);
10804 gimple *new_stmt = NULL;
10805 for (i = 0; i < vec_num; i++)
10807 tree final_mask = NULL_TREE;
10808 tree final_len = NULL_TREE;
10809 tree bias = NULL_TREE;
10810 if (!costing_p)
10812 if (loop_masks)
10813 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10814 vec_num * ncopies, vectype,
10815 vec_num * j + i);
10816 if (vec_mask)
10817 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10818 final_mask, vec_mask, gsi);
10820 if (i > 0)
10821 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10822 gsi, stmt_info, bump);
10825 /* 2. Create the vector-load in the loop. */
10826 switch (alignment_support_scheme)
10828 case dr_aligned:
10829 case dr_unaligned_supported:
10831 if (costing_p)
10832 break;
10834 unsigned int misalign;
10835 unsigned HOST_WIDE_INT align;
10836 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
10837 if (alignment_support_scheme == dr_aligned)
10838 misalign = 0;
10839 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
10841 align
10842 = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
10843 misalign = 0;
10845 else
10846 misalign = misalignment;
10847 if (dataref_offset == NULL_TREE
10848 && TREE_CODE (dataref_ptr) == SSA_NAME)
10849 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
10850 misalign);
10851 align = least_bit_hwi (misalign | align);
10853 /* Compute IFN when LOOP_LENS or final_mask valid. */
10854 machine_mode vmode = TYPE_MODE (vectype);
10855 machine_mode new_vmode = vmode;
10856 internal_fn partial_ifn = IFN_LAST;
10857 if (loop_lens)
10859 opt_machine_mode new_ovmode
10860 = get_len_load_store_mode (vmode, true, &partial_ifn);
10861 new_vmode = new_ovmode.require ();
10862 unsigned factor
10863 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
10864 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10865 vec_num * ncopies, vectype,
10866 vec_num * j + i, factor);
10868 else if (final_mask)
10870 if (!can_vec_mask_load_store_p (
10871 vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
10872 &partial_ifn))
10873 gcc_unreachable ();
10876 if (partial_ifn == IFN_MASK_LEN_LOAD)
10878 if (!final_len)
10880 /* Pass VF value to 'len' argument of
10881 MASK_LEN_LOAD if LOOP_LENS is invalid. */
10882 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10884 if (!final_mask)
10886 /* Pass all ones value to 'mask' argument of
10887 MASK_LEN_LOAD if final_mask is invalid. */
10888 mask_vectype = truth_type_for (vectype);
10889 final_mask = build_minus_one_cst (mask_vectype);
10892 if (final_len)
10894 signed char biasval
10895 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10897 bias = build_int_cst (intQI_type_node, biasval);
10900 if (final_len)
10902 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
10903 gcall *call;
10904 if (partial_ifn == IFN_MASK_LEN_LOAD)
10905 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
10906 dataref_ptr, ptr,
10907 final_mask, final_len,
10908 bias);
10909 else
10910 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
10911 dataref_ptr, ptr,
10912 final_len, bias);
10913 gimple_call_set_nothrow (call, true);
10914 new_stmt = call;
10915 data_ref = NULL_TREE;
10917 /* Need conversion if it's wrapped with VnQI. */
10918 if (vmode != new_vmode)
10920 tree new_vtype = build_vector_type_for_mode (
10921 unsigned_intQI_type_node, new_vmode);
10922 tree var
10923 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
10924 gimple_set_lhs (call, var);
10925 vect_finish_stmt_generation (vinfo, stmt_info, call,
10926 gsi);
10927 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
10928 new_stmt = gimple_build_assign (vec_dest,
10929 VIEW_CONVERT_EXPR, op);
10932 else if (final_mask)
10934 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
10935 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
10936 dataref_ptr, ptr,
10937 final_mask);
10938 gimple_call_set_nothrow (call, true);
10939 new_stmt = call;
10940 data_ref = NULL_TREE;
10942 else
10944 tree ltype = vectype;
10945 tree new_vtype = NULL_TREE;
10946 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
10947 unsigned int vect_align
10948 = vect_known_alignment_in_bytes (first_dr_info, vectype);
10949 unsigned int scalar_dr_size
10950 = vect_get_scalar_dr_size (first_dr_info);
10951 /* If there's no peeling for gaps but we have a gap
10952 with slp loads then load the lower half of the
10953 vector only. See get_group_load_store_type for
10954 when we apply this optimization. */
10955 if (slp
10956 && loop_vinfo
10957 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != 0
10958 && known_eq (nunits, (group_size - gap) * 2)
10959 && known_eq (nunits, group_size)
10960 && gap >= (vect_align / scalar_dr_size))
10962 tree half_vtype;
10963 new_vtype
10964 = vector_vector_composition_type (vectype, 2,
10965 &half_vtype);
10966 if (new_vtype != NULL_TREE)
10967 ltype = half_vtype;
10969 tree offset
10970 = (dataref_offset ? dataref_offset
10971 : build_int_cst (ref_type, 0));
10972 if (ltype != vectype
10973 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10975 unsigned HOST_WIDE_INT gap_offset
10976 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
10977 tree gapcst = build_int_cst (ref_type, gap_offset);
10978 offset = size_binop (PLUS_EXPR, offset, gapcst);
10980 data_ref
10981 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
10982 if (alignment_support_scheme == dr_aligned)
10984 else
10985 TREE_TYPE (data_ref)
10986 = build_aligned_type (TREE_TYPE (data_ref),
10987 align * BITS_PER_UNIT);
10988 if (ltype != vectype)
10990 vect_copy_ref_info (data_ref,
10991 DR_REF (first_dr_info->dr));
10992 tree tem = make_ssa_name (ltype);
10993 new_stmt = gimple_build_assign (tem, data_ref);
10994 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10995 gsi);
10996 data_ref = NULL;
10997 vec<constructor_elt, va_gc> *v;
10998 vec_alloc (v, 2);
10999 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11001 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11002 build_zero_cst (ltype));
11003 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11005 else
11007 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
11008 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
11009 build_zero_cst (ltype));
11011 gcc_assert (new_vtype != NULL_TREE);
11012 if (new_vtype == vectype)
11013 new_stmt = gimple_build_assign (
11014 vec_dest, build_constructor (vectype, v));
11015 else
11017 tree new_vname = make_ssa_name (new_vtype);
11018 new_stmt = gimple_build_assign (
11019 new_vname, build_constructor (new_vtype, v));
11020 vect_finish_stmt_generation (vinfo, stmt_info,
11021 new_stmt, gsi);
11022 new_stmt = gimple_build_assign (
11023 vec_dest,
11024 build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
11028 break;
11030 case dr_explicit_realign:
11032 if (costing_p)
11033 break;
11034 tree ptr, bump;
11036 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
11038 if (compute_in_loop)
11039 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
11040 &realignment_token,
11041 dr_explicit_realign,
11042 dataref_ptr, NULL);
11044 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11045 ptr = copy_ssa_name (dataref_ptr);
11046 else
11047 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
11048 // For explicit realign the target alignment should be
11049 // known at compile time.
11050 unsigned HOST_WIDE_INT align
11051 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11052 new_stmt = gimple_build_assign (
11053 ptr, BIT_AND_EXPR, dataref_ptr,
11054 build_int_cst (TREE_TYPE (dataref_ptr),
11055 -(HOST_WIDE_INT) align));
11056 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11057 data_ref
11058 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11059 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11060 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11061 new_stmt = gimple_build_assign (vec_dest, data_ref);
11062 new_temp = make_ssa_name (vec_dest, new_stmt);
11063 gimple_assign_set_lhs (new_stmt, new_temp);
11064 gimple_move_vops (new_stmt, stmt_info->stmt);
11065 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11066 msq = new_temp;
11068 bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
11069 bump = size_binop (MINUS_EXPR, bump, size_one_node);
11070 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
11071 bump);
11072 new_stmt = gimple_build_assign (
11073 NULL_TREE, BIT_AND_EXPR, ptr,
11074 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11075 if (TREE_CODE (ptr) == SSA_NAME)
11076 ptr = copy_ssa_name (ptr, new_stmt);
11077 else
11078 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
11079 gimple_assign_set_lhs (new_stmt, ptr);
11080 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11081 data_ref
11082 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11083 break;
11085 case dr_explicit_realign_optimized:
11087 if (costing_p)
11088 break;
11089 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11090 new_temp = copy_ssa_name (dataref_ptr);
11091 else
11092 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11093 // We should only be doing this if we know the target
11094 // alignment at compile time.
11095 unsigned HOST_WIDE_INT align
11096 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11097 new_stmt = gimple_build_assign (
11098 new_temp, BIT_AND_EXPR, dataref_ptr,
11099 build_int_cst (TREE_TYPE (dataref_ptr),
11100 -(HOST_WIDE_INT) align));
11101 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11102 data_ref = build2 (MEM_REF, vectype, new_temp,
11103 build_int_cst (ref_type, 0));
11104 break;
11106 default:
11107 gcc_unreachable ();
11110 /* One common place to cost the above vect load for different
11111 alignment support schemes. */
11112 if (costing_p)
11114 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11115 only need to take care of the first stmt, whose
11116 stmt_info is first_stmt_info, vec_num iterating on it
11117 will cover the cost for the remaining, it's consistent
11118 with transforming. For the prologue cost for realign,
11119 we only need to count it once for the whole group. */
11120 bool first_stmt_info_p = first_stmt_info == stmt_info;
11121 bool add_realign_cost = first_stmt_info_p && i == 0;
11122 if (memory_access_type == VMAT_CONTIGUOUS
11123 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11124 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11125 && (!grouped_load || first_stmt_info_p)))
11126 vect_get_load_cost (vinfo, stmt_info, 1,
11127 alignment_support_scheme, misalignment,
11128 add_realign_cost, &inside_cost,
11129 &prologue_cost, cost_vec, cost_vec, true);
11131 else
11133 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11134 /* DATA_REF is null if we've already built the statement. */
11135 if (data_ref)
11137 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11138 new_stmt = gimple_build_assign (vec_dest, data_ref);
11140 new_temp = make_ssa_name (vec_dest, new_stmt);
11141 gimple_set_lhs (new_stmt, new_temp);
11142 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11145 /* 3. Handle explicit realignment if necessary/supported.
11146 Create in loop:
11147 vec_dest = realign_load (msq, lsq, realignment_token) */
11148 if (!costing_p
11149 && (alignment_support_scheme == dr_explicit_realign_optimized
11150 || alignment_support_scheme == dr_explicit_realign))
11152 lsq = gimple_assign_lhs (new_stmt);
11153 if (!realignment_token)
11154 realignment_token = dataref_ptr;
11155 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11156 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11157 lsq, realignment_token);
11158 new_temp = make_ssa_name (vec_dest, new_stmt);
11159 gimple_assign_set_lhs (new_stmt, new_temp);
11160 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11162 if (alignment_support_scheme == dr_explicit_realign_optimized)
11164 gcc_assert (phi);
11165 if (i == vec_num - 1 && j == ncopies - 1)
11166 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11167 UNKNOWN_LOCATION);
11168 msq = lsq;
11172 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11174 if (costing_p)
11175 inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
11176 stmt_info, 0, vect_body);
11177 else
11179 tree perm_mask = perm_mask_for_reverse (vectype);
11180 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
11181 perm_mask, stmt_info, gsi);
11182 new_stmt = SSA_NAME_DEF_STMT (new_temp);
11186 /* Collect vector loads and later create their permutation in
11187 vect_transform_grouped_load (). */
11188 if (!costing_p && (grouped_load || slp_perm))
11189 dr_chain.quick_push (new_temp);
11191 /* Store vector loads in the corresponding SLP_NODE. */
11192 if (!costing_p && slp && !slp_perm)
11193 slp_node->push_vec_def (new_stmt);
11195 /* With SLP permutation we load the gaps as well, without
11196 we need to skip the gaps after we manage to fully load
11197 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11198 group_elt += nunits;
11199 if (!costing_p
11200 && maybe_ne (group_gap_adj, 0U)
11201 && !slp_perm
11202 && known_eq (group_elt, group_size - group_gap_adj))
11204 poly_wide_int bump_val
11205 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11206 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11207 == -1)
11208 bump_val = -bump_val;
11209 tree bump = wide_int_to_tree (sizetype, bump_val);
11210 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11211 stmt_info, bump);
11212 group_elt = 0;
11215 /* Bump the vector pointer to account for a gap or for excess
11216 elements loaded for a permuted SLP load. */
11217 if (!costing_p
11218 && maybe_ne (group_gap_adj, 0U)
11219 && slp_perm)
11221 poly_wide_int bump_val
11222 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11223 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11224 bump_val = -bump_val;
11225 tree bump = wide_int_to_tree (sizetype, bump_val);
11226 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11227 stmt_info, bump);
11230 if (slp && !slp_perm)
11231 continue;
11233 if (slp_perm)
11235 unsigned n_perms;
11236 /* For SLP we know we've seen all possible uses of dr_chain so
11237 direct vect_transform_slp_perm_load to DCE the unused parts.
11238 ??? This is a hack to prevent compile-time issues as seen
11239 in PR101120 and friends. */
11240 if (costing_p)
11242 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
11243 true, &n_perms, nullptr);
11244 inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
11245 stmt_info, 0, vect_body);
11247 else
11249 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11250 gsi, vf, false, &n_perms,
11251 nullptr, true);
11252 gcc_assert (ok);
11255 else
11257 if (grouped_load)
11259 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11260 /* We assume that the cost of a single load-lanes instruction
11261 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11262 If a grouped access is instead being provided by a
11263 load-and-permute operation, include the cost of the
11264 permutes. */
11265 if (costing_p && first_stmt_info == stmt_info)
11267 /* Uses an even and odd extract operations or shuffle
11268 operations for each needed permute. */
11269 int group_size = DR_GROUP_SIZE (first_stmt_info);
11270 int nstmts = ceil_log2 (group_size) * group_size;
11271 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
11272 stmt_info, 0, vect_body);
11274 if (dump_enabled_p ())
11275 dump_printf_loc (MSG_NOTE, vect_location,
11276 "vect_model_load_cost:"
11277 "strided group_size = %d .\n",
11278 group_size);
11280 else if (!costing_p)
11282 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
11283 group_size, gsi);
11284 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11287 else if (!costing_p)
11288 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11290 dr_chain.release ();
11292 if (!slp && !costing_p)
11293 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11295 if (costing_p)
11297 gcc_assert (memory_access_type == VMAT_CONTIGUOUS
11298 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11299 || memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11300 if (dump_enabled_p ())
11301 dump_printf_loc (MSG_NOTE, vect_location,
11302 "vect_model_load_cost: inside_cost = %u, "
11303 "prologue_cost = %u .\n",
11304 inside_cost, prologue_cost);
11307 return true;
11310 /* Function vect_is_simple_cond.
11312 Input:
11313 LOOP - the loop that is being vectorized.
11314 COND - Condition that is checked for simple use.
11316 Output:
11317 *COMP_VECTYPE - the vector type for the comparison.
11318 *DTS - The def types for the arguments of the comparison
11320 Returns whether a COND can be vectorized. Checks whether
11321 condition operands are supportable using vec_is_simple_use. */
11323 static bool
11324 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
11325 slp_tree slp_node, tree *comp_vectype,
11326 enum vect_def_type *dts, tree vectype)
11328 tree lhs, rhs;
11329 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11330 slp_tree slp_op;
11332 /* Mask case. */
11333 if (TREE_CODE (cond) == SSA_NAME
11334 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11336 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
11337 &slp_op, &dts[0], comp_vectype)
11338 || !*comp_vectype
11339 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11340 return false;
11341 return true;
11344 if (!COMPARISON_CLASS_P (cond))
11345 return false;
11347 lhs = TREE_OPERAND (cond, 0);
11348 rhs = TREE_OPERAND (cond, 1);
11350 if (TREE_CODE (lhs) == SSA_NAME)
11352 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
11353 &lhs, &slp_op, &dts[0], &vectype1))
11354 return false;
11356 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
11357 || TREE_CODE (lhs) == FIXED_CST)
11358 dts[0] = vect_constant_def;
11359 else
11360 return false;
11362 if (TREE_CODE (rhs) == SSA_NAME)
11364 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
11365 &rhs, &slp_op, &dts[1], &vectype2))
11366 return false;
11368 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
11369 || TREE_CODE (rhs) == FIXED_CST)
11370 dts[1] = vect_constant_def;
11371 else
11372 return false;
11374 if (vectype1 && vectype2
11375 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11376 TYPE_VECTOR_SUBPARTS (vectype2)))
11377 return false;
11379 *comp_vectype = vectype1 ? vectype1 : vectype2;
11380 /* Invariant comparison. */
11381 if (! *comp_vectype)
11383 tree scalar_type = TREE_TYPE (lhs);
11384 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11385 *comp_vectype = truth_type_for (vectype);
11386 else
11388 /* If we can widen the comparison to match vectype do so. */
11389 if (INTEGRAL_TYPE_P (scalar_type)
11390 && !slp_node
11391 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
11392 TYPE_SIZE (TREE_TYPE (vectype))))
11393 scalar_type = build_nonstandard_integer_type
11394 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
11395 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
11396 slp_node);
11400 return true;
11403 /* vectorizable_condition.
11405 Check if STMT_INFO is conditional modify expression that can be vectorized.
11406 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11407 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
11408 at GSI.
11410 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
11412 Return true if STMT_INFO is vectorizable in this way. */
11414 static bool
11415 vectorizable_condition (vec_info *vinfo,
11416 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11417 gimple **vec_stmt,
11418 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11420 tree scalar_dest = NULL_TREE;
11421 tree vec_dest = NULL_TREE;
11422 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
11423 tree then_clause, else_clause;
11424 tree comp_vectype = NULL_TREE;
11425 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
11426 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
11427 tree vec_compare;
11428 tree new_temp;
11429 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11430 enum vect_def_type dts[4]
11431 = {vect_unknown_def_type, vect_unknown_def_type,
11432 vect_unknown_def_type, vect_unknown_def_type};
11433 int ndts = 4;
11434 int ncopies;
11435 int vec_num;
11436 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11437 int i;
11438 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11439 vec<tree> vec_oprnds0 = vNULL;
11440 vec<tree> vec_oprnds1 = vNULL;
11441 vec<tree> vec_oprnds2 = vNULL;
11442 vec<tree> vec_oprnds3 = vNULL;
11443 tree vec_cmp_type;
11444 bool masked = false;
11446 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11447 return false;
11449 /* Is vectorizable conditional operation? */
11450 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11451 if (!stmt)
11452 return false;
11454 code = gimple_assign_rhs_code (stmt);
11455 if (code != COND_EXPR)
11456 return false;
11458 stmt_vec_info reduc_info = NULL;
11459 int reduc_index = -1;
11460 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
11461 bool for_reduction
11462 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
11463 if (for_reduction)
11465 if (slp_node)
11466 return false;
11467 reduc_info = info_for_reduction (vinfo, stmt_info);
11468 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
11469 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
11470 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
11471 || reduc_index != -1);
11473 else
11475 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11476 return false;
11479 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11480 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11482 if (slp_node)
11484 ncopies = 1;
11485 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
11487 else
11489 ncopies = vect_get_num_copies (loop_vinfo, vectype);
11490 vec_num = 1;
11493 gcc_assert (ncopies >= 1);
11494 if (for_reduction && ncopies > 1)
11495 return false; /* FORNOW */
11497 cond_expr = gimple_assign_rhs1 (stmt);
11499 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
11500 &comp_vectype, &dts[0], vectype)
11501 || !comp_vectype)
11502 return false;
11504 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
11505 slp_tree then_slp_node, else_slp_node;
11506 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
11507 &then_clause, &then_slp_node, &dts[2], &vectype1))
11508 return false;
11509 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
11510 &else_clause, &else_slp_node, &dts[3], &vectype2))
11511 return false;
11513 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
11514 return false;
11516 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
11517 return false;
11519 masked = !COMPARISON_CLASS_P (cond_expr);
11520 vec_cmp_type = truth_type_for (comp_vectype);
11522 if (vec_cmp_type == NULL_TREE)
11523 return false;
11525 cond_code = TREE_CODE (cond_expr);
11526 if (!masked)
11528 cond_expr0 = TREE_OPERAND (cond_expr, 0);
11529 cond_expr1 = TREE_OPERAND (cond_expr, 1);
11532 /* For conditional reductions, the "then" value needs to be the candidate
11533 value calculated by this iteration while the "else" value needs to be
11534 the result carried over from previous iterations. If the COND_EXPR
11535 is the other way around, we need to swap it. */
11536 bool must_invert_cmp_result = false;
11537 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
11539 if (masked)
11540 must_invert_cmp_result = true;
11541 else
11543 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
11544 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
11545 if (new_code == ERROR_MARK)
11546 must_invert_cmp_result = true;
11547 else
11549 cond_code = new_code;
11550 /* Make sure we don't accidentally use the old condition. */
11551 cond_expr = NULL_TREE;
11554 std::swap (then_clause, else_clause);
11557 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
11559 /* Boolean values may have another representation in vectors
11560 and therefore we prefer bit operations over comparison for
11561 them (which also works for scalar masks). We store opcodes
11562 to use in bitop1 and bitop2. Statement is vectorized as
11563 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
11564 depending on bitop1 and bitop2 arity. */
11565 switch (cond_code)
11567 case GT_EXPR:
11568 bitop1 = BIT_NOT_EXPR;
11569 bitop2 = BIT_AND_EXPR;
11570 break;
11571 case GE_EXPR:
11572 bitop1 = BIT_NOT_EXPR;
11573 bitop2 = BIT_IOR_EXPR;
11574 break;
11575 case LT_EXPR:
11576 bitop1 = BIT_NOT_EXPR;
11577 bitop2 = BIT_AND_EXPR;
11578 std::swap (cond_expr0, cond_expr1);
11579 break;
11580 case LE_EXPR:
11581 bitop1 = BIT_NOT_EXPR;
11582 bitop2 = BIT_IOR_EXPR;
11583 std::swap (cond_expr0, cond_expr1);
11584 break;
11585 case NE_EXPR:
11586 bitop1 = BIT_XOR_EXPR;
11587 break;
11588 case EQ_EXPR:
11589 bitop1 = BIT_XOR_EXPR;
11590 bitop2 = BIT_NOT_EXPR;
11591 break;
11592 default:
11593 return false;
11595 cond_code = SSA_NAME;
11598 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
11599 && reduction_type == EXTRACT_LAST_REDUCTION
11600 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
11602 if (dump_enabled_p ())
11603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11604 "reduction comparison operation not supported.\n");
11605 return false;
11608 if (!vec_stmt)
11610 if (bitop1 != NOP_EXPR)
11612 machine_mode mode = TYPE_MODE (comp_vectype);
11613 optab optab;
11615 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
11616 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11617 return false;
11619 if (bitop2 != NOP_EXPR)
11621 optab = optab_for_tree_code (bitop2, comp_vectype,
11622 optab_default);
11623 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11624 return false;
11628 vect_cost_for_stmt kind = vector_stmt;
11629 if (reduction_type == EXTRACT_LAST_REDUCTION)
11630 /* Count one reduction-like operation per vector. */
11631 kind = vec_to_scalar;
11632 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
11633 && (masked
11634 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
11635 cond_code)
11636 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
11637 ERROR_MARK))))
11638 return false;
11640 if (slp_node
11641 && (!vect_maybe_update_slp_op_vectype
11642 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
11643 || (op_adjust == 1
11644 && !vect_maybe_update_slp_op_vectype
11645 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
11646 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
11647 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
11649 if (dump_enabled_p ())
11650 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11651 "incompatible vector types for invariants\n");
11652 return false;
11655 if (loop_vinfo && for_reduction
11656 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
11658 if (reduction_type == EXTRACT_LAST_REDUCTION)
11659 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
11660 ncopies * vec_num, vectype, NULL);
11661 /* Extra inactive lanes should be safe for vect_nested_cycle. */
11662 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
11664 if (dump_enabled_p ())
11665 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11666 "conditional reduction prevents the use"
11667 " of partial vectors.\n");
11668 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
11672 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
11673 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
11674 cost_vec, kind);
11675 return true;
11678 /* Transform. */
11680 /* Handle def. */
11681 scalar_dest = gimple_assign_lhs (stmt);
11682 if (reduction_type != EXTRACT_LAST_REDUCTION)
11683 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11685 bool swap_cond_operands = false;
11687 /* See whether another part of the vectorized code applies a loop
11688 mask to the condition, or to its inverse. */
11690 vec_loop_masks *masks = NULL;
11691 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
11693 if (reduction_type == EXTRACT_LAST_REDUCTION)
11694 masks = &LOOP_VINFO_MASKS (loop_vinfo);
11695 else
11697 scalar_cond_masked_key cond (cond_expr, ncopies);
11698 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
11699 masks = &LOOP_VINFO_MASKS (loop_vinfo);
11700 else
11702 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
11703 tree_code orig_code = cond.code;
11704 cond.code = invert_tree_comparison (cond.code, honor_nans);
11705 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
11707 masks = &LOOP_VINFO_MASKS (loop_vinfo);
11708 cond_code = cond.code;
11709 swap_cond_operands = true;
11711 else
11713 /* Try the inverse of the current mask. We check if the
11714 inverse mask is live and if so we generate a negate of
11715 the current mask such that we still honor NaNs. */
11716 cond.inverted_p = true;
11717 cond.code = orig_code;
11718 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
11720 masks = &LOOP_VINFO_MASKS (loop_vinfo);
11721 cond_code = cond.code;
11722 swap_cond_operands = true;
11723 must_invert_cmp_result = true;
11730 /* Handle cond expr. */
11731 if (masked)
11732 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
11733 cond_expr, &vec_oprnds0, comp_vectype,
11734 then_clause, &vec_oprnds2, vectype,
11735 reduction_type != EXTRACT_LAST_REDUCTION
11736 ? else_clause : NULL, &vec_oprnds3, vectype);
11737 else
11738 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
11739 cond_expr0, &vec_oprnds0, comp_vectype,
11740 cond_expr1, &vec_oprnds1, comp_vectype,
11741 then_clause, &vec_oprnds2, vectype,
11742 reduction_type != EXTRACT_LAST_REDUCTION
11743 ? else_clause : NULL, &vec_oprnds3, vectype);
11745 /* Arguments are ready. Create the new vector stmt. */
11746 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
11748 vec_then_clause = vec_oprnds2[i];
11749 if (reduction_type != EXTRACT_LAST_REDUCTION)
11750 vec_else_clause = vec_oprnds3[i];
11752 if (swap_cond_operands)
11753 std::swap (vec_then_clause, vec_else_clause);
11755 if (masked)
11756 vec_compare = vec_cond_lhs;
11757 else
11759 vec_cond_rhs = vec_oprnds1[i];
11760 if (bitop1 == NOP_EXPR)
11762 gimple_seq stmts = NULL;
11763 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
11764 vec_cond_lhs, vec_cond_rhs);
11765 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
11767 else
11769 new_temp = make_ssa_name (vec_cmp_type);
11770 gassign *new_stmt;
11771 if (bitop1 == BIT_NOT_EXPR)
11772 new_stmt = gimple_build_assign (new_temp, bitop1,
11773 vec_cond_rhs);
11774 else
11775 new_stmt
11776 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
11777 vec_cond_rhs);
11778 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11779 if (bitop2 == NOP_EXPR)
11780 vec_compare = new_temp;
11781 else if (bitop2 == BIT_NOT_EXPR
11782 && reduction_type != EXTRACT_LAST_REDUCTION)
11784 /* Instead of doing ~x ? y : z do x ? z : y. */
11785 vec_compare = new_temp;
11786 std::swap (vec_then_clause, vec_else_clause);
11788 else
11790 vec_compare = make_ssa_name (vec_cmp_type);
11791 if (bitop2 == BIT_NOT_EXPR)
11792 new_stmt
11793 = gimple_build_assign (vec_compare, bitop2, new_temp);
11794 else
11795 new_stmt
11796 = gimple_build_assign (vec_compare, bitop2,
11797 vec_cond_lhs, new_temp);
11798 vect_finish_stmt_generation (vinfo, stmt_info,
11799 new_stmt, gsi);
11804 /* If we decided to apply a loop mask to the result of the vector
11805 comparison, AND the comparison with the mask now. Later passes
11806 should then be able to reuse the AND results between mulitple
11807 vector statements.
11809 For example:
11810 for (int i = 0; i < 100; ++i)
11811 x[i] = y[i] ? z[i] : 10;
11813 results in following optimized GIMPLE:
11815 mask__35.8_43 = vect__4.7_41 != { 0, ... };
11816 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
11817 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
11818 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
11819 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
11820 vect_iftmp.11_47, { 10, ... }>;
11822 instead of using a masked and unmasked forms of
11823 vec != { 0, ... } (masked in the MASK_LOAD,
11824 unmasked in the VEC_COND_EXPR). */
11826 /* Force vec_compare to be an SSA_NAME rather than a comparison,
11827 in cases where that's necessary. */
11829 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
11831 if (!is_gimple_val (vec_compare))
11833 tree vec_compare_name = make_ssa_name (vec_cmp_type);
11834 gassign *new_stmt = gimple_build_assign (vec_compare_name,
11835 vec_compare);
11836 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11837 vec_compare = vec_compare_name;
11840 if (must_invert_cmp_result)
11842 tree vec_compare_name = make_ssa_name (vec_cmp_type);
11843 gassign *new_stmt = gimple_build_assign (vec_compare_name,
11844 BIT_NOT_EXPR,
11845 vec_compare);
11846 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11847 vec_compare = vec_compare_name;
11850 if (masks)
11852 tree loop_mask
11853 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
11854 vectype, i);
11855 tree tmp2 = make_ssa_name (vec_cmp_type);
11856 gassign *g
11857 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
11858 loop_mask);
11859 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
11860 vec_compare = tmp2;
11864 gimple *new_stmt;
11865 if (reduction_type == EXTRACT_LAST_REDUCTION)
11867 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
11868 tree lhs = gimple_get_lhs (old_stmt);
11869 new_stmt = gimple_build_call_internal
11870 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
11871 vec_then_clause);
11872 gimple_call_set_lhs (new_stmt, lhs);
11873 SSA_NAME_DEF_STMT (lhs) = new_stmt;
11874 if (old_stmt == gsi_stmt (*gsi))
11875 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
11876 else
11878 /* In this case we're moving the definition to later in the
11879 block. That doesn't matter because the only uses of the
11880 lhs are in phi statements. */
11881 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
11882 gsi_remove (&old_gsi, true);
11883 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11886 else
11888 new_temp = make_ssa_name (vec_dest);
11889 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
11890 vec_then_clause, vec_else_clause);
11891 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11893 if (slp_node)
11894 slp_node->push_vec_def (new_stmt);
11895 else
11896 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11899 if (!slp_node)
11900 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11902 vec_oprnds0.release ();
11903 vec_oprnds1.release ();
11904 vec_oprnds2.release ();
11905 vec_oprnds3.release ();
11907 return true;
11910 /* vectorizable_comparison.
11912 Check if STMT_INFO is comparison expression that can be vectorized.
11913 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11914 comparison, put it in VEC_STMT, and insert it at GSI.
11916 Return true if STMT_INFO is vectorizable in this way. */
11918 static bool
11919 vectorizable_comparison (vec_info *vinfo,
11920 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11921 gimple **vec_stmt,
11922 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11924 tree lhs, rhs1, rhs2;
11925 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11926 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11927 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
11928 tree new_temp;
11929 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11930 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
11931 int ndts = 2;
11932 poly_uint64 nunits;
11933 int ncopies;
11934 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11935 int i;
11936 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11937 vec<tree> vec_oprnds0 = vNULL;
11938 vec<tree> vec_oprnds1 = vNULL;
11939 tree mask_type;
11940 tree mask;
11942 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11943 return false;
11945 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
11946 return false;
11948 mask_type = vectype;
11949 nunits = TYPE_VECTOR_SUBPARTS (vectype);
11951 if (slp_node)
11952 ncopies = 1;
11953 else
11954 ncopies = vect_get_num_copies (loop_vinfo, vectype);
11956 gcc_assert (ncopies >= 1);
11957 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11958 return false;
11960 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11961 if (!stmt)
11962 return false;
11964 code = gimple_assign_rhs_code (stmt);
11966 if (TREE_CODE_CLASS (code) != tcc_comparison)
11967 return false;
11969 slp_tree slp_rhs1, slp_rhs2;
11970 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
11971 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
11972 return false;
11974 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
11975 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
11976 return false;
11978 if (vectype1 && vectype2
11979 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11980 TYPE_VECTOR_SUBPARTS (vectype2)))
11981 return false;
11983 vectype = vectype1 ? vectype1 : vectype2;
11985 /* Invariant comparison. */
11986 if (!vectype)
11988 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
11989 vectype = mask_type;
11990 else
11991 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
11992 slp_node);
11993 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
11994 return false;
11996 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
11997 return false;
11999 /* Can't compare mask and non-mask types. */
12000 if (vectype1 && vectype2
12001 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
12002 return false;
12004 /* Boolean values may have another representation in vectors
12005 and therefore we prefer bit operations over comparison for
12006 them (which also works for scalar masks). We store opcodes
12007 to use in bitop1 and bitop2. Statement is vectorized as
12008 BITOP2 (rhs1 BITOP1 rhs2) or
12009 rhs1 BITOP2 (BITOP1 rhs2)
12010 depending on bitop1 and bitop2 arity. */
12011 bool swap_p = false;
12012 if (VECTOR_BOOLEAN_TYPE_P (vectype))
12014 if (code == GT_EXPR)
12016 bitop1 = BIT_NOT_EXPR;
12017 bitop2 = BIT_AND_EXPR;
12019 else if (code == GE_EXPR)
12021 bitop1 = BIT_NOT_EXPR;
12022 bitop2 = BIT_IOR_EXPR;
12024 else if (code == LT_EXPR)
12026 bitop1 = BIT_NOT_EXPR;
12027 bitop2 = BIT_AND_EXPR;
12028 swap_p = true;
12030 else if (code == LE_EXPR)
12032 bitop1 = BIT_NOT_EXPR;
12033 bitop2 = BIT_IOR_EXPR;
12034 swap_p = true;
12036 else
12038 bitop1 = BIT_XOR_EXPR;
12039 if (code == EQ_EXPR)
12040 bitop2 = BIT_NOT_EXPR;
12044 if (!vec_stmt)
12046 if (bitop1 == NOP_EXPR)
12048 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
12049 return false;
12051 else
12053 machine_mode mode = TYPE_MODE (vectype);
12054 optab optab;
12056 optab = optab_for_tree_code (bitop1, vectype, optab_default);
12057 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12058 return false;
12060 if (bitop2 != NOP_EXPR)
12062 optab = optab_for_tree_code (bitop2, vectype, optab_default);
12063 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
12064 return false;
12068 /* Put types on constant and invariant SLP children. */
12069 if (slp_node
12070 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
12071 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
12073 if (dump_enabled_p ())
12074 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12075 "incompatible vector types for invariants\n");
12076 return false;
12079 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
12080 vect_model_simple_cost (vinfo, stmt_info,
12081 ncopies * (1 + (bitop2 != NOP_EXPR)),
12082 dts, ndts, slp_node, cost_vec);
12083 return true;
12086 /* Transform. */
12088 /* Handle def. */
12089 lhs = gimple_assign_lhs (stmt);
12090 mask = vect_create_destination_var (lhs, mask_type);
12092 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12093 rhs1, &vec_oprnds0, vectype,
12094 rhs2, &vec_oprnds1, vectype);
12095 if (swap_p)
12096 std::swap (vec_oprnds0, vec_oprnds1);
12098 /* Arguments are ready. Create the new vector stmt. */
12099 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12101 gimple *new_stmt;
12102 vec_rhs2 = vec_oprnds1[i];
12104 new_temp = make_ssa_name (mask);
12105 if (bitop1 == NOP_EXPR)
12107 new_stmt = gimple_build_assign (new_temp, code,
12108 vec_rhs1, vec_rhs2);
12109 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12111 else
12113 if (bitop1 == BIT_NOT_EXPR)
12114 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12115 else
12116 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12117 vec_rhs2);
12118 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12119 if (bitop2 != NOP_EXPR)
12121 tree res = make_ssa_name (mask);
12122 if (bitop2 == BIT_NOT_EXPR)
12123 new_stmt = gimple_build_assign (res, bitop2, new_temp);
12124 else
12125 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12126 new_temp);
12127 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12130 if (slp_node)
12131 slp_node->push_vec_def (new_stmt);
12132 else
12133 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12136 if (!slp_node)
12137 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12139 vec_oprnds0.release ();
12140 vec_oprnds1.release ();
12142 return true;
12145 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
12146 can handle all live statements in the node. Otherwise return true
12147 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
12148 VEC_STMT_P is as for vectorizable_live_operation. */
12150 static bool
12151 can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
12152 slp_tree slp_node, slp_instance slp_node_instance,
12153 bool vec_stmt_p,
12154 stmt_vector_for_cost *cost_vec)
12156 if (slp_node)
12158 stmt_vec_info slp_stmt_info;
12159 unsigned int i;
12160 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
12162 if (STMT_VINFO_LIVE_P (slp_stmt_info)
12163 && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
12164 slp_node_instance, i,
12165 vec_stmt_p, cost_vec))
12166 return false;
12169 else if (STMT_VINFO_LIVE_P (stmt_info)
12170 && !vectorizable_live_operation (vinfo, stmt_info,
12171 slp_node, slp_node_instance, -1,
12172 vec_stmt_p, cost_vec))
12173 return false;
12175 return true;
12178 /* Make sure the statement is vectorizable. */
12180 opt_result
12181 vect_analyze_stmt (vec_info *vinfo,
12182 stmt_vec_info stmt_info, bool *need_to_vectorize,
12183 slp_tree node, slp_instance node_instance,
12184 stmt_vector_for_cost *cost_vec)
12186 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12187 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
12188 bool ok;
12189 gimple_seq pattern_def_seq;
12191 if (dump_enabled_p ())
12192 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
12193 stmt_info->stmt);
12195 if (gimple_has_volatile_ops (stmt_info->stmt))
12196 return opt_result::failure_at (stmt_info->stmt,
12197 "not vectorized:"
12198 " stmt has volatile operands: %G\n",
12199 stmt_info->stmt);
12201 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12202 && node == NULL
12203 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
12205 gimple_stmt_iterator si;
12207 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
12209 stmt_vec_info pattern_def_stmt_info
12210 = vinfo->lookup_stmt (gsi_stmt (si));
12211 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
12212 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
12214 /* Analyze def stmt of STMT if it's a pattern stmt. */
12215 if (dump_enabled_p ())
12216 dump_printf_loc (MSG_NOTE, vect_location,
12217 "==> examining pattern def statement: %G",
12218 pattern_def_stmt_info->stmt);
12220 opt_result res
12221 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
12222 need_to_vectorize, node, node_instance,
12223 cost_vec);
12224 if (!res)
12225 return res;
12230 /* Skip stmts that do not need to be vectorized. In loops this is expected
12231 to include:
12232 - the COND_EXPR which is the loop exit condition
12233 - any LABEL_EXPRs in the loop
12234 - computations that are used only for array indexing or loop control.
12235 In basic blocks we only analyze statements that are a part of some SLP
12236 instance, therefore, all the statements are relevant.
12238 Pattern statement needs to be analyzed instead of the original statement
12239 if the original statement is not relevant. Otherwise, we analyze both
12240 statements. In basic blocks we are called from some SLP instance
12241 traversal, don't analyze pattern stmts instead, the pattern stmts
12242 already will be part of SLP instance. */
12244 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
12245 if (!STMT_VINFO_RELEVANT_P (stmt_info)
12246 && !STMT_VINFO_LIVE_P (stmt_info))
12248 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12249 && pattern_stmt_info
12250 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12251 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12253 /* Analyze PATTERN_STMT instead of the original stmt. */
12254 stmt_info = pattern_stmt_info;
12255 if (dump_enabled_p ())
12256 dump_printf_loc (MSG_NOTE, vect_location,
12257 "==> examining pattern statement: %G",
12258 stmt_info->stmt);
12260 else
12262 if (dump_enabled_p ())
12263 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
12265 return opt_result::success ();
12268 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12269 && node == NULL
12270 && pattern_stmt_info
12271 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12272 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12274 /* Analyze PATTERN_STMT too. */
12275 if (dump_enabled_p ())
12276 dump_printf_loc (MSG_NOTE, vect_location,
12277 "==> examining pattern statement: %G",
12278 pattern_stmt_info->stmt);
12280 opt_result res
12281 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
12282 node_instance, cost_vec);
12283 if (!res)
12284 return res;
12287 switch (STMT_VINFO_DEF_TYPE (stmt_info))
12289 case vect_internal_def:
12290 break;
12292 case vect_reduction_def:
12293 case vect_nested_cycle:
12294 gcc_assert (!bb_vinfo
12295 && (relevance == vect_used_in_outer
12296 || relevance == vect_used_in_outer_by_reduction
12297 || relevance == vect_used_by_reduction
12298 || relevance == vect_unused_in_scope
12299 || relevance == vect_used_only_live));
12300 break;
12302 case vect_induction_def:
12303 case vect_first_order_recurrence:
12304 gcc_assert (!bb_vinfo);
12305 break;
12307 case vect_constant_def:
12308 case vect_external_def:
12309 case vect_unknown_def_type:
12310 default:
12311 gcc_unreachable ();
12314 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12315 if (node)
12316 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
12318 if (STMT_VINFO_RELEVANT_P (stmt_info))
12320 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
12321 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
12322 || (call && gimple_call_lhs (call) == NULL_TREE));
12323 *need_to_vectorize = true;
12326 if (PURE_SLP_STMT (stmt_info) && !node)
12328 if (dump_enabled_p ())
12329 dump_printf_loc (MSG_NOTE, vect_location,
12330 "handled only by SLP analysis\n");
12331 return opt_result::success ();
12334 ok = true;
12335 if (!bb_vinfo
12336 && (STMT_VINFO_RELEVANT_P (stmt_info)
12337 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
12338 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
12339 -mveclibabi= takes preference over library functions with
12340 the simd attribute. */
12341 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12342 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
12343 cost_vec)
12344 || vectorizable_conversion (vinfo, stmt_info,
12345 NULL, NULL, node, cost_vec)
12346 || vectorizable_operation (vinfo, stmt_info,
12347 NULL, NULL, node, cost_vec)
12348 || vectorizable_assignment (vinfo, stmt_info,
12349 NULL, NULL, node, cost_vec)
12350 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12351 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12352 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
12353 node, node_instance, cost_vec)
12354 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
12355 NULL, node, cost_vec)
12356 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12357 || vectorizable_condition (vinfo, stmt_info,
12358 NULL, NULL, node, cost_vec)
12359 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12360 cost_vec)
12361 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
12362 stmt_info, NULL, node)
12363 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
12364 stmt_info, NULL, node, cost_vec));
12365 else
12367 if (bb_vinfo)
12368 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12369 || vectorizable_simd_clone_call (vinfo, stmt_info,
12370 NULL, NULL, node, cost_vec)
12371 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
12372 cost_vec)
12373 || vectorizable_shift (vinfo, stmt_info,
12374 NULL, NULL, node, cost_vec)
12375 || vectorizable_operation (vinfo, stmt_info,
12376 NULL, NULL, node, cost_vec)
12377 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
12378 cost_vec)
12379 || vectorizable_load (vinfo, stmt_info,
12380 NULL, NULL, node, cost_vec)
12381 || vectorizable_store (vinfo, stmt_info,
12382 NULL, NULL, node, cost_vec)
12383 || vectorizable_condition (vinfo, stmt_info,
12384 NULL, NULL, node, cost_vec)
12385 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12386 cost_vec)
12387 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
12390 if (node)
12391 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12393 if (!ok)
12394 return opt_result::failure_at (stmt_info->stmt,
12395 "not vectorized:"
12396 " relevant stmt not supported: %G",
12397 stmt_info->stmt);
12399 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
12400 need extra handling, except for vectorizable reductions. */
12401 if (!bb_vinfo
12402 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
12403 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
12404 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
12405 stmt_info, node, node_instance,
12406 false, cost_vec))
12407 return opt_result::failure_at (stmt_info->stmt,
12408 "not vectorized:"
12409 " live stmt not supported: %G",
12410 stmt_info->stmt);
12412 return opt_result::success ();
12416 /* Function vect_transform_stmt.
12418 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
12420 bool
12421 vect_transform_stmt (vec_info *vinfo,
12422 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12423 slp_tree slp_node, slp_instance slp_node_instance)
12425 bool is_store = false;
12426 gimple *vec_stmt = NULL;
12427 bool done;
12429 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
12431 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12432 if (slp_node)
12433 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
12435 switch (STMT_VINFO_TYPE (stmt_info))
12437 case type_demotion_vec_info_type:
12438 case type_promotion_vec_info_type:
12439 case type_conversion_vec_info_type:
12440 done = vectorizable_conversion (vinfo, stmt_info,
12441 gsi, &vec_stmt, slp_node, NULL);
12442 gcc_assert (done);
12443 break;
12445 case induc_vec_info_type:
12446 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
12447 stmt_info, &vec_stmt, slp_node,
12448 NULL);
12449 gcc_assert (done);
12450 break;
12452 case shift_vec_info_type:
12453 done = vectorizable_shift (vinfo, stmt_info,
12454 gsi, &vec_stmt, slp_node, NULL);
12455 gcc_assert (done);
12456 break;
12458 case op_vec_info_type:
12459 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
12460 NULL);
12461 gcc_assert (done);
12462 break;
12464 case assignment_vec_info_type:
12465 done = vectorizable_assignment (vinfo, stmt_info,
12466 gsi, &vec_stmt, slp_node, NULL);
12467 gcc_assert (done);
12468 break;
12470 case load_vec_info_type:
12471 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
12472 NULL);
12473 gcc_assert (done);
12474 break;
12476 case store_vec_info_type:
12477 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
12478 && !slp_node
12479 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))
12480 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info))))
12481 /* In case of interleaving, the whole chain is vectorized when the
12482 last store in the chain is reached. Store stmts before the last
12483 one are skipped, and there vec_stmt_info shouldn't be freed
12484 meanwhile. */
12486 else
12488 done = vectorizable_store (vinfo, stmt_info,
12489 gsi, &vec_stmt, slp_node, NULL);
12490 gcc_assert (done);
12491 is_store = true;
12493 break;
12495 case condition_vec_info_type:
12496 done = vectorizable_condition (vinfo, stmt_info,
12497 gsi, &vec_stmt, slp_node, NULL);
12498 gcc_assert (done);
12499 break;
12501 case comparison_vec_info_type:
12502 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
12503 slp_node, NULL);
12504 gcc_assert (done);
12505 break;
12507 case call_vec_info_type:
12508 done = vectorizable_call (vinfo, stmt_info,
12509 gsi, &vec_stmt, slp_node, NULL);
12510 break;
12512 case call_simd_clone_vec_info_type:
12513 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
12514 slp_node, NULL);
12515 break;
12517 case reduc_vec_info_type:
12518 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
12519 gsi, &vec_stmt, slp_node);
12520 gcc_assert (done);
12521 break;
12523 case cycle_phi_info_type:
12524 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
12525 &vec_stmt, slp_node, slp_node_instance);
12526 gcc_assert (done);
12527 break;
12529 case lc_phi_info_type:
12530 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
12531 stmt_info, &vec_stmt, slp_node);
12532 gcc_assert (done);
12533 break;
12535 case recurr_info_type:
12536 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
12537 stmt_info, &vec_stmt, slp_node, NULL);
12538 gcc_assert (done);
12539 break;
12541 case phi_info_type:
12542 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
12543 gcc_assert (done);
12544 break;
12546 default:
12547 if (!STMT_VINFO_LIVE_P (stmt_info))
12549 if (dump_enabled_p ())
12550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12551 "stmt not supported.\n");
12552 gcc_unreachable ();
12554 done = true;
12557 if (!slp_node && vec_stmt)
12558 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
12560 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
12562 /* Handle stmts whose DEF is used outside the loop-nest that is
12563 being vectorized. */
12564 done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
12565 slp_node_instance, true, NULL);
12566 gcc_assert (done);
12569 if (slp_node)
12570 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12572 return is_store;
12576 /* Remove a group of stores (for SLP or interleaving), free their
12577 stmt_vec_info. */
12579 void
12580 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
12582 stmt_vec_info next_stmt_info = first_stmt_info;
12584 while (next_stmt_info)
12586 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
12587 next_stmt_info = vect_orig_stmt (next_stmt_info);
12588 /* Free the attached stmt_vec_info and remove the stmt. */
12589 vinfo->remove_stmt (next_stmt_info);
12590 next_stmt_info = tmp;
12594 /* If NUNITS is nonzero, return a vector type that contains NUNITS
12595 elements of type SCALAR_TYPE, or null if the target doesn't support
12596 such a type.
12598 If NUNITS is zero, return a vector type that contains elements of
12599 type SCALAR_TYPE, choosing whichever vector size the target prefers.
12601 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
12602 for this vectorization region and want to "autodetect" the best choice.
12603 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
12604 and we want the new type to be interoperable with it. PREVAILING_MODE
12605 in this case can be a scalar integer mode or a vector mode; when it
12606 is a vector mode, the function acts like a tree-level version of
12607 related_vector_mode. */
12609 tree
12610 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
12611 tree scalar_type, poly_uint64 nunits)
12613 tree orig_scalar_type = scalar_type;
12614 scalar_mode inner_mode;
12615 machine_mode simd_mode;
12616 tree vectype;
12618 if ((!INTEGRAL_TYPE_P (scalar_type)
12619 && !POINTER_TYPE_P (scalar_type)
12620 && !SCALAR_FLOAT_TYPE_P (scalar_type))
12621 || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
12622 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
12623 return NULL_TREE;
12625 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
12627 /* Interoperability between modes requires one to be a constant multiple
12628 of the other, so that the number of vectors required for each operation
12629 is a compile-time constant. */
12630 if (prevailing_mode != VOIDmode
12631 && !constant_multiple_p (nunits * nbytes,
12632 GET_MODE_SIZE (prevailing_mode))
12633 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
12634 nunits * nbytes))
12635 return NULL_TREE;
12637 /* For vector types of elements whose mode precision doesn't
12638 match their types precision we use a element type of mode
12639 precision. The vectorization routines will have to make sure
12640 they support the proper result truncation/extension.
12641 We also make sure to build vector types with INTEGER_TYPE
12642 component type only. */
12643 if (INTEGRAL_TYPE_P (scalar_type)
12644 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
12645 || TREE_CODE (scalar_type) != INTEGER_TYPE))
12646 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
12647 TYPE_UNSIGNED (scalar_type));
12649 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
12650 When the component mode passes the above test simply use a type
12651 corresponding to that mode. The theory is that any use that
12652 would cause problems with this will disable vectorization anyway. */
12653 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
12654 && !INTEGRAL_TYPE_P (scalar_type))
12655 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
12657 /* We can't build a vector type of elements with alignment bigger than
12658 their size. */
12659 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
12660 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
12661 TYPE_UNSIGNED (scalar_type));
12663 /* If we felt back to using the mode fail if there was
12664 no scalar type for it. */
12665 if (scalar_type == NULL_TREE)
12666 return NULL_TREE;
12668 /* If no prevailing mode was supplied, use the mode the target prefers.
12669 Otherwise lookup a vector mode based on the prevailing mode. */
12670 if (prevailing_mode == VOIDmode)
12672 gcc_assert (known_eq (nunits, 0U));
12673 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
12674 if (SCALAR_INT_MODE_P (simd_mode))
12676 /* Traditional behavior is not to take the integer mode
12677 literally, but simply to use it as a way of determining
12678 the vector size. It is up to mode_for_vector to decide
12679 what the TYPE_MODE should be.
12681 Note that nunits == 1 is allowed in order to support single
12682 element vector types. */
12683 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
12684 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
12685 return NULL_TREE;
12688 else if (SCALAR_INT_MODE_P (prevailing_mode)
12689 || !related_vector_mode (prevailing_mode,
12690 inner_mode, nunits).exists (&simd_mode))
12692 /* Fall back to using mode_for_vector, mostly in the hope of being
12693 able to use an integer mode. */
12694 if (known_eq (nunits, 0U)
12695 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
12696 return NULL_TREE;
12698 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
12699 return NULL_TREE;
12702 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
12704 /* In cases where the mode was chosen by mode_for_vector, check that
12705 the target actually supports the chosen mode, or that it at least
12706 allows the vector mode to be replaced by a like-sized integer. */
12707 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
12708 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
12709 return NULL_TREE;
12711 /* Re-attach the address-space qualifier if we canonicalized the scalar
12712 type. */
12713 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
12714 return build_qualified_type
12715 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
12717 return vectype;
12720 /* Function get_vectype_for_scalar_type.
12722 Returns the vector type corresponding to SCALAR_TYPE as supported
12723 by the target. If GROUP_SIZE is nonzero and we're performing BB
12724 vectorization, make sure that the number of elements in the vector
12725 is no bigger than GROUP_SIZE. */
12727 tree
12728 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
12729 unsigned int group_size)
12731 /* For BB vectorization, we should always have a group size once we've
12732 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12733 are tentative requests during things like early data reference
12734 analysis and pattern recognition. */
12735 if (is_a <bb_vec_info> (vinfo))
12736 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12737 else
12738 group_size = 0;
12740 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
12741 scalar_type);
12742 if (vectype && vinfo->vector_mode == VOIDmode)
12743 vinfo->vector_mode = TYPE_MODE (vectype);
12745 /* Register the natural choice of vector type, before the group size
12746 has been applied. */
12747 if (vectype)
12748 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
12750 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
12751 try again with an explicit number of elements. */
12752 if (vectype
12753 && group_size
12754 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
12756 /* Start with the biggest number of units that fits within
12757 GROUP_SIZE and halve it until we find a valid vector type.
12758 Usually either the first attempt will succeed or all will
12759 fail (in the latter case because GROUP_SIZE is too small
12760 for the target), but it's possible that a target could have
12761 a hole between supported vector types.
12763 If GROUP_SIZE is not a power of 2, this has the effect of
12764 trying the largest power of 2 that fits within the group,
12765 even though the group is not a multiple of that vector size.
12766 The BB vectorizer will then try to carve up the group into
12767 smaller pieces. */
12768 unsigned int nunits = 1 << floor_log2 (group_size);
12771 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
12772 scalar_type, nunits);
12773 nunits /= 2;
12775 while (nunits > 1 && !vectype);
12778 return vectype;
12781 /* Return the vector type corresponding to SCALAR_TYPE as supported
12782 by the target. NODE, if nonnull, is the SLP tree node that will
12783 use the returned vector type. */
12785 tree
12786 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
12788 unsigned int group_size = 0;
12789 if (node)
12790 group_size = SLP_TREE_LANES (node);
12791 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12794 /* Function get_mask_type_for_scalar_type.
12796 Returns the mask type corresponding to a result of comparison
12797 of vectors of specified SCALAR_TYPE as supported by target.
12798 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12799 make sure that the number of elements in the vector is no bigger
12800 than GROUP_SIZE. */
12802 tree
12803 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
12804 unsigned int group_size)
12806 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12808 if (!vectype)
12809 return NULL;
12811 return truth_type_for (vectype);
12814 /* Function get_same_sized_vectype
12816 Returns a vector type corresponding to SCALAR_TYPE of size
12817 VECTOR_TYPE if supported by the target. */
12819 tree
12820 get_same_sized_vectype (tree scalar_type, tree vector_type)
12822 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
12823 return truth_type_for (vector_type);
12825 poly_uint64 nunits;
12826 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
12827 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
12828 return NULL_TREE;
12830 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
12831 scalar_type, nunits);
12834 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
12835 would not change the chosen vector modes. */
12837 bool
12838 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
12840 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
12841 i != vinfo->used_vector_modes.end (); ++i)
12842 if (!VECTOR_MODE_P (*i)
12843 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
12844 return false;
12845 return true;
12848 /* Function vect_is_simple_use.
12850 Input:
12851 VINFO - the vect info of the loop or basic block that is being vectorized.
12852 OPERAND - operand in the loop or bb.
12853 Output:
12854 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
12855 case OPERAND is an SSA_NAME that is defined in the vectorizable region
12856 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
12857 the definition could be anywhere in the function
12858 DT - the type of definition
12860 Returns whether a stmt with OPERAND can be vectorized.
12861 For loops, supportable operands are constants, loop invariants, and operands
12862 that are defined by the current iteration of the loop. Unsupportable
12863 operands are those that are defined by a previous iteration of the loop (as
12864 is the case in reduction/induction computations).
12865 For basic blocks, supportable operands are constants and bb invariants.
12866 For now, operands defined outside the basic block are not supported. */
12868 bool
12869 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
12870 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
12872 if (def_stmt_info_out)
12873 *def_stmt_info_out = NULL;
12874 if (def_stmt_out)
12875 *def_stmt_out = NULL;
12876 *dt = vect_unknown_def_type;
12878 if (dump_enabled_p ())
12880 dump_printf_loc (MSG_NOTE, vect_location,
12881 "vect_is_simple_use: operand ");
12882 if (TREE_CODE (operand) == SSA_NAME
12883 && !SSA_NAME_IS_DEFAULT_DEF (operand))
12884 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
12885 else
12886 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
12889 if (CONSTANT_CLASS_P (operand))
12890 *dt = vect_constant_def;
12891 else if (is_gimple_min_invariant (operand))
12892 *dt = vect_external_def;
12893 else if (TREE_CODE (operand) != SSA_NAME)
12894 *dt = vect_unknown_def_type;
12895 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
12896 *dt = vect_external_def;
12897 else
12899 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
12900 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
12901 if (!stmt_vinfo)
12902 *dt = vect_external_def;
12903 else
12905 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
12906 def_stmt = stmt_vinfo->stmt;
12907 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
12908 if (def_stmt_info_out)
12909 *def_stmt_info_out = stmt_vinfo;
12911 if (def_stmt_out)
12912 *def_stmt_out = def_stmt;
12915 if (dump_enabled_p ())
12917 dump_printf (MSG_NOTE, ", type of def: ");
12918 switch (*dt)
12920 case vect_uninitialized_def:
12921 dump_printf (MSG_NOTE, "uninitialized\n");
12922 break;
12923 case vect_constant_def:
12924 dump_printf (MSG_NOTE, "constant\n");
12925 break;
12926 case vect_external_def:
12927 dump_printf (MSG_NOTE, "external\n");
12928 break;
12929 case vect_internal_def:
12930 dump_printf (MSG_NOTE, "internal\n");
12931 break;
12932 case vect_induction_def:
12933 dump_printf (MSG_NOTE, "induction\n");
12934 break;
12935 case vect_reduction_def:
12936 dump_printf (MSG_NOTE, "reduction\n");
12937 break;
12938 case vect_double_reduction_def:
12939 dump_printf (MSG_NOTE, "double reduction\n");
12940 break;
12941 case vect_nested_cycle:
12942 dump_printf (MSG_NOTE, "nested cycle\n");
12943 break;
12944 case vect_first_order_recurrence:
12945 dump_printf (MSG_NOTE, "first order recurrence\n");
12946 break;
12947 case vect_unknown_def_type:
12948 dump_printf (MSG_NOTE, "unknown\n");
12949 break;
12953 if (*dt == vect_unknown_def_type)
12955 if (dump_enabled_p ())
12956 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12957 "Unsupported pattern.\n");
12958 return false;
12961 return true;
12964 /* Function vect_is_simple_use.
12966 Same as vect_is_simple_use but also determines the vector operand
12967 type of OPERAND and stores it to *VECTYPE. If the definition of
12968 OPERAND is vect_uninitialized_def, vect_constant_def or
12969 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
12970 is responsible to compute the best suited vector type for the
12971 scalar operand. */
12973 bool
12974 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
12975 tree *vectype, stmt_vec_info *def_stmt_info_out,
12976 gimple **def_stmt_out)
12978 stmt_vec_info def_stmt_info;
12979 gimple *def_stmt;
12980 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
12981 return false;
12983 if (def_stmt_out)
12984 *def_stmt_out = def_stmt;
12985 if (def_stmt_info_out)
12986 *def_stmt_info_out = def_stmt_info;
12988 /* Now get a vector type if the def is internal, otherwise supply
12989 NULL_TREE and leave it up to the caller to figure out a proper
12990 type for the use stmt. */
12991 if (*dt == vect_internal_def
12992 || *dt == vect_induction_def
12993 || *dt == vect_reduction_def
12994 || *dt == vect_double_reduction_def
12995 || *dt == vect_nested_cycle
12996 || *dt == vect_first_order_recurrence)
12998 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
12999 gcc_assert (*vectype != NULL_TREE);
13000 if (dump_enabled_p ())
13001 dump_printf_loc (MSG_NOTE, vect_location,
13002 "vect_is_simple_use: vectype %T\n", *vectype);
13004 else if (*dt == vect_uninitialized_def
13005 || *dt == vect_constant_def
13006 || *dt == vect_external_def)
13007 *vectype = NULL_TREE;
13008 else
13009 gcc_unreachable ();
13011 return true;
13014 /* Function vect_is_simple_use.
13016 Same as vect_is_simple_use but determines the operand by operand
13017 position OPERAND from either STMT or SLP_NODE, filling in *OP
13018 and *SLP_DEF (when SLP_NODE is not NULL). */
13020 bool
13021 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
13022 unsigned operand, tree *op, slp_tree *slp_def,
13023 enum vect_def_type *dt,
13024 tree *vectype, stmt_vec_info *def_stmt_info_out)
13026 if (slp_node)
13028 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
13029 *slp_def = child;
13030 *vectype = SLP_TREE_VECTYPE (child);
13031 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
13033 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
13034 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
13036 else
13038 if (def_stmt_info_out)
13039 *def_stmt_info_out = NULL;
13040 *op = SLP_TREE_SCALAR_OPS (child)[0];
13041 *dt = SLP_TREE_DEF_TYPE (child);
13042 return true;
13045 else
13047 *slp_def = NULL;
13048 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
13050 if (gimple_assign_rhs_code (ass) == COND_EXPR
13051 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
13053 if (operand < 2)
13054 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
13055 else
13056 *op = gimple_op (ass, operand);
13058 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
13059 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
13060 else
13061 *op = gimple_op (ass, operand + 1);
13063 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
13064 *op = gimple_call_arg (call, operand);
13065 else
13066 gcc_unreachable ();
13067 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
13071 /* If OP is not NULL and is external or constant update its vector
13072 type with VECTYPE. Returns true if successful or false if not,
13073 for example when conflicting vector types are present. */
13075 bool
13076 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
13078 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
13079 return true;
13080 if (SLP_TREE_VECTYPE (op))
13081 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
13082 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
13083 should be handled by patters. Allow vect_constant_def for now. */
13084 if (VECTOR_BOOLEAN_TYPE_P (vectype)
13085 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
13086 return false;
13087 SLP_TREE_VECTYPE (op) = vectype;
13088 return true;
13091 /* Function supportable_widening_operation
13093 Check whether an operation represented by the code CODE is a
13094 widening operation that is supported by the target platform in
13095 vector form (i.e., when operating on arguments of type VECTYPE_IN
13096 producing a result of type VECTYPE_OUT).
13098 Widening operations we currently support are NOP (CONVERT), FLOAT,
13099 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
13100 are supported by the target platform either directly (via vector
13101 tree-codes), or via target builtins.
13103 Output:
13104 - CODE1 and CODE2 are codes of vector operations to be used when
13105 vectorizing the operation, if available.
13106 - MULTI_STEP_CVT determines the number of required intermediate steps in
13107 case of multi-step conversion (like char->short->int - in that case
13108 MULTI_STEP_CVT will be 1).
13109 - INTERM_TYPES contains the intermediate type required to perform the
13110 widening operation (short in the above example). */
13112 bool
13113 supportable_widening_operation (vec_info *vinfo,
13114 code_helper code,
13115 stmt_vec_info stmt_info,
13116 tree vectype_out, tree vectype_in,
13117 code_helper *code1,
13118 code_helper *code2,
13119 int *multi_step_cvt,
13120 vec<tree> *interm_types)
13122 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
13123 class loop *vect_loop = NULL;
13124 machine_mode vec_mode;
13125 enum insn_code icode1, icode2;
13126 optab optab1 = unknown_optab, optab2 = unknown_optab;
13127 tree vectype = vectype_in;
13128 tree wide_vectype = vectype_out;
13129 tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
13130 int i;
13131 tree prev_type, intermediate_type;
13132 machine_mode intermediate_mode, prev_mode;
13133 optab optab3, optab4;
13135 *multi_step_cvt = 0;
13136 if (loop_info)
13137 vect_loop = LOOP_VINFO_LOOP (loop_info);
13139 switch (code.safe_as_tree_code ())
13141 case MAX_TREE_CODES:
13142 /* Don't set c1 and c2 if code is not a tree_code. */
13143 break;
13145 case WIDEN_MULT_EXPR:
13146 /* The result of a vectorized widening operation usually requires
13147 two vectors (because the widened results do not fit into one vector).
13148 The generated vector results would normally be expected to be
13149 generated in the same order as in the original scalar computation,
13150 i.e. if 8 results are generated in each vector iteration, they are
13151 to be organized as follows:
13152 vect1: [res1,res2,res3,res4],
13153 vect2: [res5,res6,res7,res8].
13155 However, in the special case that the result of the widening
13156 operation is used in a reduction computation only, the order doesn't
13157 matter (because when vectorizing a reduction we change the order of
13158 the computation). Some targets can take advantage of this and
13159 generate more efficient code. For example, targets like Altivec,
13160 that support widen_mult using a sequence of {mult_even,mult_odd}
13161 generate the following vectors:
13162 vect1: [res1,res3,res5,res7],
13163 vect2: [res2,res4,res6,res8].
13165 When vectorizing outer-loops, we execute the inner-loop sequentially
13166 (each vectorized inner-loop iteration contributes to VF outer-loop
13167 iterations in parallel). We therefore don't allow to change the
13168 order of the computation in the inner-loop during outer-loop
13169 vectorization. */
13170 /* TODO: Another case in which order doesn't *really* matter is when we
13171 widen and then contract again, e.g. (short)((int)x * y >> 8).
13172 Normally, pack_trunc performs an even/odd permute, whereas the
13173 repack from an even/odd expansion would be an interleave, which
13174 would be significantly simpler for e.g. AVX2. */
13175 /* In any case, in order to avoid duplicating the code below, recurse
13176 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
13177 are properly set up for the caller. If we fail, we'll continue with
13178 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
13179 if (vect_loop
13180 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
13181 && !nested_in_vect_loop_p (vect_loop, stmt_info)
13182 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
13183 stmt_info, vectype_out,
13184 vectype_in, code1,
13185 code2, multi_step_cvt,
13186 interm_types))
13188 /* Elements in a vector with vect_used_by_reduction property cannot
13189 be reordered if the use chain with this property does not have the
13190 same operation. One such an example is s += a * b, where elements
13191 in a and b cannot be reordered. Here we check if the vector defined
13192 by STMT is only directly used in the reduction statement. */
13193 tree lhs = gimple_assign_lhs (stmt_info->stmt);
13194 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
13195 if (use_stmt_info
13196 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
13197 return true;
13199 c1 = VEC_WIDEN_MULT_LO_EXPR;
13200 c2 = VEC_WIDEN_MULT_HI_EXPR;
13201 break;
13203 case DOT_PROD_EXPR:
13204 c1 = DOT_PROD_EXPR;
13205 c2 = DOT_PROD_EXPR;
13206 break;
13208 case SAD_EXPR:
13209 c1 = SAD_EXPR;
13210 c2 = SAD_EXPR;
13211 break;
13213 case VEC_WIDEN_MULT_EVEN_EXPR:
13214 /* Support the recursion induced just above. */
13215 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
13216 c2 = VEC_WIDEN_MULT_ODD_EXPR;
13217 break;
13219 case WIDEN_LSHIFT_EXPR:
13220 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
13221 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
13222 break;
13224 CASE_CONVERT:
13225 c1 = VEC_UNPACK_LO_EXPR;
13226 c2 = VEC_UNPACK_HI_EXPR;
13227 break;
13229 case FLOAT_EXPR:
13230 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
13231 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
13232 break;
13234 case FIX_TRUNC_EXPR:
13235 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
13236 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
13237 break;
13239 default:
13240 gcc_unreachable ();
13243 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
13244 std::swap (c1, c2);
13246 if (code == FIX_TRUNC_EXPR)
13248 /* The signedness is determined from output operand. */
13249 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13250 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13252 else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
13253 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13254 && VECTOR_BOOLEAN_TYPE_P (vectype)
13255 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13256 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13258 /* If the input and result modes are the same, a different optab
13259 is needed where we pass in the number of units in vectype. */
13260 optab1 = vec_unpacks_sbool_lo_optab;
13261 optab2 = vec_unpacks_sbool_hi_optab;
13264 vec_mode = TYPE_MODE (vectype);
13265 if (widening_fn_p (code))
13267 /* If this is an internal fn then we must check whether the target
13268 supports either a low-high split or an even-odd split. */
13269 internal_fn ifn = as_internal_fn ((combined_fn) code);
13271 internal_fn lo, hi, even, odd;
13272 lookup_hilo_internal_fn (ifn, &lo, &hi);
13273 *code1 = as_combined_fn (lo);
13274 *code2 = as_combined_fn (hi);
13275 optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
13276 optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
13278 /* If we don't support low-high, then check for even-odd. */
13279 if (!optab1
13280 || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13281 || !optab2
13282 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13284 lookup_evenodd_internal_fn (ifn, &even, &odd);
13285 *code1 = as_combined_fn (even);
13286 *code2 = as_combined_fn (odd);
13287 optab1 = direct_internal_fn_optab (even, {vectype, vectype});
13288 optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
13291 else if (code.is_tree_code ())
13293 if (code == FIX_TRUNC_EXPR)
13295 /* The signedness is determined from output operand. */
13296 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13297 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13299 else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
13300 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13301 && VECTOR_BOOLEAN_TYPE_P (vectype)
13302 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13303 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13305 /* If the input and result modes are the same, a different optab
13306 is needed where we pass in the number of units in vectype. */
13307 optab1 = vec_unpacks_sbool_lo_optab;
13308 optab2 = vec_unpacks_sbool_hi_optab;
13310 else
13312 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13313 optab2 = optab_for_tree_code (c2, vectype, optab_default);
13315 *code1 = c1;
13316 *code2 = c2;
13319 if (!optab1 || !optab2)
13320 return false;
13322 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13323 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13324 return false;
13327 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13328 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13330 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13331 return true;
13332 /* For scalar masks we may have different boolean
13333 vector types having the same QImode. Thus we
13334 add additional check for elements number. */
13335 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
13336 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13337 return true;
13340 /* Check if it's a multi-step conversion that can be done using intermediate
13341 types. */
13343 prev_type = vectype;
13344 prev_mode = vec_mode;
13346 if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
13347 return false;
13349 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13350 intermediate steps in promotion sequence. We try
13351 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
13352 not. */
13353 interm_types->create (MAX_INTERM_CVT_STEPS);
13354 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
13356 intermediate_mode = insn_data[icode1].operand[0].mode;
13357 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13358 intermediate_type
13359 = vect_halve_mask_nunits (prev_type, intermediate_mode);
13360 else if (VECTOR_MODE_P (intermediate_mode))
13362 tree intermediate_element_type
13363 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
13364 TYPE_UNSIGNED (prev_type));
13365 intermediate_type
13366 = build_vector_type_for_mode (intermediate_element_type,
13367 intermediate_mode);
13369 else
13370 intermediate_type
13371 = lang_hooks.types.type_for_mode (intermediate_mode,
13372 TYPE_UNSIGNED (prev_type));
13374 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13375 && VECTOR_BOOLEAN_TYPE_P (prev_type)
13376 && intermediate_mode == prev_mode
13377 && SCALAR_INT_MODE_P (prev_mode))
13379 /* If the input and result modes are the same, a different optab
13380 is needed where we pass in the number of units in vectype. */
13381 optab3 = vec_unpacks_sbool_lo_optab;
13382 optab4 = vec_unpacks_sbool_hi_optab;
13384 else
13386 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
13387 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
13390 if (!optab3 || !optab4
13391 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
13392 || insn_data[icode1].operand[0].mode != intermediate_mode
13393 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
13394 || insn_data[icode2].operand[0].mode != intermediate_mode
13395 || ((icode1 = optab_handler (optab3, intermediate_mode))
13396 == CODE_FOR_nothing)
13397 || ((icode2 = optab_handler (optab4, intermediate_mode))
13398 == CODE_FOR_nothing))
13399 break;
13401 interm_types->quick_push (intermediate_type);
13402 (*multi_step_cvt)++;
13404 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13405 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13407 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13408 return true;
13409 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
13410 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13411 return true;
13414 prev_type = intermediate_type;
13415 prev_mode = intermediate_mode;
13418 interm_types->release ();
13419 return false;
13423 /* Function supportable_narrowing_operation
13425 Check whether an operation represented by the code CODE is a
13426 narrowing operation that is supported by the target platform in
13427 vector form (i.e., when operating on arguments of type VECTYPE_IN
13428 and producing a result of type VECTYPE_OUT).
13430 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
13431 and FLOAT. This function checks if these operations are supported by
13432 the target platform directly via vector tree-codes.
13434 Output:
13435 - CODE1 is the code of a vector operation to be used when
13436 vectorizing the operation, if available.
13437 - MULTI_STEP_CVT determines the number of required intermediate steps in
13438 case of multi-step conversion (like int->short->char - in that case
13439 MULTI_STEP_CVT will be 1).
13440 - INTERM_TYPES contains the intermediate type required to perform the
13441 narrowing operation (short in the above example). */
13443 bool
13444 supportable_narrowing_operation (code_helper code,
13445 tree vectype_out, tree vectype_in,
13446 code_helper *code1, int *multi_step_cvt,
13447 vec<tree> *interm_types)
13449 machine_mode vec_mode;
13450 enum insn_code icode1;
13451 optab optab1, interm_optab;
13452 tree vectype = vectype_in;
13453 tree narrow_vectype = vectype_out;
13454 enum tree_code c1;
13455 tree intermediate_type, prev_type;
13456 machine_mode intermediate_mode, prev_mode;
13457 int i;
13458 unsigned HOST_WIDE_INT n_elts;
13459 bool uns;
13461 if (!code.is_tree_code ())
13462 return false;
13464 *multi_step_cvt = 0;
13465 switch ((tree_code) code)
13467 CASE_CONVERT:
13468 c1 = VEC_PACK_TRUNC_EXPR;
13469 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
13470 && VECTOR_BOOLEAN_TYPE_P (vectype)
13471 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
13472 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
13473 && n_elts < BITS_PER_UNIT)
13474 optab1 = vec_pack_sbool_trunc_optab;
13475 else
13476 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13477 break;
13479 case FIX_TRUNC_EXPR:
13480 c1 = VEC_PACK_FIX_TRUNC_EXPR;
13481 /* The signedness is determined from output operand. */
13482 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13483 break;
13485 case FLOAT_EXPR:
13486 c1 = VEC_PACK_FLOAT_EXPR;
13487 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13488 break;
13490 default:
13491 gcc_unreachable ();
13494 if (!optab1)
13495 return false;
13497 vec_mode = TYPE_MODE (vectype);
13498 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
13499 return false;
13501 *code1 = c1;
13503 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
13505 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13506 return true;
13507 /* For scalar masks we may have different boolean
13508 vector types having the same QImode. Thus we
13509 add additional check for elements number. */
13510 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
13511 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
13512 return true;
13515 if (code == FLOAT_EXPR)
13516 return false;
13518 /* Check if it's a multi-step conversion that can be done using intermediate
13519 types. */
13520 prev_mode = vec_mode;
13521 prev_type = vectype;
13522 if (code == FIX_TRUNC_EXPR)
13523 uns = TYPE_UNSIGNED (vectype_out);
13524 else
13525 uns = TYPE_UNSIGNED (vectype);
13527 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
13528 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
13529 costly than signed. */
13530 if (code == FIX_TRUNC_EXPR && uns)
13532 enum insn_code icode2;
13534 intermediate_type
13535 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
13536 interm_optab
13537 = optab_for_tree_code (c1, intermediate_type, optab_default);
13538 if (interm_optab != unknown_optab
13539 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
13540 && insn_data[icode1].operand[0].mode
13541 == insn_data[icode2].operand[0].mode)
13543 uns = false;
13544 optab1 = interm_optab;
13545 icode1 = icode2;
13549 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13550 intermediate steps in promotion sequence. We try
13551 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
13552 interm_types->create (MAX_INTERM_CVT_STEPS);
13553 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
13555 intermediate_mode = insn_data[icode1].operand[0].mode;
13556 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13557 intermediate_type
13558 = vect_double_mask_nunits (prev_type, intermediate_mode);
13559 else
13560 intermediate_type
13561 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
13562 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13563 && VECTOR_BOOLEAN_TYPE_P (prev_type)
13564 && SCALAR_INT_MODE_P (prev_mode)
13565 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
13566 && n_elts < BITS_PER_UNIT)
13567 interm_optab = vec_pack_sbool_trunc_optab;
13568 else
13569 interm_optab
13570 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
13571 optab_default);
13572 if (!interm_optab
13573 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
13574 || insn_data[icode1].operand[0].mode != intermediate_mode
13575 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
13576 == CODE_FOR_nothing))
13577 break;
13579 interm_types->quick_push (intermediate_type);
13580 (*multi_step_cvt)++;
13582 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
13584 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13585 return true;
13586 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
13587 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
13588 return true;
13591 prev_mode = intermediate_mode;
13592 prev_type = intermediate_type;
13593 optab1 = interm_optab;
13596 interm_types->release ();
13597 return false;
13600 /* Generate and return a vector mask of MASK_TYPE such that
13601 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
13602 Add the statements to SEQ. */
13604 tree
13605 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
13606 tree end_index, const char *name)
13608 tree cmp_type = TREE_TYPE (start_index);
13609 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
13610 cmp_type, mask_type,
13611 OPTIMIZE_FOR_SPEED));
13612 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
13613 start_index, end_index,
13614 build_zero_cst (mask_type));
13615 tree tmp;
13616 if (name)
13617 tmp = make_temp_ssa_name (mask_type, NULL, name);
13618 else
13619 tmp = make_ssa_name (mask_type);
13620 gimple_call_set_lhs (call, tmp);
13621 gimple_seq_add_stmt (seq, call);
13622 return tmp;
13625 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
13626 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
13628 tree
13629 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
13630 tree end_index)
13632 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
13633 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
13636 /* Try to compute the vector types required to vectorize STMT_INFO,
13637 returning true on success and false if vectorization isn't possible.
13638 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13639 take sure that the number of elements in the vectors is no bigger
13640 than GROUP_SIZE.
13642 On success:
13644 - Set *STMT_VECTYPE_OUT to:
13645 - NULL_TREE if the statement doesn't need to be vectorized;
13646 - the equivalent of STMT_VINFO_VECTYPE otherwise.
13648 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
13649 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
13650 statement does not help to determine the overall number of units. */
13652 opt_result
13653 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
13654 tree *stmt_vectype_out,
13655 tree *nunits_vectype_out,
13656 unsigned int group_size)
13658 gimple *stmt = stmt_info->stmt;
13660 /* For BB vectorization, we should always have a group size once we've
13661 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13662 are tentative requests during things like early data reference
13663 analysis and pattern recognition. */
13664 if (is_a <bb_vec_info> (vinfo))
13665 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13666 else
13667 group_size = 0;
13669 *stmt_vectype_out = NULL_TREE;
13670 *nunits_vectype_out = NULL_TREE;
13672 if (gimple_get_lhs (stmt) == NULL_TREE
13673 /* MASK_STORE has no lhs, but is ok. */
13674 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
13676 if (is_a <gcall *> (stmt))
13678 /* Ignore calls with no lhs. These must be calls to
13679 #pragma omp simd functions, and what vectorization factor
13680 it really needs can't be determined until
13681 vectorizable_simd_clone_call. */
13682 if (dump_enabled_p ())
13683 dump_printf_loc (MSG_NOTE, vect_location,
13684 "defer to SIMD clone analysis.\n");
13685 return opt_result::success ();
13688 return opt_result::failure_at (stmt,
13689 "not vectorized: irregular stmt.%G", stmt);
13692 tree vectype;
13693 tree scalar_type = NULL_TREE;
13694 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
13696 vectype = STMT_VINFO_VECTYPE (stmt_info);
13697 if (dump_enabled_p ())
13698 dump_printf_loc (MSG_NOTE, vect_location,
13699 "precomputed vectype: %T\n", vectype);
13701 else if (vect_use_mask_type_p (stmt_info))
13703 unsigned int precision = stmt_info->mask_precision;
13704 scalar_type = build_nonstandard_integer_type (precision, 1);
13705 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
13706 if (!vectype)
13707 return opt_result::failure_at (stmt, "not vectorized: unsupported"
13708 " data-type %T\n", scalar_type);
13709 if (dump_enabled_p ())
13710 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
13712 else
13714 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
13715 scalar_type = TREE_TYPE (DR_REF (dr));
13716 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
13717 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
13718 else
13719 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
13721 if (dump_enabled_p ())
13723 if (group_size)
13724 dump_printf_loc (MSG_NOTE, vect_location,
13725 "get vectype for scalar type (group size %d):"
13726 " %T\n", group_size, scalar_type);
13727 else
13728 dump_printf_loc (MSG_NOTE, vect_location,
13729 "get vectype for scalar type: %T\n", scalar_type);
13731 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13732 if (!vectype)
13733 return opt_result::failure_at (stmt,
13734 "not vectorized:"
13735 " unsupported data-type %T\n",
13736 scalar_type);
13738 if (dump_enabled_p ())
13739 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
13742 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
13743 return opt_result::failure_at (stmt,
13744 "not vectorized: vector stmt in loop:%G",
13745 stmt);
13747 *stmt_vectype_out = vectype;
13749 /* Don't try to compute scalar types if the stmt produces a boolean
13750 vector; use the existing vector type instead. */
13751 tree nunits_vectype = vectype;
13752 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13754 /* The number of units is set according to the smallest scalar
13755 type (or the largest vector size, but we only support one
13756 vector size per vectorization). */
13757 scalar_type = vect_get_smallest_scalar_type (stmt_info,
13758 TREE_TYPE (vectype));
13759 if (scalar_type != TREE_TYPE (vectype))
13761 if (dump_enabled_p ())
13762 dump_printf_loc (MSG_NOTE, vect_location,
13763 "get vectype for smallest scalar type: %T\n",
13764 scalar_type);
13765 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
13766 group_size);
13767 if (!nunits_vectype)
13768 return opt_result::failure_at
13769 (stmt, "not vectorized: unsupported data-type %T\n",
13770 scalar_type);
13771 if (dump_enabled_p ())
13772 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
13773 nunits_vectype);
13777 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
13778 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
13779 return opt_result::failure_at (stmt,
13780 "Not vectorized: Incompatible number "
13781 "of vector subparts between %T and %T\n",
13782 nunits_vectype, *stmt_vectype_out);
13784 if (dump_enabled_p ())
13786 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
13787 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
13788 dump_printf (MSG_NOTE, "\n");
13791 *nunits_vectype_out = nunits_vectype;
13792 return opt_result::success ();
13795 /* Generate and return statement sequence that sets vector length LEN that is:
13797 min_of_start_and_end = min (START_INDEX, END_INDEX);
13798 left_len = END_INDEX - min_of_start_and_end;
13799 rhs = min (left_len, LEN_LIMIT);
13800 LEN = rhs;
13802 Note: the cost of the code generated by this function is modeled
13803 by vect_estimate_min_profitable_iters, so changes here may need
13804 corresponding changes there. */
13806 gimple_seq
13807 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
13809 gimple_seq stmts = NULL;
13810 tree len_type = TREE_TYPE (len);
13811 gcc_assert (TREE_TYPE (start_index) == len_type);
13813 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
13814 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
13815 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
13816 gimple* stmt = gimple_build_assign (len, rhs);
13817 gimple_seq_add_stmt (&stmts, stmt);
13819 return stmts;