Extend fold_vec_perm to handle VLA vector_cst.
[official-gcc.git] / gcc / tree-vect-stmts.cc
blobcd8e0a763746f6a3f0255f6a9234532a41e75326
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
57 #include "regs.h"
58 #include "attribs.h"
59 #include "optabs-libfuncs.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* Return the vectorized type for the given statement. */
66 tree
67 stmt_vectype (class _stmt_vec_info *stmt_info)
69 return STMT_VINFO_VECTYPE (stmt_info);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
74 bool
75 stmt_in_inner_loop_p (vec_info *vinfo, class _stmt_vec_info *stmt_info)
77 gimple *stmt = STMT_VINFO_STMT (stmt_info);
78 basic_block bb = gimple_bb (stmt);
79 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
80 class loop* loop;
82 if (!loop_vinfo)
83 return false;
85 loop = LOOP_VINFO_LOOP (loop_vinfo);
87 return (bb->loop_father == loop->inner);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
94 static unsigned
95 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
96 enum vect_cost_for_stmt kind,
97 stmt_vec_info stmt_info, slp_tree node,
98 tree vectype, int misalign,
99 enum vect_cost_model_location where)
101 if ((kind == vector_load || kind == unaligned_load)
102 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
103 kind = vector_gather_load;
104 if ((kind == vector_store || kind == unaligned_store)
105 && (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info)))
106 kind = vector_scatter_store;
108 stmt_info_for_cost si
109 = { count, kind, where, stmt_info, node, vectype, misalign };
110 body_cost_vec->safe_push (si);
112 return (unsigned)
113 (builtin_vectorization_cost (kind, vectype, misalign) * count);
116 unsigned
117 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
118 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
119 tree vectype, int misalign,
120 enum vect_cost_model_location where)
122 return record_stmt_cost (body_cost_vec, count, kind, stmt_info, NULL,
123 vectype, misalign, where);
126 unsigned
127 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
128 enum vect_cost_for_stmt kind, slp_tree node,
129 tree vectype, int misalign,
130 enum vect_cost_model_location where)
132 return record_stmt_cost (body_cost_vec, count, kind, NULL, node,
133 vectype, misalign, where);
136 unsigned
137 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
138 enum vect_cost_for_stmt kind,
139 enum vect_cost_model_location where)
141 gcc_assert (kind == cond_branch_taken || kind == cond_branch_not_taken
142 || kind == scalar_stmt);
143 return record_stmt_cost (body_cost_vec, count, kind, NULL, NULL,
144 NULL_TREE, 0, where);
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
149 static tree
150 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
152 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
153 "vect_array");
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
161 static tree
162 read_vector_array (vec_info *vinfo,
163 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
164 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
166 tree vect_type, vect, vect_name, array_ref;
167 gimple *new_stmt;
169 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
170 vect_type = TREE_TYPE (TREE_TYPE (array));
171 vect = vect_create_destination_var (scalar_dest, vect_type);
172 array_ref = build4 (ARRAY_REF, vect_type, array,
173 build_int_cst (size_type_node, n),
174 NULL_TREE, NULL_TREE);
176 new_stmt = gimple_build_assign (vect, array_ref);
177 vect_name = make_ssa_name (vect, new_stmt);
178 gimple_assign_set_lhs (new_stmt, vect_name);
179 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
181 return vect_name;
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
188 static void
189 write_vector_array (vec_info *vinfo,
190 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
191 tree vect, tree array, unsigned HOST_WIDE_INT n)
193 tree array_ref;
194 gimple *new_stmt;
196 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
197 build_int_cst (size_type_node, n),
198 NULL_TREE, NULL_TREE);
200 new_stmt = gimple_build_assign (array_ref, vect);
201 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
206 (and its group). */
208 static tree
209 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
211 tree mem_ref;
213 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
216 return mem_ref;
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
222 static void
223 vect_clobber_variable (vec_info *vinfo, stmt_vec_info stmt_info,
224 gimple_stmt_iterator *gsi, tree var)
226 tree clobber = build_clobber (TREE_TYPE (var));
227 gimple *new_stmt = gimple_build_assign (var, clobber);
228 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
233 /* Function vect_mark_relevant.
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
237 static void
238 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
239 enum vect_relevant relevant, bool live_p)
241 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
242 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE, vect_location,
246 "mark relevant %d, live %d: %G", relevant, live_p,
247 stmt_info->stmt);
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE, vect_location,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
265 stmt_vec_info old_stmt_info = stmt_info;
266 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
268 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
269 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
271 if (live_p && relevant == vect_unused_in_scope)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE, vect_location,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
276 "relevant.\n");
277 relevant = vect_used_only_live;
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE, vect_location,
282 "mark relevant %d, live %d: %G", relevant, live_p,
283 stmt_info->stmt);
286 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
287 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
288 STMT_VINFO_RELEVANT (stmt_info) = relevant;
290 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE, vect_location,
295 "already marked relevant/live.\n");
296 return;
299 worklist->safe_push (stmt_info);
303 /* Function is_simple_and_all_uses_invariant
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
307 bool
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
309 loop_vec_info loop_vinfo)
311 tree op;
312 ssa_op_iter iter;
314 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
315 if (!stmt)
316 return false;
318 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
320 enum vect_def_type dt = vect_uninitialized_def;
322 if (!vect_is_simple_use (op, loop_vinfo, &dt))
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "use not simple.\n");
327 return false;
330 if (dt != vect_external_def && dt != vect_constant_def)
331 return false;
333 return true;
336 /* Function vect_stmt_relevant_p.
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
346 CHECKME: what other side effects would the vectorizer allow? */
348 static bool
349 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
350 enum vect_relevant *relevant, bool *live_p)
352 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
353 ssa_op_iter op_iter;
354 imm_use_iterator imm_iter;
355 use_operand_p use_p;
356 def_operand_p def_p;
358 *relevant = vect_unused_in_scope;
359 *live_p = false;
361 /* cond stmt other than loop exit cond. */
362 if (is_ctrl_stmt (stmt_info->stmt)
363 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
364 *relevant = vect_used_in_scope;
366 /* changing memory. */
367 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
368 if (gimple_vdef (stmt_info->stmt)
369 && !gimple_clobber_p (stmt_info->stmt))
371 if (dump_enabled_p ())
372 dump_printf_loc (MSG_NOTE, vect_location,
373 "vec_stmt_relevant_p: stmt has vdefs.\n");
374 *relevant = vect_used_in_scope;
377 /* uses outside the loop. */
378 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
380 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
382 basic_block bb = gimple_bb (USE_STMT (use_p));
383 if (!flow_bb_inside_loop_p (loop, bb))
385 if (is_gimple_debug (USE_STMT (use_p)))
386 continue;
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_NOTE, vect_location,
390 "vec_stmt_relevant_p: used out of loop.\n");
392 /* We expect all such uses to be in the loop exit phis
393 (because of loop closed form) */
394 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
395 gcc_assert (bb == single_exit (loop)->dest);
397 *live_p = true;
402 if (*live_p && *relevant == vect_unused_in_scope
403 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
405 if (dump_enabled_p ())
406 dump_printf_loc (MSG_NOTE, vect_location,
407 "vec_stmt_relevant_p: stmt live but not relevant.\n");
408 *relevant = vect_used_only_live;
411 return (*live_p || *relevant);
415 /* Function exist_non_indexing_operands_for_use_p
417 USE is one of the uses attached to STMT_INFO. Check if USE is
418 used in STMT_INFO for anything other than indexing an array. */
420 static bool
421 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
423 tree operand;
425 /* USE corresponds to some operand in STMT. If there is no data
426 reference in STMT, then any operand that corresponds to USE
427 is not indexing an array. */
428 if (!STMT_VINFO_DATA_REF (stmt_info))
429 return true;
431 /* STMT has a data_ref. FORNOW this means that its of one of
432 the following forms:
433 -1- ARRAY_REF = var
434 -2- var = ARRAY_REF
435 (This should have been verified in analyze_data_refs).
437 'var' in the second case corresponds to a def, not a use,
438 so USE cannot correspond to any operands that are not used
439 for array indexing.
441 Therefore, all we need to check is if STMT falls into the
442 first case, and whether var corresponds to USE. */
444 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
445 if (!assign || !gimple_assign_copy_p (assign))
447 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
448 if (call && gimple_call_internal_p (call))
450 internal_fn ifn = gimple_call_internal_fn (call);
451 int mask_index = internal_fn_mask_index (ifn);
452 if (mask_index >= 0
453 && use == gimple_call_arg (call, mask_index))
454 return true;
455 int stored_value_index = internal_fn_stored_value_index (ifn);
456 if (stored_value_index >= 0
457 && use == gimple_call_arg (call, stored_value_index))
458 return true;
459 if (internal_gather_scatter_fn_p (ifn)
460 && use == gimple_call_arg (call, 1))
461 return true;
463 return false;
466 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
467 return false;
468 operand = gimple_assign_rhs1 (assign);
469 if (TREE_CODE (operand) != SSA_NAME)
470 return false;
472 if (operand == use)
473 return true;
475 return false;
480 Function process_use.
482 Inputs:
483 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
484 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
485 that defined USE. This is done by calling mark_relevant and passing it
486 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
487 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
488 be performed.
490 Outputs:
491 Generally, LIVE_P and RELEVANT are used to define the liveness and
492 relevance info of the DEF_STMT of this USE:
493 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
494 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
495 Exceptions:
496 - case 1: If USE is used only for address computations (e.g. array indexing),
497 which does not need to be directly vectorized, then the liveness/relevance
498 of the respective DEF_STMT is left unchanged.
499 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
500 we skip DEF_STMT cause it had already been processed.
501 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
502 "relevant" will be modified accordingly.
504 Return true if everything is as expected. Return false otherwise. */
506 static opt_result
507 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
508 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
509 bool force)
511 stmt_vec_info dstmt_vinfo;
512 enum vect_def_type dt;
514 /* case 1: we are only interested in uses that need to be vectorized. Uses
515 that are used for address computation are not considered relevant. */
516 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
517 return opt_result::success ();
519 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
520 return opt_result::failure_at (stmt_vinfo->stmt,
521 "not vectorized:"
522 " unsupported use in stmt.\n");
524 if (!dstmt_vinfo)
525 return opt_result::success ();
527 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
528 basic_block bb = gimple_bb (stmt_vinfo->stmt);
530 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
531 We have to force the stmt live since the epilogue loop needs it to
532 continue computing the reduction. */
533 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
534 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
535 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
536 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
537 && bb->loop_father == def_bb->loop_father)
539 if (dump_enabled_p ())
540 dump_printf_loc (MSG_NOTE, vect_location,
541 "reduc-stmt defining reduc-phi in the same nest.\n");
542 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
543 return opt_result::success ();
546 /* case 3a: outer-loop stmt defining an inner-loop stmt:
547 outer-loop-header-bb:
548 d = dstmt_vinfo
549 inner-loop:
550 stmt # use (d)
551 outer-loop-tail-bb:
552 ... */
553 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
555 if (dump_enabled_p ())
556 dump_printf_loc (MSG_NOTE, vect_location,
557 "outer-loop def-stmt defining inner-loop stmt.\n");
559 switch (relevant)
561 case vect_unused_in_scope:
562 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
563 vect_used_in_scope : vect_unused_in_scope;
564 break;
566 case vect_used_in_outer_by_reduction:
567 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
568 relevant = vect_used_by_reduction;
569 break;
571 case vect_used_in_outer:
572 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
573 relevant = vect_used_in_scope;
574 break;
576 case vect_used_in_scope:
577 break;
579 default:
580 gcc_unreachable ();
584 /* case 3b: inner-loop stmt defining an outer-loop stmt:
585 outer-loop-header-bb:
587 inner-loop:
588 d = dstmt_vinfo
589 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
590 stmt # use (d) */
591 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
593 if (dump_enabled_p ())
594 dump_printf_loc (MSG_NOTE, vect_location,
595 "inner-loop def-stmt defining outer-loop stmt.\n");
597 switch (relevant)
599 case vect_unused_in_scope:
600 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
601 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
602 vect_used_in_outer_by_reduction : vect_unused_in_scope;
603 break;
605 case vect_used_by_reduction:
606 case vect_used_only_live:
607 relevant = vect_used_in_outer_by_reduction;
608 break;
610 case vect_used_in_scope:
611 relevant = vect_used_in_outer;
612 break;
614 default:
615 gcc_unreachable ();
618 /* We are also not interested in uses on loop PHI backedges that are
619 inductions. Otherwise we'll needlessly vectorize the IV increment
620 and cause hybrid SLP for SLP inductions. Unless the PHI is live
621 of course. */
622 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
623 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
624 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
625 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
626 loop_latch_edge (bb->loop_father))
627 == use))
629 if (dump_enabled_p ())
630 dump_printf_loc (MSG_NOTE, vect_location,
631 "induction value on backedge.\n");
632 return opt_result::success ();
636 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
637 return opt_result::success ();
641 /* Function vect_mark_stmts_to_be_vectorized.
643 Not all stmts in the loop need to be vectorized. For example:
645 for i...
646 for j...
647 1. T0 = i + j
648 2. T1 = a[T0]
650 3. j = j + 1
652 Stmt 1 and 3 do not need to be vectorized, because loop control and
653 addressing of vectorized data-refs are handled differently.
655 This pass detects such stmts. */
657 opt_result
658 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
660 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
661 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
662 unsigned int nbbs = loop->num_nodes;
663 gimple_stmt_iterator si;
664 unsigned int i;
665 basic_block bb;
666 bool live_p;
667 enum vect_relevant relevant;
669 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
671 auto_vec<stmt_vec_info, 64> worklist;
673 /* 1. Init worklist. */
674 for (i = 0; i < nbbs; i++)
676 bb = bbs[i];
677 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
679 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
682 phi_info->stmt);
684 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
685 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
687 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
689 if (is_gimple_debug (gsi_stmt (si)))
690 continue;
691 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_NOTE, vect_location,
694 "init: stmt relevant? %G", stmt_info->stmt);
696 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
697 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
701 /* 2. Process_worklist */
702 while (worklist.length () > 0)
704 use_operand_p use_p;
705 ssa_op_iter iter;
707 stmt_vec_info stmt_vinfo = worklist.pop ();
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_NOTE, vect_location,
710 "worklist: examine stmt: %G", stmt_vinfo->stmt);
712 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
713 (DEF_STMT) as relevant/irrelevant according to the relevance property
714 of STMT. */
715 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
717 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
718 propagated as is to the DEF_STMTs of its USEs.
720 One exception is when STMT has been identified as defining a reduction
721 variable; in this case we set the relevance to vect_used_by_reduction.
722 This is because we distinguish between two kinds of relevant stmts -
723 those that are used by a reduction computation, and those that are
724 (also) used by a regular computation. This allows us later on to
725 identify stmts that are used solely by a reduction, and therefore the
726 order of the results that they produce does not have to be kept. */
728 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
730 case vect_reduction_def:
731 gcc_assert (relevant != vect_unused_in_scope);
732 if (relevant != vect_unused_in_scope
733 && relevant != vect_used_in_scope
734 && relevant != vect_used_by_reduction
735 && relevant != vect_used_only_live)
736 return opt_result::failure_at
737 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
738 break;
740 case vect_nested_cycle:
741 if (relevant != vect_unused_in_scope
742 && relevant != vect_used_in_outer_by_reduction
743 && relevant != vect_used_in_outer)
744 return opt_result::failure_at
745 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
746 break;
748 case vect_double_reduction_def:
749 if (relevant != vect_unused_in_scope
750 && relevant != vect_used_by_reduction
751 && relevant != vect_used_only_live)
752 return opt_result::failure_at
753 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
754 break;
756 default:
757 break;
760 if (is_pattern_stmt_p (stmt_vinfo))
762 /* Pattern statements are not inserted into the code, so
763 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
764 have to scan the RHS or function arguments instead. */
765 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
767 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
768 tree op = gimple_assign_rhs1 (assign);
770 i = 1;
771 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
773 opt_result res
774 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
775 loop_vinfo, relevant, &worklist, false);
776 if (!res)
777 return res;
778 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
779 loop_vinfo, relevant, &worklist, false);
780 if (!res)
781 return res;
782 i = 2;
784 for (; i < gimple_num_ops (assign); i++)
786 op = gimple_op (assign, i);
787 if (TREE_CODE (op) == SSA_NAME)
789 opt_result res
790 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
791 &worklist, false);
792 if (!res)
793 return res;
797 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
799 for (i = 0; i < gimple_call_num_args (call); i++)
801 tree arg = gimple_call_arg (call, i);
802 opt_result res
803 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
804 &worklist, false);
805 if (!res)
806 return res;
810 else
811 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
813 tree op = USE_FROM_PTR (use_p);
814 opt_result res
815 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
816 &worklist, false);
817 if (!res)
818 return res;
821 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
823 gather_scatter_info gs_info;
824 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
825 gcc_unreachable ();
826 opt_result res
827 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
828 &worklist, true);
829 if (!res)
831 if (fatal)
832 *fatal = false;
833 return res;
836 } /* while worklist */
838 return opt_result::success ();
841 /* Function vect_model_simple_cost.
843 Models cost for simple operations, i.e. those that only emit ncopies of a
844 single op. Right now, this does not account for multiple insns that could
845 be generated for the single vector op. We will handle that shortly. */
847 static void
848 vect_model_simple_cost (vec_info *,
849 stmt_vec_info stmt_info, int ncopies,
850 enum vect_def_type *dt,
851 int ndts,
852 slp_tree node,
853 stmt_vector_for_cost *cost_vec,
854 vect_cost_for_stmt kind = vector_stmt)
856 int inside_cost = 0, prologue_cost = 0;
858 gcc_assert (cost_vec != NULL);
860 /* ??? Somehow we need to fix this at the callers. */
861 if (node)
862 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
864 if (!node)
865 /* Cost the "broadcast" of a scalar operand in to a vector operand.
866 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
867 cost model. */
868 for (int i = 0; i < ndts; i++)
869 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
870 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
871 stmt_info, 0, vect_prologue);
873 /* Pass the inside-of-loop statements to the target-specific cost model. */
874 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
875 stmt_info, 0, vect_body);
877 if (dump_enabled_p ())
878 dump_printf_loc (MSG_NOTE, vect_location,
879 "vect_model_simple_cost: inside_cost = %d, "
880 "prologue_cost = %d .\n", inside_cost, prologue_cost);
884 /* Model cost for type demotion and promotion operations. PWR is
885 normally zero for single-step promotions and demotions. It will be
886 one if two-step promotion/demotion is required, and so on. NCOPIES
887 is the number of vector results (and thus number of instructions)
888 for the narrowest end of the operation chain. Each additional
889 step doubles the number of instructions required. If WIDEN_ARITH
890 is true the stmt is doing widening arithmetic. */
892 static void
893 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
894 enum vect_def_type *dt,
895 unsigned int ncopies, int pwr,
896 stmt_vector_for_cost *cost_vec,
897 bool widen_arith)
899 int i;
900 int inside_cost = 0, prologue_cost = 0;
902 for (i = 0; i < pwr + 1; i++)
904 inside_cost += record_stmt_cost (cost_vec, ncopies,
905 widen_arith
906 ? vector_stmt : vec_promote_demote,
907 stmt_info, 0, vect_body);
908 ncopies *= 2;
911 /* FORNOW: Assuming maximum 2 args per stmts. */
912 for (i = 0; i < 2; i++)
913 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
914 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
915 stmt_info, 0, vect_prologue);
917 if (dump_enabled_p ())
918 dump_printf_loc (MSG_NOTE, vect_location,
919 "vect_model_promotion_demotion_cost: inside_cost = %d, "
920 "prologue_cost = %d .\n", inside_cost, prologue_cost);
923 /* Returns true if the current function returns DECL. */
925 static bool
926 cfun_returns (tree decl)
928 edge_iterator ei;
929 edge e;
930 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
932 greturn *ret = safe_dyn_cast <greturn *> (*gsi_last_bb (e->src));
933 if (!ret)
934 continue;
935 if (gimple_return_retval (ret) == decl)
936 return true;
937 /* We often end up with an aggregate copy to the result decl,
938 handle that case as well. First skip intermediate clobbers
939 though. */
940 gimple *def = ret;
943 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
945 while (gimple_clobber_p (def));
946 if (is_a <gassign *> (def)
947 && gimple_assign_lhs (def) == gimple_return_retval (ret)
948 && gimple_assign_rhs1 (def) == decl)
949 return true;
951 return false;
954 /* Function vect_model_store_cost
956 Models cost for stores. In the case of grouped accesses, one access
957 has the overhead of the grouped access attributed to it. */
959 static void
960 vect_model_store_cost (vec_info *vinfo, stmt_vec_info stmt_info, int ncopies,
961 vect_memory_access_type memory_access_type,
962 gather_scatter_info *gs_info,
963 dr_alignment_support alignment_support_scheme,
964 int misalignment,
965 vec_load_store_type vls_type, slp_tree slp_node,
966 stmt_vector_for_cost *cost_vec)
968 unsigned int inside_cost = 0, prologue_cost = 0;
969 stmt_vec_info first_stmt_info = stmt_info;
970 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
972 /* ??? Somehow we need to fix this at the callers. */
973 if (slp_node)
974 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
976 if (vls_type == VLS_STORE_INVARIANT)
978 if (!slp_node)
979 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
980 stmt_info, 0, vect_prologue);
983 /* Grouped stores update all elements in the group at once,
984 so we want the DR for the first statement. */
985 if (!slp_node && grouped_access_p)
986 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
988 /* True if we should include any once-per-group costs as well as
989 the cost of the statement itself. For SLP we only get called
990 once per group anyhow. */
991 bool first_stmt_p = (first_stmt_info == stmt_info);
993 /* We assume that the cost of a single store-lanes instruction is
994 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
995 access is instead being provided by a permute-and-store operation,
996 include the cost of the permutes. */
997 if (first_stmt_p
998 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1000 /* Uses a high and low interleave or shuffle operations for each
1001 needed permute. */
1002 int group_size = DR_GROUP_SIZE (first_stmt_info);
1003 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1004 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1005 stmt_info, 0, vect_body);
1007 if (dump_enabled_p ())
1008 dump_printf_loc (MSG_NOTE, vect_location,
1009 "vect_model_store_cost: strided group_size = %d .\n",
1010 group_size);
1013 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1014 /* Costs of the stores. */
1015 if (memory_access_type == VMAT_ELEMENTWISE
1016 || memory_access_type == VMAT_GATHER_SCATTER)
1018 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1019 if (memory_access_type == VMAT_GATHER_SCATTER
1020 && gs_info->ifn == IFN_LAST && !gs_info->decl)
1021 /* For emulated scatter N offset vector element extracts
1022 (we assume the scalar scaling and ptr + offset add is consumed by
1023 the load). */
1024 inside_cost += record_stmt_cost (cost_vec, ncopies * assumed_nunits,
1025 vec_to_scalar, stmt_info, 0,
1026 vect_body);
1027 /* N scalar stores plus extracting the elements. */
1028 inside_cost += record_stmt_cost (cost_vec,
1029 ncopies * assumed_nunits,
1030 scalar_store, stmt_info, 0, vect_body);
1032 else
1033 vect_get_store_cost (vinfo, stmt_info, ncopies, alignment_support_scheme,
1034 misalignment, &inside_cost, cost_vec);
1036 if (memory_access_type == VMAT_ELEMENTWISE
1037 || memory_access_type == VMAT_STRIDED_SLP
1038 || (memory_access_type == VMAT_GATHER_SCATTER
1039 && gs_info->ifn == IFN_LAST && !gs_info->decl))
1041 /* N scalar stores plus extracting the elements. */
1042 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1043 inside_cost += record_stmt_cost (cost_vec,
1044 ncopies * assumed_nunits,
1045 vec_to_scalar, stmt_info, 0, vect_body);
1048 /* When vectorizing a store into the function result assign
1049 a penalty if the function returns in a multi-register location.
1050 In this case we assume we'll end up with having to spill the
1051 vector result and do piecewise loads as a conservative estimate. */
1052 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1053 if (base
1054 && (TREE_CODE (base) == RESULT_DECL
1055 || (DECL_P (base) && cfun_returns (base)))
1056 && !aggregate_value_p (base, cfun->decl))
1058 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1059 /* ??? Handle PARALLEL in some way. */
1060 if (REG_P (reg))
1062 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1063 /* Assume that a single reg-reg move is possible and cheap,
1064 do not account for vector to gp register move cost. */
1065 if (nregs > 1)
1067 /* Spill. */
1068 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1069 vector_store,
1070 stmt_info, 0, vect_epilogue);
1071 /* Loads. */
1072 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1073 scalar_load,
1074 stmt_info, 0, vect_epilogue);
1079 if (dump_enabled_p ())
1080 dump_printf_loc (MSG_NOTE, vect_location,
1081 "vect_model_store_cost: inside_cost = %d, "
1082 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1086 /* Calculate cost of DR's memory access. */
1087 void
1088 vect_get_store_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1089 dr_alignment_support alignment_support_scheme,
1090 int misalignment,
1091 unsigned int *inside_cost,
1092 stmt_vector_for_cost *body_cost_vec)
1094 switch (alignment_support_scheme)
1096 case dr_aligned:
1098 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1099 vector_store, stmt_info, 0,
1100 vect_body);
1102 if (dump_enabled_p ())
1103 dump_printf_loc (MSG_NOTE, vect_location,
1104 "vect_model_store_cost: aligned.\n");
1105 break;
1108 case dr_unaligned_supported:
1110 /* Here, we assign an additional cost for the unaligned store. */
1111 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1112 unaligned_store, stmt_info,
1113 misalignment, vect_body);
1114 if (dump_enabled_p ())
1115 dump_printf_loc (MSG_NOTE, vect_location,
1116 "vect_model_store_cost: unaligned supported by "
1117 "hardware.\n");
1118 break;
1121 case dr_unaligned_unsupported:
1123 *inside_cost = VECT_MAX_COST;
1125 if (dump_enabled_p ())
1126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1127 "vect_model_store_cost: unsupported access.\n");
1128 break;
1131 default:
1132 gcc_unreachable ();
1136 /* Calculate cost of DR's memory access. */
1137 void
1138 vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
1139 dr_alignment_support alignment_support_scheme,
1140 int misalignment,
1141 bool add_realign_cost, unsigned int *inside_cost,
1142 unsigned int *prologue_cost,
1143 stmt_vector_for_cost *prologue_cost_vec,
1144 stmt_vector_for_cost *body_cost_vec,
1145 bool record_prologue_costs)
1147 switch (alignment_support_scheme)
1149 case dr_aligned:
1151 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1152 stmt_info, 0, vect_body);
1154 if (dump_enabled_p ())
1155 dump_printf_loc (MSG_NOTE, vect_location,
1156 "vect_model_load_cost: aligned.\n");
1158 break;
1160 case dr_unaligned_supported:
1162 /* Here, we assign an additional cost for the unaligned load. */
1163 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1164 unaligned_load, stmt_info,
1165 misalignment, vect_body);
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE, vect_location,
1169 "vect_model_load_cost: unaligned supported by "
1170 "hardware.\n");
1172 break;
1174 case dr_explicit_realign:
1176 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1177 vector_load, stmt_info, 0, vect_body);
1178 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1179 vec_perm, stmt_info, 0, vect_body);
1181 /* FIXME: If the misalignment remains fixed across the iterations of
1182 the containing loop, the following cost should be added to the
1183 prologue costs. */
1184 if (targetm.vectorize.builtin_mask_for_load)
1185 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1186 stmt_info, 0, vect_body);
1188 if (dump_enabled_p ())
1189 dump_printf_loc (MSG_NOTE, vect_location,
1190 "vect_model_load_cost: explicit realign\n");
1192 break;
1194 case dr_explicit_realign_optimized:
1196 if (dump_enabled_p ())
1197 dump_printf_loc (MSG_NOTE, vect_location,
1198 "vect_model_load_cost: unaligned software "
1199 "pipelined.\n");
1201 /* Unaligned software pipeline has a load of an address, an initial
1202 load, and possibly a mask operation to "prime" the loop. However,
1203 if this is an access in a group of loads, which provide grouped
1204 access, then the above cost should only be considered for one
1205 access in the group. Inside the loop, there is a load op
1206 and a realignment op. */
1208 if (add_realign_cost && record_prologue_costs)
1210 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1211 vector_stmt, stmt_info,
1212 0, vect_prologue);
1213 if (targetm.vectorize.builtin_mask_for_load)
1214 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1215 vector_stmt, stmt_info,
1216 0, vect_prologue);
1219 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1220 stmt_info, 0, vect_body);
1221 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1222 stmt_info, 0, vect_body);
1224 if (dump_enabled_p ())
1225 dump_printf_loc (MSG_NOTE, vect_location,
1226 "vect_model_load_cost: explicit realign optimized"
1227 "\n");
1229 break;
1232 case dr_unaligned_unsupported:
1234 *inside_cost = VECT_MAX_COST;
1236 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1238 "vect_model_load_cost: unsupported access.\n");
1239 break;
1242 default:
1243 gcc_unreachable ();
1247 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1248 the loop preheader for the vectorized stmt STMT_VINFO. */
1250 static void
1251 vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
1252 gimple_stmt_iterator *gsi)
1254 if (gsi)
1255 vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
1256 else
1257 vinfo->insert_on_entry (stmt_vinfo, new_stmt);
1259 if (dump_enabled_p ())
1260 dump_printf_loc (MSG_NOTE, vect_location,
1261 "created new init_stmt: %G", new_stmt);
1264 /* Function vect_init_vector.
1266 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1267 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1268 vector type a vector with all elements equal to VAL is created first.
1269 Place the initialization at GSI if it is not NULL. Otherwise, place the
1270 initialization at the loop preheader.
1271 Return the DEF of INIT_STMT.
1272 It will be used in the vectorization of STMT_INFO. */
1274 tree
1275 vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
1276 gimple_stmt_iterator *gsi)
1278 gimple *init_stmt;
1279 tree new_temp;
1281 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1282 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1284 gcc_assert (VECTOR_TYPE_P (type));
1285 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1287 /* Scalar boolean value should be transformed into
1288 all zeros or all ones value before building a vector. */
1289 if (VECTOR_BOOLEAN_TYPE_P (type))
1291 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1292 tree false_val = build_zero_cst (TREE_TYPE (type));
1294 if (CONSTANT_CLASS_P (val))
1295 val = integer_zerop (val) ? false_val : true_val;
1296 else
1298 new_temp = make_ssa_name (TREE_TYPE (type));
1299 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1300 val, true_val, false_val);
1301 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1302 val = new_temp;
1305 else
1307 gimple_seq stmts = NULL;
1308 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1309 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1310 TREE_TYPE (type), val);
1311 else
1312 /* ??? Condition vectorization expects us to do
1313 promotion of invariant/external defs. */
1314 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1315 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1316 !gsi_end_p (gsi2); )
1318 init_stmt = gsi_stmt (gsi2);
1319 gsi_remove (&gsi2, false);
1320 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1324 val = build_vector_from_val (type, val);
1327 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1328 init_stmt = gimple_build_assign (new_temp, val);
1329 vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
1330 return new_temp;
1334 /* Function vect_get_vec_defs_for_operand.
1336 OP is an operand in STMT_VINFO. This function returns a vector of
1337 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1339 In the case that OP is an SSA_NAME which is defined in the loop, then
1340 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1342 In case OP is an invariant or constant, a new stmt that creates a vector def
1343 needs to be introduced. VECTYPE may be used to specify a required type for
1344 vector invariant. */
1346 void
1347 vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info stmt_vinfo,
1348 unsigned ncopies,
1349 tree op, vec<tree> *vec_oprnds, tree vectype)
1351 gimple *def_stmt;
1352 enum vect_def_type dt;
1353 bool is_simple_use;
1354 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
1356 if (dump_enabled_p ())
1357 dump_printf_loc (MSG_NOTE, vect_location,
1358 "vect_get_vec_defs_for_operand: %T\n", op);
1360 stmt_vec_info def_stmt_info;
1361 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1362 &def_stmt_info, &def_stmt);
1363 gcc_assert (is_simple_use);
1364 if (def_stmt && dump_enabled_p ())
1365 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1367 vec_oprnds->create (ncopies);
1368 if (dt == vect_constant_def || dt == vect_external_def)
1370 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1371 tree vector_type;
1373 if (vectype)
1374 vector_type = vectype;
1375 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1376 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1377 vector_type = truth_type_for (stmt_vectype);
1378 else
1379 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1381 gcc_assert (vector_type);
1382 tree vop = vect_init_vector (vinfo, stmt_vinfo, op, vector_type, NULL);
1383 while (ncopies--)
1384 vec_oprnds->quick_push (vop);
1386 else
1388 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
1389 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info).length () == ncopies);
1390 for (unsigned i = 0; i < ncopies; ++i)
1391 vec_oprnds->quick_push (gimple_get_lhs
1392 (STMT_VINFO_VEC_STMTS (def_stmt_info)[i]));
1397 /* Get vectorized definitions for OP0 and OP1. */
1399 void
1400 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1401 unsigned ncopies,
1402 tree op0, vec<tree> *vec_oprnds0, tree vectype0,
1403 tree op1, vec<tree> *vec_oprnds1, tree vectype1,
1404 tree op2, vec<tree> *vec_oprnds2, tree vectype2,
1405 tree op3, vec<tree> *vec_oprnds3, tree vectype3)
1407 if (slp_node)
1409 if (op0)
1410 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_oprnds0);
1411 if (op1)
1412 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[1], vec_oprnds1);
1413 if (op2)
1414 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[2], vec_oprnds2);
1415 if (op3)
1416 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[3], vec_oprnds3);
1418 else
1420 if (op0)
1421 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1422 op0, vec_oprnds0, vectype0);
1423 if (op1)
1424 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1425 op1, vec_oprnds1, vectype1);
1426 if (op2)
1427 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1428 op2, vec_oprnds2, vectype2);
1429 if (op3)
1430 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
1431 op3, vec_oprnds3, vectype3);
1435 void
1436 vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
1437 unsigned ncopies,
1438 tree op0, vec<tree> *vec_oprnds0,
1439 tree op1, vec<tree> *vec_oprnds1,
1440 tree op2, vec<tree> *vec_oprnds2,
1441 tree op3, vec<tree> *vec_oprnds3)
1443 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
1444 op0, vec_oprnds0, NULL_TREE,
1445 op1, vec_oprnds1, NULL_TREE,
1446 op2, vec_oprnds2, NULL_TREE,
1447 op3, vec_oprnds3, NULL_TREE);
1450 /* Helper function called by vect_finish_replace_stmt and
1451 vect_finish_stmt_generation. Set the location of the new
1452 statement and create and return a stmt_vec_info for it. */
1454 static void
1455 vect_finish_stmt_generation_1 (vec_info *,
1456 stmt_vec_info stmt_info, gimple *vec_stmt)
1458 if (dump_enabled_p ())
1459 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1461 if (stmt_info)
1463 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1465 /* While EH edges will generally prevent vectorization, stmt might
1466 e.g. be in a must-not-throw region. Ensure newly created stmts
1467 that could throw are part of the same region. */
1468 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1469 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1470 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1472 else
1473 gcc_assert (!stmt_could_throw_p (cfun, vec_stmt));
1476 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1477 which sets the same scalar result as STMT_INFO did. Create and return a
1478 stmt_vec_info for VEC_STMT. */
1480 void
1481 vect_finish_replace_stmt (vec_info *vinfo,
1482 stmt_vec_info stmt_info, gimple *vec_stmt)
1484 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1485 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1487 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1488 gsi_replace (&gsi, vec_stmt, true);
1490 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1493 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1494 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1496 void
1497 vect_finish_stmt_generation (vec_info *vinfo,
1498 stmt_vec_info stmt_info, gimple *vec_stmt,
1499 gimple_stmt_iterator *gsi)
1501 gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1503 if (!gsi_end_p (*gsi)
1504 && gimple_has_mem_ops (vec_stmt))
1506 gimple *at_stmt = gsi_stmt (*gsi);
1507 tree vuse = gimple_vuse (at_stmt);
1508 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1510 tree vdef = gimple_vdef (at_stmt);
1511 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1512 gimple_set_modified (vec_stmt, true);
1513 /* If we have an SSA vuse and insert a store, update virtual
1514 SSA form to avoid triggering the renamer. Do so only
1515 if we can easily see all uses - which is what almost always
1516 happens with the way vectorized stmts are inserted. */
1517 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1518 && ((is_gimple_assign (vec_stmt)
1519 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1520 || (is_gimple_call (vec_stmt)
1521 && (!(gimple_call_flags (vec_stmt)
1522 & (ECF_CONST|ECF_PURE|ECF_NOVOPS))
1523 || (gimple_call_lhs (vec_stmt)
1524 && !is_gimple_reg (gimple_call_lhs (vec_stmt)))))))
1526 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1527 gimple_set_vdef (vec_stmt, new_vdef);
1528 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1532 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1533 vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
1536 /* We want to vectorize a call to combined function CFN with function
1537 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1538 as the types of all inputs. Check whether this is possible using
1539 an internal function, returning its code if so or IFN_LAST if not. */
1541 static internal_fn
1542 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1543 tree vectype_out, tree vectype_in)
1545 internal_fn ifn;
1546 if (internal_fn_p (cfn))
1547 ifn = as_internal_fn (cfn);
1548 else
1549 ifn = associated_internal_fn (fndecl);
1550 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1552 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1553 if (info.vectorizable)
1555 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1556 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1557 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1558 OPTIMIZE_FOR_SPEED))
1559 return ifn;
1562 return IFN_LAST;
1566 static tree permute_vec_elements (vec_info *, tree, tree, tree, stmt_vec_info,
1567 gimple_stmt_iterator *);
1569 /* Check whether a load or store statement in the loop described by
1570 LOOP_VINFO is possible in a loop using partial vectors. This is
1571 testing whether the vectorizer pass has the appropriate support,
1572 as well as whether the target does.
1574 VLS_TYPE says whether the statement is a load or store and VECTYPE
1575 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1576 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1577 says how the load or store is going to be implemented and GROUP_SIZE
1578 is the number of load or store statements in the containing group.
1579 If the access is a gather load or scatter store, GS_INFO describes
1580 its arguments. If the load or store is conditional, SCALAR_MASK is the
1581 condition under which it occurs.
1583 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1584 vectors is not supported, otherwise record the required rgroup control
1585 types. */
1587 static void
1588 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo, tree vectype,
1589 slp_tree slp_node,
1590 vec_load_store_type vls_type,
1591 int group_size,
1592 vect_memory_access_type
1593 memory_access_type,
1594 gather_scatter_info *gs_info,
1595 tree scalar_mask)
1597 /* Invariant loads need no special support. */
1598 if (memory_access_type == VMAT_INVARIANT)
1599 return;
1601 unsigned int nvectors;
1602 if (slp_node)
1603 nvectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1604 else
1605 nvectors = vect_get_num_copies (loop_vinfo, vectype);
1607 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1608 vec_loop_lens *lens = &LOOP_VINFO_LENS (loop_vinfo);
1609 machine_mode vecmode = TYPE_MODE (vectype);
1610 bool is_load = (vls_type == VLS_LOAD);
1611 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1613 internal_fn ifn
1614 = (is_load ? vect_load_lanes_supported (vectype, group_size, true)
1615 : vect_store_lanes_supported (vectype, group_size, true));
1616 if (ifn == IFN_MASK_LEN_LOAD_LANES || ifn == IFN_MASK_LEN_STORE_LANES)
1617 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1618 else if (ifn == IFN_MASK_LOAD_LANES || ifn == IFN_MASK_STORE_LANES)
1619 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1620 scalar_mask);
1621 else
1623 if (dump_enabled_p ())
1624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1625 "can't operate on partial vectors because"
1626 " the target doesn't have an appropriate"
1627 " load/store-lanes instruction.\n");
1628 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1630 return;
1633 if (memory_access_type == VMAT_GATHER_SCATTER)
1635 internal_fn ifn = (is_load
1636 ? IFN_MASK_GATHER_LOAD
1637 : IFN_MASK_SCATTER_STORE);
1638 internal_fn len_ifn = (is_load
1639 ? IFN_MASK_LEN_GATHER_LOAD
1640 : IFN_MASK_LEN_SCATTER_STORE);
1641 if (internal_gather_scatter_fn_supported_p (len_ifn, vectype,
1642 gs_info->memory_type,
1643 gs_info->offset_vectype,
1644 gs_info->scale))
1645 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, 1);
1646 else if (internal_gather_scatter_fn_supported_p (ifn, vectype,
1647 gs_info->memory_type,
1648 gs_info->offset_vectype,
1649 gs_info->scale))
1650 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype,
1651 scalar_mask);
1652 else
1654 if (dump_enabled_p ())
1655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1656 "can't operate on partial vectors because"
1657 " the target doesn't have an appropriate"
1658 " gather load or scatter store instruction.\n");
1659 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1661 return;
1664 if (memory_access_type != VMAT_CONTIGUOUS
1665 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1667 /* Element X of the data must come from iteration i * VF + X of the
1668 scalar loop. We need more work to support other mappings. */
1669 if (dump_enabled_p ())
1670 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1671 "can't operate on partial vectors because an"
1672 " access isn't contiguous.\n");
1673 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1674 return;
1677 if (!VECTOR_MODE_P (vecmode))
1679 if (dump_enabled_p ())
1680 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1681 "can't operate on partial vectors when emulating"
1682 " vector operations.\n");
1683 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1684 return;
1687 /* We might load more scalars than we need for permuting SLP loads.
1688 We checked in get_group_load_store_type that the extra elements
1689 don't leak into a new vector. */
1690 auto group_memory_nvectors = [](poly_uint64 size, poly_uint64 nunits)
1692 unsigned int nvectors;
1693 if (can_div_away_from_zero_p (size, nunits, &nvectors))
1694 return nvectors;
1695 gcc_unreachable ();
1698 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1699 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1700 machine_mode mask_mode;
1701 machine_mode vmode;
1702 bool using_partial_vectors_p = false;
1703 if (get_len_load_store_mode (vecmode, is_load).exists (&vmode))
1705 nvectors = group_memory_nvectors (group_size * vf, nunits);
1706 unsigned factor = (vecmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vecmode);
1707 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype, factor);
1708 using_partial_vectors_p = true;
1710 else if (targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1711 && can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1713 nvectors = group_memory_nvectors (group_size * vf, nunits);
1714 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1715 using_partial_vectors_p = true;
1718 if (!using_partial_vectors_p)
1720 if (dump_enabled_p ())
1721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1722 "can't operate on partial vectors because the"
1723 " target doesn't have the appropriate partial"
1724 " vectorization load or store.\n");
1725 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
1729 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1730 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1731 that needs to be applied to all loads and stores in a vectorized loop.
1732 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1733 otherwise return VEC_MASK & LOOP_MASK.
1735 MASK_TYPE is the type of both masks. If new statements are needed,
1736 insert them before GSI. */
1738 static tree
1739 prepare_vec_mask (loop_vec_info loop_vinfo, tree mask_type, tree loop_mask,
1740 tree vec_mask, gimple_stmt_iterator *gsi)
1742 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1743 if (!loop_mask)
1744 return vec_mask;
1746 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1748 if (loop_vinfo->vec_cond_masked_set.contains ({ vec_mask, loop_mask }))
1749 return vec_mask;
1751 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1752 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1753 vec_mask, loop_mask);
1755 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1756 return and_res;
1759 /* Determine whether we can use a gather load or scatter store to vectorize
1760 strided load or store STMT_INFO by truncating the current offset to a
1761 smaller width. We need to be able to construct an offset vector:
1763 { 0, X, X*2, X*3, ... }
1765 without loss of precision, where X is STMT_INFO's DR_STEP.
1767 Return true if this is possible, describing the gather load or scatter
1768 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1770 static bool
1771 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
1772 loop_vec_info loop_vinfo, bool masked_p,
1773 gather_scatter_info *gs_info)
1775 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1776 data_reference *dr = dr_info->dr;
1777 tree step = DR_STEP (dr);
1778 if (TREE_CODE (step) != INTEGER_CST)
1780 /* ??? Perhaps we could use range information here? */
1781 if (dump_enabled_p ())
1782 dump_printf_loc (MSG_NOTE, vect_location,
1783 "cannot truncate variable step.\n");
1784 return false;
1787 /* Get the number of bits in an element. */
1788 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1789 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
1790 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
1792 /* Set COUNT to the upper limit on the number of elements - 1.
1793 Start with the maximum vectorization factor. */
1794 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
1796 /* Try lowering COUNT to the number of scalar latch iterations. */
1797 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1798 widest_int max_iters;
1799 if (max_loop_iterations (loop, &max_iters)
1800 && max_iters < count)
1801 count = max_iters.to_shwi ();
1803 /* Try scales of 1 and the element size. */
1804 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
1805 wi::overflow_type overflow = wi::OVF_NONE;
1806 for (int i = 0; i < 2; ++i)
1808 int scale = scales[i];
1809 widest_int factor;
1810 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
1811 continue;
1813 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1814 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
1815 if (overflow)
1816 continue;
1817 signop sign = range >= 0 ? UNSIGNED : SIGNED;
1818 unsigned int min_offset_bits = wi::min_precision (range, sign);
1820 /* Find the narrowest viable offset type. */
1821 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
1822 tree offset_type = build_nonstandard_integer_type (offset_bits,
1823 sign == UNSIGNED);
1825 /* See whether the target supports the operation with an offset
1826 no narrower than OFFSET_TYPE. */
1827 tree memory_type = TREE_TYPE (DR_REF (dr));
1828 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
1829 vectype, memory_type, offset_type, scale,
1830 &gs_info->ifn, &gs_info->offset_vectype)
1831 || gs_info->ifn == IFN_LAST)
1832 continue;
1834 gs_info->decl = NULL_TREE;
1835 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1836 but we don't need to store that here. */
1837 gs_info->base = NULL_TREE;
1838 gs_info->element_type = TREE_TYPE (vectype);
1839 gs_info->offset = fold_convert (offset_type, step);
1840 gs_info->offset_dt = vect_constant_def;
1841 gs_info->scale = scale;
1842 gs_info->memory_type = memory_type;
1843 return true;
1846 if (overflow && dump_enabled_p ())
1847 dump_printf_loc (MSG_NOTE, vect_location,
1848 "truncating gather/scatter offset to %d bits"
1849 " might change its value.\n", element_bits);
1851 return false;
1854 /* Return true if we can use gather/scatter internal functions to
1855 vectorize STMT_INFO, which is a grouped or strided load or store.
1856 MASKED_P is true if load or store is conditional. When returning
1857 true, fill in GS_INFO with the information required to perform the
1858 operation. */
1860 static bool
1861 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
1862 loop_vec_info loop_vinfo, bool masked_p,
1863 gather_scatter_info *gs_info)
1865 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
1866 || gs_info->ifn == IFN_LAST)
1867 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
1868 masked_p, gs_info);
1870 tree old_offset_type = TREE_TYPE (gs_info->offset);
1871 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
1873 gcc_assert (TYPE_PRECISION (new_offset_type)
1874 >= TYPE_PRECISION (old_offset_type));
1875 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_NOTE, vect_location,
1879 "using gather/scatter for strided/grouped access,"
1880 " scale = %d\n", gs_info->scale);
1882 return true;
1885 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1886 elements with a known constant step. Return -1 if that step
1887 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1889 static int
1890 compare_step_with_zero (vec_info *vinfo, stmt_vec_info stmt_info)
1892 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1893 return tree_int_cst_compare (vect_dr_behavior (vinfo, dr_info)->step,
1894 size_zero_node);
1897 /* If the target supports a permute mask that reverses the elements in
1898 a vector of type VECTYPE, return that mask, otherwise return null. */
1900 static tree
1901 perm_mask_for_reverse (tree vectype)
1903 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1905 /* The encoding has a single stepped pattern. */
1906 vec_perm_builder sel (nunits, 1, 3);
1907 for (int i = 0; i < 3; ++i)
1908 sel.quick_push (nunits - 1 - i);
1910 vec_perm_indices indices (sel, 1, nunits);
1911 if (!can_vec_perm_const_p (TYPE_MODE (vectype), TYPE_MODE (vectype),
1912 indices))
1913 return NULL_TREE;
1914 return vect_gen_perm_mask_checked (vectype, indices);
1917 /* A subroutine of get_load_store_type, with a subset of the same
1918 arguments. Handle the case where STMT_INFO is a load or store that
1919 accesses consecutive elements with a negative step. Sets *POFFSET
1920 to the offset to be applied to the DR for the first access. */
1922 static vect_memory_access_type
1923 get_negative_load_store_type (vec_info *vinfo,
1924 stmt_vec_info stmt_info, tree vectype,
1925 vec_load_store_type vls_type,
1926 unsigned int ncopies, poly_int64 *poffset)
1928 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1929 dr_alignment_support alignment_support_scheme;
1931 if (ncopies > 1)
1933 if (dump_enabled_p ())
1934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1935 "multiple types with negative step.\n");
1936 return VMAT_ELEMENTWISE;
1939 /* For backward running DRs the first access in vectype actually is
1940 N-1 elements before the address of the DR. */
1941 *poffset = ((-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1942 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype))));
1944 int misalignment = dr_misalignment (dr_info, vectype, *poffset);
1945 alignment_support_scheme
1946 = vect_supportable_dr_alignment (vinfo, dr_info, vectype, misalignment);
1947 if (alignment_support_scheme != dr_aligned
1948 && alignment_support_scheme != dr_unaligned_supported)
1950 if (dump_enabled_p ())
1951 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1952 "negative step but alignment required.\n");
1953 *poffset = 0;
1954 return VMAT_ELEMENTWISE;
1957 if (vls_type == VLS_STORE_INVARIANT)
1959 if (dump_enabled_p ())
1960 dump_printf_loc (MSG_NOTE, vect_location,
1961 "negative step with invariant source;"
1962 " no permute needed.\n");
1963 return VMAT_CONTIGUOUS_DOWN;
1966 if (!perm_mask_for_reverse (vectype))
1968 if (dump_enabled_p ())
1969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1970 "negative step and reversing not supported.\n");
1971 *poffset = 0;
1972 return VMAT_ELEMENTWISE;
1975 return VMAT_CONTIGUOUS_REVERSE;
1978 /* STMT_INFO is either a masked or unconditional store. Return the value
1979 being stored. */
1981 tree
1982 vect_get_store_rhs (stmt_vec_info stmt_info)
1984 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
1986 gcc_assert (gimple_assign_single_p (assign));
1987 return gimple_assign_rhs1 (assign);
1989 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
1991 internal_fn ifn = gimple_call_internal_fn (call);
1992 int index = internal_fn_stored_value_index (ifn);
1993 gcc_assert (index >= 0);
1994 return gimple_call_arg (call, index);
1996 gcc_unreachable ();
1999 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2001 This function returns a vector type which can be composed with NETLS pieces,
2002 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2003 same vector size as the return vector. It checks target whether supports
2004 pieces-size vector mode for construction firstly, if target fails to, check
2005 pieces-size scalar mode for construction further. It returns NULL_TREE if
2006 fails to find the available composition.
2008 For example, for (vtype=V16QI, nelts=4), we can probably get:
2009 - V16QI with PTYPE V4QI.
2010 - V4SI with PTYPE SI.
2011 - NULL_TREE. */
2013 static tree
2014 vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
2016 gcc_assert (VECTOR_TYPE_P (vtype));
2017 gcc_assert (known_gt (nelts, 0U));
2019 machine_mode vmode = TYPE_MODE (vtype);
2020 if (!VECTOR_MODE_P (vmode))
2021 return NULL_TREE;
2023 /* When we are asked to compose the vector from its components let
2024 that happen directly. */
2025 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype), nelts))
2027 *ptype = TREE_TYPE (vtype);
2028 return vtype;
2031 poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
2032 unsigned int pbsize;
2033 if (constant_multiple_p (vbsize, nelts, &pbsize))
2035 /* First check if vec_init optab supports construction from
2036 vector pieces directly. */
2037 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
2038 poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
2039 machine_mode rmode;
2040 if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
2041 && (convert_optab_handler (vec_init_optab, vmode, rmode)
2042 != CODE_FOR_nothing))
2044 *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
2045 return vtype;
2048 /* Otherwise check if exists an integer type of the same piece size and
2049 if vec_init optab supports construction from it directly. */
2050 if (int_mode_for_size (pbsize, 0).exists (&elmode)
2051 && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
2052 && (convert_optab_handler (vec_init_optab, rmode, elmode)
2053 != CODE_FOR_nothing))
2055 *ptype = build_nonstandard_integer_type (pbsize, 1);
2056 return build_vector_type (*ptype, nelts);
2060 return NULL_TREE;
2063 /* A subroutine of get_load_store_type, with a subset of the same
2064 arguments. Handle the case where STMT_INFO is part of a grouped load
2065 or store.
2067 For stores, the statements in the group are all consecutive
2068 and there is no gap at the end. For loads, the statements in the
2069 group might not be consecutive; there can be gaps between statements
2070 as well as at the end. */
2072 static bool
2073 get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2074 tree vectype, slp_tree slp_node,
2075 bool masked_p, vec_load_store_type vls_type,
2076 vect_memory_access_type *memory_access_type,
2077 poly_int64 *poffset,
2078 dr_alignment_support *alignment_support_scheme,
2079 int *misalignment,
2080 gather_scatter_info *gs_info,
2081 internal_fn *lanes_ifn)
2083 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2084 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2085 stmt_vec_info first_stmt_info;
2086 unsigned int group_size;
2087 unsigned HOST_WIDE_INT gap;
2088 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2090 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2091 group_size = DR_GROUP_SIZE (first_stmt_info);
2092 gap = DR_GROUP_GAP (first_stmt_info);
2094 else
2096 first_stmt_info = stmt_info;
2097 group_size = 1;
2098 gap = 0;
2100 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2101 bool single_element_p = (stmt_info == first_stmt_info
2102 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2103 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2105 /* True if the vectorized statements would access beyond the last
2106 statement in the group. */
2107 bool overrun_p = false;
2109 /* True if we can cope with such overrun by peeling for gaps, so that
2110 there is at least one final scalar iteration after the vector loop. */
2111 bool can_overrun_p = (!masked_p
2112 && vls_type == VLS_LOAD
2113 && loop_vinfo
2114 && !loop->inner);
2116 /* There can only be a gap at the end of the group if the stride is
2117 known at compile time. */
2118 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2120 /* Stores can't yet have gaps. */
2121 gcc_assert (slp_node || vls_type == VLS_LOAD || gap == 0);
2123 if (slp_node)
2125 /* For SLP vectorization we directly vectorize a subchain
2126 without permutation. */
2127 if (! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
2128 first_dr_info
2129 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node)[0]);
2130 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2132 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2133 separated by the stride, until we have a complete vector.
2134 Fall back to scalar accesses if that isn't possible. */
2135 if (multiple_p (nunits, group_size))
2136 *memory_access_type = VMAT_STRIDED_SLP;
2137 else
2138 *memory_access_type = VMAT_ELEMENTWISE;
2140 else
2142 overrun_p = loop_vinfo && gap != 0;
2143 if (overrun_p && vls_type != VLS_LOAD)
2145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2146 "Grouped store with gaps requires"
2147 " non-consecutive accesses\n");
2148 return false;
2150 /* An overrun is fine if the trailing elements are smaller
2151 than the alignment boundary B. Every vector access will
2152 be a multiple of B and so we are guaranteed to access a
2153 non-gap element in the same B-sized block. */
2154 if (overrun_p
2155 && gap < (vect_known_alignment_in_bytes (first_dr_info,
2156 vectype)
2157 / vect_get_scalar_dr_size (first_dr_info)))
2158 overrun_p = false;
2160 /* If the gap splits the vector in half and the target
2161 can do half-vector operations avoid the epilogue peeling
2162 by simply loading half of the vector only. Usually
2163 the construction with an upper zero half will be elided. */
2164 dr_alignment_support alss;
2165 int misalign = dr_misalignment (first_dr_info, vectype);
2166 tree half_vtype;
2167 if (overrun_p
2168 && !masked_p
2169 && (((alss = vect_supportable_dr_alignment (vinfo, first_dr_info,
2170 vectype, misalign)))
2171 == dr_aligned
2172 || alss == dr_unaligned_supported)
2173 && known_eq (nunits, (group_size - gap) * 2)
2174 && known_eq (nunits, group_size)
2175 && (vector_vector_composition_type (vectype, 2, &half_vtype)
2176 != NULL_TREE))
2177 overrun_p = false;
2179 if (overrun_p && !can_overrun_p)
2181 if (dump_enabled_p ())
2182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2183 "Peeling for outer loop is not supported\n");
2184 return false;
2186 int cmp = compare_step_with_zero (vinfo, stmt_info);
2187 if (cmp < 0)
2189 if (single_element_p)
2190 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2191 only correct for single element "interleaving" SLP. */
2192 *memory_access_type = get_negative_load_store_type
2193 (vinfo, stmt_info, vectype, vls_type, 1, poffset);
2194 else
2196 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2197 separated by the stride, until we have a complete vector.
2198 Fall back to scalar accesses if that isn't possible. */
2199 if (multiple_p (nunits, group_size))
2200 *memory_access_type = VMAT_STRIDED_SLP;
2201 else
2202 *memory_access_type = VMAT_ELEMENTWISE;
2205 else if (cmp == 0 && loop_vinfo)
2207 gcc_assert (vls_type == VLS_LOAD);
2208 *memory_access_type = VMAT_INVARIANT;
2209 /* Invariant accesses perform only component accesses, alignment
2210 is irrelevant for them. */
2211 *alignment_support_scheme = dr_unaligned_supported;
2213 else
2214 *memory_access_type = VMAT_CONTIGUOUS;
2216 /* When we have a contiguous access across loop iterations
2217 but the access in the loop doesn't cover the full vector
2218 we can end up with no gap recorded but still excess
2219 elements accessed, see PR103116. Make sure we peel for
2220 gaps if necessary and sufficient and give up if not.
2222 If there is a combination of the access not covering the full
2223 vector and a gap recorded then we may need to peel twice. */
2224 if (loop_vinfo
2225 && *memory_access_type == VMAT_CONTIGUOUS
2226 && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
2227 && !multiple_p (group_size * LOOP_VINFO_VECT_FACTOR (loop_vinfo),
2228 nunits))
2230 unsigned HOST_WIDE_INT cnunits, cvf;
2231 if (!can_overrun_p
2232 || !nunits.is_constant (&cnunits)
2233 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&cvf)
2234 /* Peeling for gaps assumes that a single scalar iteration
2235 is enough to make sure the last vector iteration doesn't
2236 access excess elements.
2237 ??? Enhancements include peeling multiple iterations
2238 or using masked loads with a static mask. */
2239 || (group_size * cvf) % cnunits + group_size - gap < cnunits)
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2243 "peeling for gaps insufficient for "
2244 "access\n");
2245 return false;
2247 overrun_p = true;
2251 else
2253 /* We can always handle this case using elementwise accesses,
2254 but see if something more efficient is available. */
2255 *memory_access_type = VMAT_ELEMENTWISE;
2257 /* If there is a gap at the end of the group then these optimizations
2258 would access excess elements in the last iteration. */
2259 bool would_overrun_p = (gap != 0);
2260 /* An overrun is fine if the trailing elements are smaller than the
2261 alignment boundary B. Every vector access will be a multiple of B
2262 and so we are guaranteed to access a non-gap element in the
2263 same B-sized block. */
2264 if (would_overrun_p
2265 && !masked_p
2266 && gap < (vect_known_alignment_in_bytes (first_dr_info, vectype)
2267 / vect_get_scalar_dr_size (first_dr_info)))
2268 would_overrun_p = false;
2270 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2271 && (can_overrun_p || !would_overrun_p)
2272 && compare_step_with_zero (vinfo, stmt_info) > 0)
2274 /* First cope with the degenerate case of a single-element
2275 vector. */
2276 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2279 else
2281 /* Otherwise try using LOAD/STORE_LANES. */
2282 *lanes_ifn
2283 = vls_type == VLS_LOAD
2284 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2285 : vect_store_lanes_supported (vectype, group_size,
2286 masked_p);
2287 if (*lanes_ifn != IFN_LAST)
2289 *memory_access_type = VMAT_LOAD_STORE_LANES;
2290 overrun_p = would_overrun_p;
2293 /* If that fails, try using permuting loads. */
2294 else if (vls_type == VLS_LOAD
2295 ? vect_grouped_load_supported (vectype,
2296 single_element_p,
2297 group_size)
2298 : vect_grouped_store_supported (vectype, group_size))
2300 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2301 overrun_p = would_overrun_p;
2306 /* As a last resort, trying using a gather load or scatter store.
2308 ??? Although the code can handle all group sizes correctly,
2309 it probably isn't a win to use separate strided accesses based
2310 on nearby locations. Or, even if it's a win over scalar code,
2311 it might not be a win over vectorizing at a lower VF, if that
2312 allows us to use contiguous accesses. */
2313 if (*memory_access_type == VMAT_ELEMENTWISE
2314 && single_element_p
2315 && loop_vinfo
2316 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2317 masked_p, gs_info))
2318 *memory_access_type = VMAT_GATHER_SCATTER;
2321 if (*memory_access_type == VMAT_GATHER_SCATTER
2322 || *memory_access_type == VMAT_ELEMENTWISE)
2324 *alignment_support_scheme = dr_unaligned_supported;
2325 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2327 else
2329 *misalignment = dr_misalignment (first_dr_info, vectype, *poffset);
2330 *alignment_support_scheme
2331 = vect_supportable_dr_alignment (vinfo, first_dr_info, vectype,
2332 *misalignment);
2335 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2337 /* STMT is the leader of the group. Check the operands of all the
2338 stmts of the group. */
2339 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2340 while (next_stmt_info)
2342 tree op = vect_get_store_rhs (next_stmt_info);
2343 enum vect_def_type dt;
2344 if (!vect_is_simple_use (op, vinfo, &dt))
2346 if (dump_enabled_p ())
2347 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2348 "use not simple.\n");
2349 return false;
2351 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2355 if (overrun_p)
2357 gcc_assert (can_overrun_p);
2358 if (dump_enabled_p ())
2359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2360 "Data access with gaps requires scalar "
2361 "epilogue loop\n");
2362 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2365 return true;
2368 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2369 if there is a memory access type that the vectorized form can use,
2370 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2371 or scatters, fill in GS_INFO accordingly. In addition
2372 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2373 the target does not support the alignment scheme. *MISALIGNMENT
2374 is set according to the alignment of the access (including
2375 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2377 SLP says whether we're performing SLP rather than loop vectorization.
2378 MASKED_P is true if the statement is conditional on a vectorized mask.
2379 VECTYPE is the vector type that the vectorized statements will use.
2380 NCOPIES is the number of vector statements that will be needed. */
2382 static bool
2383 get_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
2384 tree vectype, slp_tree slp_node,
2385 bool masked_p, vec_load_store_type vls_type,
2386 unsigned int ncopies,
2387 vect_memory_access_type *memory_access_type,
2388 poly_int64 *poffset,
2389 dr_alignment_support *alignment_support_scheme,
2390 int *misalignment,
2391 gather_scatter_info *gs_info,
2392 internal_fn *lanes_ifn)
2394 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2395 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2396 *misalignment = DR_MISALIGNMENT_UNKNOWN;
2397 *poffset = 0;
2398 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2400 *memory_access_type = VMAT_GATHER_SCATTER;
2401 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2402 gcc_unreachable ();
2403 /* When using internal functions, we rely on pattern recognition
2404 to convert the type of the offset to the type that the target
2405 requires, with the result being a call to an internal function.
2406 If that failed for some reason (e.g. because another pattern
2407 took priority), just handle cases in which the offset already
2408 has the right type. */
2409 else if (gs_info->ifn != IFN_LAST
2410 && !is_gimple_call (stmt_info->stmt)
2411 && !tree_nop_conversion_p (TREE_TYPE (gs_info->offset),
2412 TREE_TYPE (gs_info->offset_vectype)))
2414 if (dump_enabled_p ())
2415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2416 "%s offset requires a conversion\n",
2417 vls_type == VLS_LOAD ? "gather" : "scatter");
2418 return false;
2420 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2421 &gs_info->offset_dt,
2422 &gs_info->offset_vectype))
2424 if (dump_enabled_p ())
2425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2426 "%s index use not simple.\n",
2427 vls_type == VLS_LOAD ? "gather" : "scatter");
2428 return false;
2430 else if (gs_info->ifn == IFN_LAST && !gs_info->decl)
2432 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()
2433 || !TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype).is_constant ()
2434 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2435 (gs_info->offset_vectype),
2436 TYPE_VECTOR_SUBPARTS (vectype)))
2438 if (dump_enabled_p ())
2439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2440 "unsupported vector types for emulated "
2441 "gather.\n");
2442 return false;
2445 /* Gather-scatter accesses perform only component accesses, alignment
2446 is irrelevant for them. */
2447 *alignment_support_scheme = dr_unaligned_supported;
2449 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info) || slp_node)
2451 if (!get_group_load_store_type (vinfo, stmt_info, vectype, slp_node,
2452 masked_p,
2453 vls_type, memory_access_type, poffset,
2454 alignment_support_scheme,
2455 misalignment, gs_info, lanes_ifn))
2456 return false;
2458 else if (STMT_VINFO_STRIDED_P (stmt_info))
2460 gcc_assert (!slp_node);
2461 if (loop_vinfo
2462 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2463 masked_p, gs_info))
2464 *memory_access_type = VMAT_GATHER_SCATTER;
2465 else
2466 *memory_access_type = VMAT_ELEMENTWISE;
2467 /* Alignment is irrelevant here. */
2468 *alignment_support_scheme = dr_unaligned_supported;
2470 else
2472 int cmp = compare_step_with_zero (vinfo, stmt_info);
2473 if (cmp == 0)
2475 gcc_assert (vls_type == VLS_LOAD);
2476 *memory_access_type = VMAT_INVARIANT;
2477 /* Invariant accesses perform only component accesses, alignment
2478 is irrelevant for them. */
2479 *alignment_support_scheme = dr_unaligned_supported;
2481 else
2483 if (cmp < 0)
2484 *memory_access_type = get_negative_load_store_type
2485 (vinfo, stmt_info, vectype, vls_type, ncopies, poffset);
2486 else
2487 *memory_access_type = VMAT_CONTIGUOUS;
2488 *misalignment = dr_misalignment (STMT_VINFO_DR_INFO (stmt_info),
2489 vectype, *poffset);
2490 *alignment_support_scheme
2491 = vect_supportable_dr_alignment (vinfo,
2492 STMT_VINFO_DR_INFO (stmt_info),
2493 vectype, *misalignment);
2497 if ((*memory_access_type == VMAT_ELEMENTWISE
2498 || *memory_access_type == VMAT_STRIDED_SLP)
2499 && !nunits.is_constant ())
2501 if (dump_enabled_p ())
2502 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2503 "Not using elementwise accesses due to variable "
2504 "vectorization factor.\n");
2505 return false;
2508 if (*alignment_support_scheme == dr_unaligned_unsupported)
2510 if (dump_enabled_p ())
2511 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2512 "unsupported unaligned access\n");
2513 return false;
2516 /* FIXME: At the moment the cost model seems to underestimate the
2517 cost of using elementwise accesses. This check preserves the
2518 traditional behavior until that can be fixed. */
2519 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2520 if (!first_stmt_info)
2521 first_stmt_info = stmt_info;
2522 if (*memory_access_type == VMAT_ELEMENTWISE
2523 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2524 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2525 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2526 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2528 if (dump_enabled_p ())
2529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2530 "not falling back to elementwise accesses\n");
2531 return false;
2533 return true;
2536 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2537 conditional operation STMT_INFO. When returning true, store the mask
2538 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2539 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2540 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2542 static bool
2543 vect_check_scalar_mask (vec_info *vinfo, stmt_vec_info stmt_info,
2544 slp_tree slp_node, unsigned mask_index,
2545 tree *mask, slp_tree *mask_node,
2546 vect_def_type *mask_dt_out, tree *mask_vectype_out)
2548 enum vect_def_type mask_dt;
2549 tree mask_vectype;
2550 slp_tree mask_node_1;
2551 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, mask_index,
2552 mask, &mask_node_1, &mask_dt, &mask_vectype))
2554 if (dump_enabled_p ())
2555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2556 "mask use not simple.\n");
2557 return false;
2560 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask)))
2562 if (dump_enabled_p ())
2563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2564 "mask argument is not a boolean.\n");
2565 return false;
2568 /* If the caller is not prepared for adjusting an external/constant
2569 SLP mask vector type fail. */
2570 if (slp_node
2571 && !mask_node
2572 && SLP_TREE_DEF_TYPE (mask_node_1) != vect_internal_def)
2574 if (dump_enabled_p ())
2575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2576 "SLP mask argument is not vectorized.\n");
2577 return false;
2580 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2581 if (!mask_vectype)
2582 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2584 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2586 if (dump_enabled_p ())
2587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2588 "could not find an appropriate vector mask type.\n");
2589 return false;
2592 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2593 TYPE_VECTOR_SUBPARTS (vectype)))
2595 if (dump_enabled_p ())
2596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2597 "vector mask type %T"
2598 " does not match vector data type %T.\n",
2599 mask_vectype, vectype);
2601 return false;
2604 *mask_dt_out = mask_dt;
2605 *mask_vectype_out = mask_vectype;
2606 if (mask_node)
2607 *mask_node = mask_node_1;
2608 return true;
2611 /* Return true if stored value RHS is suitable for vectorizing store
2612 statement STMT_INFO. When returning true, store the type of the
2613 definition in *RHS_DT_OUT, the type of the vectorized store value in
2614 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2616 static bool
2617 vect_check_store_rhs (vec_info *vinfo, stmt_vec_info stmt_info,
2618 slp_tree slp_node, tree rhs,
2619 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2620 vec_load_store_type *vls_type_out)
2622 /* In the case this is a store from a constant make sure
2623 native_encode_expr can handle it. */
2624 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2626 if (dump_enabled_p ())
2627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2628 "cannot encode constant as a byte sequence.\n");
2629 return false;
2632 unsigned op_no = 0;
2633 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2635 if (gimple_call_internal_p (call)
2636 && internal_store_fn_p (gimple_call_internal_fn (call)))
2637 op_no = internal_fn_stored_value_index (gimple_call_internal_fn (call));
2640 enum vect_def_type rhs_dt;
2641 tree rhs_vectype;
2642 slp_tree slp_op;
2643 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, op_no,
2644 &rhs, &slp_op, &rhs_dt, &rhs_vectype))
2646 if (dump_enabled_p ())
2647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2648 "use not simple.\n");
2649 return false;
2652 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2653 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2655 if (dump_enabled_p ())
2656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2657 "incompatible vector types.\n");
2658 return false;
2661 *rhs_dt_out = rhs_dt;
2662 *rhs_vectype_out = rhs_vectype;
2663 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2664 *vls_type_out = VLS_STORE_INVARIANT;
2665 else
2666 *vls_type_out = VLS_STORE;
2667 return true;
2670 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2671 Note that we support masks with floating-point type, in which case the
2672 floats are interpreted as a bitmask. */
2674 static tree
2675 vect_build_all_ones_mask (vec_info *vinfo,
2676 stmt_vec_info stmt_info, tree masktype)
2678 if (TREE_CODE (masktype) == INTEGER_TYPE)
2679 return build_int_cst (masktype, -1);
2680 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2682 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2683 mask = build_vector_from_val (masktype, mask);
2684 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2686 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2688 REAL_VALUE_TYPE r;
2689 long tmp[6];
2690 for (int j = 0; j < 6; ++j)
2691 tmp[j] = -1;
2692 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2693 tree mask = build_real (TREE_TYPE (masktype), r);
2694 mask = build_vector_from_val (masktype, mask);
2695 return vect_init_vector (vinfo, stmt_info, mask, masktype, NULL);
2697 gcc_unreachable ();
2700 /* Build an all-zero merge value of type VECTYPE while vectorizing
2701 STMT_INFO as a gather load. */
2703 static tree
2704 vect_build_zero_merge_argument (vec_info *vinfo,
2705 stmt_vec_info stmt_info, tree vectype)
2707 tree merge;
2708 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2709 merge = build_int_cst (TREE_TYPE (vectype), 0);
2710 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2712 REAL_VALUE_TYPE r;
2713 long tmp[6];
2714 for (int j = 0; j < 6; ++j)
2715 tmp[j] = 0;
2716 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2717 merge = build_real (TREE_TYPE (vectype), r);
2719 else
2720 gcc_unreachable ();
2721 merge = build_vector_from_val (vectype, merge);
2722 return vect_init_vector (vinfo, stmt_info, merge, vectype, NULL);
2725 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2726 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2727 the gather load operation. If the load is conditional, MASK is the
2728 unvectorized condition and MASK_DT is its definition type, otherwise
2729 MASK is null. */
2731 static void
2732 vect_build_gather_load_calls (vec_info *vinfo, stmt_vec_info stmt_info,
2733 gimple_stmt_iterator *gsi,
2734 gimple **vec_stmt,
2735 gather_scatter_info *gs_info,
2736 tree mask,
2737 stmt_vector_for_cost *cost_vec)
2739 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2740 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2741 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2742 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2743 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2744 edge pe = loop_preheader_edge (loop);
2745 enum { NARROW, NONE, WIDEN } modifier;
2746 poly_uint64 gather_off_nunits
2747 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2749 /* FIXME: Keep the previous costing way in vect_model_load_cost by costing
2750 N scalar loads, but it should be tweaked to use target specific costs
2751 on related gather load calls. */
2752 if (cost_vec)
2754 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
2755 unsigned int inside_cost;
2756 inside_cost = record_stmt_cost (cost_vec, ncopies * assumed_nunits,
2757 scalar_load, stmt_info, 0, vect_body);
2758 if (dump_enabled_p ())
2759 dump_printf_loc (MSG_NOTE, vect_location,
2760 "vect_model_load_cost: inside_cost = %d, "
2761 "prologue_cost = 0 .\n",
2762 inside_cost);
2763 return;
2766 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2767 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2768 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2769 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2770 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2771 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2772 tree scaletype = TREE_VALUE (arglist);
2773 tree real_masktype = masktype;
2774 gcc_checking_assert (types_compatible_p (srctype, rettype)
2775 && (!mask
2776 || TREE_CODE (masktype) == INTEGER_TYPE
2777 || types_compatible_p (srctype, masktype)));
2778 if (mask)
2779 masktype = truth_type_for (srctype);
2781 tree mask_halftype = masktype;
2782 tree perm_mask = NULL_TREE;
2783 tree mask_perm_mask = NULL_TREE;
2784 if (known_eq (nunits, gather_off_nunits))
2785 modifier = NONE;
2786 else if (known_eq (nunits * 2, gather_off_nunits))
2788 modifier = WIDEN;
2790 /* Currently widening gathers and scatters are only supported for
2791 fixed-length vectors. */
2792 int count = gather_off_nunits.to_constant ();
2793 vec_perm_builder sel (count, count, 1);
2794 for (int i = 0; i < count; ++i)
2795 sel.quick_push (i | (count / 2));
2797 vec_perm_indices indices (sel, 1, count);
2798 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2799 indices);
2801 else if (known_eq (nunits, gather_off_nunits * 2))
2803 modifier = NARROW;
2805 /* Currently narrowing gathers and scatters are only supported for
2806 fixed-length vectors. */
2807 int count = nunits.to_constant ();
2808 vec_perm_builder sel (count, count, 1);
2809 sel.quick_grow (count);
2810 for (int i = 0; i < count; ++i)
2811 sel[i] = i < count / 2 ? i : i + count / 2;
2812 vec_perm_indices indices (sel, 2, count);
2813 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2815 ncopies *= 2;
2817 if (mask && VECTOR_TYPE_P (real_masktype))
2819 for (int i = 0; i < count; ++i)
2820 sel[i] = i | (count / 2);
2821 indices.new_vector (sel, 2, count);
2822 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2824 else if (mask)
2825 mask_halftype = truth_type_for (gs_info->offset_vectype);
2827 else
2828 gcc_unreachable ();
2830 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2831 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2833 tree ptr = fold_convert (ptrtype, gs_info->base);
2834 if (!is_gimple_min_invariant (ptr))
2836 gimple_seq seq;
2837 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2838 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2839 gcc_assert (!new_bb);
2842 tree scale = build_int_cst (scaletype, gs_info->scale);
2844 tree vec_oprnd0 = NULL_TREE;
2845 tree vec_mask = NULL_TREE;
2846 tree src_op = NULL_TREE;
2847 tree mask_op = NULL_TREE;
2848 tree prev_res = NULL_TREE;
2850 if (!mask)
2852 src_op = vect_build_zero_merge_argument (vinfo, stmt_info, rettype);
2853 mask_op = vect_build_all_ones_mask (vinfo, stmt_info, masktype);
2856 auto_vec<tree> vec_oprnds0;
2857 auto_vec<tree> vec_masks;
2858 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2859 modifier == WIDEN ? ncopies / 2 : ncopies,
2860 gs_info->offset, &vec_oprnds0);
2861 if (mask)
2862 vect_get_vec_defs_for_operand (vinfo, stmt_info,
2863 modifier == NARROW ? ncopies / 2 : ncopies,
2864 mask, &vec_masks, masktype);
2865 for (int j = 0; j < ncopies; ++j)
2867 tree op, var;
2868 if (modifier == WIDEN && (j & 1))
2869 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
2870 perm_mask, stmt_info, gsi);
2871 else
2872 op = vec_oprnd0 = vec_oprnds0[modifier == WIDEN ? j / 2 : j];
2874 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2876 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2877 TYPE_VECTOR_SUBPARTS (idxtype)));
2878 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2879 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2880 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2881 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2882 op = var;
2885 if (mask)
2887 if (mask_perm_mask && (j & 1))
2888 mask_op = permute_vec_elements (vinfo, mask_op, mask_op,
2889 mask_perm_mask, stmt_info, gsi);
2890 else
2892 if (modifier == NARROW)
2894 if ((j & 1) == 0)
2895 vec_mask = vec_masks[j / 2];
2897 else
2898 vec_mask = vec_masks[j];
2900 mask_op = vec_mask;
2901 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2903 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2904 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2905 gcc_assert (known_eq (sub1, sub2));
2906 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2907 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2908 gassign *new_stmt
2909 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2910 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2911 mask_op = var;
2914 if (modifier == NARROW && !VECTOR_TYPE_P (real_masktype))
2916 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2917 gassign *new_stmt
2918 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2919 : VEC_UNPACK_LO_EXPR,
2920 mask_op);
2921 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2922 mask_op = var;
2924 src_op = mask_op;
2927 tree mask_arg = mask_op;
2928 if (masktype != real_masktype)
2930 tree utype, optype = TREE_TYPE (mask_op);
2931 if (VECTOR_TYPE_P (real_masktype)
2932 || TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2933 utype = real_masktype;
2934 else
2935 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2936 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2937 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2938 gassign *new_stmt
2939 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2940 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2941 mask_arg = var;
2942 if (!useless_type_conversion_p (real_masktype, utype))
2944 gcc_assert (TYPE_PRECISION (utype)
2945 <= TYPE_PRECISION (real_masktype));
2946 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2947 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2948 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2949 mask_arg = var;
2951 src_op = build_zero_cst (srctype);
2953 gimple *new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2954 mask_arg, scale);
2956 if (!useless_type_conversion_p (vectype, rettype))
2958 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2959 TYPE_VECTOR_SUBPARTS (rettype)));
2960 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2961 gimple_call_set_lhs (new_stmt, op);
2962 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2963 var = make_ssa_name (vec_dest);
2964 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2965 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2966 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2968 else
2970 var = make_ssa_name (vec_dest, new_stmt);
2971 gimple_call_set_lhs (new_stmt, var);
2972 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
2975 if (modifier == NARROW)
2977 if ((j & 1) == 0)
2979 prev_res = var;
2980 continue;
2982 var = permute_vec_elements (vinfo, prev_res, var, perm_mask,
2983 stmt_info, gsi);
2984 new_stmt = SSA_NAME_DEF_STMT (var);
2987 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
2989 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
2992 /* Prepare the base and offset in GS_INFO for vectorization.
2993 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2994 to the vectorized offset argument for the first copy of STMT_INFO.
2995 STMT_INFO is the statement described by GS_INFO and LOOP is the
2996 containing loop. */
2998 static void
2999 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo,
3000 class loop *loop, stmt_vec_info stmt_info,
3001 slp_tree slp_node, gather_scatter_info *gs_info,
3002 tree *dataref_ptr, vec<tree> *vec_offset)
3004 gimple_seq stmts = NULL;
3005 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
3006 if (stmts != NULL)
3008 basic_block new_bb;
3009 edge pe = loop_preheader_edge (loop);
3010 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3011 gcc_assert (!new_bb);
3013 if (slp_node)
3014 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[0], vec_offset);
3015 else
3017 unsigned ncopies
3018 = vect_get_num_copies (loop_vinfo, gs_info->offset_vectype);
3019 vect_get_vec_defs_for_operand (loop_vinfo, stmt_info, ncopies,
3020 gs_info->offset, vec_offset,
3021 gs_info->offset_vectype);
3025 /* Prepare to implement a grouped or strided load or store using
3026 the gather load or scatter store operation described by GS_INFO.
3027 STMT_INFO is the load or store statement.
3029 Set *DATAREF_BUMP to the amount that should be added to the base
3030 address after each copy of the vectorized statement. Set *VEC_OFFSET
3031 to an invariant offset vector in which element I has the value
3032 I * DR_STEP / SCALE. */
3034 static void
3035 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
3036 loop_vec_info loop_vinfo,
3037 gimple_stmt_iterator *gsi,
3038 gather_scatter_info *gs_info,
3039 tree *dataref_bump, tree *vec_offset,
3040 vec_loop_lens *loop_lens)
3042 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3043 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3045 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3047 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
3048 ivtmp_8 = _31 * 16 (step in bytes);
3049 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
3050 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
3051 tree loop_len
3052 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, vectype, 0, 0);
3053 tree tmp
3054 = fold_build2 (MULT_EXPR, sizetype,
3055 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3056 loop_len);
3057 *dataref_bump = force_gimple_operand_gsi (gsi, tmp, true, NULL_TREE, true,
3058 GSI_SAME_STMT);
3060 else
3062 tree bump
3063 = size_binop (MULT_EXPR,
3064 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
3065 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
3066 *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
3069 /* The offset given in GS_INFO can have pointer type, so use the element
3070 type of the vector instead. */
3071 tree offset_type = TREE_TYPE (gs_info->offset_vectype);
3073 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3074 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3075 ssize_int (gs_info->scale));
3076 step = fold_convert (offset_type, step);
3078 /* Create {0, X, X*2, X*3, ...}. */
3079 tree offset = fold_build2 (VEC_SERIES_EXPR, gs_info->offset_vectype,
3080 build_zero_cst (offset_type), step);
3081 *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
3084 /* Prepare the pointer IVs which needs to be updated by a variable amount.
3085 Such variable amount is the outcome of .SELECT_VL. In this case, we can
3086 allow each iteration process the flexible number of elements as long as
3087 the number <= vf elments.
3089 Return data reference according to SELECT_VL.
3090 If new statements are needed, insert them before GSI. */
3092 static tree
3093 vect_get_loop_variant_data_ptr_increment (
3094 vec_info *vinfo, tree aggr_type, gimple_stmt_iterator *gsi,
3095 vec_loop_lens *loop_lens, dr_vec_info *dr_info,
3096 vect_memory_access_type memory_access_type)
3098 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3099 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3101 /* gather/scatter never reach here. */
3102 gcc_assert (memory_access_type != VMAT_GATHER_SCATTER);
3104 /* When we support SELECT_VL pattern, we dynamic adjust
3105 the memory address by .SELECT_VL result.
3107 The result of .SELECT_VL is the number of elements to
3108 be processed of each iteration. So the memory address
3109 adjustment operation should be:
3111 addr = addr + .SELECT_VL (ARG..) * step;
3113 tree loop_len
3114 = vect_get_loop_len (loop_vinfo, gsi, loop_lens, 1, aggr_type, 0, 0);
3115 tree len_type = TREE_TYPE (loop_len);
3116 /* Since the outcome of .SELECT_VL is element size, we should adjust
3117 it into bytesize so that it can be used in address pointer variable
3118 amount IVs adjustment. */
3119 tree tmp = fold_build2 (MULT_EXPR, len_type, loop_len,
3120 wide_int_to_tree (len_type, wi::to_widest (step)));
3121 tree bump = make_temp_ssa_name (len_type, NULL, "ivtmp");
3122 gassign *assign = gimple_build_assign (bump, tmp);
3123 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
3124 return bump;
3127 /* Return the amount that should be added to a vector pointer to move
3128 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3129 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3130 vectorization. */
3132 static tree
3133 vect_get_data_ptr_increment (vec_info *vinfo, gimple_stmt_iterator *gsi,
3134 dr_vec_info *dr_info, tree aggr_type,
3135 vect_memory_access_type memory_access_type,
3136 vec_loop_lens *loop_lens = nullptr)
3138 if (memory_access_type == VMAT_INVARIANT)
3139 return size_zero_node;
3141 loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo);
3142 if (loop_vinfo && LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo))
3143 return vect_get_loop_variant_data_ptr_increment (vinfo, aggr_type, gsi,
3144 loop_lens, dr_info,
3145 memory_access_type);
3147 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3148 tree step = vect_dr_behavior (vinfo, dr_info)->step;
3149 if (tree_int_cst_sgn (step) == -1)
3150 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3151 return iv_step;
3154 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
3156 static bool
3157 vectorizable_bswap (vec_info *vinfo,
3158 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3159 gimple **vec_stmt, slp_tree slp_node,
3160 slp_tree *slp_op,
3161 tree vectype_in, stmt_vector_for_cost *cost_vec)
3163 tree op, vectype;
3164 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3165 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3166 unsigned ncopies;
3168 op = gimple_call_arg (stmt, 0);
3169 vectype = STMT_VINFO_VECTYPE (stmt_info);
3170 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3172 /* Multiple types in SLP are handled by creating the appropriate number of
3173 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3174 case of SLP. */
3175 if (slp_node)
3176 ncopies = 1;
3177 else
3178 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3180 gcc_assert (ncopies >= 1);
3182 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3183 if (! char_vectype)
3184 return false;
3186 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3187 unsigned word_bytes;
3188 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3189 return false;
3191 /* The encoding uses one stepped pattern for each byte in the word. */
3192 vec_perm_builder elts (num_bytes, word_bytes, 3);
3193 for (unsigned i = 0; i < 3; ++i)
3194 for (unsigned j = 0; j < word_bytes; ++j)
3195 elts.quick_push ((i + 1) * word_bytes - j - 1);
3197 vec_perm_indices indices (elts, 1, num_bytes);
3198 machine_mode vmode = TYPE_MODE (char_vectype);
3199 if (!can_vec_perm_const_p (vmode, vmode, indices))
3200 return false;
3202 if (! vec_stmt)
3204 if (slp_node
3205 && !vect_maybe_update_slp_op_vectype (slp_op[0], vectype_in))
3207 if (dump_enabled_p ())
3208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3209 "incompatible vector types for invariants\n");
3210 return false;
3213 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3214 DUMP_VECT_SCOPE ("vectorizable_bswap");
3215 record_stmt_cost (cost_vec,
3216 1, vector_stmt, stmt_info, 0, vect_prologue);
3217 record_stmt_cost (cost_vec,
3218 slp_node
3219 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies,
3220 vec_perm, stmt_info, 0, vect_body);
3221 return true;
3224 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3226 /* Transform. */
3227 vec<tree> vec_oprnds = vNULL;
3228 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
3229 op, &vec_oprnds);
3230 /* Arguments are ready. create the new vector stmt. */
3231 unsigned i;
3232 tree vop;
3233 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3235 gimple *new_stmt;
3236 tree tem = make_ssa_name (char_vectype);
3237 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3238 char_vectype, vop));
3239 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3240 tree tem2 = make_ssa_name (char_vectype);
3241 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3242 tem, tem, bswap_vconst);
3243 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3244 tem = make_ssa_name (vectype);
3245 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3246 vectype, tem2));
3247 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3248 if (slp_node)
3249 slp_node->push_vec_def (new_stmt);
3250 else
3251 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3254 if (!slp_node)
3255 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3257 vec_oprnds.release ();
3258 return true;
3261 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3262 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3263 in a single step. On success, store the binary pack code in
3264 *CONVERT_CODE. */
3266 static bool
3267 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3268 code_helper *convert_code)
3270 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3271 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3272 return false;
3274 code_helper code;
3275 int multi_step_cvt = 0;
3276 auto_vec <tree, 8> interm_types;
3277 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3278 &code, &multi_step_cvt, &interm_types)
3279 || multi_step_cvt)
3280 return false;
3282 *convert_code = code;
3283 return true;
3286 /* Function vectorizable_call.
3288 Check if STMT_INFO performs a function call that can be vectorized.
3289 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3290 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3291 Return true if STMT_INFO is vectorizable in this way. */
3293 static bool
3294 vectorizable_call (vec_info *vinfo,
3295 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3296 gimple **vec_stmt, slp_tree slp_node,
3297 stmt_vector_for_cost *cost_vec)
3299 gcall *stmt;
3300 tree vec_dest;
3301 tree scalar_dest;
3302 tree op;
3303 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3304 tree vectype_out, vectype_in;
3305 poly_uint64 nunits_in;
3306 poly_uint64 nunits_out;
3307 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
3308 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
3309 tree fndecl, new_temp, rhs_type;
3310 enum vect_def_type dt[4]
3311 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3312 vect_unknown_def_type };
3313 tree vectypes[ARRAY_SIZE (dt)] = {};
3314 slp_tree slp_op[ARRAY_SIZE (dt)] = {};
3315 int ndts = ARRAY_SIZE (dt);
3316 int ncopies, j;
3317 auto_vec<tree, 8> vargs;
3318 enum { NARROW, NONE, WIDEN } modifier;
3319 size_t i, nargs;
3320 tree lhs;
3322 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3323 return false;
3325 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3326 && ! vec_stmt)
3327 return false;
3329 /* Is STMT_INFO a vectorizable call? */
3330 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3331 if (!stmt)
3332 return false;
3334 if (gimple_call_internal_p (stmt)
3335 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3336 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3337 /* Handled by vectorizable_load and vectorizable_store. */
3338 return false;
3340 if (gimple_call_lhs (stmt) == NULL_TREE
3341 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3342 return false;
3344 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3346 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3348 /* Process function arguments. */
3349 rhs_type = NULL_TREE;
3350 vectype_in = NULL_TREE;
3351 nargs = gimple_call_num_args (stmt);
3353 /* Bail out if the function has more than four arguments, we do not have
3354 interesting builtin functions to vectorize with more than two arguments
3355 except for fma. No arguments is also not good. */
3356 if (nargs == 0 || nargs > 4)
3357 return false;
3359 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3360 combined_fn cfn = gimple_call_combined_fn (stmt);
3361 if (cfn == CFN_GOMP_SIMD_LANE)
3363 nargs = 0;
3364 rhs_type = unsigned_type_node;
3367 int mask_opno = -1;
3368 if (internal_fn_p (cfn))
3369 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3371 for (i = 0; i < nargs; i++)
3373 if ((int) i == mask_opno)
3375 if (!vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_opno,
3376 &op, &slp_op[i], &dt[i], &vectypes[i]))
3377 return false;
3378 continue;
3381 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
3382 i, &op, &slp_op[i], &dt[i], &vectypes[i]))
3384 if (dump_enabled_p ())
3385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3386 "use not simple.\n");
3387 return false;
3390 /* We can only handle calls with arguments of the same type. */
3391 if (rhs_type
3392 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3394 if (dump_enabled_p ())
3395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3396 "argument types differ.\n");
3397 return false;
3399 if (!rhs_type)
3400 rhs_type = TREE_TYPE (op);
3402 if (!vectype_in)
3403 vectype_in = vectypes[i];
3404 else if (vectypes[i]
3405 && !types_compatible_p (vectypes[i], vectype_in))
3407 if (dump_enabled_p ())
3408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3409 "argument vector types differ.\n");
3410 return false;
3413 /* If all arguments are external or constant defs, infer the vector type
3414 from the scalar type. */
3415 if (!vectype_in)
3416 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3417 if (vec_stmt)
3418 gcc_assert (vectype_in);
3419 if (!vectype_in)
3421 if (dump_enabled_p ())
3422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3423 "no vectype for scalar type %T\n", rhs_type);
3425 return false;
3427 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3428 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3429 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3430 by a pack of the two vectors into an SI vector. We would need
3431 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3432 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3434 if (dump_enabled_p ())
3435 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3436 "mismatched vector sizes %T and %T\n",
3437 vectype_in, vectype_out);
3438 return false;
3441 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3442 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3444 if (dump_enabled_p ())
3445 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3446 "mixed mask and nonmask vector types\n");
3447 return false;
3450 if (vect_emulated_vector_p (vectype_in) || vect_emulated_vector_p (vectype_out))
3452 if (dump_enabled_p ())
3453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3454 "use emulated vector type for call\n");
3455 return false;
3458 /* FORNOW */
3459 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3460 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3461 if (known_eq (nunits_in * 2, nunits_out))
3462 modifier = NARROW;
3463 else if (known_eq (nunits_out, nunits_in))
3464 modifier = NONE;
3465 else if (known_eq (nunits_out * 2, nunits_in))
3466 modifier = WIDEN;
3467 else
3468 return false;
3470 /* We only handle functions that do not read or clobber memory. */
3471 if (gimple_vuse (stmt))
3473 if (dump_enabled_p ())
3474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3475 "function reads from or writes to memory.\n");
3476 return false;
3479 /* For now, we only vectorize functions if a target specific builtin
3480 is available. TODO -- in some cases, it might be profitable to
3481 insert the calls for pieces of the vector, in order to be able
3482 to vectorize other operations in the loop. */
3483 fndecl = NULL_TREE;
3484 internal_fn ifn = IFN_LAST;
3485 tree callee = gimple_call_fndecl (stmt);
3487 /* First try using an internal function. */
3488 code_helper convert_code = MAX_TREE_CODES;
3489 if (cfn != CFN_LAST
3490 && (modifier == NONE
3491 || (modifier == NARROW
3492 && simple_integer_narrowing (vectype_out, vectype_in,
3493 &convert_code))))
3494 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3495 vectype_in);
3497 /* If that fails, try asking for a target-specific built-in function. */
3498 if (ifn == IFN_LAST)
3500 if (cfn != CFN_LAST)
3501 fndecl = targetm.vectorize.builtin_vectorized_function
3502 (cfn, vectype_out, vectype_in);
3503 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3504 fndecl = targetm.vectorize.builtin_md_vectorized_function
3505 (callee, vectype_out, vectype_in);
3508 if (ifn == IFN_LAST && !fndecl)
3510 if (cfn == CFN_GOMP_SIMD_LANE
3511 && !slp_node
3512 && loop_vinfo
3513 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3514 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3515 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3516 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3518 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3519 { 0, 1, 2, ... vf - 1 } vector. */
3520 gcc_assert (nargs == 0);
3522 else if (modifier == NONE
3523 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3524 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3525 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)
3526 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP128)))
3527 return vectorizable_bswap (vinfo, stmt_info, gsi, vec_stmt, slp_node,
3528 slp_op, vectype_in, cost_vec);
3529 else
3531 if (dump_enabled_p ())
3532 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3533 "function is not vectorizable.\n");
3534 return false;
3538 if (slp_node)
3539 ncopies = 1;
3540 else if (modifier == NARROW && ifn == IFN_LAST)
3541 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3542 else
3543 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3545 /* Sanity check: make sure that at least one copy of the vectorized stmt
3546 needs to be generated. */
3547 gcc_assert (ncopies >= 1);
3549 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
3550 internal_fn cond_fn = get_conditional_internal_fn (ifn);
3551 internal_fn cond_len_fn = get_len_internal_fn (ifn);
3552 int len_opno = internal_fn_len_index (cond_len_fn);
3553 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3554 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
3555 if (!vec_stmt) /* transformation not required. */
3557 if (slp_node)
3558 for (i = 0; i < nargs; ++i)
3559 if (!vect_maybe_update_slp_op_vectype (slp_op[i],
3560 vectypes[i]
3561 ? vectypes[i] : vectype_in))
3563 if (dump_enabled_p ())
3564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3565 "incompatible vector types for invariants\n");
3566 return false;
3568 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3569 DUMP_VECT_SCOPE ("vectorizable_call");
3570 vect_model_simple_cost (vinfo, stmt_info,
3571 ncopies, dt, ndts, slp_node, cost_vec);
3572 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3573 record_stmt_cost (cost_vec, ncopies / 2,
3574 vec_promote_demote, stmt_info, 0, vect_body);
3576 if (loop_vinfo
3577 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
3578 && (reduc_idx >= 0 || mask_opno >= 0))
3580 if (reduc_idx >= 0
3581 && (cond_fn == IFN_LAST
3582 || !direct_internal_fn_supported_p (cond_fn, vectype_out,
3583 OPTIMIZE_FOR_SPEED))
3584 && (cond_len_fn == IFN_LAST
3585 || !direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3586 OPTIMIZE_FOR_SPEED)))
3588 if (dump_enabled_p ())
3589 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3590 "can't use a fully-masked loop because no"
3591 " conditional operation is available.\n");
3592 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
3594 else
3596 unsigned int nvectors
3597 = (slp_node
3598 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3599 : ncopies);
3600 tree scalar_mask = NULL_TREE;
3601 if (mask_opno >= 0)
3602 scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3603 if (cond_len_fn != IFN_LAST
3604 && direct_internal_fn_supported_p (cond_len_fn, vectype_out,
3605 OPTIMIZE_FOR_SPEED))
3606 vect_record_loop_len (loop_vinfo, lens, nvectors, vectype_out,
3608 else
3609 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out,
3610 scalar_mask);
3613 return true;
3616 /* Transform. */
3618 if (dump_enabled_p ())
3619 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3621 /* Handle def. */
3622 scalar_dest = gimple_call_lhs (stmt);
3623 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3625 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3626 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
3627 unsigned int vect_nargs = nargs;
3628 if (len_loop_p)
3630 if (len_opno >= 0)
3632 ifn = cond_len_fn;
3633 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3634 vect_nargs += 2;
3636 else if (reduc_idx >= 0)
3637 gcc_unreachable ();
3639 else if (masked_loop_p && reduc_idx >= 0)
3641 ifn = cond_fn;
3642 vect_nargs += 2;
3645 if (modifier == NONE || ifn != IFN_LAST)
3647 tree prev_res = NULL_TREE;
3648 vargs.safe_grow (vect_nargs, true);
3649 auto_vec<vec<tree> > vec_defs (nargs);
3650 for (j = 0; j < ncopies; ++j)
3652 /* Build argument list for the vectorized call. */
3653 if (slp_node)
3655 vec<tree> vec_oprnds0;
3657 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3658 vec_oprnds0 = vec_defs[0];
3660 /* Arguments are ready. Create the new vector stmt. */
3661 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3663 int varg = 0;
3664 if (masked_loop_p && reduc_idx >= 0)
3666 unsigned int vec_num = vec_oprnds0.length ();
3667 /* Always true for SLP. */
3668 gcc_assert (ncopies == 1);
3669 vargs[varg++] = vect_get_loop_mask (loop_vinfo,
3670 gsi, masks, vec_num,
3671 vectype_out, i);
3673 size_t k;
3674 for (k = 0; k < nargs; k++)
3676 vec<tree> vec_oprndsk = vec_defs[k];
3677 vargs[varg++] = vec_oprndsk[i];
3679 if (masked_loop_p && reduc_idx >= 0)
3680 vargs[varg++] = vargs[reduc_idx + 1];
3681 gimple *new_stmt;
3682 if (modifier == NARROW)
3684 /* We don't define any narrowing conditional functions
3685 at present. */
3686 gcc_assert (mask_opno < 0);
3687 tree half_res = make_ssa_name (vectype_in);
3688 gcall *call
3689 = gimple_build_call_internal_vec (ifn, vargs);
3690 gimple_call_set_lhs (call, half_res);
3691 gimple_call_set_nothrow (call, true);
3692 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3693 if ((i & 1) == 0)
3695 prev_res = half_res;
3696 continue;
3698 new_temp = make_ssa_name (vec_dest);
3699 new_stmt = vect_gimple_build (new_temp, convert_code,
3700 prev_res, half_res);
3701 vect_finish_stmt_generation (vinfo, stmt_info,
3702 new_stmt, gsi);
3704 else
3706 if (len_opno >= 0 && len_loop_p)
3708 unsigned int vec_num = vec_oprnds0.length ();
3709 /* Always true for SLP. */
3710 gcc_assert (ncopies == 1);
3711 tree len
3712 = vect_get_loop_len (loop_vinfo, gsi, lens, vec_num,
3713 vectype_out, i, 1);
3714 signed char biasval
3715 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3716 tree bias = build_int_cst (intQI_type_node, biasval);
3717 vargs[len_opno] = len;
3718 vargs[len_opno + 1] = bias;
3720 else if (mask_opno >= 0 && masked_loop_p)
3722 unsigned int vec_num = vec_oprnds0.length ();
3723 /* Always true for SLP. */
3724 gcc_assert (ncopies == 1);
3725 tree mask = vect_get_loop_mask (loop_vinfo,
3726 gsi, masks, vec_num,
3727 vectype_out, i);
3728 vargs[mask_opno] = prepare_vec_mask
3729 (loop_vinfo, TREE_TYPE (mask), mask,
3730 vargs[mask_opno], gsi);
3733 gcall *call;
3734 if (ifn != IFN_LAST)
3735 call = gimple_build_call_internal_vec (ifn, vargs);
3736 else
3737 call = gimple_build_call_vec (fndecl, vargs);
3738 new_temp = make_ssa_name (vec_dest, call);
3739 gimple_call_set_lhs (call, new_temp);
3740 gimple_call_set_nothrow (call, true);
3741 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3742 new_stmt = call;
3744 slp_node->push_vec_def (new_stmt);
3746 continue;
3749 int varg = 0;
3750 if (masked_loop_p && reduc_idx >= 0)
3751 vargs[varg++] = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3752 vectype_out, j);
3753 for (i = 0; i < nargs; i++)
3755 op = gimple_call_arg (stmt, i);
3756 if (j == 0)
3758 vec_defs.quick_push (vNULL);
3759 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
3760 op, &vec_defs[i],
3761 vectypes[i]);
3763 vargs[varg++] = vec_defs[i][j];
3765 if (masked_loop_p && reduc_idx >= 0)
3766 vargs[varg++] = vargs[reduc_idx + 1];
3768 if (len_opno >= 0 && len_loop_p)
3770 tree len = vect_get_loop_len (loop_vinfo, gsi, lens, ncopies,
3771 vectype_out, j, 1);
3772 signed char biasval
3773 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
3774 tree bias = build_int_cst (intQI_type_node, biasval);
3775 vargs[len_opno] = len;
3776 vargs[len_opno + 1] = bias;
3778 else if (mask_opno >= 0 && masked_loop_p)
3780 tree mask = vect_get_loop_mask (loop_vinfo, gsi, masks, ncopies,
3781 vectype_out, j);
3782 vargs[mask_opno]
3783 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
3784 vargs[mask_opno], gsi);
3787 gimple *new_stmt;
3788 if (cfn == CFN_GOMP_SIMD_LANE)
3790 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3791 tree new_var
3792 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3793 gimple *init_stmt = gimple_build_assign (new_var, cst);
3794 vect_init_vector_1 (vinfo, stmt_info, init_stmt, NULL);
3795 new_temp = make_ssa_name (vec_dest);
3796 new_stmt = gimple_build_assign (new_temp, new_var);
3797 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3799 else if (modifier == NARROW)
3801 /* We don't define any narrowing conditional functions at
3802 present. */
3803 gcc_assert (mask_opno < 0);
3804 tree half_res = make_ssa_name (vectype_in);
3805 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3806 gimple_call_set_lhs (call, half_res);
3807 gimple_call_set_nothrow (call, true);
3808 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3809 if ((j & 1) == 0)
3811 prev_res = half_res;
3812 continue;
3814 new_temp = make_ssa_name (vec_dest);
3815 new_stmt = vect_gimple_build (new_temp, convert_code, prev_res,
3816 half_res);
3817 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3819 else
3821 gcall *call;
3822 if (ifn != IFN_LAST)
3823 call = gimple_build_call_internal_vec (ifn, vargs);
3824 else
3825 call = gimple_build_call_vec (fndecl, vargs);
3826 new_temp = make_ssa_name (vec_dest, call);
3827 gimple_call_set_lhs (call, new_temp);
3828 gimple_call_set_nothrow (call, true);
3829 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3830 new_stmt = call;
3833 if (j == (modifier == NARROW ? 1 : 0))
3834 *vec_stmt = new_stmt;
3835 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3837 for (i = 0; i < nargs; i++)
3839 vec<tree> vec_oprndsi = vec_defs[i];
3840 vec_oprndsi.release ();
3843 else if (modifier == NARROW)
3845 auto_vec<vec<tree> > vec_defs (nargs);
3846 /* We don't define any narrowing conditional functions at present. */
3847 gcc_assert (mask_opno < 0);
3848 for (j = 0; j < ncopies; ++j)
3850 /* Build argument list for the vectorized call. */
3851 if (j == 0)
3852 vargs.create (nargs * 2);
3853 else
3854 vargs.truncate (0);
3856 if (slp_node)
3858 vec<tree> vec_oprnds0;
3860 vect_get_slp_defs (vinfo, slp_node, &vec_defs);
3861 vec_oprnds0 = vec_defs[0];
3863 /* Arguments are ready. Create the new vector stmt. */
3864 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3866 size_t k;
3867 vargs.truncate (0);
3868 for (k = 0; k < nargs; k++)
3870 vec<tree> vec_oprndsk = vec_defs[k];
3871 vargs.quick_push (vec_oprndsk[i]);
3872 vargs.quick_push (vec_oprndsk[i + 1]);
3874 gcall *call;
3875 if (ifn != IFN_LAST)
3876 call = gimple_build_call_internal_vec (ifn, vargs);
3877 else
3878 call = gimple_build_call_vec (fndecl, vargs);
3879 new_temp = make_ssa_name (vec_dest, call);
3880 gimple_call_set_lhs (call, new_temp);
3881 gimple_call_set_nothrow (call, true);
3882 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
3883 slp_node->push_vec_def (call);
3885 continue;
3888 for (i = 0; i < nargs; i++)
3890 op = gimple_call_arg (stmt, i);
3891 if (j == 0)
3893 vec_defs.quick_push (vNULL);
3894 vect_get_vec_defs_for_operand (vinfo, stmt_info, 2 * ncopies,
3895 op, &vec_defs[i], vectypes[i]);
3897 vec_oprnd0 = vec_defs[i][2*j];
3898 vec_oprnd1 = vec_defs[i][2*j+1];
3900 vargs.quick_push (vec_oprnd0);
3901 vargs.quick_push (vec_oprnd1);
3904 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3905 new_temp = make_ssa_name (vec_dest, new_stmt);
3906 gimple_call_set_lhs (new_stmt, new_temp);
3907 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
3909 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
3912 if (!slp_node)
3913 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
3915 for (i = 0; i < nargs; i++)
3917 vec<tree> vec_oprndsi = vec_defs[i];
3918 vec_oprndsi.release ();
3921 else
3922 /* No current target implements this case. */
3923 return false;
3925 vargs.release ();
3927 /* The call in STMT might prevent it from being removed in dce.
3928 We however cannot remove it here, due to the way the ssa name
3929 it defines is mapped to the new definition. So just replace
3930 rhs of the statement with something harmless. */
3932 if (slp_node)
3933 return true;
3935 stmt_info = vect_orig_stmt (stmt_info);
3936 lhs = gimple_get_lhs (stmt_info->stmt);
3938 gassign *new_stmt
3939 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3940 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3942 return true;
3946 struct simd_call_arg_info
3948 tree vectype;
3949 tree op;
3950 HOST_WIDE_INT linear_step;
3951 enum vect_def_type dt;
3952 unsigned int align;
3953 bool simd_lane_linear;
3956 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3957 is linear within simd lane (but not within whole loop), note it in
3958 *ARGINFO. */
3960 static void
3961 vect_simd_lane_linear (tree op, class loop *loop,
3962 struct simd_call_arg_info *arginfo)
3964 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3966 if (!is_gimple_assign (def_stmt)
3967 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3968 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3969 return;
3971 tree base = gimple_assign_rhs1 (def_stmt);
3972 HOST_WIDE_INT linear_step = 0;
3973 tree v = gimple_assign_rhs2 (def_stmt);
3974 while (TREE_CODE (v) == SSA_NAME)
3976 tree t;
3977 def_stmt = SSA_NAME_DEF_STMT (v);
3978 if (is_gimple_assign (def_stmt))
3979 switch (gimple_assign_rhs_code (def_stmt))
3981 case PLUS_EXPR:
3982 t = gimple_assign_rhs2 (def_stmt);
3983 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3984 return;
3985 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3986 v = gimple_assign_rhs1 (def_stmt);
3987 continue;
3988 case MULT_EXPR:
3989 t = gimple_assign_rhs2 (def_stmt);
3990 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3991 return;
3992 linear_step = tree_to_shwi (t);
3993 v = gimple_assign_rhs1 (def_stmt);
3994 continue;
3995 CASE_CONVERT:
3996 t = gimple_assign_rhs1 (def_stmt);
3997 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3998 || (TYPE_PRECISION (TREE_TYPE (v))
3999 < TYPE_PRECISION (TREE_TYPE (t))))
4000 return;
4001 if (!linear_step)
4002 linear_step = 1;
4003 v = t;
4004 continue;
4005 default:
4006 return;
4008 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
4009 && loop->simduid
4010 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
4011 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
4012 == loop->simduid))
4014 if (!linear_step)
4015 linear_step = 1;
4016 arginfo->linear_step = linear_step;
4017 arginfo->op = base;
4018 arginfo->simd_lane_linear = true;
4019 return;
4024 /* Return the number of elements in vector type VECTYPE, which is associated
4025 with a SIMD clone. At present these vectors always have a constant
4026 length. */
4028 static unsigned HOST_WIDE_INT
4029 simd_clone_subparts (tree vectype)
4031 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
4034 /* Function vectorizable_simd_clone_call.
4036 Check if STMT_INFO performs a function call that can be vectorized
4037 by calling a simd clone of the function.
4038 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4039 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4040 Return true if STMT_INFO is vectorizable in this way. */
4042 static bool
4043 vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
4044 gimple_stmt_iterator *gsi,
4045 gimple **vec_stmt, slp_tree slp_node,
4046 stmt_vector_for_cost *)
4048 tree vec_dest;
4049 tree scalar_dest;
4050 tree op, type;
4051 tree vec_oprnd0 = NULL_TREE;
4052 tree vectype;
4053 poly_uint64 nunits;
4054 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
4055 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
4056 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
4057 tree fndecl, new_temp;
4058 int ncopies, j;
4059 auto_vec<simd_call_arg_info> arginfo;
4060 vec<tree> vargs = vNULL;
4061 size_t i, nargs;
4062 tree lhs, rtype, ratype;
4063 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
4064 int arg_offset = 0;
4066 /* Is STMT a vectorizable call? */
4067 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4068 if (!stmt)
4069 return false;
4071 fndecl = gimple_call_fndecl (stmt);
4072 if (fndecl == NULL_TREE
4073 && gimple_call_internal_p (stmt, IFN_MASK_CALL))
4075 fndecl = gimple_call_arg (stmt, 0);
4076 gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR);
4077 fndecl = TREE_OPERAND (fndecl, 0);
4078 gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL);
4079 arg_offset = 1;
4081 if (fndecl == NULL_TREE)
4082 return false;
4084 struct cgraph_node *node = cgraph_node::get (fndecl);
4085 if (node == NULL || node->simd_clones == NULL)
4086 return false;
4088 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4089 return false;
4091 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4092 && ! vec_stmt)
4093 return false;
4095 if (gimple_call_lhs (stmt)
4096 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
4097 return false;
4099 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
4101 vectype = STMT_VINFO_VECTYPE (stmt_info);
4103 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
4104 return false;
4106 /* FORNOW */
4107 if (slp_node)
4108 return false;
4110 /* Process function arguments. */
4111 nargs = gimple_call_num_args (stmt) - arg_offset;
4113 /* Bail out if the function has zero arguments. */
4114 if (nargs == 0)
4115 return false;
4117 arginfo.reserve (nargs, true);
4119 for (i = 0; i < nargs; i++)
4121 simd_call_arg_info thisarginfo;
4122 affine_iv iv;
4124 thisarginfo.linear_step = 0;
4125 thisarginfo.align = 0;
4126 thisarginfo.op = NULL_TREE;
4127 thisarginfo.simd_lane_linear = false;
4129 op = gimple_call_arg (stmt, i + arg_offset);
4130 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
4131 &thisarginfo.vectype)
4132 || thisarginfo.dt == vect_uninitialized_def)
4134 if (dump_enabled_p ())
4135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4136 "use not simple.\n");
4137 return false;
4140 if (thisarginfo.dt == vect_constant_def
4141 || thisarginfo.dt == vect_external_def)
4142 gcc_assert (thisarginfo.vectype == NULL_TREE);
4143 else
4144 gcc_assert (thisarginfo.vectype != NULL_TREE);
4146 /* For linear arguments, the analyze phase should have saved
4147 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
4148 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
4149 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
4151 gcc_assert (vec_stmt);
4152 thisarginfo.linear_step
4153 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
4154 thisarginfo.op
4155 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
4156 thisarginfo.simd_lane_linear
4157 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
4158 == boolean_true_node);
4159 /* If loop has been peeled for alignment, we need to adjust it. */
4160 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
4161 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
4162 if (n1 != n2 && !thisarginfo.simd_lane_linear)
4164 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
4165 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
4166 tree opt = TREE_TYPE (thisarginfo.op);
4167 bias = fold_convert (TREE_TYPE (step), bias);
4168 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
4169 thisarginfo.op
4170 = fold_build2 (POINTER_TYPE_P (opt)
4171 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
4172 thisarginfo.op, bias);
4175 else if (!vec_stmt
4176 && thisarginfo.dt != vect_constant_def
4177 && thisarginfo.dt != vect_external_def
4178 && loop_vinfo
4179 && TREE_CODE (op) == SSA_NAME
4180 && simple_iv (loop, loop_containing_stmt (stmt), op,
4181 &iv, false)
4182 && tree_fits_shwi_p (iv.step))
4184 thisarginfo.linear_step = tree_to_shwi (iv.step);
4185 thisarginfo.op = iv.base;
4187 else if ((thisarginfo.dt == vect_constant_def
4188 || thisarginfo.dt == vect_external_def)
4189 && POINTER_TYPE_P (TREE_TYPE (op)))
4190 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
4191 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4192 linear too. */
4193 if (POINTER_TYPE_P (TREE_TYPE (op))
4194 && !thisarginfo.linear_step
4195 && !vec_stmt
4196 && thisarginfo.dt != vect_constant_def
4197 && thisarginfo.dt != vect_external_def
4198 && loop_vinfo
4199 && !slp_node
4200 && TREE_CODE (op) == SSA_NAME)
4201 vect_simd_lane_linear (op, loop, &thisarginfo);
4203 arginfo.quick_push (thisarginfo);
4206 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4207 if (!vf.is_constant ())
4209 if (dump_enabled_p ())
4210 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4211 "not considering SIMD clones; not yet supported"
4212 " for variable-width vectors.\n");
4213 return false;
4216 unsigned int badness = 0;
4217 struct cgraph_node *bestn = NULL;
4218 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4219 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4220 else
4221 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4222 n = n->simdclone->next_clone)
4224 unsigned int this_badness = 0;
4225 unsigned int num_calls;
4226 if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
4227 || n->simdclone->nargs != nargs)
4228 continue;
4229 if (num_calls != 1)
4230 this_badness += exact_log2 (num_calls) * 4096;
4231 if (n->simdclone->inbranch)
4232 this_badness += 8192;
4233 int target_badness = targetm.simd_clone.usable (n);
4234 if (target_badness < 0)
4235 continue;
4236 this_badness += target_badness * 512;
4237 for (i = 0; i < nargs; i++)
4239 switch (n->simdclone->args[i].arg_type)
4241 case SIMD_CLONE_ARG_TYPE_VECTOR:
4242 if (!useless_type_conversion_p
4243 (n->simdclone->args[i].orig_type,
4244 TREE_TYPE (gimple_call_arg (stmt, i + arg_offset))))
4245 i = -1;
4246 else if (arginfo[i].dt == vect_constant_def
4247 || arginfo[i].dt == vect_external_def
4248 || arginfo[i].linear_step)
4249 this_badness += 64;
4250 break;
4251 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4252 if (arginfo[i].dt != vect_constant_def
4253 && arginfo[i].dt != vect_external_def)
4254 i = -1;
4255 break;
4256 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4257 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4258 if (arginfo[i].dt == vect_constant_def
4259 || arginfo[i].dt == vect_external_def
4260 || (arginfo[i].linear_step
4261 != n->simdclone->args[i].linear_step))
4262 i = -1;
4263 break;
4264 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4265 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4266 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4267 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4268 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4269 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4270 /* FORNOW */
4271 i = -1;
4272 break;
4273 case SIMD_CLONE_ARG_TYPE_MASK:
4274 break;
4276 if (i == (size_t) -1)
4277 break;
4278 if (n->simdclone->args[i].alignment > arginfo[i].align)
4280 i = -1;
4281 break;
4283 if (arginfo[i].align)
4284 this_badness += (exact_log2 (arginfo[i].align)
4285 - exact_log2 (n->simdclone->args[i].alignment));
4287 if (i == (size_t) -1)
4288 continue;
4289 if (bestn == NULL || this_badness < badness)
4291 bestn = n;
4292 badness = this_badness;
4296 if (bestn == NULL)
4297 return false;
4299 for (i = 0; i < nargs; i++)
4301 if ((arginfo[i].dt == vect_constant_def
4302 || arginfo[i].dt == vect_external_def)
4303 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4305 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i + arg_offset));
4306 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4307 slp_node);
4308 if (arginfo[i].vectype == NULL
4309 || !constant_multiple_p (bestn->simdclone->simdlen,
4310 simd_clone_subparts (arginfo[i].vectype)))
4311 return false;
4314 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR
4315 && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type))
4317 if (dump_enabled_p ())
4318 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4319 "vector mask arguments are not supported.\n");
4320 return false;
4323 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
4324 && bestn->simdclone->mask_mode == VOIDmode
4325 && (simd_clone_subparts (bestn->simdclone->args[i].vector_type)
4326 != simd_clone_subparts (arginfo[i].vectype)))
4328 /* FORNOW we only have partial support for vector-type masks that
4329 can't hold all of simdlen. */
4330 if (dump_enabled_p ())
4331 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4332 vect_location,
4333 "in-branch vector clones are not yet"
4334 " supported for mismatched vector sizes.\n");
4335 return false;
4337 if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
4338 && bestn->simdclone->mask_mode != VOIDmode)
4340 /* FORNOW don't support integer-type masks. */
4341 if (dump_enabled_p ())
4342 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4343 vect_location,
4344 "in-branch vector clones are not yet"
4345 " supported for integer mask modes.\n");
4346 return false;
4350 fndecl = bestn->decl;
4351 nunits = bestn->simdclone->simdlen;
4352 ncopies = vector_unroll_factor (vf, nunits);
4354 /* If the function isn't const, only allow it in simd loops where user
4355 has asserted that at least nunits consecutive iterations can be
4356 performed using SIMD instructions. */
4357 if ((loop == NULL || maybe_lt ((unsigned) loop->safelen, nunits))
4358 && gimple_vuse (stmt))
4359 return false;
4361 /* Sanity check: make sure that at least one copy of the vectorized stmt
4362 needs to be generated. */
4363 gcc_assert (ncopies >= 1);
4365 if (!vec_stmt) /* transformation not required. */
4367 /* When the original call is pure or const but the SIMD ABI dictates
4368 an aggregate return we will have to use a virtual definition and
4369 in a loop eventually even need to add a virtual PHI. That's
4370 not straight-forward so allow to fix this up via renaming. */
4371 if (gimple_call_lhs (stmt)
4372 && !gimple_vdef (stmt)
4373 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn->decl))) == ARRAY_TYPE)
4374 vinfo->any_known_not_updated_vssa = true;
4375 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4376 for (i = 0; i < nargs; i++)
4377 if ((bestn->simdclone->args[i].arg_type
4378 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4379 || (bestn->simdclone->args[i].arg_type
4380 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4382 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4383 + 1,
4384 true);
4385 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4386 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4387 ? size_type_node : TREE_TYPE (arginfo[i].op);
4388 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4389 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4390 tree sll = arginfo[i].simd_lane_linear
4391 ? boolean_true_node : boolean_false_node;
4392 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4394 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4395 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4396 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4397 dt, slp_node, cost_vec); */
4398 return true;
4401 /* Transform. */
4403 if (dump_enabled_p ())
4404 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4406 /* Handle def. */
4407 scalar_dest = gimple_call_lhs (stmt);
4408 vec_dest = NULL_TREE;
4409 rtype = NULL_TREE;
4410 ratype = NULL_TREE;
4411 if (scalar_dest)
4413 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4414 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4415 if (TREE_CODE (rtype) == ARRAY_TYPE)
4417 ratype = rtype;
4418 rtype = TREE_TYPE (ratype);
4422 auto_vec<vec<tree> > vec_oprnds;
4423 auto_vec<unsigned> vec_oprnds_i;
4424 vec_oprnds.safe_grow_cleared (nargs, true);
4425 vec_oprnds_i.safe_grow_cleared (nargs, true);
4426 for (j = 0; j < ncopies; ++j)
4428 /* Build argument list for the vectorized call. */
4429 if (j == 0)
4430 vargs.create (nargs);
4431 else
4432 vargs.truncate (0);
4434 for (i = 0; i < nargs; i++)
4436 unsigned int k, l, m, o;
4437 tree atype;
4438 op = gimple_call_arg (stmt, i + arg_offset);
4439 switch (bestn->simdclone->args[i].arg_type)
4441 case SIMD_CLONE_ARG_TYPE_VECTOR:
4442 atype = bestn->simdclone->args[i].vector_type;
4443 o = vector_unroll_factor (nunits,
4444 simd_clone_subparts (atype));
4445 for (m = j * o; m < (j + 1) * o; m++)
4447 if (simd_clone_subparts (atype)
4448 < simd_clone_subparts (arginfo[i].vectype))
4450 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4451 k = (simd_clone_subparts (arginfo[i].vectype)
4452 / simd_clone_subparts (atype));
4453 gcc_assert ((k & (k - 1)) == 0);
4454 if (m == 0)
4456 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4457 ncopies * o / k, op,
4458 &vec_oprnds[i]);
4459 vec_oprnds_i[i] = 0;
4460 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4462 else
4464 vec_oprnd0 = arginfo[i].op;
4465 if ((m & (k - 1)) == 0)
4466 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4468 arginfo[i].op = vec_oprnd0;
4469 vec_oprnd0
4470 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4471 bitsize_int (prec),
4472 bitsize_int ((m & (k - 1)) * prec));
4473 gassign *new_stmt
4474 = gimple_build_assign (make_ssa_name (atype),
4475 vec_oprnd0);
4476 vect_finish_stmt_generation (vinfo, stmt_info,
4477 new_stmt, gsi);
4478 vargs.safe_push (gimple_assign_lhs (new_stmt));
4480 else
4482 k = (simd_clone_subparts (atype)
4483 / simd_clone_subparts (arginfo[i].vectype));
4484 gcc_assert ((k & (k - 1)) == 0);
4485 vec<constructor_elt, va_gc> *ctor_elts;
4486 if (k != 1)
4487 vec_alloc (ctor_elts, k);
4488 else
4489 ctor_elts = NULL;
4490 for (l = 0; l < k; l++)
4492 if (m == 0 && l == 0)
4494 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4495 k * o * ncopies,
4497 &vec_oprnds[i]);
4498 vec_oprnds_i[i] = 0;
4499 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4501 else
4502 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4503 arginfo[i].op = vec_oprnd0;
4504 if (k == 1)
4505 break;
4506 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4507 vec_oprnd0);
4509 if (k == 1)
4510 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0),
4511 atype))
4513 vec_oprnd0
4514 = build1 (VIEW_CONVERT_EXPR, atype, vec_oprnd0);
4515 gassign *new_stmt
4516 = gimple_build_assign (make_ssa_name (atype),
4517 vec_oprnd0);
4518 vect_finish_stmt_generation (vinfo, stmt_info,
4519 new_stmt, gsi);
4520 vargs.safe_push (gimple_assign_lhs (new_stmt));
4522 else
4523 vargs.safe_push (vec_oprnd0);
4524 else
4526 vec_oprnd0 = build_constructor (atype, ctor_elts);
4527 gassign *new_stmt
4528 = gimple_build_assign (make_ssa_name (atype),
4529 vec_oprnd0);
4530 vect_finish_stmt_generation (vinfo, stmt_info,
4531 new_stmt, gsi);
4532 vargs.safe_push (gimple_assign_lhs (new_stmt));
4536 break;
4537 case SIMD_CLONE_ARG_TYPE_MASK:
4538 atype = bestn->simdclone->args[i].vector_type;
4539 if (bestn->simdclone->mask_mode != VOIDmode)
4541 /* FORNOW: this is disabled above. */
4542 gcc_unreachable ();
4544 else
4546 tree elt_type = TREE_TYPE (atype);
4547 tree one = fold_convert (elt_type, integer_one_node);
4548 tree zero = fold_convert (elt_type, integer_zero_node);
4549 o = vector_unroll_factor (nunits,
4550 simd_clone_subparts (atype));
4551 for (m = j * o; m < (j + 1) * o; m++)
4553 if (simd_clone_subparts (atype)
4554 < simd_clone_subparts (arginfo[i].vectype))
4556 /* The mask type has fewer elements than simdlen. */
4558 /* FORNOW */
4559 gcc_unreachable ();
4561 else if (simd_clone_subparts (atype)
4562 == simd_clone_subparts (arginfo[i].vectype))
4564 /* The SIMD clone function has the same number of
4565 elements as the current function. */
4566 if (m == 0)
4568 vect_get_vec_defs_for_operand (vinfo, stmt_info,
4569 o * ncopies,
4571 &vec_oprnds[i]);
4572 vec_oprnds_i[i] = 0;
4574 vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
4575 vec_oprnd0
4576 = build3 (VEC_COND_EXPR, atype, vec_oprnd0,
4577 build_vector_from_val (atype, one),
4578 build_vector_from_val (atype, zero));
4579 gassign *new_stmt
4580 = gimple_build_assign (make_ssa_name (atype),
4581 vec_oprnd0);
4582 vect_finish_stmt_generation (vinfo, stmt_info,
4583 new_stmt, gsi);
4584 vargs.safe_push (gimple_assign_lhs (new_stmt));
4586 else
4588 /* The mask type has more elements than simdlen. */
4590 /* FORNOW */
4591 gcc_unreachable ();
4595 break;
4596 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4597 vargs.safe_push (op);
4598 break;
4599 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4600 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4601 if (j == 0)
4603 gimple_seq stmts;
4604 arginfo[i].op
4605 = force_gimple_operand (unshare_expr (arginfo[i].op),
4606 &stmts, true, NULL_TREE);
4607 if (stmts != NULL)
4609 basic_block new_bb;
4610 edge pe = loop_preheader_edge (loop);
4611 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4612 gcc_assert (!new_bb);
4614 if (arginfo[i].simd_lane_linear)
4616 vargs.safe_push (arginfo[i].op);
4617 break;
4619 tree phi_res = copy_ssa_name (op);
4620 gphi *new_phi = create_phi_node (phi_res, loop->header);
4621 add_phi_arg (new_phi, arginfo[i].op,
4622 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4623 enum tree_code code
4624 = POINTER_TYPE_P (TREE_TYPE (op))
4625 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4626 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4627 ? sizetype : TREE_TYPE (op);
4628 poly_widest_int cst
4629 = wi::mul (bestn->simdclone->args[i].linear_step,
4630 ncopies * nunits);
4631 tree tcst = wide_int_to_tree (type, cst);
4632 tree phi_arg = copy_ssa_name (op);
4633 gassign *new_stmt
4634 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4635 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4636 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4637 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4638 UNKNOWN_LOCATION);
4639 arginfo[i].op = phi_res;
4640 vargs.safe_push (phi_res);
4642 else
4644 enum tree_code code
4645 = POINTER_TYPE_P (TREE_TYPE (op))
4646 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4647 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4648 ? sizetype : TREE_TYPE (op);
4649 poly_widest_int cst
4650 = wi::mul (bestn->simdclone->args[i].linear_step,
4651 j * nunits);
4652 tree tcst = wide_int_to_tree (type, cst);
4653 new_temp = make_ssa_name (TREE_TYPE (op));
4654 gassign *new_stmt
4655 = gimple_build_assign (new_temp, code,
4656 arginfo[i].op, tcst);
4657 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4658 vargs.safe_push (new_temp);
4660 break;
4661 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4662 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4663 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4664 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4665 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4666 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4667 default:
4668 gcc_unreachable ();
4672 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4673 if (vec_dest)
4675 gcc_assert (ratype
4676 || known_eq (simd_clone_subparts (rtype), nunits));
4677 if (ratype)
4678 new_temp = create_tmp_var (ratype);
4679 else if (useless_type_conversion_p (vectype, rtype))
4680 new_temp = make_ssa_name (vec_dest, new_call);
4681 else
4682 new_temp = make_ssa_name (rtype, new_call);
4683 gimple_call_set_lhs (new_call, new_temp);
4685 vect_finish_stmt_generation (vinfo, stmt_info, new_call, gsi);
4686 gimple *new_stmt = new_call;
4688 if (vec_dest)
4690 if (!multiple_p (simd_clone_subparts (vectype), nunits))
4692 unsigned int k, l;
4693 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4694 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4695 k = vector_unroll_factor (nunits,
4696 simd_clone_subparts (vectype));
4697 gcc_assert ((k & (k - 1)) == 0);
4698 for (l = 0; l < k; l++)
4700 tree t;
4701 if (ratype)
4703 t = build_fold_addr_expr (new_temp);
4704 t = build2 (MEM_REF, vectype, t,
4705 build_int_cst (TREE_TYPE (t), l * bytes));
4707 else
4708 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4709 bitsize_int (prec), bitsize_int (l * prec));
4710 new_stmt = gimple_build_assign (make_ssa_name (vectype), t);
4711 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4713 if (j == 0 && l == 0)
4714 *vec_stmt = new_stmt;
4715 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4718 if (ratype)
4719 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4720 continue;
4722 else if (!multiple_p (nunits, simd_clone_subparts (vectype)))
4724 unsigned int k = (simd_clone_subparts (vectype)
4725 / simd_clone_subparts (rtype));
4726 gcc_assert ((k & (k - 1)) == 0);
4727 if ((j & (k - 1)) == 0)
4728 vec_alloc (ret_ctor_elts, k);
4729 if (ratype)
4731 unsigned int m, o;
4732 o = vector_unroll_factor (nunits,
4733 simd_clone_subparts (rtype));
4734 for (m = 0; m < o; m++)
4736 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4737 size_int (m), NULL_TREE, NULL_TREE);
4738 new_stmt = gimple_build_assign (make_ssa_name (rtype),
4739 tem);
4740 vect_finish_stmt_generation (vinfo, stmt_info,
4741 new_stmt, gsi);
4742 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4743 gimple_assign_lhs (new_stmt));
4745 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4747 else
4748 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4749 if ((j & (k - 1)) != k - 1)
4750 continue;
4751 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4752 new_stmt
4753 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4754 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4756 if ((unsigned) j == k - 1)
4757 *vec_stmt = new_stmt;
4758 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4759 continue;
4761 else if (ratype)
4763 tree t = build_fold_addr_expr (new_temp);
4764 t = build2 (MEM_REF, vectype, t,
4765 build_int_cst (TREE_TYPE (t), 0));
4766 new_stmt = gimple_build_assign (make_ssa_name (vec_dest), t);
4767 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4768 vect_clobber_variable (vinfo, stmt_info, gsi, new_temp);
4770 else if (!useless_type_conversion_p (vectype, rtype))
4772 vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
4773 new_stmt
4774 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4775 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4779 if (j == 0)
4780 *vec_stmt = new_stmt;
4781 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4784 for (i = 0; i < nargs; ++i)
4786 vec<tree> oprndsi = vec_oprnds[i];
4787 oprndsi.release ();
4789 vargs.release ();
4791 /* Mark the clone as no longer being a candidate for GC. */
4792 bestn->gc_candidate = false;
4794 /* The call in STMT might prevent it from being removed in dce.
4795 We however cannot remove it here, due to the way the ssa name
4796 it defines is mapped to the new definition. So just replace
4797 rhs of the statement with something harmless. */
4799 if (slp_node)
4800 return true;
4802 gimple *new_stmt;
4803 if (scalar_dest)
4805 type = TREE_TYPE (scalar_dest);
4806 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4807 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4809 else
4810 new_stmt = gimple_build_nop ();
4811 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4812 unlink_stmt_vdef (stmt);
4814 return true;
4818 /* Function vect_gen_widened_results_half
4820 Create a vector stmt whose code, type, number of arguments, and result
4821 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4822 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4823 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4824 needs to be created (DECL is a function-decl of a target-builtin).
4825 STMT_INFO is the original scalar stmt that we are vectorizing. */
4827 static gimple *
4828 vect_gen_widened_results_half (vec_info *vinfo, code_helper ch,
4829 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4830 tree vec_dest, gimple_stmt_iterator *gsi,
4831 stmt_vec_info stmt_info)
4833 gimple *new_stmt;
4834 tree new_temp;
4836 /* Generate half of the widened result: */
4837 if (op_type != binary_op)
4838 vec_oprnd1 = NULL;
4839 new_stmt = vect_gimple_build (vec_dest, ch, vec_oprnd0, vec_oprnd1);
4840 new_temp = make_ssa_name (vec_dest, new_stmt);
4841 gimple_set_lhs (new_stmt, new_temp);
4842 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4844 return new_stmt;
4848 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4849 For multi-step conversions store the resulting vectors and call the function
4850 recursively. When NARROW_SRC_P is true, there's still a conversion after
4851 narrowing, don't store the vectors in the SLP_NODE or in vector info of
4852 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
4854 static void
4855 vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds,
4856 int multi_step_cvt,
4857 stmt_vec_info stmt_info,
4858 vec<tree> &vec_dsts,
4859 gimple_stmt_iterator *gsi,
4860 slp_tree slp_node, code_helper code,
4861 bool narrow_src_p)
4863 unsigned int i;
4864 tree vop0, vop1, new_tmp, vec_dest;
4866 vec_dest = vec_dsts.pop ();
4868 for (i = 0; i < vec_oprnds->length (); i += 2)
4870 /* Create demotion operation. */
4871 vop0 = (*vec_oprnds)[i];
4872 vop1 = (*vec_oprnds)[i + 1];
4873 gimple *new_stmt = vect_gimple_build (vec_dest, code, vop0, vop1);
4874 new_tmp = make_ssa_name (vec_dest, new_stmt);
4875 gimple_set_lhs (new_stmt, new_tmp);
4876 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
4877 if (multi_step_cvt || narrow_src_p)
4878 /* Store the resulting vector for next recursive call,
4879 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
4880 (*vec_oprnds)[i/2] = new_tmp;
4881 else
4883 /* This is the last step of the conversion sequence. Store the
4884 vectors in SLP_NODE or in vector info of the scalar statement
4885 (or in STMT_VINFO_RELATED_STMT chain). */
4886 if (slp_node)
4887 slp_node->push_vec_def (new_stmt);
4888 else
4889 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
4893 /* For multi-step demotion operations we first generate demotion operations
4894 from the source type to the intermediate types, and then combine the
4895 results (stored in VEC_OPRNDS) in demotion operation to the destination
4896 type. */
4897 if (multi_step_cvt)
4899 /* At each level of recursion we have half of the operands we had at the
4900 previous level. */
4901 vec_oprnds->truncate ((i+1)/2);
4902 vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds,
4903 multi_step_cvt - 1,
4904 stmt_info, vec_dsts, gsi,
4905 slp_node, VEC_PACK_TRUNC_EXPR,
4906 narrow_src_p);
4909 vec_dsts.quick_push (vec_dest);
4913 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4914 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4915 STMT_INFO. For multi-step conversions store the resulting vectors and
4916 call the function recursively. */
4918 static void
4919 vect_create_vectorized_promotion_stmts (vec_info *vinfo,
4920 vec<tree> *vec_oprnds0,
4921 vec<tree> *vec_oprnds1,
4922 stmt_vec_info stmt_info, tree vec_dest,
4923 gimple_stmt_iterator *gsi,
4924 code_helper ch1,
4925 code_helper ch2, int op_type)
4927 int i;
4928 tree vop0, vop1, new_tmp1, new_tmp2;
4929 gimple *new_stmt1, *new_stmt2;
4930 vec<tree> vec_tmp = vNULL;
4932 vec_tmp.create (vec_oprnds0->length () * 2);
4933 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4935 if (op_type == binary_op)
4936 vop1 = (*vec_oprnds1)[i];
4937 else
4938 vop1 = NULL_TREE;
4940 /* Generate the two halves of promotion operation. */
4941 new_stmt1 = vect_gen_widened_results_half (vinfo, ch1, vop0, vop1,
4942 op_type, vec_dest, gsi,
4943 stmt_info);
4944 new_stmt2 = vect_gen_widened_results_half (vinfo, ch2, vop0, vop1,
4945 op_type, vec_dest, gsi,
4946 stmt_info);
4947 if (is_gimple_call (new_stmt1))
4949 new_tmp1 = gimple_call_lhs (new_stmt1);
4950 new_tmp2 = gimple_call_lhs (new_stmt2);
4952 else
4954 new_tmp1 = gimple_assign_lhs (new_stmt1);
4955 new_tmp2 = gimple_assign_lhs (new_stmt2);
4958 /* Store the results for the next step. */
4959 vec_tmp.quick_push (new_tmp1);
4960 vec_tmp.quick_push (new_tmp2);
4963 vec_oprnds0->release ();
4964 *vec_oprnds0 = vec_tmp;
4967 /* Create vectorized promotion stmts for widening stmts using only half the
4968 potential vector size for input. */
4969 static void
4970 vect_create_half_widening_stmts (vec_info *vinfo,
4971 vec<tree> *vec_oprnds0,
4972 vec<tree> *vec_oprnds1,
4973 stmt_vec_info stmt_info, tree vec_dest,
4974 gimple_stmt_iterator *gsi,
4975 code_helper code1,
4976 int op_type)
4978 int i;
4979 tree vop0, vop1;
4980 gimple *new_stmt1;
4981 gimple *new_stmt2;
4982 gimple *new_stmt3;
4983 vec<tree> vec_tmp = vNULL;
4985 vec_tmp.create (vec_oprnds0->length ());
4986 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4988 tree new_tmp1, new_tmp2, new_tmp3, out_type;
4990 gcc_assert (op_type == binary_op);
4991 vop1 = (*vec_oprnds1)[i];
4993 /* Widen the first vector input. */
4994 out_type = TREE_TYPE (vec_dest);
4995 new_tmp1 = make_ssa_name (out_type);
4996 new_stmt1 = gimple_build_assign (new_tmp1, NOP_EXPR, vop0);
4997 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt1, gsi);
4998 if (VECTOR_TYPE_P (TREE_TYPE (vop1)))
5000 /* Widen the second vector input. */
5001 new_tmp2 = make_ssa_name (out_type);
5002 new_stmt2 = gimple_build_assign (new_tmp2, NOP_EXPR, vop1);
5003 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt2, gsi);
5004 /* Perform the operation. With both vector inputs widened. */
5005 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, new_tmp2);
5007 else
5009 /* Perform the operation. With the single vector input widened. */
5010 new_stmt3 = vect_gimple_build (vec_dest, code1, new_tmp1, vop1);
5013 new_tmp3 = make_ssa_name (vec_dest, new_stmt3);
5014 gimple_assign_set_lhs (new_stmt3, new_tmp3);
5015 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt3, gsi);
5017 /* Store the results for the next step. */
5018 vec_tmp.quick_push (new_tmp3);
5021 vec_oprnds0->release ();
5022 *vec_oprnds0 = vec_tmp;
5026 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5027 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5028 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5029 Return true if STMT_INFO is vectorizable in this way. */
5031 static bool
5032 vectorizable_conversion (vec_info *vinfo,
5033 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5034 gimple **vec_stmt, slp_tree slp_node,
5035 stmt_vector_for_cost *cost_vec)
5037 tree vec_dest, cvt_op = NULL_TREE;
5038 tree scalar_dest;
5039 tree op0, op1 = NULL_TREE;
5040 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5041 tree_code tc1, tc2;
5042 code_helper code, code1, code2;
5043 code_helper codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
5044 tree new_temp;
5045 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5046 int ndts = 2;
5047 poly_uint64 nunits_in;
5048 poly_uint64 nunits_out;
5049 tree vectype_out, vectype_in;
5050 int ncopies, i;
5051 tree lhs_type, rhs_type;
5052 /* For conversions between floating point and integer, there're 2 NARROW
5053 cases. NARROW_SRC is for FLOAT_EXPR, means
5054 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5055 This is safe when the range of the source integer can fit into the lower
5056 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5057 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5058 For other conversions, when there's narrowing, NARROW_DST is used as
5059 default. */
5060 enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier;
5061 vec<tree> vec_oprnds0 = vNULL;
5062 vec<tree> vec_oprnds1 = vNULL;
5063 tree vop0;
5064 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5065 int multi_step_cvt = 0;
5066 vec<tree> interm_types = vNULL;
5067 tree intermediate_type, cvt_type = NULL_TREE;
5068 int op_type;
5069 unsigned short fltsz;
5071 /* Is STMT a vectorizable conversion? */
5073 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5074 return false;
5076 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5077 && ! vec_stmt)
5078 return false;
5080 gimple* stmt = stmt_info->stmt;
5081 if (!(is_gimple_assign (stmt) || is_gimple_call (stmt)))
5082 return false;
5084 if (gimple_get_lhs (stmt) == NULL_TREE
5085 || TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5086 return false;
5088 if (TREE_CODE (gimple_get_lhs (stmt)) != SSA_NAME)
5089 return false;
5091 if (is_gimple_assign (stmt))
5093 code = gimple_assign_rhs_code (stmt);
5094 op_type = TREE_CODE_LENGTH ((tree_code) code);
5096 else if (gimple_call_internal_p (stmt))
5098 code = gimple_call_internal_fn (stmt);
5099 op_type = gimple_call_num_args (stmt);
5101 else
5102 return false;
5104 bool widen_arith = (code == WIDEN_MULT_EXPR
5105 || code == WIDEN_LSHIFT_EXPR
5106 || widening_fn_p (code));
5108 if (!widen_arith
5109 && !CONVERT_EXPR_CODE_P (code)
5110 && code != FIX_TRUNC_EXPR
5111 && code != FLOAT_EXPR)
5112 return false;
5114 /* Check types of lhs and rhs. */
5115 scalar_dest = gimple_get_lhs (stmt);
5116 lhs_type = TREE_TYPE (scalar_dest);
5117 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5119 /* Check the operands of the operation. */
5120 slp_tree slp_op0, slp_op1 = NULL;
5121 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
5122 0, &op0, &slp_op0, &dt[0], &vectype_in))
5124 if (dump_enabled_p ())
5125 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5126 "use not simple.\n");
5127 return false;
5130 rhs_type = TREE_TYPE (op0);
5131 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
5132 && !((INTEGRAL_TYPE_P (lhs_type)
5133 && INTEGRAL_TYPE_P (rhs_type))
5134 || (SCALAR_FLOAT_TYPE_P (lhs_type)
5135 && SCALAR_FLOAT_TYPE_P (rhs_type))))
5136 return false;
5138 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5139 && ((INTEGRAL_TYPE_P (lhs_type)
5140 && !type_has_mode_precision_p (lhs_type))
5141 || (INTEGRAL_TYPE_P (rhs_type)
5142 && !type_has_mode_precision_p (rhs_type))))
5144 if (dump_enabled_p ())
5145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5146 "type conversion to/from bit-precision unsupported."
5147 "\n");
5148 return false;
5151 if (op_type == binary_op)
5153 gcc_assert (code == WIDEN_MULT_EXPR
5154 || code == WIDEN_LSHIFT_EXPR
5155 || widening_fn_p (code));
5157 op1 = is_gimple_assign (stmt) ? gimple_assign_rhs2 (stmt) :
5158 gimple_call_arg (stmt, 0);
5159 tree vectype1_in;
5160 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
5161 &op1, &slp_op1, &dt[1], &vectype1_in))
5163 if (dump_enabled_p ())
5164 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5165 "use not simple.\n");
5166 return false;
5168 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5169 OP1. */
5170 if (!vectype_in)
5171 vectype_in = vectype1_in;
5174 /* If op0 is an external or constant def, infer the vector type
5175 from the scalar type. */
5176 if (!vectype_in)
5177 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
5178 if (vec_stmt)
5179 gcc_assert (vectype_in);
5180 if (!vectype_in)
5182 if (dump_enabled_p ())
5183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5184 "no vectype for scalar type %T\n", rhs_type);
5186 return false;
5189 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
5190 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
5192 if (dump_enabled_p ())
5193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5194 "can't convert between boolean and non "
5195 "boolean vectors %T\n", rhs_type);
5197 return false;
5200 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
5201 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5202 if (known_eq (nunits_out, nunits_in))
5203 if (widen_arith)
5204 modifier = WIDEN;
5205 else
5206 modifier = NONE;
5207 else if (multiple_p (nunits_out, nunits_in))
5208 modifier = NARROW_DST;
5209 else
5211 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
5212 modifier = WIDEN;
5215 /* Multiple types in SLP are handled by creating the appropriate number of
5216 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5217 case of SLP. */
5218 if (slp_node)
5219 ncopies = 1;
5220 else if (modifier == NARROW_DST)
5221 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
5222 else
5223 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5225 /* Sanity check: make sure that at least one copy of the vectorized stmt
5226 needs to be generated. */
5227 gcc_assert (ncopies >= 1);
5229 bool found_mode = false;
5230 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
5231 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
5232 opt_scalar_mode rhs_mode_iter;
5234 /* Supportable by target? */
5235 switch (modifier)
5237 case NONE:
5238 if (code != FIX_TRUNC_EXPR
5239 && code != FLOAT_EXPR
5240 && !CONVERT_EXPR_CODE_P (code))
5241 return false;
5242 gcc_assert (code.is_tree_code ());
5243 if (supportable_convert_operation ((tree_code) code, vectype_out,
5244 vectype_in, &tc1))
5246 code1 = tc1;
5247 break;
5250 /* For conversions between float and integer types try whether
5251 we can use intermediate signed integer types to support the
5252 conversion. */
5253 if (GET_MODE_SIZE (lhs_mode) != GET_MODE_SIZE (rhs_mode)
5254 && (code == FLOAT_EXPR ||
5255 (code == FIX_TRUNC_EXPR && !flag_trapping_math)))
5257 bool demotion = GET_MODE_SIZE (rhs_mode) > GET_MODE_SIZE (lhs_mode);
5258 bool float_expr_p = code == FLOAT_EXPR;
5259 unsigned short target_size;
5260 scalar_mode intermediate_mode;
5261 if (demotion)
5263 intermediate_mode = lhs_mode;
5264 target_size = GET_MODE_SIZE (rhs_mode);
5266 else
5268 target_size = GET_MODE_SIZE (lhs_mode);
5269 if (!int_mode_for_size
5270 (GET_MODE_BITSIZE (rhs_mode), 0).exists (&intermediate_mode))
5271 goto unsupported;
5273 code1 = float_expr_p ? code : NOP_EXPR;
5274 codecvt1 = float_expr_p ? NOP_EXPR : code;
5275 opt_scalar_mode mode_iter;
5276 FOR_EACH_2XWIDER_MODE (mode_iter, intermediate_mode)
5278 intermediate_mode = mode_iter.require ();
5280 if (GET_MODE_SIZE (intermediate_mode) > target_size)
5281 break;
5283 scalar_mode cvt_mode;
5284 if (!int_mode_for_size
5285 (GET_MODE_BITSIZE (intermediate_mode), 0).exists (&cvt_mode))
5286 break;
5288 cvt_type = build_nonstandard_integer_type
5289 (GET_MODE_BITSIZE (cvt_mode), 0);
5291 /* Check if the intermediate type can hold OP0's range.
5292 When converting from float to integer this is not necessary
5293 because values that do not fit the (smaller) target type are
5294 unspecified anyway. */
5295 if (demotion && float_expr_p)
5297 wide_int op_min_value, op_max_value;
5298 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5299 break;
5301 if (cvt_type == NULL_TREE
5302 || (wi::min_precision (op_max_value, SIGNED)
5303 > TYPE_PRECISION (cvt_type))
5304 || (wi::min_precision (op_min_value, SIGNED)
5305 > TYPE_PRECISION (cvt_type)))
5306 continue;
5309 cvt_type = get_vectype_for_scalar_type (vinfo, cvt_type, slp_node);
5310 /* This should only happened for SLP as long as loop vectorizer
5311 only supports same-sized vector. */
5312 if (cvt_type == NULL_TREE
5313 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type), nunits_in)
5314 || !supportable_convert_operation ((tree_code) code1,
5315 vectype_out,
5316 cvt_type, &tc1)
5317 || !supportable_convert_operation ((tree_code) codecvt1,
5318 cvt_type,
5319 vectype_in, &tc2))
5320 continue;
5322 found_mode = true;
5323 break;
5326 if (found_mode)
5328 multi_step_cvt++;
5329 interm_types.safe_push (cvt_type);
5330 cvt_type = NULL_TREE;
5331 code1 = tc1;
5332 codecvt1 = tc2;
5333 break;
5336 /* FALLTHRU */
5337 unsupported:
5338 if (dump_enabled_p ())
5339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5340 "conversion not supported by target.\n");
5341 return false;
5343 case WIDEN:
5344 if (known_eq (nunits_in, nunits_out))
5346 if (!(code.is_tree_code ()
5347 && supportable_half_widening_operation ((tree_code) code,
5348 vectype_out, vectype_in,
5349 &tc1)))
5350 goto unsupported;
5351 code1 = tc1;
5352 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5353 break;
5355 if (supportable_widening_operation (vinfo, code, stmt_info,
5356 vectype_out, vectype_in, &code1,
5357 &code2, &multi_step_cvt,
5358 &interm_types))
5360 /* Binary widening operation can only be supported directly by the
5361 architecture. */
5362 gcc_assert (!(multi_step_cvt && op_type == binary_op));
5363 break;
5366 if (code != FLOAT_EXPR
5367 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
5368 goto unsupported;
5370 fltsz = GET_MODE_SIZE (lhs_mode);
5371 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
5373 rhs_mode = rhs_mode_iter.require ();
5374 if (GET_MODE_SIZE (rhs_mode) > fltsz)
5375 break;
5377 cvt_type
5378 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5379 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5380 if (cvt_type == NULL_TREE)
5381 goto unsupported;
5383 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5385 tc1 = ERROR_MARK;
5386 gcc_assert (code.is_tree_code ());
5387 if (!supportable_convert_operation ((tree_code) code, vectype_out,
5388 cvt_type, &tc1))
5389 goto unsupported;
5390 codecvt1 = tc1;
5392 else if (!supportable_widening_operation (vinfo, code,
5393 stmt_info, vectype_out,
5394 cvt_type, &codecvt1,
5395 &codecvt2, &multi_step_cvt,
5396 &interm_types))
5397 continue;
5398 else
5399 gcc_assert (multi_step_cvt == 0);
5401 if (supportable_widening_operation (vinfo, NOP_EXPR, stmt_info,
5402 cvt_type,
5403 vectype_in, &code1,
5404 &code2, &multi_step_cvt,
5405 &interm_types))
5407 found_mode = true;
5408 break;
5412 if (!found_mode)
5413 goto unsupported;
5415 if (GET_MODE_SIZE (rhs_mode) == fltsz)
5416 codecvt2 = ERROR_MARK;
5417 else
5419 multi_step_cvt++;
5420 interm_types.safe_push (cvt_type);
5421 cvt_type = NULL_TREE;
5423 break;
5425 case NARROW_DST:
5426 gcc_assert (op_type == unary_op);
5427 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
5428 &code1, &multi_step_cvt,
5429 &interm_types))
5430 break;
5432 if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
5433 goto unsupported;
5435 if (code == FIX_TRUNC_EXPR)
5437 cvt_type
5438 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
5439 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
5440 if (cvt_type == NULL_TREE)
5441 goto unsupported;
5442 if (supportable_convert_operation ((tree_code) code, cvt_type, vectype_in,
5443 &tc1))
5444 codecvt1 = tc1;
5445 else
5446 goto unsupported;
5447 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
5448 &code1, &multi_step_cvt,
5449 &interm_types))
5450 break;
5452 /* If op0 can be represented with low precision integer,
5453 truncate it to cvt_type and the do FLOAT_EXPR. */
5454 else if (code == FLOAT_EXPR)
5456 wide_int op_min_value, op_max_value;
5457 if (!vect_get_range_info (op0, &op_min_value, &op_max_value))
5458 goto unsupported;
5460 cvt_type
5461 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0);
5462 if (cvt_type == NULL_TREE
5463 || (wi::min_precision (op_max_value, SIGNED)
5464 > TYPE_PRECISION (cvt_type))
5465 || (wi::min_precision (op_min_value, SIGNED)
5466 > TYPE_PRECISION (cvt_type)))
5467 goto unsupported;
5469 cvt_type = get_same_sized_vectype (cvt_type, vectype_out);
5470 if (cvt_type == NULL_TREE)
5471 goto unsupported;
5472 if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in,
5473 &code1, &multi_step_cvt,
5474 &interm_types))
5475 goto unsupported;
5476 if (supportable_convert_operation ((tree_code) code, vectype_out,
5477 cvt_type, &tc1))
5479 codecvt1 = tc1;
5480 modifier = NARROW_SRC;
5481 break;
5485 goto unsupported;
5487 default:
5488 gcc_unreachable ();
5491 if (!vec_stmt) /* transformation not required. */
5493 if (slp_node
5494 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype_in)
5495 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype_in)))
5497 if (dump_enabled_p ())
5498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5499 "incompatible vector types for invariants\n");
5500 return false;
5502 DUMP_VECT_SCOPE ("vectorizable_conversion");
5503 if (modifier == NONE)
5505 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
5506 vect_model_simple_cost (vinfo, stmt_info,
5507 ncopies * (1 + multi_step_cvt),
5508 dt, ndts, slp_node, cost_vec);
5510 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5512 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
5513 /* The final packing step produces one vector result per copy. */
5514 unsigned int nvectors
5515 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
5516 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5517 multi_step_cvt, cost_vec,
5518 widen_arith);
5520 else
5522 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
5523 /* The initial unpacking step produces two vector results
5524 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5525 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5526 unsigned int nvectors
5527 = (slp_node
5528 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5529 : ncopies * 2);
5530 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5531 multi_step_cvt, cost_vec,
5532 widen_arith);
5534 interm_types.release ();
5535 return true;
5538 /* Transform. */
5539 if (dump_enabled_p ())
5540 dump_printf_loc (MSG_NOTE, vect_location,
5541 "transform conversion. ncopies = %d.\n", ncopies);
5543 if (op_type == binary_op)
5545 if (CONSTANT_CLASS_P (op0))
5546 op0 = fold_convert (TREE_TYPE (op1), op0);
5547 else if (CONSTANT_CLASS_P (op1))
5548 op1 = fold_convert (TREE_TYPE (op0), op1);
5551 /* In case of multi-step conversion, we first generate conversion operations
5552 to the intermediate types, and then from that types to the final one.
5553 We create vector destinations for the intermediate type (TYPES) received
5554 from supportable_*_operation, and store them in the correct order
5555 for future use in vect_create_vectorized_*_stmts (). */
5556 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5557 bool widen_or_narrow_float_p
5558 = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC);
5559 vec_dest = vect_create_destination_var (scalar_dest,
5560 widen_or_narrow_float_p
5561 ? cvt_type : vectype_out);
5562 vec_dsts.quick_push (vec_dest);
5564 if (multi_step_cvt)
5566 for (i = interm_types.length () - 1;
5567 interm_types.iterate (i, &intermediate_type); i--)
5569 vec_dest = vect_create_destination_var (scalar_dest,
5570 intermediate_type);
5571 vec_dsts.quick_push (vec_dest);
5575 if (cvt_type)
5576 vec_dest = vect_create_destination_var (scalar_dest,
5577 widen_or_narrow_float_p
5578 ? vectype_out : cvt_type);
5580 int ninputs = 1;
5581 if (!slp_node)
5583 if (modifier == WIDEN)
5585 else if (modifier == NARROW_SRC || modifier == NARROW_DST)
5587 if (multi_step_cvt)
5588 ninputs = vect_pow2 (multi_step_cvt);
5589 ninputs *= 2;
5593 switch (modifier)
5595 case NONE:
5596 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
5597 op0, &vec_oprnds0);
5598 /* vec_dest is intermediate type operand when multi_step_cvt. */
5599 if (multi_step_cvt)
5601 cvt_op = vec_dest;
5602 vec_dest = vec_dsts[0];
5605 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5607 /* Arguments are ready, create the new vector stmt. */
5608 gimple* new_stmt;
5609 if (multi_step_cvt)
5611 gcc_assert (multi_step_cvt == 1);
5612 new_stmt = vect_gimple_build (cvt_op, codecvt1, vop0);
5613 new_temp = make_ssa_name (cvt_op, new_stmt);
5614 gimple_assign_set_lhs (new_stmt, new_temp);
5615 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5616 vop0 = new_temp;
5618 new_stmt = vect_gimple_build (vec_dest, code1, vop0);
5619 new_temp = make_ssa_name (vec_dest, new_stmt);
5620 gimple_set_lhs (new_stmt, new_temp);
5621 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5623 if (slp_node)
5624 slp_node->push_vec_def (new_stmt);
5625 else
5626 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5628 break;
5630 case WIDEN:
5631 /* In case the vectorization factor (VF) is bigger than the number
5632 of elements that we can fit in a vectype (nunits), we have to
5633 generate more than one vector stmt - i.e - we need to "unroll"
5634 the vector stmt by a factor VF/nunits. */
5635 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5636 op0, &vec_oprnds0,
5637 code == WIDEN_LSHIFT_EXPR ? NULL_TREE : op1,
5638 &vec_oprnds1);
5639 if (code == WIDEN_LSHIFT_EXPR)
5641 int oprnds_size = vec_oprnds0.length ();
5642 vec_oprnds1.create (oprnds_size);
5643 for (i = 0; i < oprnds_size; ++i)
5644 vec_oprnds1.quick_push (op1);
5646 /* Arguments are ready. Create the new vector stmts. */
5647 for (i = multi_step_cvt; i >= 0; i--)
5649 tree this_dest = vec_dsts[i];
5650 code_helper c1 = code1, c2 = code2;
5651 if (i == 0 && codecvt2 != ERROR_MARK)
5653 c1 = codecvt1;
5654 c2 = codecvt2;
5656 if (known_eq (nunits_out, nunits_in))
5657 vect_create_half_widening_stmts (vinfo, &vec_oprnds0, &vec_oprnds1,
5658 stmt_info, this_dest, gsi, c1,
5659 op_type);
5660 else
5661 vect_create_vectorized_promotion_stmts (vinfo, &vec_oprnds0,
5662 &vec_oprnds1, stmt_info,
5663 this_dest, gsi,
5664 c1, c2, op_type);
5667 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5669 gimple *new_stmt;
5670 if (cvt_type)
5672 new_temp = make_ssa_name (vec_dest);
5673 new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5674 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5676 else
5677 new_stmt = SSA_NAME_DEF_STMT (vop0);
5679 if (slp_node)
5680 slp_node->push_vec_def (new_stmt);
5681 else
5682 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5684 break;
5686 case NARROW_SRC:
5687 case NARROW_DST:
5688 /* In case the vectorization factor (VF) is bigger than the number
5689 of elements that we can fit in a vectype (nunits), we have to
5690 generate more than one vector stmt - i.e - we need to "unroll"
5691 the vector stmt by a factor VF/nunits. */
5692 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs,
5693 op0, &vec_oprnds0);
5694 /* Arguments are ready. Create the new vector stmts. */
5695 if (cvt_type && modifier == NARROW_DST)
5696 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5698 new_temp = make_ssa_name (vec_dest);
5699 gimple *new_stmt = vect_gimple_build (new_temp, codecvt1, vop0);
5700 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5701 vec_oprnds0[i] = new_temp;
5704 vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0,
5705 multi_step_cvt,
5706 stmt_info, vec_dsts, gsi,
5707 slp_node, code1,
5708 modifier == NARROW_SRC);
5709 /* After demoting op0 to cvt_type, convert it to dest. */
5710 if (cvt_type && code == FLOAT_EXPR)
5712 for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++)
5714 /* Arguments are ready, create the new vector stmt. */
5715 gcc_assert (TREE_CODE_LENGTH ((tree_code) codecvt1) == unary_op);
5716 gimple *new_stmt
5717 = vect_gimple_build (vec_dest, codecvt1, vec_oprnds0[i]);
5718 new_temp = make_ssa_name (vec_dest, new_stmt);
5719 gimple_set_lhs (new_stmt, new_temp);
5720 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5722 /* This is the last step of the conversion sequence. Store the
5723 vectors in SLP_NODE or in vector info of the scalar statement
5724 (or in STMT_VINFO_RELATED_STMT chain). */
5725 if (slp_node)
5726 slp_node->push_vec_def (new_stmt);
5727 else
5728 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5731 break;
5733 if (!slp_node)
5734 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5736 vec_oprnds0.release ();
5737 vec_oprnds1.release ();
5738 interm_types.release ();
5740 return true;
5743 /* Return true if we can assume from the scalar form of STMT_INFO that
5744 neither the scalar nor the vector forms will generate code. STMT_INFO
5745 is known not to involve a data reference. */
5747 bool
5748 vect_nop_conversion_p (stmt_vec_info stmt_info)
5750 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5751 if (!stmt)
5752 return false;
5754 tree lhs = gimple_assign_lhs (stmt);
5755 tree_code code = gimple_assign_rhs_code (stmt);
5756 tree rhs = gimple_assign_rhs1 (stmt);
5758 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5759 return true;
5761 if (CONVERT_EXPR_CODE_P (code))
5762 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5764 return false;
5767 /* Function vectorizable_assignment.
5769 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5770 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5771 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5772 Return true if STMT_INFO is vectorizable in this way. */
5774 static bool
5775 vectorizable_assignment (vec_info *vinfo,
5776 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5777 gimple **vec_stmt, slp_tree slp_node,
5778 stmt_vector_for_cost *cost_vec)
5780 tree vec_dest;
5781 tree scalar_dest;
5782 tree op;
5783 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5784 tree new_temp;
5785 enum vect_def_type dt[1] = {vect_unknown_def_type};
5786 int ndts = 1;
5787 int ncopies;
5788 int i;
5789 vec<tree> vec_oprnds = vNULL;
5790 tree vop;
5791 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
5792 enum tree_code code;
5793 tree vectype_in;
5795 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5796 return false;
5798 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5799 && ! vec_stmt)
5800 return false;
5802 /* Is vectorizable assignment? */
5803 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5804 if (!stmt)
5805 return false;
5807 scalar_dest = gimple_assign_lhs (stmt);
5808 if (TREE_CODE (scalar_dest) != SSA_NAME)
5809 return false;
5811 if (STMT_VINFO_DATA_REF (stmt_info))
5812 return false;
5814 code = gimple_assign_rhs_code (stmt);
5815 if (!(gimple_assign_single_p (stmt)
5816 || code == PAREN_EXPR
5817 || CONVERT_EXPR_CODE_P (code)))
5818 return false;
5820 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5821 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5823 /* Multiple types in SLP are handled by creating the appropriate number of
5824 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5825 case of SLP. */
5826 if (slp_node)
5827 ncopies = 1;
5828 else
5829 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5831 gcc_assert (ncopies >= 1);
5833 slp_tree slp_op;
5834 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &op, &slp_op,
5835 &dt[0], &vectype_in))
5837 if (dump_enabled_p ())
5838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5839 "use not simple.\n");
5840 return false;
5842 if (!vectype_in)
5843 vectype_in = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), slp_node);
5845 /* We can handle NOP_EXPR conversions that do not change the number
5846 of elements or the vector size. */
5847 if ((CONVERT_EXPR_CODE_P (code)
5848 || code == VIEW_CONVERT_EXPR)
5849 && (!vectype_in
5850 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5851 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5852 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5853 return false;
5855 if (VECTOR_BOOLEAN_TYPE_P (vectype) != VECTOR_BOOLEAN_TYPE_P (vectype_in))
5857 if (dump_enabled_p ())
5858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5859 "can't convert between boolean and non "
5860 "boolean vectors %T\n", TREE_TYPE (op));
5862 return false;
5865 /* We do not handle bit-precision changes. */
5866 if ((CONVERT_EXPR_CODE_P (code)
5867 || code == VIEW_CONVERT_EXPR)
5868 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5869 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5870 || (INTEGRAL_TYPE_P (TREE_TYPE (op))
5871 && !type_has_mode_precision_p (TREE_TYPE (op))))
5872 /* But a conversion that does not change the bit-pattern is ok. */
5873 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5874 && INTEGRAL_TYPE_P (TREE_TYPE (op))
5875 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5876 > TYPE_PRECISION (TREE_TYPE (op)))
5877 && TYPE_UNSIGNED (TREE_TYPE (op))))
5879 if (dump_enabled_p ())
5880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5881 "type conversion to/from bit-precision "
5882 "unsupported.\n");
5883 return false;
5886 if (!vec_stmt) /* transformation not required. */
5888 if (slp_node
5889 && !vect_maybe_update_slp_op_vectype (slp_op, vectype_in))
5891 if (dump_enabled_p ())
5892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5893 "incompatible vector types for invariants\n");
5894 return false;
5896 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5897 DUMP_VECT_SCOPE ("vectorizable_assignment");
5898 if (!vect_nop_conversion_p (stmt_info))
5899 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node,
5900 cost_vec);
5901 return true;
5904 /* Transform. */
5905 if (dump_enabled_p ())
5906 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5908 /* Handle def. */
5909 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5911 /* Handle use. */
5912 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies, op, &vec_oprnds);
5914 /* Arguments are ready. create the new vector stmt. */
5915 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5917 if (CONVERT_EXPR_CODE_P (code)
5918 || code == VIEW_CONVERT_EXPR)
5919 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5920 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5921 new_temp = make_ssa_name (vec_dest, new_stmt);
5922 gimple_assign_set_lhs (new_stmt, new_temp);
5923 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
5924 if (slp_node)
5925 slp_node->push_vec_def (new_stmt);
5926 else
5927 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
5929 if (!slp_node)
5930 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
5932 vec_oprnds.release ();
5933 return true;
5937 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5938 either as shift by a scalar or by a vector. */
5940 bool
5941 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5944 machine_mode vec_mode;
5945 optab optab;
5946 int icode;
5947 tree vectype;
5949 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5950 if (!vectype)
5951 return false;
5953 optab = optab_for_tree_code (code, vectype, optab_scalar);
5954 if (!optab
5955 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5957 optab = optab_for_tree_code (code, vectype, optab_vector);
5958 if (!optab
5959 || (optab_handler (optab, TYPE_MODE (vectype))
5960 == CODE_FOR_nothing))
5961 return false;
5964 vec_mode = TYPE_MODE (vectype);
5965 icode = (int) optab_handler (optab, vec_mode);
5966 if (icode == CODE_FOR_nothing)
5967 return false;
5969 return true;
5973 /* Function vectorizable_shift.
5975 Check if STMT_INFO performs a shift operation that can be vectorized.
5976 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5977 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5978 Return true if STMT_INFO is vectorizable in this way. */
5980 static bool
5981 vectorizable_shift (vec_info *vinfo,
5982 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5983 gimple **vec_stmt, slp_tree slp_node,
5984 stmt_vector_for_cost *cost_vec)
5986 tree vec_dest;
5987 tree scalar_dest;
5988 tree op0, op1 = NULL;
5989 tree vec_oprnd1 = NULL_TREE;
5990 tree vectype;
5991 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
5992 enum tree_code code;
5993 machine_mode vec_mode;
5994 tree new_temp;
5995 optab optab;
5996 int icode;
5997 machine_mode optab_op2_mode;
5998 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5999 int ndts = 2;
6000 poly_uint64 nunits_in;
6001 poly_uint64 nunits_out;
6002 tree vectype_out;
6003 tree op1_vectype;
6004 int ncopies;
6005 int i;
6006 vec<tree> vec_oprnds0 = vNULL;
6007 vec<tree> vec_oprnds1 = vNULL;
6008 tree vop0, vop1;
6009 unsigned int k;
6010 bool scalar_shift_arg = true;
6011 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6012 bool incompatible_op1_vectype_p = false;
6014 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6015 return false;
6017 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6018 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
6019 && ! vec_stmt)
6020 return false;
6022 /* Is STMT a vectorizable binary/unary operation? */
6023 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6024 if (!stmt)
6025 return false;
6027 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6028 return false;
6030 code = gimple_assign_rhs_code (stmt);
6032 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
6033 || code == RROTATE_EXPR))
6034 return false;
6036 scalar_dest = gimple_assign_lhs (stmt);
6037 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6038 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
6040 if (dump_enabled_p ())
6041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6042 "bit-precision shifts not supported.\n");
6043 return false;
6046 slp_tree slp_op0;
6047 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6048 0, &op0, &slp_op0, &dt[0], &vectype))
6050 if (dump_enabled_p ())
6051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6052 "use not simple.\n");
6053 return false;
6055 /* If op0 is an external or constant def, infer the vector type
6056 from the scalar type. */
6057 if (!vectype)
6058 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
6059 if (vec_stmt)
6060 gcc_assert (vectype);
6061 if (!vectype)
6063 if (dump_enabled_p ())
6064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6065 "no vectype for scalar type\n");
6066 return false;
6069 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6070 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6071 if (maybe_ne (nunits_out, nunits_in))
6072 return false;
6074 stmt_vec_info op1_def_stmt_info;
6075 slp_tree slp_op1;
6076 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1, &op1, &slp_op1,
6077 &dt[1], &op1_vectype, &op1_def_stmt_info))
6079 if (dump_enabled_p ())
6080 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6081 "use not simple.\n");
6082 return false;
6085 /* Multiple types in SLP are handled by creating the appropriate number of
6086 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6087 case of SLP. */
6088 if (slp_node)
6089 ncopies = 1;
6090 else
6091 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6093 gcc_assert (ncopies >= 1);
6095 /* Determine whether the shift amount is a vector, or scalar. If the
6096 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6098 if ((dt[1] == vect_internal_def
6099 || dt[1] == vect_induction_def
6100 || dt[1] == vect_nested_cycle)
6101 && !slp_node)
6102 scalar_shift_arg = false;
6103 else if (dt[1] == vect_constant_def
6104 || dt[1] == vect_external_def
6105 || dt[1] == vect_internal_def)
6107 /* In SLP, need to check whether the shift count is the same,
6108 in loops if it is a constant or invariant, it is always
6109 a scalar shift. */
6110 if (slp_node)
6112 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
6113 stmt_vec_info slpstmt_info;
6115 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
6117 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
6118 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
6119 scalar_shift_arg = false;
6122 /* For internal SLP defs we have to make sure we see scalar stmts
6123 for all vector elements.
6124 ??? For different vectors we could resort to a different
6125 scalar shift operand but code-generation below simply always
6126 takes the first. */
6127 if (dt[1] == vect_internal_def
6128 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
6129 stmts.length ()))
6130 scalar_shift_arg = false;
6133 /* If the shift amount is computed by a pattern stmt we cannot
6134 use the scalar amount directly thus give up and use a vector
6135 shift. */
6136 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
6137 scalar_shift_arg = false;
6139 else
6141 if (dump_enabled_p ())
6142 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6143 "operand mode requires invariant argument.\n");
6144 return false;
6147 /* Vector shifted by vector. */
6148 bool was_scalar_shift_arg = scalar_shift_arg;
6149 if (!scalar_shift_arg)
6151 optab = optab_for_tree_code (code, vectype, optab_vector);
6152 if (dump_enabled_p ())
6153 dump_printf_loc (MSG_NOTE, vect_location,
6154 "vector/vector shift/rotate found.\n");
6156 if (!op1_vectype)
6157 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
6158 slp_op1);
6159 incompatible_op1_vectype_p
6160 = (op1_vectype == NULL_TREE
6161 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
6162 TYPE_VECTOR_SUBPARTS (vectype))
6163 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
6164 if (incompatible_op1_vectype_p
6165 && (!slp_node
6166 || SLP_TREE_DEF_TYPE (slp_op1) != vect_constant_def
6167 || slp_op1->refcnt != 1))
6169 if (dump_enabled_p ())
6170 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6171 "unusable type for last operand in"
6172 " vector/vector shift/rotate.\n");
6173 return false;
6176 /* See if the machine has a vector shifted by scalar insn and if not
6177 then see if it has a vector shifted by vector insn. */
6178 else
6180 optab = optab_for_tree_code (code, vectype, optab_scalar);
6181 if (optab
6182 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
6184 if (dump_enabled_p ())
6185 dump_printf_loc (MSG_NOTE, vect_location,
6186 "vector/scalar shift/rotate found.\n");
6188 else
6190 optab = optab_for_tree_code (code, vectype, optab_vector);
6191 if (optab
6192 && (optab_handler (optab, TYPE_MODE (vectype))
6193 != CODE_FOR_nothing))
6195 scalar_shift_arg = false;
6197 if (dump_enabled_p ())
6198 dump_printf_loc (MSG_NOTE, vect_location,
6199 "vector/vector shift/rotate found.\n");
6201 if (!op1_vectype)
6202 op1_vectype = get_vectype_for_scalar_type (vinfo,
6203 TREE_TYPE (op1),
6204 slp_op1);
6206 /* Unlike the other binary operators, shifts/rotates have
6207 the rhs being int, instead of the same type as the lhs,
6208 so make sure the scalar is the right type if we are
6209 dealing with vectors of long long/long/short/char. */
6210 incompatible_op1_vectype_p
6211 = (!op1_vectype
6212 || !tree_nop_conversion_p (TREE_TYPE (vectype),
6213 TREE_TYPE (op1)));
6214 if (incompatible_op1_vectype_p
6215 && dt[1] == vect_internal_def)
6217 if (dump_enabled_p ())
6218 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6219 "unusable type for last operand in"
6220 " vector/vector shift/rotate.\n");
6221 return false;
6227 /* Supportable by target? */
6228 if (!optab)
6230 if (dump_enabled_p ())
6231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6232 "no optab.\n");
6233 return false;
6235 vec_mode = TYPE_MODE (vectype);
6236 icode = (int) optab_handler (optab, vec_mode);
6237 if (icode == CODE_FOR_nothing)
6239 if (dump_enabled_p ())
6240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6241 "op not supported by target.\n");
6242 return false;
6244 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6245 if (vect_emulated_vector_p (vectype))
6246 return false;
6248 if (!vec_stmt) /* transformation not required. */
6250 if (slp_node
6251 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6252 || ((!scalar_shift_arg || dt[1] == vect_internal_def)
6253 && (!incompatible_op1_vectype_p
6254 || dt[1] == vect_constant_def)
6255 && !vect_maybe_update_slp_op_vectype
6256 (slp_op1,
6257 incompatible_op1_vectype_p ? vectype : op1_vectype))))
6259 if (dump_enabled_p ())
6260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6261 "incompatible vector types for invariants\n");
6262 return false;
6264 /* Now adjust the constant shift amount in place. */
6265 if (slp_node
6266 && incompatible_op1_vectype_p
6267 && dt[1] == vect_constant_def)
6269 for (unsigned i = 0;
6270 i < SLP_TREE_SCALAR_OPS (slp_op1).length (); ++i)
6272 SLP_TREE_SCALAR_OPS (slp_op1)[i]
6273 = fold_convert (TREE_TYPE (vectype),
6274 SLP_TREE_SCALAR_OPS (slp_op1)[i]);
6275 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1)[i])
6276 == INTEGER_CST));
6279 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
6280 DUMP_VECT_SCOPE ("vectorizable_shift");
6281 vect_model_simple_cost (vinfo, stmt_info, ncopies, dt,
6282 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
6283 return true;
6286 /* Transform. */
6288 if (dump_enabled_p ())
6289 dump_printf_loc (MSG_NOTE, vect_location,
6290 "transform binary/unary operation.\n");
6292 if (incompatible_op1_vectype_p && !slp_node)
6294 gcc_assert (!scalar_shift_arg && was_scalar_shift_arg);
6295 op1 = fold_convert (TREE_TYPE (vectype), op1);
6296 if (dt[1] != vect_constant_def)
6297 op1 = vect_init_vector (vinfo, stmt_info, op1,
6298 TREE_TYPE (vectype), NULL);
6301 /* Handle def. */
6302 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6304 if (scalar_shift_arg && dt[1] != vect_internal_def)
6306 /* Vector shl and shr insn patterns can be defined with scalar
6307 operand 2 (shift operand). In this case, use constant or loop
6308 invariant op1 directly, without extending it to vector mode
6309 first. */
6310 optab_op2_mode = insn_data[icode].operand[2].mode;
6311 if (!VECTOR_MODE_P (optab_op2_mode))
6313 if (dump_enabled_p ())
6314 dump_printf_loc (MSG_NOTE, vect_location,
6315 "operand 1 using scalar mode.\n");
6316 vec_oprnd1 = op1;
6317 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : ncopies);
6318 vec_oprnds1.quick_push (vec_oprnd1);
6319 /* Store vec_oprnd1 for every vector stmt to be created.
6320 We check during the analysis that all the shift arguments
6321 are the same.
6322 TODO: Allow different constants for different vector
6323 stmts generated for an SLP instance. */
6324 for (k = 0;
6325 k < (slp_node ? slp_node->vec_stmts_size - 1 : ncopies - 1); k++)
6326 vec_oprnds1.quick_push (vec_oprnd1);
6329 else if (!scalar_shift_arg && slp_node && incompatible_op1_vectype_p)
6331 if (was_scalar_shift_arg)
6333 /* If the argument was the same in all lanes create
6334 the correctly typed vector shift amount directly. */
6335 op1 = fold_convert (TREE_TYPE (vectype), op1);
6336 op1 = vect_init_vector (vinfo, stmt_info, op1, TREE_TYPE (vectype),
6337 !loop_vinfo ? gsi : NULL);
6338 vec_oprnd1 = vect_init_vector (vinfo, stmt_info, op1, vectype,
6339 !loop_vinfo ? gsi : NULL);
6340 vec_oprnds1.create (slp_node->vec_stmts_size);
6341 for (k = 0; k < slp_node->vec_stmts_size; k++)
6342 vec_oprnds1.quick_push (vec_oprnd1);
6344 else if (dt[1] == vect_constant_def)
6345 /* The constant shift amount has been adjusted in place. */
6347 else
6348 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
6351 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6352 (a special case for certain kind of vector shifts); otherwise,
6353 operand 1 should be of a vector type (the usual case). */
6354 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6355 op0, &vec_oprnds0,
6356 vec_oprnd1 ? NULL_TREE : op1, &vec_oprnds1);
6358 /* Arguments are ready. Create the new vector stmt. */
6359 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6361 /* For internal defs where we need to use a scalar shift arg
6362 extract the first lane. */
6363 if (scalar_shift_arg && dt[1] == vect_internal_def)
6365 vop1 = vec_oprnds1[0];
6366 new_temp = make_ssa_name (TREE_TYPE (TREE_TYPE (vop1)));
6367 gassign *new_stmt
6368 = gimple_build_assign (new_temp,
6369 build3 (BIT_FIELD_REF, TREE_TYPE (new_temp),
6370 vop1,
6371 TYPE_SIZE (TREE_TYPE (new_temp)),
6372 bitsize_zero_node));
6373 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6374 vop1 = new_temp;
6376 else
6377 vop1 = vec_oprnds1[i];
6378 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
6379 new_temp = make_ssa_name (vec_dest, new_stmt);
6380 gimple_assign_set_lhs (new_stmt, new_temp);
6381 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6382 if (slp_node)
6383 slp_node->push_vec_def (new_stmt);
6384 else
6385 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
6388 if (!slp_node)
6389 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
6391 vec_oprnds0.release ();
6392 vec_oprnds1.release ();
6394 return true;
6397 /* Function vectorizable_operation.
6399 Check if STMT_INFO performs a binary, unary or ternary operation that can
6400 be vectorized.
6401 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6402 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6403 Return true if STMT_INFO is vectorizable in this way. */
6405 static bool
6406 vectorizable_operation (vec_info *vinfo,
6407 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
6408 gimple **vec_stmt, slp_tree slp_node,
6409 stmt_vector_for_cost *cost_vec)
6411 tree vec_dest;
6412 tree scalar_dest;
6413 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
6414 tree vectype;
6415 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
6416 enum tree_code code, orig_code;
6417 machine_mode vec_mode;
6418 tree new_temp;
6419 int op_type;
6420 optab optab;
6421 bool target_support_p;
6422 enum vect_def_type dt[3]
6423 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
6424 int ndts = 3;
6425 poly_uint64 nunits_in;
6426 poly_uint64 nunits_out;
6427 tree vectype_out;
6428 int ncopies, vec_num;
6429 int i;
6430 vec<tree> vec_oprnds0 = vNULL;
6431 vec<tree> vec_oprnds1 = vNULL;
6432 vec<tree> vec_oprnds2 = vNULL;
6433 tree vop0, vop1, vop2;
6434 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
6436 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6437 return false;
6439 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6440 && ! vec_stmt)
6441 return false;
6443 /* Is STMT a vectorizable binary/unary operation? */
6444 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
6445 if (!stmt)
6446 return false;
6448 /* Loads and stores are handled in vectorizable_{load,store}. */
6449 if (STMT_VINFO_DATA_REF (stmt_info))
6450 return false;
6452 orig_code = code = gimple_assign_rhs_code (stmt);
6454 /* Shifts are handled in vectorizable_shift. */
6455 if (code == LSHIFT_EXPR
6456 || code == RSHIFT_EXPR
6457 || code == LROTATE_EXPR
6458 || code == RROTATE_EXPR)
6459 return false;
6461 /* Comparisons are handled in vectorizable_comparison. */
6462 if (TREE_CODE_CLASS (code) == tcc_comparison)
6463 return false;
6465 /* Conditions are handled in vectorizable_condition. */
6466 if (code == COND_EXPR)
6467 return false;
6469 /* For pointer addition and subtraction, we should use the normal
6470 plus and minus for the vector operation. */
6471 if (code == POINTER_PLUS_EXPR)
6472 code = PLUS_EXPR;
6473 if (code == POINTER_DIFF_EXPR)
6474 code = MINUS_EXPR;
6476 /* Support only unary or binary operations. */
6477 op_type = TREE_CODE_LENGTH (code);
6478 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
6480 if (dump_enabled_p ())
6481 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6482 "num. args = %d (not unary/binary/ternary op).\n",
6483 op_type);
6484 return false;
6487 scalar_dest = gimple_assign_lhs (stmt);
6488 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
6490 /* Most operations cannot handle bit-precision types without extra
6491 truncations. */
6492 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
6493 if (!mask_op_p
6494 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
6495 /* Exception are bitwise binary operations. */
6496 && code != BIT_IOR_EXPR
6497 && code != BIT_XOR_EXPR
6498 && code != BIT_AND_EXPR)
6500 if (dump_enabled_p ())
6501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6502 "bit-precision arithmetic not supported.\n");
6503 return false;
6506 slp_tree slp_op0;
6507 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6508 0, &op0, &slp_op0, &dt[0], &vectype))
6510 if (dump_enabled_p ())
6511 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6512 "use not simple.\n");
6513 return false;
6515 bool is_invariant = (dt[0] == vect_external_def
6516 || dt[0] == vect_constant_def);
6517 /* If op0 is an external or constant def, infer the vector type
6518 from the scalar type. */
6519 if (!vectype)
6521 /* For boolean type we cannot determine vectype by
6522 invariant value (don't know whether it is a vector
6523 of booleans or vector of integers). We use output
6524 vectype because operations on boolean don't change
6525 type. */
6526 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6528 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6530 if (dump_enabled_p ())
6531 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6532 "not supported operation on bool value.\n");
6533 return false;
6535 vectype = vectype_out;
6537 else
6538 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6539 slp_node);
6541 if (vec_stmt)
6542 gcc_assert (vectype);
6543 if (!vectype)
6545 if (dump_enabled_p ())
6546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6547 "no vectype for scalar type %T\n",
6548 TREE_TYPE (op0));
6550 return false;
6553 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6554 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6555 if (maybe_ne (nunits_out, nunits_in))
6556 return false;
6558 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6559 slp_tree slp_op1 = NULL, slp_op2 = NULL;
6560 if (op_type == binary_op || op_type == ternary_op)
6562 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6563 1, &op1, &slp_op1, &dt[1], &vectype2))
6565 if (dump_enabled_p ())
6566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6567 "use not simple.\n");
6568 return false;
6570 is_invariant &= (dt[1] == vect_external_def
6571 || dt[1] == vect_constant_def);
6572 if (vectype2
6573 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
6574 return false;
6576 if (op_type == ternary_op)
6578 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
6579 2, &op2, &slp_op2, &dt[2], &vectype3))
6581 if (dump_enabled_p ())
6582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6583 "use not simple.\n");
6584 return false;
6586 is_invariant &= (dt[2] == vect_external_def
6587 || dt[2] == vect_constant_def);
6588 if (vectype3
6589 && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
6590 return false;
6593 /* Multiple types in SLP are handled by creating the appropriate number of
6594 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6595 case of SLP. */
6596 if (slp_node)
6598 ncopies = 1;
6599 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6601 else
6603 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6604 vec_num = 1;
6607 gcc_assert (ncopies >= 1);
6609 /* Reject attempts to combine mask types with nonmask types, e.g. if
6610 we have an AND between a (nonmask) boolean loaded from memory and
6611 a (mask) boolean result of a comparison.
6613 TODO: We could easily fix these cases up using pattern statements. */
6614 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6615 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6616 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6618 if (dump_enabled_p ())
6619 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6620 "mixed mask and nonmask vector types\n");
6621 return false;
6624 /* Supportable by target? */
6626 vec_mode = TYPE_MODE (vectype);
6627 if (code == MULT_HIGHPART_EXPR)
6628 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6629 else
6631 optab = optab_for_tree_code (code, vectype, optab_default);
6632 if (!optab)
6634 if (dump_enabled_p ())
6635 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6636 "no optab.\n");
6637 return false;
6639 target_support_p = (optab_handler (optab, vec_mode) != CODE_FOR_nothing
6640 || optab_libfunc (optab, vec_mode));
6643 bool using_emulated_vectors_p = vect_emulated_vector_p (vectype);
6644 if (!target_support_p || using_emulated_vectors_p)
6646 if (dump_enabled_p ())
6647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6648 "op not supported by target.\n");
6649 /* When vec_mode is not a vector mode and we verified ops we
6650 do not have to lower like AND are natively supported let
6651 those through even when the mode isn't word_mode. For
6652 ops we have to lower the lowering code assumes we are
6653 dealing with word_mode. */
6654 if ((((code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR)
6655 || !target_support_p)
6656 && maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD))
6657 /* Check only during analysis. */
6658 || (!vec_stmt && !vect_can_vectorize_without_simd_p (code)))
6660 if (dump_enabled_p ())
6661 dump_printf (MSG_NOTE, "using word mode not possible.\n");
6662 return false;
6664 if (dump_enabled_p ())
6665 dump_printf_loc (MSG_NOTE, vect_location,
6666 "proceeding using word mode.\n");
6667 using_emulated_vectors_p = true;
6670 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6671 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6672 vec_loop_lens *lens = (loop_vinfo ? &LOOP_VINFO_LENS (loop_vinfo) : NULL);
6673 internal_fn cond_fn = get_conditional_internal_fn (code);
6674 internal_fn cond_len_fn = get_conditional_len_internal_fn (code);
6676 /* If operating on inactive elements could generate spurious traps,
6677 we need to restrict the operation to active lanes. Note that this
6678 specifically doesn't apply to unhoisted invariants, since they
6679 operate on the same value for every lane.
6681 Similarly, if this operation is part of a reduction, a fully-masked
6682 loop should only change the active lanes of the reduction chain,
6683 keeping the inactive lanes as-is. */
6684 bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
6685 || reduc_idx >= 0);
6687 if (!vec_stmt) /* transformation not required. */
6689 if (loop_vinfo
6690 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
6691 && mask_out_inactive)
6693 if (cond_len_fn != IFN_LAST
6694 && direct_internal_fn_supported_p (cond_len_fn, vectype,
6695 OPTIMIZE_FOR_SPEED))
6696 vect_record_loop_len (loop_vinfo, lens, ncopies * vec_num, vectype,
6698 else if (cond_fn != IFN_LAST
6699 && direct_internal_fn_supported_p (cond_fn, vectype,
6700 OPTIMIZE_FOR_SPEED))
6701 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6702 vectype, NULL);
6703 else
6705 if (dump_enabled_p ())
6706 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6707 "can't use a fully-masked loop because no"
6708 " conditional operation is available.\n");
6709 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
6713 /* Put types on constant and invariant SLP children. */
6714 if (slp_node
6715 && (!vect_maybe_update_slp_op_vectype (slp_op0, vectype)
6716 || !vect_maybe_update_slp_op_vectype (slp_op1, vectype)
6717 || !vect_maybe_update_slp_op_vectype (slp_op2, vectype)))
6719 if (dump_enabled_p ())
6720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6721 "incompatible vector types for invariants\n");
6722 return false;
6725 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6726 DUMP_VECT_SCOPE ("vectorizable_operation");
6727 vect_model_simple_cost (vinfo, stmt_info,
6728 ncopies, dt, ndts, slp_node, cost_vec);
6729 if (using_emulated_vectors_p)
6731 /* The above vect_model_simple_cost call handles constants
6732 in the prologue and (mis-)costs one of the stmts as
6733 vector stmt. See below for the actual lowering that will
6734 be applied. */
6735 unsigned n
6736 = slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies;
6737 switch (code)
6739 case PLUS_EXPR:
6740 n *= 5;
6741 break;
6742 case MINUS_EXPR:
6743 n *= 6;
6744 break;
6745 case NEGATE_EXPR:
6746 n *= 4;
6747 break;
6748 default:
6749 /* Bit operations do not have extra cost and are accounted
6750 as vector stmt by vect_model_simple_cost. */
6751 n = 0;
6752 break;
6754 if (n != 0)
6756 /* We also need to materialize two large constants. */
6757 record_stmt_cost (cost_vec, 2, scalar_stmt, stmt_info,
6758 0, vect_prologue);
6759 record_stmt_cost (cost_vec, n, scalar_stmt, stmt_info,
6760 0, vect_body);
6763 return true;
6766 /* Transform. */
6768 if (dump_enabled_p ())
6769 dump_printf_loc (MSG_NOTE, vect_location,
6770 "transform binary/unary operation.\n");
6772 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6773 bool len_loop_p = loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo);
6775 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6776 vectors with unsigned elements, but the result is signed. So, we
6777 need to compute the MINUS_EXPR into vectype temporary and
6778 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6779 tree vec_cvt_dest = NULL_TREE;
6780 if (orig_code == POINTER_DIFF_EXPR)
6782 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6783 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6785 /* Handle def. */
6786 else
6787 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6789 /* In case the vectorization factor (VF) is bigger than the number
6790 of elements that we can fit in a vectype (nunits), we have to generate
6791 more than one vector stmt - i.e - we need to "unroll" the
6792 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6793 from one copy of the vector stmt to the next, in the field
6794 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6795 stages to find the correct vector defs to be used when vectorizing
6796 stmts that use the defs of the current stmt. The example below
6797 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6798 we need to create 4 vectorized stmts):
6800 before vectorization:
6801 RELATED_STMT VEC_STMT
6802 S1: x = memref - -
6803 S2: z = x + 1 - -
6805 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6806 there):
6807 RELATED_STMT VEC_STMT
6808 VS1_0: vx0 = memref0 VS1_1 -
6809 VS1_1: vx1 = memref1 VS1_2 -
6810 VS1_2: vx2 = memref2 VS1_3 -
6811 VS1_3: vx3 = memref3 - -
6812 S1: x = load - VS1_0
6813 S2: z = x + 1 - -
6815 step2: vectorize stmt S2 (done here):
6816 To vectorize stmt S2 we first need to find the relevant vector
6817 def for the first operand 'x'. This is, as usual, obtained from
6818 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6819 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6820 relevant vector def 'vx0'. Having found 'vx0' we can generate
6821 the vector stmt VS2_0, and as usual, record it in the
6822 STMT_VINFO_VEC_STMT of stmt S2.
6823 When creating the second copy (VS2_1), we obtain the relevant vector
6824 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6825 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6826 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6827 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6828 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6829 chain of stmts and pointers:
6830 RELATED_STMT VEC_STMT
6831 VS1_0: vx0 = memref0 VS1_1 -
6832 VS1_1: vx1 = memref1 VS1_2 -
6833 VS1_2: vx2 = memref2 VS1_3 -
6834 VS1_3: vx3 = memref3 - -
6835 S1: x = load - VS1_0
6836 VS2_0: vz0 = vx0 + v1 VS2_1 -
6837 VS2_1: vz1 = vx1 + v1 VS2_2 -
6838 VS2_2: vz2 = vx2 + v1 VS2_3 -
6839 VS2_3: vz3 = vx3 + v1 - -
6840 S2: z = x + 1 - VS2_0 */
6842 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
6843 op0, &vec_oprnds0, op1, &vec_oprnds1, op2, &vec_oprnds2);
6844 /* Arguments are ready. Create the new vector stmt. */
6845 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6847 gimple *new_stmt = NULL;
6848 vop1 = ((op_type == binary_op || op_type == ternary_op)
6849 ? vec_oprnds1[i] : NULL_TREE);
6850 vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
6851 if (using_emulated_vectors_p
6852 && (code == PLUS_EXPR || code == MINUS_EXPR || code == NEGATE_EXPR))
6854 /* Lower the operation. This follows vector lowering. */
6855 unsigned int width = vector_element_bits (vectype);
6856 tree inner_type = TREE_TYPE (vectype);
6857 tree word_type
6858 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode), 1);
6859 HOST_WIDE_INT max = GET_MODE_MASK (TYPE_MODE (inner_type));
6860 tree low_bits = build_replicated_int_cst (word_type, width, max >> 1);
6861 tree high_bits
6862 = build_replicated_int_cst (word_type, width, max & ~(max >> 1));
6863 tree wvop0 = make_ssa_name (word_type);
6864 new_stmt = gimple_build_assign (wvop0, VIEW_CONVERT_EXPR,
6865 build1 (VIEW_CONVERT_EXPR,
6866 word_type, vop0));
6867 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6868 tree result_low, signs;
6869 if (code == PLUS_EXPR || code == MINUS_EXPR)
6871 tree wvop1 = make_ssa_name (word_type);
6872 new_stmt = gimple_build_assign (wvop1, VIEW_CONVERT_EXPR,
6873 build1 (VIEW_CONVERT_EXPR,
6874 word_type, vop1));
6875 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6876 signs = make_ssa_name (word_type);
6877 new_stmt = gimple_build_assign (signs,
6878 BIT_XOR_EXPR, wvop0, wvop1);
6879 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6880 tree b_low = make_ssa_name (word_type);
6881 new_stmt = gimple_build_assign (b_low,
6882 BIT_AND_EXPR, wvop1, low_bits);
6883 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6884 tree a_low = make_ssa_name (word_type);
6885 if (code == PLUS_EXPR)
6886 new_stmt = gimple_build_assign (a_low,
6887 BIT_AND_EXPR, wvop0, low_bits);
6888 else
6889 new_stmt = gimple_build_assign (a_low,
6890 BIT_IOR_EXPR, wvop0, high_bits);
6891 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6892 if (code == MINUS_EXPR)
6894 new_stmt = gimple_build_assign (NULL_TREE,
6895 BIT_NOT_EXPR, signs);
6896 signs = make_ssa_name (word_type);
6897 gimple_assign_set_lhs (new_stmt, signs);
6898 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6900 new_stmt = gimple_build_assign (NULL_TREE,
6901 BIT_AND_EXPR, signs, high_bits);
6902 signs = make_ssa_name (word_type);
6903 gimple_assign_set_lhs (new_stmt, signs);
6904 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6905 result_low = make_ssa_name (word_type);
6906 new_stmt = gimple_build_assign (result_low, code, a_low, b_low);
6907 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6909 else
6911 tree a_low = make_ssa_name (word_type);
6912 new_stmt = gimple_build_assign (a_low,
6913 BIT_AND_EXPR, wvop0, low_bits);
6914 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6915 signs = make_ssa_name (word_type);
6916 new_stmt = gimple_build_assign (signs, BIT_NOT_EXPR, wvop0);
6917 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6918 new_stmt = gimple_build_assign (NULL_TREE,
6919 BIT_AND_EXPR, signs, high_bits);
6920 signs = make_ssa_name (word_type);
6921 gimple_assign_set_lhs (new_stmt, signs);
6922 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6923 result_low = make_ssa_name (word_type);
6924 new_stmt = gimple_build_assign (result_low,
6925 MINUS_EXPR, high_bits, a_low);
6926 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6928 new_stmt = gimple_build_assign (NULL_TREE, BIT_XOR_EXPR, result_low,
6929 signs);
6930 result_low = make_ssa_name (word_type);
6931 gimple_assign_set_lhs (new_stmt, result_low);
6932 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6933 new_stmt = gimple_build_assign (NULL_TREE, VIEW_CONVERT_EXPR,
6934 build1 (VIEW_CONVERT_EXPR,
6935 vectype, result_low));
6936 new_temp = make_ssa_name (vectype);
6937 gimple_assign_set_lhs (new_stmt, new_temp);
6938 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
6940 else if ((masked_loop_p || len_loop_p) && mask_out_inactive)
6942 tree mask;
6943 if (masked_loop_p)
6944 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
6945 vec_num * ncopies, vectype, i);
6946 else
6947 /* Dummy mask. */
6948 mask = build_minus_one_cst (truth_type_for (vectype));
6949 auto_vec<tree> vops (6);
6950 vops.quick_push (mask);
6951 vops.quick_push (vop0);
6952 if (vop1)
6953 vops.quick_push (vop1);
6954 if (vop2)
6955 vops.quick_push (vop2);
6956 if (reduc_idx >= 0)
6958 /* Perform the operation on active elements only and take
6959 inactive elements from the reduction chain input. */
6960 gcc_assert (!vop2);
6961 vops.quick_push (reduc_idx == 1 ? vop1 : vop0);
6963 else
6965 auto else_value = targetm.preferred_else_value
6966 (cond_fn, vectype, vops.length () - 1, &vops[1]);
6967 vops.quick_push (else_value);
6969 if (len_loop_p)
6971 tree len = vect_get_loop_len (loop_vinfo, gsi, lens,
6972 vec_num * ncopies, vectype, i, 1);
6973 signed char biasval
6974 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
6975 tree bias = build_int_cst (intQI_type_node, biasval);
6976 vops.quick_push (len);
6977 vops.quick_push (bias);
6979 gcall *call
6980 = gimple_build_call_internal_vec (masked_loop_p ? cond_fn
6981 : cond_len_fn,
6982 vops);
6983 new_temp = make_ssa_name (vec_dest, call);
6984 gimple_call_set_lhs (call, new_temp);
6985 gimple_call_set_nothrow (call, true);
6986 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
6987 new_stmt = call;
6989 else
6991 tree mask = NULL_TREE;
6992 /* When combining two masks check if either of them is elsewhere
6993 combined with a loop mask, if that's the case we can mark that the
6994 new combined mask doesn't need to be combined with a loop mask. */
6995 if (masked_loop_p
6996 && code == BIT_AND_EXPR
6997 && VECTOR_BOOLEAN_TYPE_P (vectype))
6999 if (loop_vinfo->scalar_cond_masked_set.contains ({ op0,
7000 ncopies}))
7002 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7003 vec_num * ncopies, vectype, i);
7005 vop0 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7006 vop0, gsi);
7009 if (loop_vinfo->scalar_cond_masked_set.contains ({ op1,
7010 ncopies }))
7012 mask = vect_get_loop_mask (loop_vinfo, gsi, masks,
7013 vec_num * ncopies, vectype, i);
7015 vop1 = prepare_vec_mask (loop_vinfo, TREE_TYPE (mask), mask,
7016 vop1, gsi);
7020 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
7021 new_temp = make_ssa_name (vec_dest, new_stmt);
7022 gimple_assign_set_lhs (new_stmt, new_temp);
7023 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
7024 if (using_emulated_vectors_p)
7025 suppress_warning (new_stmt, OPT_Wvector_operation_performance);
7027 /* Enter the combined value into the vector cond hash so we don't
7028 AND it with a loop mask again. */
7029 if (mask)
7030 loop_vinfo->vec_cond_masked_set.add ({ new_temp, mask });
7033 if (vec_cvt_dest)
7035 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
7036 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
7037 new_temp);
7038 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
7039 gimple_assign_set_lhs (new_stmt, new_temp);
7040 vect_finish_stmt_generation (vinfo, stmt_info,
7041 new_stmt, gsi);
7044 if (slp_node)
7045 slp_node->push_vec_def (new_stmt);
7046 else
7047 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
7050 if (!slp_node)
7051 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7053 vec_oprnds0.release ();
7054 vec_oprnds1.release ();
7055 vec_oprnds2.release ();
7057 return true;
7060 /* A helper function to ensure data reference DR_INFO's base alignment. */
7062 static void
7063 ensure_base_align (dr_vec_info *dr_info)
7065 /* Alignment is only analyzed for the first element of a DR group,
7066 use that to look at base alignment we need to enforce. */
7067 if (STMT_VINFO_GROUPED_ACCESS (dr_info->stmt))
7068 dr_info = STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info->stmt));
7070 gcc_assert (dr_info->misalignment != DR_MISALIGNMENT_UNINITIALIZED);
7072 if (dr_info->base_misaligned)
7074 tree base_decl = dr_info->base_decl;
7076 // We should only be able to increase the alignment of a base object if
7077 // we know what its new alignment should be at compile time.
7078 unsigned HOST_WIDE_INT align_base_to =
7079 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
7081 if (decl_in_symtab_p (base_decl))
7082 symtab_node::get (base_decl)->increase_alignment (align_base_to);
7083 else if (DECL_ALIGN (base_decl) < align_base_to)
7085 SET_DECL_ALIGN (base_decl, align_base_to);
7086 DECL_USER_ALIGN (base_decl) = 1;
7088 dr_info->base_misaligned = false;
7093 /* Function get_group_alias_ptr_type.
7095 Return the alias type for the group starting at FIRST_STMT_INFO. */
7097 static tree
7098 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
7100 struct data_reference *first_dr, *next_dr;
7102 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
7103 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
7104 while (next_stmt_info)
7106 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
7107 if (get_alias_set (DR_REF (first_dr))
7108 != get_alias_set (DR_REF (next_dr)))
7110 if (dump_enabled_p ())
7111 dump_printf_loc (MSG_NOTE, vect_location,
7112 "conflicting alias set types.\n");
7113 return ptr_type_node;
7115 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7117 return reference_alias_ptr_type (DR_REF (first_dr));
7121 /* Function scan_operand_equal_p.
7123 Helper function for check_scan_store. Compare two references
7124 with .GOMP_SIMD_LANE bases. */
7126 static bool
7127 scan_operand_equal_p (tree ref1, tree ref2)
7129 tree ref[2] = { ref1, ref2 };
7130 poly_int64 bitsize[2], bitpos[2];
7131 tree offset[2], base[2];
7132 for (int i = 0; i < 2; ++i)
7134 machine_mode mode;
7135 int unsignedp, reversep, volatilep = 0;
7136 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
7137 &offset[i], &mode, &unsignedp,
7138 &reversep, &volatilep);
7139 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
7140 return false;
7141 if (TREE_CODE (base[i]) == MEM_REF
7142 && offset[i] == NULL_TREE
7143 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
7145 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
7146 if (is_gimple_assign (def_stmt)
7147 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
7148 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
7149 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
7151 if (maybe_ne (mem_ref_offset (base[i]), 0))
7152 return false;
7153 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
7154 offset[i] = gimple_assign_rhs2 (def_stmt);
7159 if (!operand_equal_p (base[0], base[1], 0))
7160 return false;
7161 if (maybe_ne (bitsize[0], bitsize[1]))
7162 return false;
7163 if (offset[0] != offset[1])
7165 if (!offset[0] || !offset[1])
7166 return false;
7167 if (!operand_equal_p (offset[0], offset[1], 0))
7169 tree step[2];
7170 for (int i = 0; i < 2; ++i)
7172 step[i] = integer_one_node;
7173 if (TREE_CODE (offset[i]) == SSA_NAME)
7175 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7176 if (is_gimple_assign (def_stmt)
7177 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
7178 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
7179 == INTEGER_CST))
7181 step[i] = gimple_assign_rhs2 (def_stmt);
7182 offset[i] = gimple_assign_rhs1 (def_stmt);
7185 else if (TREE_CODE (offset[i]) == MULT_EXPR)
7187 step[i] = TREE_OPERAND (offset[i], 1);
7188 offset[i] = TREE_OPERAND (offset[i], 0);
7190 tree rhs1 = NULL_TREE;
7191 if (TREE_CODE (offset[i]) == SSA_NAME)
7193 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
7194 if (gimple_assign_cast_p (def_stmt))
7195 rhs1 = gimple_assign_rhs1 (def_stmt);
7197 else if (CONVERT_EXPR_P (offset[i]))
7198 rhs1 = TREE_OPERAND (offset[i], 0);
7199 if (rhs1
7200 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
7201 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
7202 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
7203 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
7204 offset[i] = rhs1;
7206 if (!operand_equal_p (offset[0], offset[1], 0)
7207 || !operand_equal_p (step[0], step[1], 0))
7208 return false;
7211 return true;
7215 enum scan_store_kind {
7216 /* Normal permutation. */
7217 scan_store_kind_perm,
7219 /* Whole vector left shift permutation with zero init. */
7220 scan_store_kind_lshift_zero,
7222 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7223 scan_store_kind_lshift_cond
7226 /* Function check_scan_store.
7228 Verify if we can perform the needed permutations or whole vector shifts.
7229 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7230 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7231 to do at each step. */
7233 static int
7234 scan_store_can_perm_p (tree vectype, tree init,
7235 vec<enum scan_store_kind> *use_whole_vector = NULL)
7237 enum machine_mode vec_mode = TYPE_MODE (vectype);
7238 unsigned HOST_WIDE_INT nunits;
7239 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7240 return -1;
7241 int units_log2 = exact_log2 (nunits);
7242 if (units_log2 <= 0)
7243 return -1;
7245 int i;
7246 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
7247 for (i = 0; i <= units_log2; ++i)
7249 unsigned HOST_WIDE_INT j, k;
7250 enum scan_store_kind kind = scan_store_kind_perm;
7251 vec_perm_builder sel (nunits, nunits, 1);
7252 sel.quick_grow (nunits);
7253 if (i == units_log2)
7255 for (j = 0; j < nunits; ++j)
7256 sel[j] = nunits - 1;
7258 else
7260 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7261 sel[j] = j;
7262 for (k = 0; j < nunits; ++j, ++k)
7263 sel[j] = nunits + k;
7265 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7266 if (!can_vec_perm_const_p (vec_mode, vec_mode, indices))
7268 if (i == units_log2)
7269 return -1;
7271 if (whole_vector_shift_kind == scan_store_kind_perm)
7273 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
7274 return -1;
7275 whole_vector_shift_kind = scan_store_kind_lshift_zero;
7276 /* Whole vector shifts shift in zeros, so if init is all zero
7277 constant, there is no need to do anything further. */
7278 if ((TREE_CODE (init) != INTEGER_CST
7279 && TREE_CODE (init) != REAL_CST)
7280 || !initializer_zerop (init))
7282 tree masktype = truth_type_for (vectype);
7283 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
7284 return -1;
7285 whole_vector_shift_kind = scan_store_kind_lshift_cond;
7288 kind = whole_vector_shift_kind;
7290 if (use_whole_vector)
7292 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
7293 use_whole_vector->safe_grow_cleared (i, true);
7294 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
7295 use_whole_vector->safe_push (kind);
7299 return units_log2;
7303 /* Function check_scan_store.
7305 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7307 static bool
7308 check_scan_store (vec_info *vinfo, stmt_vec_info stmt_info, tree vectype,
7309 enum vect_def_type rhs_dt, bool slp, tree mask,
7310 vect_memory_access_type memory_access_type)
7312 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7313 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7314 tree ref_type;
7316 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
7317 if (slp
7318 || mask
7319 || memory_access_type != VMAT_CONTIGUOUS
7320 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
7321 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
7322 || loop_vinfo == NULL
7323 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7324 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
7325 || !integer_zerop (get_dr_vinfo_offset (vinfo, dr_info))
7326 || !integer_zerop (DR_INIT (dr_info->dr))
7327 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
7328 || !alias_sets_conflict_p (get_alias_set (vectype),
7329 get_alias_set (TREE_TYPE (ref_type))))
7331 if (dump_enabled_p ())
7332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7333 "unsupported OpenMP scan store.\n");
7334 return false;
7337 /* We need to pattern match code built by OpenMP lowering and simplified
7338 by following optimizations into something we can handle.
7339 #pragma omp simd reduction(inscan,+:r)
7340 for (...)
7342 r += something ();
7343 #pragma omp scan inclusive (r)
7344 use (r);
7346 shall have body with:
7347 // Initialization for input phase, store the reduction initializer:
7348 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7349 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7350 D.2042[_21] = 0;
7351 // Actual input phase:
7353 r.0_5 = D.2042[_20];
7354 _6 = _4 + r.0_5;
7355 D.2042[_20] = _6;
7356 // Initialization for scan phase:
7357 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7358 _26 = D.2043[_25];
7359 _27 = D.2042[_25];
7360 _28 = _26 + _27;
7361 D.2043[_25] = _28;
7362 D.2042[_25] = _28;
7363 // Actual scan phase:
7365 r.1_8 = D.2042[_20];
7367 The "omp simd array" variable D.2042 holds the privatized copy used
7368 inside of the loop and D.2043 is another one that holds copies of
7369 the current original list item. The separate GOMP_SIMD_LANE ifn
7370 kinds are there in order to allow optimizing the initializer store
7371 and combiner sequence, e.g. if it is originally some C++ish user
7372 defined reduction, but allow the vectorizer to pattern recognize it
7373 and turn into the appropriate vectorized scan.
7375 For exclusive scan, this is slightly different:
7376 #pragma omp simd reduction(inscan,+:r)
7377 for (...)
7379 use (r);
7380 #pragma omp scan exclusive (r)
7381 r += something ();
7383 shall have body with:
7384 // Initialization for input phase, store the reduction initializer:
7385 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7386 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7387 D.2042[_21] = 0;
7388 // Actual input phase:
7390 r.0_5 = D.2042[_20];
7391 _6 = _4 + r.0_5;
7392 D.2042[_20] = _6;
7393 // Initialization for scan phase:
7394 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7395 _26 = D.2043[_25];
7396 D.2044[_25] = _26;
7397 _27 = D.2042[_25];
7398 _28 = _26 + _27;
7399 D.2043[_25] = _28;
7400 // Actual scan phase:
7402 r.1_8 = D.2044[_20];
7403 ... */
7405 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
7407 /* Match the D.2042[_21] = 0; store above. Just require that
7408 it is a constant or external definition store. */
7409 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
7411 fail_init:
7412 if (dump_enabled_p ())
7413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7414 "unsupported OpenMP scan initializer store.\n");
7415 return false;
7418 if (! loop_vinfo->scan_map)
7419 loop_vinfo->scan_map = new hash_map<tree, tree>;
7420 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7421 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
7422 if (cached)
7423 goto fail_init;
7424 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
7426 /* These stores can be vectorized normally. */
7427 return true;
7430 if (rhs_dt != vect_internal_def)
7432 fail:
7433 if (dump_enabled_p ())
7434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7435 "unsupported OpenMP scan combiner pattern.\n");
7436 return false;
7439 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7440 tree rhs = gimple_assign_rhs1 (stmt);
7441 if (TREE_CODE (rhs) != SSA_NAME)
7442 goto fail;
7444 gimple *other_store_stmt = NULL;
7445 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7446 bool inscan_var_store
7447 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7449 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7451 if (!inscan_var_store)
7453 use_operand_p use_p;
7454 imm_use_iterator iter;
7455 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7457 gimple *use_stmt = USE_STMT (use_p);
7458 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7459 continue;
7460 if (gimple_bb (use_stmt) != gimple_bb (stmt)
7461 || !is_gimple_assign (use_stmt)
7462 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
7463 || other_store_stmt
7464 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
7465 goto fail;
7466 other_store_stmt = use_stmt;
7468 if (other_store_stmt == NULL)
7469 goto fail;
7470 rhs = gimple_assign_lhs (other_store_stmt);
7471 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
7472 goto fail;
7475 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
7477 use_operand_p use_p;
7478 imm_use_iterator iter;
7479 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7481 gimple *use_stmt = USE_STMT (use_p);
7482 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7483 continue;
7484 if (other_store_stmt)
7485 goto fail;
7486 other_store_stmt = use_stmt;
7489 else
7490 goto fail;
7492 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7493 if (gimple_bb (def_stmt) != gimple_bb (stmt)
7494 || !is_gimple_assign (def_stmt)
7495 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
7496 goto fail;
7498 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7499 /* For pointer addition, we should use the normal plus for the vector
7500 operation. */
7501 switch (code)
7503 case POINTER_PLUS_EXPR:
7504 code = PLUS_EXPR;
7505 break;
7506 case MULT_HIGHPART_EXPR:
7507 goto fail;
7508 default:
7509 break;
7511 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
7512 goto fail;
7514 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7515 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7516 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
7517 goto fail;
7519 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7520 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7521 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
7522 || !gimple_assign_load_p (load1_stmt)
7523 || gimple_bb (load2_stmt) != gimple_bb (stmt)
7524 || !gimple_assign_load_p (load2_stmt))
7525 goto fail;
7527 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7528 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7529 if (load1_stmt_info == NULL
7530 || load2_stmt_info == NULL
7531 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
7532 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
7533 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
7534 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7535 goto fail;
7537 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
7539 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7540 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
7541 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
7542 goto fail;
7543 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7544 tree lrhs;
7545 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7546 lrhs = rhs1;
7547 else
7548 lrhs = rhs2;
7549 use_operand_p use_p;
7550 imm_use_iterator iter;
7551 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
7553 gimple *use_stmt = USE_STMT (use_p);
7554 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
7555 continue;
7556 if (other_store_stmt)
7557 goto fail;
7558 other_store_stmt = use_stmt;
7562 if (other_store_stmt == NULL)
7563 goto fail;
7564 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
7565 || !gimple_store_p (other_store_stmt))
7566 goto fail;
7568 stmt_vec_info other_store_stmt_info
7569 = loop_vinfo->lookup_stmt (other_store_stmt);
7570 if (other_store_stmt_info == NULL
7571 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
7572 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
7573 goto fail;
7575 gimple *stmt1 = stmt;
7576 gimple *stmt2 = other_store_stmt;
7577 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7578 std::swap (stmt1, stmt2);
7579 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
7580 gimple_assign_rhs1 (load2_stmt)))
7582 std::swap (rhs1, rhs2);
7583 std::swap (load1_stmt, load2_stmt);
7584 std::swap (load1_stmt_info, load2_stmt_info);
7586 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
7587 gimple_assign_rhs1 (load1_stmt)))
7588 goto fail;
7590 tree var3 = NULL_TREE;
7591 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
7592 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
7593 gimple_assign_rhs1 (load2_stmt)))
7594 goto fail;
7595 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7597 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7598 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
7599 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
7600 goto fail;
7601 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7602 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
7603 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
7604 || lookup_attribute ("omp simd inscan exclusive",
7605 DECL_ATTRIBUTES (var3)))
7606 goto fail;
7609 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
7610 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
7611 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
7612 goto fail;
7614 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7615 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
7616 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
7617 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
7618 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7619 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
7620 goto fail;
7622 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7623 std::swap (var1, var2);
7625 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7627 if (!lookup_attribute ("omp simd inscan exclusive",
7628 DECL_ATTRIBUTES (var1)))
7629 goto fail;
7630 var1 = var3;
7633 if (loop_vinfo->scan_map == NULL)
7634 goto fail;
7635 tree *init = loop_vinfo->scan_map->get (var1);
7636 if (init == NULL)
7637 goto fail;
7639 /* The IL is as expected, now check if we can actually vectorize it.
7640 Inclusive scan:
7641 _26 = D.2043[_25];
7642 _27 = D.2042[_25];
7643 _28 = _26 + _27;
7644 D.2043[_25] = _28;
7645 D.2042[_25] = _28;
7646 should be vectorized as (where _40 is the vectorized rhs
7647 from the D.2042[_21] = 0; store):
7648 _30 = MEM <vector(8) int> [(int *)&D.2043];
7649 _31 = MEM <vector(8) int> [(int *)&D.2042];
7650 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7651 _33 = _31 + _32;
7652 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7653 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7654 _35 = _33 + _34;
7655 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7656 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7657 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7658 _37 = _35 + _36;
7659 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7660 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7661 _38 = _30 + _37;
7662 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7663 MEM <vector(8) int> [(int *)&D.2043] = _39;
7664 MEM <vector(8) int> [(int *)&D.2042] = _38;
7665 Exclusive scan:
7666 _26 = D.2043[_25];
7667 D.2044[_25] = _26;
7668 _27 = D.2042[_25];
7669 _28 = _26 + _27;
7670 D.2043[_25] = _28;
7671 should be vectorized as (where _40 is the vectorized rhs
7672 from the D.2042[_21] = 0; store):
7673 _30 = MEM <vector(8) int> [(int *)&D.2043];
7674 _31 = MEM <vector(8) int> [(int *)&D.2042];
7675 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7676 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7677 _34 = _32 + _33;
7678 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7679 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7680 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7681 _36 = _34 + _35;
7682 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7683 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7684 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7685 _38 = _36 + _37;
7686 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7687 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7688 _39 = _30 + _38;
7689 _50 = _31 + _39;
7690 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7691 MEM <vector(8) int> [(int *)&D.2044] = _39;
7692 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7693 enum machine_mode vec_mode = TYPE_MODE (vectype);
7694 optab optab = optab_for_tree_code (code, vectype, optab_default);
7695 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7696 goto fail;
7698 int units_log2 = scan_store_can_perm_p (vectype, *init);
7699 if (units_log2 == -1)
7700 goto fail;
7702 return true;
7706 /* Function vectorizable_scan_store.
7708 Helper of vectorizable_score, arguments like on vectorizable_store.
7709 Handle only the transformation, checking is done in check_scan_store. */
7711 static bool
7712 vectorizable_scan_store (vec_info *vinfo,
7713 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7714 gimple **vec_stmt, int ncopies)
7716 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7717 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7718 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7719 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7721 if (dump_enabled_p ())
7722 dump_printf_loc (MSG_NOTE, vect_location,
7723 "transform scan store. ncopies = %d\n", ncopies);
7725 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7726 tree rhs = gimple_assign_rhs1 (stmt);
7727 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7729 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7730 bool inscan_var_store
7731 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7733 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7735 use_operand_p use_p;
7736 imm_use_iterator iter;
7737 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7739 gimple *use_stmt = USE_STMT (use_p);
7740 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7741 continue;
7742 rhs = gimple_assign_lhs (use_stmt);
7743 break;
7747 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7748 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7749 if (code == POINTER_PLUS_EXPR)
7750 code = PLUS_EXPR;
7751 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7752 && commutative_tree_code (code));
7753 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7754 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7755 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7756 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7757 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7758 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7759 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7760 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7761 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7762 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7763 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7765 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7767 std::swap (rhs1, rhs2);
7768 std::swap (var1, var2);
7769 std::swap (load1_dr_info, load2_dr_info);
7772 tree *init = loop_vinfo->scan_map->get (var1);
7773 gcc_assert (init);
7775 unsigned HOST_WIDE_INT nunits;
7776 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7777 gcc_unreachable ();
7778 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7779 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7780 gcc_assert (units_log2 > 0);
7781 auto_vec<tree, 16> perms;
7782 perms.quick_grow (units_log2 + 1);
7783 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7784 for (int i = 0; i <= units_log2; ++i)
7786 unsigned HOST_WIDE_INT j, k;
7787 vec_perm_builder sel (nunits, nunits, 1);
7788 sel.quick_grow (nunits);
7789 if (i == units_log2)
7790 for (j = 0; j < nunits; ++j)
7791 sel[j] = nunits - 1;
7792 else
7794 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7795 sel[j] = j;
7796 for (k = 0; j < nunits; ++j, ++k)
7797 sel[j] = nunits + k;
7799 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7800 if (!use_whole_vector.is_empty ()
7801 && use_whole_vector[i] != scan_store_kind_perm)
7803 if (zero_vec == NULL_TREE)
7804 zero_vec = build_zero_cst (vectype);
7805 if (masktype == NULL_TREE
7806 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7807 masktype = truth_type_for (vectype);
7808 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7810 else
7811 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7814 tree vec_oprnd1 = NULL_TREE;
7815 tree vec_oprnd2 = NULL_TREE;
7816 tree vec_oprnd3 = NULL_TREE;
7817 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7818 tree dataref_offset = build_int_cst (ref_type, 0);
7819 tree bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info,
7820 vectype, VMAT_CONTIGUOUS);
7821 tree ldataref_ptr = NULL_TREE;
7822 tree orig = NULL_TREE;
7823 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7824 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7825 auto_vec<tree> vec_oprnds1;
7826 auto_vec<tree> vec_oprnds2;
7827 auto_vec<tree> vec_oprnds3;
7828 vect_get_vec_defs (vinfo, stmt_info, NULL, ncopies,
7829 *init, &vec_oprnds1,
7830 ldataref_ptr == NULL ? rhs1 : NULL, &vec_oprnds2,
7831 rhs2, &vec_oprnds3);
7832 for (int j = 0; j < ncopies; j++)
7834 vec_oprnd1 = vec_oprnds1[j];
7835 if (ldataref_ptr == NULL)
7836 vec_oprnd2 = vec_oprnds2[j];
7837 vec_oprnd3 = vec_oprnds3[j];
7838 if (j == 0)
7839 orig = vec_oprnd3;
7840 else if (!inscan_var_store)
7841 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7843 if (ldataref_ptr)
7845 vec_oprnd2 = make_ssa_name (vectype);
7846 tree data_ref = fold_build2 (MEM_REF, vectype,
7847 unshare_expr (ldataref_ptr),
7848 dataref_offset);
7849 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7850 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7851 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7852 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7853 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7856 tree v = vec_oprnd2;
7857 for (int i = 0; i < units_log2; ++i)
7859 tree new_temp = make_ssa_name (vectype);
7860 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7861 (zero_vec
7862 && (use_whole_vector[i]
7863 != scan_store_kind_perm))
7864 ? zero_vec : vec_oprnd1, v,
7865 perms[i]);
7866 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7867 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7868 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
7870 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7872 /* Whole vector shift shifted in zero bits, but if *init
7873 is not initializer_zerop, we need to replace those elements
7874 with elements from vec_oprnd1. */
7875 tree_vector_builder vb (masktype, nunits, 1);
7876 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7877 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7878 ? boolean_false_node : boolean_true_node);
7880 tree new_temp2 = make_ssa_name (vectype);
7881 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7882 new_temp, vec_oprnd1);
7883 vect_finish_stmt_generation (vinfo, stmt_info,
7884 g, gsi);
7885 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7886 new_temp = new_temp2;
7889 /* For exclusive scan, perform the perms[i] permutation once
7890 more. */
7891 if (i == 0
7892 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7893 && v == vec_oprnd2)
7895 v = new_temp;
7896 --i;
7897 continue;
7900 tree new_temp2 = make_ssa_name (vectype);
7901 g = gimple_build_assign (new_temp2, code, v, new_temp);
7902 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7903 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7905 v = new_temp2;
7908 tree new_temp = make_ssa_name (vectype);
7909 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7910 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7911 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7913 tree last_perm_arg = new_temp;
7914 /* For exclusive scan, new_temp computed above is the exclusive scan
7915 prefix sum. Turn it into inclusive prefix sum for the broadcast
7916 of the last element into orig. */
7917 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7919 last_perm_arg = make_ssa_name (vectype);
7920 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7921 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7922 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7925 orig = make_ssa_name (vectype);
7926 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7927 last_perm_arg, perms[units_log2]);
7928 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7929 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7931 if (!inscan_var_store)
7933 tree data_ref = fold_build2 (MEM_REF, vectype,
7934 unshare_expr (dataref_ptr),
7935 dataref_offset);
7936 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7937 g = gimple_build_assign (data_ref, new_temp);
7938 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7939 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7943 if (inscan_var_store)
7944 for (int j = 0; j < ncopies; j++)
7946 if (j != 0)
7947 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7949 tree data_ref = fold_build2 (MEM_REF, vectype,
7950 unshare_expr (dataref_ptr),
7951 dataref_offset);
7952 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7953 gimple *g = gimple_build_assign (data_ref, orig);
7954 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
7955 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (g);
7957 return true;
7961 /* Function vectorizable_store.
7963 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7964 that can be vectorized.
7965 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7966 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7967 Return true if STMT_INFO is vectorizable in this way. */
7969 static bool
7970 vectorizable_store (vec_info *vinfo,
7971 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7972 gimple **vec_stmt, slp_tree slp_node,
7973 stmt_vector_for_cost *cost_vec)
7975 tree data_ref;
7976 tree op;
7977 tree vec_oprnd = NULL_TREE;
7978 tree elem_type;
7979 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7980 class loop *loop = NULL;
7981 machine_mode vec_mode;
7982 tree dummy;
7983 enum vect_def_type rhs_dt = vect_unknown_def_type;
7984 enum vect_def_type mask_dt = vect_unknown_def_type;
7985 tree dataref_ptr = NULL_TREE;
7986 tree dataref_offset = NULL_TREE;
7987 gimple *ptr_incr = NULL;
7988 int ncopies;
7989 int j;
7990 stmt_vec_info first_stmt_info;
7991 bool grouped_store;
7992 unsigned int group_size, i;
7993 vec<tree> oprnds = vNULL;
7994 vec<tree> result_chain = vNULL;
7995 vec<tree> vec_oprnds = vNULL;
7996 bool slp = (slp_node != NULL);
7997 unsigned int vec_num;
7998 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
7999 tree aggr_type;
8000 gather_scatter_info gs_info;
8001 poly_uint64 vf;
8002 vec_load_store_type vls_type;
8003 tree ref_type;
8005 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8006 return false;
8008 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8009 && ! vec_stmt)
8010 return false;
8012 /* Is vectorizable store? */
8014 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8015 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8017 tree scalar_dest = gimple_assign_lhs (assign);
8018 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
8019 && is_pattern_stmt_p (stmt_info))
8020 scalar_dest = TREE_OPERAND (scalar_dest, 0);
8021 if (TREE_CODE (scalar_dest) != ARRAY_REF
8022 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
8023 && TREE_CODE (scalar_dest) != INDIRECT_REF
8024 && TREE_CODE (scalar_dest) != COMPONENT_REF
8025 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
8026 && TREE_CODE (scalar_dest) != REALPART_EXPR
8027 && TREE_CODE (scalar_dest) != MEM_REF)
8028 return false;
8030 else
8032 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8033 if (!call || !gimple_call_internal_p (call))
8034 return false;
8036 internal_fn ifn = gimple_call_internal_fn (call);
8037 if (!internal_store_fn_p (ifn))
8038 return false;
8040 if (slp_node != NULL)
8042 if (dump_enabled_p ())
8043 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8044 "SLP of masked stores not supported.\n");
8045 return false;
8048 int mask_index = internal_fn_mask_index (ifn);
8049 if (mask_index >= 0
8050 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
8051 &mask, NULL, &mask_dt, &mask_vectype))
8052 return false;
8055 op = vect_get_store_rhs (stmt_info);
8057 /* Cannot have hybrid store SLP -- that would mean storing to the
8058 same location twice. */
8059 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
8061 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
8062 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8064 if (loop_vinfo)
8066 loop = LOOP_VINFO_LOOP (loop_vinfo);
8067 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8069 else
8070 vf = 1;
8072 /* Multiple types in SLP are handled by creating the appropriate number of
8073 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8074 case of SLP. */
8075 if (slp)
8076 ncopies = 1;
8077 else
8078 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8080 gcc_assert (ncopies >= 1);
8082 /* FORNOW. This restriction should be relaxed. */
8083 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
8085 if (dump_enabled_p ())
8086 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8087 "multiple types in nested loop.\n");
8088 return false;
8091 if (!vect_check_store_rhs (vinfo, stmt_info, slp_node,
8092 op, &rhs_dt, &rhs_vectype, &vls_type))
8093 return false;
8095 elem_type = TREE_TYPE (vectype);
8096 vec_mode = TYPE_MODE (vectype);
8098 if (!STMT_VINFO_DATA_REF (stmt_info))
8099 return false;
8101 vect_memory_access_type memory_access_type;
8102 enum dr_alignment_support alignment_support_scheme;
8103 int misalignment;
8104 poly_int64 poffset;
8105 internal_fn lanes_ifn;
8106 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, vls_type,
8107 ncopies, &memory_access_type, &poffset,
8108 &alignment_support_scheme, &misalignment, &gs_info,
8109 &lanes_ifn))
8110 return false;
8112 if (mask)
8114 if (memory_access_type == VMAT_CONTIGUOUS)
8116 if (!VECTOR_MODE_P (vec_mode)
8117 || !can_vec_mask_load_store_p (vec_mode,
8118 TYPE_MODE (mask_vectype), false))
8119 return false;
8121 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8122 && (memory_access_type != VMAT_GATHER_SCATTER
8123 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
8125 if (dump_enabled_p ())
8126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8127 "unsupported access type for masked store.\n");
8128 return false;
8130 else if (memory_access_type == VMAT_GATHER_SCATTER
8131 && gs_info.ifn == IFN_LAST
8132 && !gs_info.decl)
8134 if (dump_enabled_p ())
8135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8136 "unsupported masked emulated scatter.\n");
8137 return false;
8140 else
8142 /* FORNOW. In some cases can vectorize even if data-type not supported
8143 (e.g. - array initialization with 0). */
8144 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
8145 return false;
8148 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8149 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
8150 && memory_access_type != VMAT_GATHER_SCATTER
8151 && (slp || memory_access_type != VMAT_CONTIGUOUS));
8152 if (grouped_store)
8154 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8155 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8156 group_size = DR_GROUP_SIZE (first_stmt_info);
8158 else
8160 first_stmt_info = stmt_info;
8161 first_dr_info = dr_info;
8162 group_size = vec_num = 1;
8165 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
8167 if (!check_scan_store (vinfo, stmt_info, vectype, rhs_dt, slp, mask,
8168 memory_access_type))
8169 return false;
8172 if (!vec_stmt) /* transformation not required. */
8174 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8176 if (loop_vinfo
8177 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
8178 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
8179 vls_type, group_size,
8180 memory_access_type, &gs_info,
8181 mask);
8183 if (slp_node
8184 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
8185 vectype))
8187 if (dump_enabled_p ())
8188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8189 "incompatible vector types for invariants\n");
8190 return false;
8193 if (dump_enabled_p ()
8194 && memory_access_type != VMAT_ELEMENTWISE
8195 && memory_access_type != VMAT_GATHER_SCATTER
8196 && alignment_support_scheme != dr_aligned)
8197 dump_printf_loc (MSG_NOTE, vect_location,
8198 "Vectorizing an unaligned access.\n");
8200 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
8201 vect_model_store_cost (vinfo, stmt_info, ncopies,
8202 memory_access_type, &gs_info,
8203 alignment_support_scheme,
8204 misalignment, vls_type, slp_node, cost_vec);
8205 return true;
8207 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8209 /* Transform. */
8211 ensure_base_align (dr_info);
8213 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8215 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
8216 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
8217 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
8218 tree ptr, var, scale, vec_mask;
8219 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
8220 tree mask_halfvectype = mask_vectype;
8221 edge pe = loop_preheader_edge (loop);
8222 gimple_seq seq;
8223 basic_block new_bb;
8224 enum { NARROW, NONE, WIDEN } modifier;
8225 poly_uint64 scatter_off_nunits
8226 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
8228 if (known_eq (nunits, scatter_off_nunits))
8229 modifier = NONE;
8230 else if (known_eq (nunits * 2, scatter_off_nunits))
8232 modifier = WIDEN;
8234 /* Currently gathers and scatters are only supported for
8235 fixed-length vectors. */
8236 unsigned int count = scatter_off_nunits.to_constant ();
8237 vec_perm_builder sel (count, count, 1);
8238 for (i = 0; i < (unsigned int) count; ++i)
8239 sel.quick_push (i | (count / 2));
8241 vec_perm_indices indices (sel, 1, count);
8242 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
8243 indices);
8244 gcc_assert (perm_mask != NULL_TREE);
8246 else if (known_eq (nunits, scatter_off_nunits * 2))
8248 modifier = NARROW;
8250 /* Currently gathers and scatters are only supported for
8251 fixed-length vectors. */
8252 unsigned int count = nunits.to_constant ();
8253 vec_perm_builder sel (count, count, 1);
8254 for (i = 0; i < (unsigned int) count; ++i)
8255 sel.quick_push (i | (count / 2));
8257 vec_perm_indices indices (sel, 2, count);
8258 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
8259 gcc_assert (perm_mask != NULL_TREE);
8260 ncopies *= 2;
8262 if (mask)
8263 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
8265 else
8266 gcc_unreachable ();
8268 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
8269 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
8270 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
8271 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
8272 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
8273 scaletype = TREE_VALUE (arglist);
8275 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
8276 && TREE_CODE (rettype) == VOID_TYPE);
8278 ptr = fold_convert (ptrtype, gs_info.base);
8279 if (!is_gimple_min_invariant (ptr))
8281 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
8282 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
8283 gcc_assert (!new_bb);
8286 if (mask == NULL_TREE)
8288 mask_arg = build_int_cst (masktype, -1);
8289 mask_arg = vect_init_vector (vinfo, stmt_info,
8290 mask_arg, masktype, NULL);
8293 scale = build_int_cst (scaletype, gs_info.scale);
8295 auto_vec<tree> vec_oprnds0;
8296 auto_vec<tree> vec_oprnds1;
8297 auto_vec<tree> vec_masks;
8298 if (mask)
8300 tree mask_vectype = truth_type_for (vectype);
8301 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8302 modifier == NARROW
8303 ? ncopies / 2 : ncopies,
8304 mask, &vec_masks, mask_vectype);
8306 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8307 modifier == WIDEN
8308 ? ncopies / 2 : ncopies,
8309 gs_info.offset, &vec_oprnds0);
8310 vect_get_vec_defs_for_operand (vinfo, stmt_info,
8311 modifier == NARROW
8312 ? ncopies / 2 : ncopies,
8313 op, &vec_oprnds1);
8314 for (j = 0; j < ncopies; ++j)
8316 if (modifier == WIDEN)
8318 if (j & 1)
8319 op = permute_vec_elements (vinfo, vec_oprnd0, vec_oprnd0,
8320 perm_mask, stmt_info, gsi);
8321 else
8322 op = vec_oprnd0 = vec_oprnds0[j / 2];
8323 src = vec_oprnd1 = vec_oprnds1[j];
8324 if (mask)
8325 mask_op = vec_mask = vec_masks[j];
8327 else if (modifier == NARROW)
8329 if (j & 1)
8330 src = permute_vec_elements (vinfo, vec_oprnd1, vec_oprnd1,
8331 perm_mask, stmt_info, gsi);
8332 else
8333 src = vec_oprnd1 = vec_oprnds1[j / 2];
8334 op = vec_oprnd0 = vec_oprnds0[j];
8335 if (mask)
8336 mask_op = vec_mask = vec_masks[j / 2];
8338 else
8340 op = vec_oprnd0 = vec_oprnds0[j];
8341 src = vec_oprnd1 = vec_oprnds1[j];
8342 if (mask)
8343 mask_op = vec_mask = vec_masks[j];
8346 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
8348 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
8349 TYPE_VECTOR_SUBPARTS (srctype)));
8350 var = vect_get_new_ssa_name (srctype, vect_simple_var);
8351 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
8352 gassign *new_stmt
8353 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
8354 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8355 src = var;
8358 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
8360 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
8361 TYPE_VECTOR_SUBPARTS (idxtype)));
8362 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
8363 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
8364 gassign *new_stmt
8365 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
8366 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8367 op = var;
8370 if (mask)
8372 tree utype;
8373 mask_arg = mask_op;
8374 if (modifier == NARROW)
8376 var = vect_get_new_ssa_name (mask_halfvectype,
8377 vect_simple_var);
8378 gassign *new_stmt
8379 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
8380 : VEC_UNPACK_LO_EXPR,
8381 mask_op);
8382 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8383 mask_arg = var;
8385 tree optype = TREE_TYPE (mask_arg);
8386 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
8387 utype = masktype;
8388 else
8389 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
8390 var = vect_get_new_ssa_name (utype, vect_scalar_var);
8391 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
8392 gassign *new_stmt
8393 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
8394 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8395 mask_arg = var;
8396 if (!useless_type_conversion_p (masktype, utype))
8398 gcc_assert (TYPE_PRECISION (utype)
8399 <= TYPE_PRECISION (masktype));
8400 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
8401 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
8402 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8403 mask_arg = var;
8407 gcall *new_stmt
8408 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
8409 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
8411 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
8413 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
8414 return true;
8416 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
8417 return vectorizable_scan_store (vinfo, stmt_info, gsi, vec_stmt, ncopies);
8419 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8420 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
8422 if (grouped_store)
8424 /* FORNOW */
8425 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
8427 /* We vectorize all the stmts of the interleaving group when we
8428 reach the last stmt in the group. */
8429 if (DR_GROUP_STORE_COUNT (first_stmt_info)
8430 < DR_GROUP_SIZE (first_stmt_info)
8431 && !slp)
8433 *vec_stmt = NULL;
8434 return true;
8437 if (slp)
8439 grouped_store = false;
8440 /* VEC_NUM is the number of vect stmts to be created for this
8441 group. */
8442 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8443 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
8444 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
8445 == first_stmt_info);
8446 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8447 op = vect_get_store_rhs (first_stmt_info);
8449 else
8450 /* VEC_NUM is the number of vect stmts to be created for this
8451 group. */
8452 vec_num = group_size;
8454 ref_type = get_group_alias_ptr_type (first_stmt_info);
8456 else
8457 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
8459 if (dump_enabled_p ())
8460 dump_printf_loc (MSG_NOTE, vect_location,
8461 "transform store. ncopies = %d\n", ncopies);
8463 if (memory_access_type == VMAT_ELEMENTWISE
8464 || memory_access_type == VMAT_STRIDED_SLP)
8466 gimple_stmt_iterator incr_gsi;
8467 bool insert_after;
8468 gimple *incr;
8469 tree offvar;
8470 tree ivstep;
8471 tree running_off;
8472 tree stride_base, stride_step, alias_off;
8473 tree vec_oprnd;
8474 tree dr_offset;
8475 unsigned int g;
8476 /* Checked by get_load_store_type. */
8477 unsigned int const_nunits = nunits.to_constant ();
8479 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8480 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
8482 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
8483 stride_base
8484 = fold_build_pointer_plus
8485 (DR_BASE_ADDRESS (first_dr_info->dr),
8486 size_binop (PLUS_EXPR,
8487 convert_to_ptrofftype (dr_offset),
8488 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8489 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8491 /* For a store with loop-invariant (but other than power-of-2)
8492 stride (i.e. not a grouped access) like so:
8494 for (i = 0; i < n; i += stride)
8495 array[i] = ...;
8497 we generate a new induction variable and new stores from
8498 the components of the (vectorized) rhs:
8500 for (j = 0; ; j += VF*stride)
8501 vectemp = ...;
8502 tmp1 = vectemp[0];
8503 array[j] = tmp1;
8504 tmp2 = vectemp[1];
8505 array[j + stride] = tmp2;
8509 unsigned nstores = const_nunits;
8510 unsigned lnel = 1;
8511 tree ltype = elem_type;
8512 tree lvectype = vectype;
8513 if (slp)
8515 if (group_size < const_nunits
8516 && const_nunits % group_size == 0)
8518 nstores = const_nunits / group_size;
8519 lnel = group_size;
8520 ltype = build_vector_type (elem_type, group_size);
8521 lvectype = vectype;
8523 /* First check if vec_extract optab doesn't support extraction
8524 of vector elts directly. */
8525 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
8526 machine_mode vmode;
8527 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8528 || !related_vector_mode (TYPE_MODE (vectype), elmode,
8529 group_size).exists (&vmode)
8530 || (convert_optab_handler (vec_extract_optab,
8531 TYPE_MODE (vectype), vmode)
8532 == CODE_FOR_nothing))
8534 /* Try to avoid emitting an extract of vector elements
8535 by performing the extracts using an integer type of the
8536 same size, extracting from a vector of those and then
8537 re-interpreting it as the original vector type if
8538 supported. */
8539 unsigned lsize
8540 = group_size * GET_MODE_BITSIZE (elmode);
8541 unsigned int lnunits = const_nunits / group_size;
8542 /* If we can't construct such a vector fall back to
8543 element extracts from the original vector type and
8544 element size stores. */
8545 if (int_mode_for_size (lsize, 0).exists (&elmode)
8546 && VECTOR_MODE_P (TYPE_MODE (vectype))
8547 && related_vector_mode (TYPE_MODE (vectype), elmode,
8548 lnunits).exists (&vmode)
8549 && (convert_optab_handler (vec_extract_optab,
8550 vmode, elmode)
8551 != CODE_FOR_nothing))
8553 nstores = lnunits;
8554 lnel = group_size;
8555 ltype = build_nonstandard_integer_type (lsize, 1);
8556 lvectype = build_vector_type (ltype, nstores);
8558 /* Else fall back to vector extraction anyway.
8559 Fewer stores are more important than avoiding spilling
8560 of the vector we extract from. Compared to the
8561 construction case in vectorizable_load no store-forwarding
8562 issue exists here for reasonable archs. */
8565 else if (group_size >= const_nunits
8566 && group_size % const_nunits == 0)
8568 nstores = 1;
8569 lnel = const_nunits;
8570 ltype = vectype;
8571 lvectype = vectype;
8573 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
8574 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8577 ivstep = stride_step;
8578 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
8579 build_int_cst (TREE_TYPE (ivstep), vf));
8581 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8583 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8584 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8585 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
8586 loop, &incr_gsi, insert_after,
8587 &offvar, NULL);
8588 incr = gsi_stmt (incr_gsi);
8590 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8592 alias_off = build_int_cst (ref_type, 0);
8593 stmt_vec_info next_stmt_info = first_stmt_info;
8594 for (g = 0; g < group_size; g++)
8596 running_off = offvar;
8597 if (g)
8599 tree size = TYPE_SIZE_UNIT (ltype);
8600 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
8601 size);
8602 tree newoff = copy_ssa_name (running_off, NULL);
8603 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8604 running_off, pos);
8605 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8606 running_off = newoff;
8608 if (!slp)
8609 op = vect_get_store_rhs (next_stmt_info);
8610 vect_get_vec_defs (vinfo, next_stmt_info, slp_node, ncopies,
8611 op, &vec_oprnds);
8612 unsigned int group_el = 0;
8613 unsigned HOST_WIDE_INT
8614 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8615 for (j = 0; j < ncopies; j++)
8617 vec_oprnd = vec_oprnds[j];
8618 /* Pun the vector to extract from if necessary. */
8619 if (lvectype != vectype)
8621 tree tem = make_ssa_name (lvectype);
8622 gimple *pun
8623 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
8624 lvectype, vec_oprnd));
8625 vect_finish_stmt_generation (vinfo, stmt_info, pun, gsi);
8626 vec_oprnd = tem;
8628 for (i = 0; i < nstores; i++)
8630 tree newref, newoff;
8631 gimple *incr, *assign;
8632 tree size = TYPE_SIZE (ltype);
8633 /* Extract the i'th component. */
8634 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
8635 bitsize_int (i), size);
8636 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
8637 size, pos);
8639 elem = force_gimple_operand_gsi (gsi, elem, true,
8640 NULL_TREE, true,
8641 GSI_SAME_STMT);
8643 tree this_off = build_int_cst (TREE_TYPE (alias_off),
8644 group_el * elsz);
8645 newref = build2 (MEM_REF, ltype,
8646 running_off, this_off);
8647 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
8649 /* And store it to *running_off. */
8650 assign = gimple_build_assign (newref, elem);
8651 vect_finish_stmt_generation (vinfo, stmt_info, assign, gsi);
8653 group_el += lnel;
8654 if (! slp
8655 || group_el == group_size)
8657 newoff = copy_ssa_name (running_off, NULL);
8658 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
8659 running_off, stride_step);
8660 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
8662 running_off = newoff;
8663 group_el = 0;
8665 if (g == group_size - 1
8666 && !slp)
8668 if (j == 0 && i == 0)
8669 *vec_stmt = assign;
8670 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (assign);
8674 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8675 vec_oprnds.release ();
8676 if (slp)
8677 break;
8680 return true;
8683 auto_vec<tree> dr_chain (group_size);
8684 oprnds.create (group_size);
8686 gcc_assert (alignment_support_scheme);
8687 vec_loop_masks *loop_masks
8688 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8689 ? &LOOP_VINFO_MASKS (loop_vinfo)
8690 : NULL);
8691 vec_loop_lens *loop_lens
8692 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
8693 ? &LOOP_VINFO_LENS (loop_vinfo)
8694 : NULL);
8696 /* Shouldn't go with length-based approach if fully masked. */
8697 gcc_assert (!loop_lens || !loop_masks);
8699 /* Targets with store-lane instructions must not require explicit
8700 realignment. vect_supportable_dr_alignment always returns either
8701 dr_aligned or dr_unaligned_supported for masked operations. */
8702 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8703 && !mask
8704 && !loop_masks)
8705 || alignment_support_scheme == dr_aligned
8706 || alignment_support_scheme == dr_unaligned_supported);
8708 tree offset = NULL_TREE;
8709 if (!known_eq (poffset, 0))
8710 offset = size_int (poffset);
8712 tree bump;
8713 tree vec_offset = NULL_TREE;
8714 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8716 aggr_type = NULL_TREE;
8717 bump = NULL_TREE;
8719 else if (memory_access_type == VMAT_GATHER_SCATTER)
8721 aggr_type = elem_type;
8722 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
8723 &bump, &vec_offset, loop_lens);
8725 else
8727 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8728 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8729 else
8730 aggr_type = vectype;
8731 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
8732 memory_access_type, loop_lens);
8735 if (mask)
8736 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8738 /* In case the vectorization factor (VF) is bigger than the number
8739 of elements that we can fit in a vectype (nunits), we have to generate
8740 more than one vector stmt - i.e - we need to "unroll" the
8741 vector stmt by a factor VF/nunits. */
8743 /* In case of interleaving (non-unit grouped access):
8745 S1: &base + 2 = x2
8746 S2: &base = x0
8747 S3: &base + 1 = x1
8748 S4: &base + 3 = x3
8750 We create vectorized stores starting from base address (the access of the
8751 first stmt in the chain (S2 in the above example), when the last store stmt
8752 of the chain (S4) is reached:
8754 VS1: &base = vx2
8755 VS2: &base + vec_size*1 = vx0
8756 VS3: &base + vec_size*2 = vx1
8757 VS4: &base + vec_size*3 = vx3
8759 Then permutation statements are generated:
8761 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8762 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8765 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8766 (the order of the data-refs in the output of vect_permute_store_chain
8767 corresponds to the order of scalar stmts in the interleaving chain - see
8768 the documentation of vect_permute_store_chain()).
8770 In case of both multiple types and interleaving, above vector stores and
8771 permutation stmts are created for every copy. The result vector stmts are
8772 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8773 STMT_VINFO_RELATED_STMT for the next copies.
8776 auto_vec<tree> vec_masks;
8777 tree vec_mask = NULL;
8778 auto_vec<tree> vec_offsets;
8779 auto_vec<vec<tree> > gvec_oprnds;
8780 gvec_oprnds.safe_grow_cleared (group_size, true);
8781 for (j = 0; j < ncopies; j++)
8783 gimple *new_stmt;
8784 if (j == 0)
8786 if (slp)
8788 /* Get vectorized arguments for SLP_NODE. */
8789 vect_get_vec_defs (vinfo, stmt_info, slp_node, 1,
8790 op, &vec_oprnds);
8791 vec_oprnd = vec_oprnds[0];
8793 else
8795 /* For interleaved stores we collect vectorized defs for all the
8796 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8797 used as an input to vect_permute_store_chain().
8799 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
8800 and OPRNDS are of size 1. */
8801 stmt_vec_info next_stmt_info = first_stmt_info;
8802 for (i = 0; i < group_size; i++)
8804 /* Since gaps are not supported for interleaved stores,
8805 DR_GROUP_SIZE is the exact number of stmts in the chain.
8806 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8807 that there is no interleaving, DR_GROUP_SIZE is 1,
8808 and only one iteration of the loop will be executed. */
8809 op = vect_get_store_rhs (next_stmt_info);
8810 vect_get_vec_defs_for_operand (vinfo, next_stmt_info,
8811 ncopies, op, &gvec_oprnds[i]);
8812 vec_oprnd = gvec_oprnds[i][0];
8813 dr_chain.quick_push (gvec_oprnds[i][0]);
8814 oprnds.quick_push (gvec_oprnds[i][0]);
8815 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8817 if (mask)
8819 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies,
8820 mask, &vec_masks, mask_vectype);
8821 vec_mask = vec_masks[0];
8825 /* We should have catched mismatched types earlier. */
8826 gcc_assert (useless_type_conversion_p (vectype,
8827 TREE_TYPE (vec_oprnd)));
8828 bool simd_lane_access_p
8829 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8830 if (simd_lane_access_p
8831 && !loop_masks
8832 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8833 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8834 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
8835 && integer_zerop (DR_INIT (first_dr_info->dr))
8836 && alias_sets_conflict_p (get_alias_set (aggr_type),
8837 get_alias_set (TREE_TYPE (ref_type))))
8839 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8840 dataref_offset = build_int_cst (ref_type, 0);
8842 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8843 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
8844 slp_node, &gs_info, &dataref_ptr,
8845 &vec_offsets);
8846 else
8847 dataref_ptr
8848 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
8849 simd_lane_access_p ? loop : NULL,
8850 offset, &dummy, gsi, &ptr_incr,
8851 simd_lane_access_p, bump);
8853 else
8855 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
8856 /* For interleaved stores we created vectorized defs for all the
8857 defs stored in OPRNDS in the previous iteration (previous copy).
8858 DR_CHAIN is then used as an input to vect_permute_store_chain().
8859 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8860 OPRNDS are of size 1. */
8861 for (i = 0; i < group_size; i++)
8863 vec_oprnd = gvec_oprnds[i][j];
8864 dr_chain[i] = gvec_oprnds[i][j];
8865 oprnds[i] = gvec_oprnds[i][j];
8867 if (mask)
8868 vec_mask = vec_masks[j];
8869 if (dataref_offset)
8870 dataref_offset
8871 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8872 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8873 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
8874 stmt_info, bump);
8877 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8879 tree vec_array;
8881 /* Get an array into which we can store the individual vectors. */
8882 vec_array = create_vector_array (vectype, vec_num);
8884 /* Invalidate the current contents of VEC_ARRAY. This should
8885 become an RTL clobber too, which prevents the vector registers
8886 from being upward-exposed. */
8887 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8889 /* Store the individual vectors into the array. */
8890 for (i = 0; i < vec_num; i++)
8892 vec_oprnd = dr_chain[i];
8893 write_vector_array (vinfo, stmt_info,
8894 gsi, vec_oprnd, vec_array, i);
8897 tree final_mask = NULL;
8898 tree final_len = NULL;
8899 tree bias = NULL;
8900 if (loop_masks)
8901 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8902 ncopies, vectype, j);
8903 if (vec_mask)
8904 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8905 final_mask, vec_mask, gsi);
8907 if (lanes_ifn == IFN_MASK_LEN_STORE_LANES)
8909 if (loop_lens)
8910 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
8911 ncopies, vectype, j, 1);
8912 else
8913 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
8914 signed char biasval
8915 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
8916 bias = build_int_cst (intQI_type_node, biasval);
8917 if (!final_mask)
8919 mask_vectype = truth_type_for (vectype);
8920 final_mask = build_minus_one_cst (mask_vectype);
8924 gcall *call;
8925 if (final_len && final_mask)
8927 /* Emit:
8928 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8929 LEN, BIAS, VEC_ARRAY). */
8930 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8931 tree alias_ptr = build_int_cst (ref_type, align);
8932 call = gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES, 6,
8933 dataref_ptr, alias_ptr,
8934 final_mask, final_len, bias,
8935 vec_array);
8937 else if (final_mask)
8939 /* Emit:
8940 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8941 VEC_ARRAY). */
8942 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
8943 tree alias_ptr = build_int_cst (ref_type, align);
8944 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8945 dataref_ptr, alias_ptr,
8946 final_mask, vec_array);
8948 else
8950 /* Emit:
8951 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8952 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8953 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8954 vec_array);
8955 gimple_call_set_lhs (call, data_ref);
8957 gimple_call_set_nothrow (call, true);
8958 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
8959 new_stmt = call;
8961 /* Record that VEC_ARRAY is now dead. */
8962 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
8964 else
8966 new_stmt = NULL;
8967 if (grouped_store)
8969 if (j == 0)
8970 result_chain.create (group_size);
8971 /* Permute. */
8972 vect_permute_store_chain (vinfo, dr_chain, group_size, stmt_info,
8973 gsi, &result_chain);
8976 stmt_vec_info next_stmt_info = first_stmt_info;
8977 for (i = 0; i < vec_num; i++)
8979 unsigned misalign;
8980 unsigned HOST_WIDE_INT align;
8982 tree final_mask = NULL_TREE;
8983 tree final_len = NULL_TREE;
8984 tree bias = NULL_TREE;
8985 if (loop_masks)
8986 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
8987 vec_num * ncopies,
8988 vectype, vec_num * j + i);
8989 if (vec_mask)
8990 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
8991 final_mask, vec_mask, gsi);
8993 if (memory_access_type == VMAT_GATHER_SCATTER
8994 && gs_info.ifn != IFN_LAST)
8996 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8997 vec_offset = vec_offsets[vec_num * j + i];
8998 tree scale = size_int (gs_info.scale);
9000 if (gs_info.ifn == IFN_MASK_LEN_SCATTER_STORE)
9002 if (loop_lens)
9003 final_len
9004 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9005 vec_num * ncopies, vectype,
9006 vec_num * j + i, 1);
9007 else
9008 final_len
9009 = build_int_cst (sizetype,
9010 TYPE_VECTOR_SUBPARTS (vectype));
9011 signed char biasval
9012 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9013 bias = build_int_cst (intQI_type_node, biasval);
9014 if (!final_mask)
9016 mask_vectype = truth_type_for (vectype);
9017 final_mask = build_minus_one_cst (mask_vectype);
9021 gcall *call;
9022 if (final_len && final_mask)
9023 call
9024 = gimple_build_call_internal (IFN_MASK_LEN_SCATTER_STORE,
9025 7, dataref_ptr, vec_offset,
9026 scale, vec_oprnd, final_mask,
9027 final_len, bias);
9028 else if (final_mask)
9029 call = gimple_build_call_internal
9030 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
9031 scale, vec_oprnd, final_mask);
9032 else
9033 call = gimple_build_call_internal
9034 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
9035 scale, vec_oprnd);
9036 gimple_call_set_nothrow (call, true);
9037 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9038 new_stmt = call;
9039 break;
9041 else if (memory_access_type == VMAT_GATHER_SCATTER)
9043 /* Emulated scatter. */
9044 gcc_assert (!final_mask);
9045 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
9046 unsigned HOST_WIDE_INT const_offset_nunits
9047 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
9048 .to_constant ();
9049 vec<constructor_elt, va_gc> *ctor_elts;
9050 vec_alloc (ctor_elts, const_nunits);
9051 gimple_seq stmts = NULL;
9052 tree elt_type = TREE_TYPE (vectype);
9053 unsigned HOST_WIDE_INT elt_size
9054 = tree_to_uhwi (TYPE_SIZE (elt_type));
9055 /* We support offset vectors with more elements
9056 than the data vector for now. */
9057 unsigned HOST_WIDE_INT factor
9058 = const_offset_nunits / const_nunits;
9059 vec_offset = vec_offsets[j / factor];
9060 unsigned elt_offset = (j % factor) * const_nunits;
9061 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
9062 tree scale = size_int (gs_info.scale);
9063 align = get_object_alignment (DR_REF (first_dr_info->dr));
9064 tree ltype = build_aligned_type (TREE_TYPE (vectype), align);
9065 for (unsigned k = 0; k < const_nunits; ++k)
9067 /* Compute the offsetted pointer. */
9068 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
9069 bitsize_int (k + elt_offset));
9070 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
9071 idx_type, vec_offset,
9072 TYPE_SIZE (idx_type), boff);
9073 idx = gimple_convert (&stmts, sizetype, idx);
9074 idx = gimple_build (&stmts, MULT_EXPR,
9075 sizetype, idx, scale);
9076 tree ptr = gimple_build (&stmts, PLUS_EXPR,
9077 TREE_TYPE (dataref_ptr),
9078 dataref_ptr, idx);
9079 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
9080 /* Extract the element to be stored. */
9081 tree elt = gimple_build (&stmts, BIT_FIELD_REF,
9082 TREE_TYPE (vectype), vec_oprnd,
9083 TYPE_SIZE (elt_type),
9084 bitsize_int (k * elt_size));
9085 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
9086 stmts = NULL;
9087 tree ref = build2 (MEM_REF, ltype, ptr,
9088 build_int_cst (ref_type, 0));
9089 new_stmt = gimple_build_assign (ref, elt);
9090 vect_finish_stmt_generation (vinfo, stmt_info,
9091 new_stmt, gsi);
9093 break;
9096 if (i > 0)
9097 /* Bump the vector pointer. */
9098 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
9099 gsi, stmt_info, bump);
9101 if (slp)
9102 vec_oprnd = vec_oprnds[i];
9103 else if (grouped_store)
9104 /* For grouped stores vectorized defs are interleaved in
9105 vect_permute_store_chain(). */
9106 vec_oprnd = result_chain[i];
9108 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9109 if (alignment_support_scheme == dr_aligned)
9110 misalign = 0;
9111 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
9113 align = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
9114 misalign = 0;
9116 else
9117 misalign = misalignment;
9118 if (dataref_offset == NULL_TREE
9119 && TREE_CODE (dataref_ptr) == SSA_NAME)
9120 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
9121 misalign);
9122 align = least_bit_hwi (misalign | align);
9124 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9126 tree perm_mask = perm_mask_for_reverse (vectype);
9127 tree perm_dest = vect_create_destination_var
9128 (vect_get_store_rhs (stmt_info), vectype);
9129 tree new_temp = make_ssa_name (perm_dest);
9131 /* Generate the permute statement. */
9132 gimple *perm_stmt
9133 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
9134 vec_oprnd, perm_mask);
9135 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9137 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
9138 vec_oprnd = new_temp;
9141 /* Compute IFN when LOOP_LENS or final_mask valid. */
9142 machine_mode vmode = TYPE_MODE (vectype);
9143 machine_mode new_vmode = vmode;
9144 internal_fn partial_ifn = IFN_LAST;
9145 if (loop_lens)
9147 opt_machine_mode new_ovmode
9148 = get_len_load_store_mode (vmode, false, &partial_ifn);
9149 new_vmode = new_ovmode.require ();
9150 unsigned factor
9151 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
9152 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
9153 vec_num * ncopies, vectype,
9154 vec_num * j + i, factor);
9156 else if (final_mask)
9158 if (!can_vec_mask_load_store_p (vmode,
9159 TYPE_MODE (TREE_TYPE (final_mask)),
9160 false, &partial_ifn))
9161 gcc_unreachable ();
9164 if (partial_ifn == IFN_MASK_LEN_STORE)
9166 if (!final_len)
9168 /* Pass VF value to 'len' argument of
9169 MASK_LEN_STORE if LOOP_LENS is invalid. */
9170 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9172 if (!final_mask)
9174 /* Pass all ones value to 'mask' argument of
9175 MASK_LEN_STORE if final_mask is invalid. */
9176 mask_vectype = truth_type_for (vectype);
9177 final_mask = build_minus_one_cst (mask_vectype);
9180 if (final_len)
9182 signed char biasval
9183 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
9185 bias = build_int_cst (intQI_type_node, biasval);
9188 /* Arguments are ready. Create the new vector stmt. */
9189 if (final_len)
9191 gcall *call;
9192 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9193 /* Need conversion if it's wrapped with VnQI. */
9194 if (vmode != new_vmode)
9196 tree new_vtype
9197 = build_vector_type_for_mode (unsigned_intQI_type_node,
9198 new_vmode);
9199 tree var
9200 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
9201 vec_oprnd
9202 = build1 (VIEW_CONVERT_EXPR, new_vtype, vec_oprnd);
9203 gassign *new_stmt
9204 = gimple_build_assign (var, VIEW_CONVERT_EXPR,
9205 vec_oprnd);
9206 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
9207 gsi);
9208 vec_oprnd = var;
9211 if (partial_ifn == IFN_MASK_LEN_STORE)
9212 call = gimple_build_call_internal (IFN_MASK_LEN_STORE, 6,
9213 dataref_ptr, ptr,
9214 final_mask, final_len,
9215 bias, vec_oprnd);
9216 else
9217 call
9218 = gimple_build_call_internal (IFN_LEN_STORE, 5,
9219 dataref_ptr, ptr,
9220 final_len, bias,
9221 vec_oprnd);
9222 gimple_call_set_nothrow (call, true);
9223 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9224 new_stmt = call;
9226 else if (final_mask)
9228 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
9229 gcall *call
9230 = gimple_build_call_internal (IFN_MASK_STORE, 4,
9231 dataref_ptr, ptr,
9232 final_mask, vec_oprnd);
9233 gimple_call_set_nothrow (call, true);
9234 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
9235 new_stmt = call;
9237 else
9239 data_ref = fold_build2 (MEM_REF, vectype,
9240 dataref_ptr,
9241 dataref_offset
9242 ? dataref_offset
9243 : build_int_cst (ref_type, 0));
9244 if (alignment_support_scheme == dr_aligned)
9246 else
9247 TREE_TYPE (data_ref)
9248 = build_aligned_type (TREE_TYPE (data_ref),
9249 align * BITS_PER_UNIT);
9250 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9251 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
9252 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9255 if (slp)
9256 continue;
9258 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
9259 if (!next_stmt_info)
9260 break;
9263 if (!slp)
9265 if (j == 0)
9266 *vec_stmt = new_stmt;
9267 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9271 for (i = 0; i < group_size; ++i)
9273 vec<tree> oprndsi = gvec_oprnds[i];
9274 oprndsi.release ();
9276 oprnds.release ();
9277 result_chain.release ();
9278 vec_oprnds.release ();
9280 return true;
9283 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9284 VECTOR_CST mask. No checks are made that the target platform supports the
9285 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9286 vect_gen_perm_mask_checked. */
9288 tree
9289 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
9291 tree mask_type;
9293 poly_uint64 nunits = sel.length ();
9294 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
9296 mask_type = build_vector_type (ssizetype, nunits);
9297 return vec_perm_indices_to_tree (mask_type, sel);
9300 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9301 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9303 tree
9304 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
9306 machine_mode vmode = TYPE_MODE (vectype);
9307 gcc_assert (can_vec_perm_const_p (vmode, vmode, sel));
9308 return vect_gen_perm_mask_any (vectype, sel);
9311 /* Given a vector variable X and Y, that was generated for the scalar
9312 STMT_INFO, generate instructions to permute the vector elements of X and Y
9313 using permutation mask MASK_VEC, insert them at *GSI and return the
9314 permuted vector variable. */
9316 static tree
9317 permute_vec_elements (vec_info *vinfo,
9318 tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
9319 gimple_stmt_iterator *gsi)
9321 tree vectype = TREE_TYPE (x);
9322 tree perm_dest, data_ref;
9323 gimple *perm_stmt;
9325 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
9326 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
9327 perm_dest = vect_create_destination_var (scalar_dest, vectype);
9328 else
9329 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
9330 data_ref = make_ssa_name (perm_dest);
9332 /* Generate the permute statement. */
9333 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
9334 vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
9336 return data_ref;
9339 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9340 inserting them on the loops preheader edge. Returns true if we
9341 were successful in doing so (and thus STMT_INFO can be moved then),
9342 otherwise returns false. HOIST_P indicates if we want to hoist the
9343 definitions of all SSA uses, it would be false when we are costing. */
9345 static bool
9346 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop, bool hoist_p)
9348 ssa_op_iter i;
9349 tree op;
9350 bool any = false;
9352 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9354 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9355 if (!gimple_nop_p (def_stmt)
9356 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9358 /* Make sure we don't need to recurse. While we could do
9359 so in simple cases when there are more complex use webs
9360 we don't have an easy way to preserve stmt order to fulfil
9361 dependencies within them. */
9362 tree op2;
9363 ssa_op_iter i2;
9364 if (gimple_code (def_stmt) == GIMPLE_PHI)
9365 return false;
9366 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
9368 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
9369 if (!gimple_nop_p (def_stmt2)
9370 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
9371 return false;
9373 any = true;
9377 if (!any)
9378 return true;
9380 if (!hoist_p)
9381 return true;
9383 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
9385 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
9386 if (!gimple_nop_p (def_stmt)
9387 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
9389 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
9390 gsi_remove (&gsi, false);
9391 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
9395 return true;
9398 /* vectorizable_load.
9400 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9401 that can be vectorized.
9402 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9403 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9404 Return true if STMT_INFO is vectorizable in this way. */
9406 static bool
9407 vectorizable_load (vec_info *vinfo,
9408 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9409 gimple **vec_stmt, slp_tree slp_node,
9410 stmt_vector_for_cost *cost_vec)
9412 tree scalar_dest;
9413 tree vec_dest = NULL;
9414 tree data_ref = NULL;
9415 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
9416 class loop *loop = NULL;
9417 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
9418 bool nested_in_vect_loop = false;
9419 tree elem_type;
9420 /* Avoid false positive uninitialized warning, see PR110652. */
9421 tree new_temp = NULL_TREE;
9422 machine_mode mode;
9423 tree dummy;
9424 tree dataref_ptr = NULL_TREE;
9425 tree dataref_offset = NULL_TREE;
9426 gimple *ptr_incr = NULL;
9427 int ncopies;
9428 int i, j;
9429 unsigned int group_size;
9430 poly_uint64 group_gap_adj;
9431 tree msq = NULL_TREE, lsq;
9432 tree realignment_token = NULL_TREE;
9433 gphi *phi = NULL;
9434 vec<tree> dr_chain = vNULL;
9435 bool grouped_load = false;
9436 stmt_vec_info first_stmt_info;
9437 stmt_vec_info first_stmt_info_for_drptr = NULL;
9438 bool compute_in_loop = false;
9439 class loop *at_loop;
9440 int vec_num;
9441 bool slp = (slp_node != NULL);
9442 bool slp_perm = false;
9443 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
9444 poly_uint64 vf;
9445 tree aggr_type;
9446 gather_scatter_info gs_info;
9447 tree ref_type;
9448 enum vect_def_type mask_dt = vect_unknown_def_type;
9450 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9451 return false;
9453 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
9454 && ! vec_stmt)
9455 return false;
9457 if (!STMT_VINFO_DATA_REF (stmt_info))
9458 return false;
9460 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
9461 int mask_index = -1;
9462 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
9464 scalar_dest = gimple_assign_lhs (assign);
9465 if (TREE_CODE (scalar_dest) != SSA_NAME)
9466 return false;
9468 tree_code code = gimple_assign_rhs_code (assign);
9469 if (code != ARRAY_REF
9470 && code != BIT_FIELD_REF
9471 && code != INDIRECT_REF
9472 && code != COMPONENT_REF
9473 && code != IMAGPART_EXPR
9474 && code != REALPART_EXPR
9475 && code != MEM_REF
9476 && TREE_CODE_CLASS (code) != tcc_declaration)
9477 return false;
9479 else
9481 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
9482 if (!call || !gimple_call_internal_p (call))
9483 return false;
9485 internal_fn ifn = gimple_call_internal_fn (call);
9486 if (!internal_load_fn_p (ifn))
9487 return false;
9489 scalar_dest = gimple_call_lhs (call);
9490 if (!scalar_dest)
9491 return false;
9493 mask_index = internal_fn_mask_index (ifn);
9494 /* ??? For SLP the mask operand is always last. */
9495 if (mask_index >= 0 && slp_node)
9496 mask_index = SLP_TREE_CHILDREN (slp_node).length () - 1;
9497 if (mask_index >= 0
9498 && !vect_check_scalar_mask (vinfo, stmt_info, slp_node, mask_index,
9499 &mask, NULL, &mask_dt, &mask_vectype))
9500 return false;
9503 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9504 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
9506 if (loop_vinfo)
9508 loop = LOOP_VINFO_LOOP (loop_vinfo);
9509 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
9510 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9512 else
9513 vf = 1;
9515 /* Multiple types in SLP are handled by creating the appropriate number of
9516 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9517 case of SLP. */
9518 if (slp)
9519 ncopies = 1;
9520 else
9521 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9523 gcc_assert (ncopies >= 1);
9525 /* FORNOW. This restriction should be relaxed. */
9526 if (nested_in_vect_loop && ncopies > 1)
9528 if (dump_enabled_p ())
9529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9530 "multiple types in nested loop.\n");
9531 return false;
9534 /* Invalidate assumptions made by dependence analysis when vectorization
9535 on the unrolled body effectively re-orders stmts. */
9536 if (ncopies > 1
9537 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9538 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9539 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9541 if (dump_enabled_p ())
9542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9543 "cannot perform implicit CSE when unrolling "
9544 "with negative dependence distance\n");
9545 return false;
9548 elem_type = TREE_TYPE (vectype);
9549 mode = TYPE_MODE (vectype);
9551 /* FORNOW. In some cases can vectorize even if data-type not supported
9552 (e.g. - data copies). */
9553 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
9555 if (dump_enabled_p ())
9556 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9557 "Aligned load, but unsupported type.\n");
9558 return false;
9561 /* Check if the load is a part of an interleaving chain. */
9562 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
9564 grouped_load = true;
9565 /* FORNOW */
9566 gcc_assert (!nested_in_vect_loop);
9567 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
9569 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9570 group_size = DR_GROUP_SIZE (first_stmt_info);
9572 /* Refuse non-SLP vectorization of SLP-only groups. */
9573 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
9575 if (dump_enabled_p ())
9576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9577 "cannot vectorize load in non-SLP mode.\n");
9578 return false;
9581 /* Invalidate assumptions made by dependence analysis when vectorization
9582 on the unrolled body effectively re-orders stmts. */
9583 if (!PURE_SLP_STMT (stmt_info)
9584 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
9585 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
9586 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
9588 if (dump_enabled_p ())
9589 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9590 "cannot perform implicit CSE when performing "
9591 "group loads with negative dependence distance\n");
9592 return false;
9595 else
9596 group_size = 1;
9598 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9600 slp_perm = true;
9602 if (!loop_vinfo)
9604 /* In BB vectorization we may not actually use a loaded vector
9605 accessing elements in excess of DR_GROUP_SIZE. */
9606 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9607 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
9608 unsigned HOST_WIDE_INT nunits;
9609 unsigned j, k, maxk = 0;
9610 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), j, k)
9611 if (k > maxk)
9612 maxk = k;
9613 tree vectype = SLP_TREE_VECTYPE (slp_node);
9614 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
9615 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
9617 if (dump_enabled_p ())
9618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9619 "BB vectorization with gaps at the end of "
9620 "a load is not supported\n");
9621 return false;
9625 auto_vec<tree> tem;
9626 unsigned n_perms;
9627 if (!vect_transform_slp_perm_load (vinfo, slp_node, tem, NULL, vf,
9628 true, &n_perms))
9630 if (dump_enabled_p ())
9631 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
9632 vect_location,
9633 "unsupported load permutation\n");
9634 return false;
9638 vect_memory_access_type memory_access_type;
9639 enum dr_alignment_support alignment_support_scheme;
9640 int misalignment;
9641 poly_int64 poffset;
9642 internal_fn lanes_ifn;
9643 if (!get_load_store_type (vinfo, stmt_info, vectype, slp_node, mask, VLS_LOAD,
9644 ncopies, &memory_access_type, &poffset,
9645 &alignment_support_scheme, &misalignment, &gs_info,
9646 &lanes_ifn))
9647 return false;
9649 if (mask)
9651 if (memory_access_type == VMAT_CONTIGUOUS)
9653 machine_mode vec_mode = TYPE_MODE (vectype);
9654 if (!VECTOR_MODE_P (vec_mode)
9655 || !can_vec_mask_load_store_p (vec_mode,
9656 TYPE_MODE (mask_vectype), true))
9657 return false;
9659 else if (memory_access_type != VMAT_LOAD_STORE_LANES
9660 && memory_access_type != VMAT_GATHER_SCATTER)
9662 if (dump_enabled_p ())
9663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9664 "unsupported access type for masked load.\n");
9665 return false;
9667 else if (memory_access_type == VMAT_GATHER_SCATTER
9668 && gs_info.ifn == IFN_LAST
9669 && !gs_info.decl)
9671 if (dump_enabled_p ())
9672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9673 "unsupported masked emulated gather.\n");
9674 return false;
9678 bool costing_p = !vec_stmt;
9680 if (costing_p) /* transformation not required. */
9682 if (slp_node
9683 && mask
9684 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node)[0],
9685 mask_vectype))
9687 if (dump_enabled_p ())
9688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9689 "incompatible vector types for invariants\n");
9690 return false;
9693 if (!slp)
9694 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
9696 if (loop_vinfo
9697 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
9698 check_load_store_for_partial_vectors (loop_vinfo, vectype, slp_node,
9699 VLS_LOAD, group_size,
9700 memory_access_type, &gs_info,
9701 mask);
9703 if (dump_enabled_p ()
9704 && memory_access_type != VMAT_ELEMENTWISE
9705 && memory_access_type != VMAT_GATHER_SCATTER
9706 && alignment_support_scheme != dr_aligned)
9707 dump_printf_loc (MSG_NOTE, vect_location,
9708 "Vectorizing an unaligned access.\n");
9710 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9711 vinfo->any_known_not_updated_vssa = true;
9713 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
9716 if (!slp)
9717 gcc_assert (memory_access_type
9718 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
9720 if (dump_enabled_p () && !costing_p)
9721 dump_printf_loc (MSG_NOTE, vect_location,
9722 "transform load. ncopies = %d\n", ncopies);
9724 /* Transform. */
9726 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
9727 ensure_base_align (dr_info);
9729 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
9731 vect_build_gather_load_calls (vinfo, stmt_info, gsi, vec_stmt, &gs_info,
9732 mask, cost_vec);
9733 return true;
9736 if (memory_access_type == VMAT_INVARIANT)
9738 gcc_assert (!grouped_load && !mask && !bb_vinfo);
9739 /* If we have versioned for aliasing or the loop doesn't
9740 have any data dependencies that would preclude this,
9741 then we are sure this is a loop invariant load and
9742 thus we can insert it on the preheader edge.
9743 TODO: hoist_defs_of_uses should ideally be computed
9744 once at analysis time, remembered and used in the
9745 transform time. */
9746 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
9747 && !nested_in_vect_loop
9748 && hoist_defs_of_uses (stmt_info, loop, !costing_p));
9749 if (costing_p)
9751 enum vect_cost_model_location cost_loc
9752 = hoist_p ? vect_prologue : vect_body;
9753 unsigned int cost = record_stmt_cost (cost_vec, 1, scalar_load,
9754 stmt_info, 0, cost_loc);
9755 cost += record_stmt_cost (cost_vec, 1, scalar_to_vec, stmt_info, 0,
9756 cost_loc);
9757 unsigned int prologue_cost = hoist_p ? cost : 0;
9758 unsigned int inside_cost = hoist_p ? 0 : cost;
9759 if (dump_enabled_p ())
9760 dump_printf_loc (MSG_NOTE, vect_location,
9761 "vect_model_load_cost: inside_cost = %d, "
9762 "prologue_cost = %d .\n",
9763 inside_cost, prologue_cost);
9764 return true;
9766 if (hoist_p)
9768 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
9769 if (dump_enabled_p ())
9770 dump_printf_loc (MSG_NOTE, vect_location,
9771 "hoisting out of the vectorized loop: %G",
9772 (gimple *) stmt);
9773 scalar_dest = copy_ssa_name (scalar_dest);
9774 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
9775 edge pe = loop_preheader_edge (loop);
9776 gphi *vphi = get_virtual_phi (loop->header);
9777 tree vuse;
9778 if (vphi)
9779 vuse = PHI_ARG_DEF_FROM_EDGE (vphi, pe);
9780 else
9781 vuse = gimple_vuse (gsi_stmt (*gsi));
9782 gimple *new_stmt = gimple_build_assign (scalar_dest, rhs);
9783 gimple_set_vuse (new_stmt, vuse);
9784 gsi_insert_on_edge_immediate (pe, new_stmt);
9786 /* These copies are all equivalent. */
9787 if (hoist_p)
9788 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9789 vectype, NULL);
9790 else
9792 gimple_stmt_iterator gsi2 = *gsi;
9793 gsi_next (&gsi2);
9794 new_temp = vect_init_vector (vinfo, stmt_info, scalar_dest,
9795 vectype, &gsi2);
9797 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
9798 if (slp)
9799 for (j = 0; j < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); ++j)
9800 slp_node->push_vec_def (new_stmt);
9801 else
9803 for (j = 0; j < ncopies; ++j)
9804 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
9805 *vec_stmt = new_stmt;
9807 return true;
9810 if (memory_access_type == VMAT_ELEMENTWISE
9811 || memory_access_type == VMAT_STRIDED_SLP)
9813 gimple_stmt_iterator incr_gsi;
9814 bool insert_after;
9815 tree offvar;
9816 tree ivstep;
9817 tree running_off;
9818 vec<constructor_elt, va_gc> *v = NULL;
9819 tree stride_base, stride_step, alias_off;
9820 /* Checked by get_load_store_type. */
9821 unsigned int const_nunits = nunits.to_constant ();
9822 unsigned HOST_WIDE_INT cst_offset = 0;
9823 tree dr_offset;
9824 unsigned int inside_cost = 0;
9826 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo));
9827 gcc_assert (!nested_in_vect_loop);
9829 if (grouped_load)
9831 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9832 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9834 else
9836 first_stmt_info = stmt_info;
9837 first_dr_info = dr_info;
9840 if (slp && grouped_load)
9842 group_size = DR_GROUP_SIZE (first_stmt_info);
9843 ref_type = get_group_alias_ptr_type (first_stmt_info);
9845 else
9847 if (grouped_load)
9848 cst_offset
9849 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
9850 * vect_get_place_in_interleaving_chain (stmt_info,
9851 first_stmt_info));
9852 group_size = 1;
9853 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
9856 if (!costing_p)
9858 dr_offset = get_dr_vinfo_offset (vinfo, first_dr_info);
9859 stride_base = fold_build_pointer_plus (
9860 DR_BASE_ADDRESS (first_dr_info->dr),
9861 size_binop (PLUS_EXPR, convert_to_ptrofftype (dr_offset),
9862 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
9863 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
9865 /* For a load with loop-invariant (but other than power-of-2)
9866 stride (i.e. not a grouped access) like so:
9868 for (i = 0; i < n; i += stride)
9869 ... = array[i];
9871 we generate a new induction variable and new accesses to
9872 form a new vector (or vectors, depending on ncopies):
9874 for (j = 0; ; j += VF*stride)
9875 tmp1 = array[j];
9876 tmp2 = array[j + stride];
9878 vectemp = {tmp1, tmp2, ...}
9881 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
9882 build_int_cst (TREE_TYPE (stride_step), vf));
9884 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
9886 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
9887 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
9888 create_iv (stride_base, PLUS_EXPR, ivstep, NULL,
9889 loop, &incr_gsi, insert_after,
9890 &offvar, NULL);
9892 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
9895 running_off = offvar;
9896 alias_off = build_int_cst (ref_type, 0);
9897 int nloads = const_nunits;
9898 int lnel = 1;
9899 tree ltype = TREE_TYPE (vectype);
9900 tree lvectype = vectype;
9901 auto_vec<tree> dr_chain;
9902 if (memory_access_type == VMAT_STRIDED_SLP)
9904 if (group_size < const_nunits)
9906 /* First check if vec_init optab supports construction from vector
9907 elts directly. Otherwise avoid emitting a constructor of
9908 vector elements by performing the loads using an integer type
9909 of the same size, constructing a vector of those and then
9910 re-interpreting it as the original vector type. This avoids a
9911 huge runtime penalty due to the general inability to perform
9912 store forwarding from smaller stores to a larger load. */
9913 tree ptype;
9914 tree vtype
9915 = vector_vector_composition_type (vectype,
9916 const_nunits / group_size,
9917 &ptype);
9918 if (vtype != NULL_TREE)
9920 nloads = const_nunits / group_size;
9921 lnel = group_size;
9922 lvectype = vtype;
9923 ltype = ptype;
9926 else
9928 nloads = 1;
9929 lnel = const_nunits;
9930 ltype = vectype;
9932 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
9934 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
9935 else if (nloads == 1)
9936 ltype = vectype;
9938 if (slp)
9940 /* For SLP permutation support we need to load the whole group,
9941 not only the number of vector stmts the permutation result
9942 fits in. */
9943 if (slp_perm)
9945 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
9946 variable VF. */
9947 unsigned int const_vf = vf.to_constant ();
9948 ncopies = CEIL (group_size * const_vf, const_nunits);
9949 dr_chain.create (ncopies);
9951 else
9952 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9954 unsigned int group_el = 0;
9955 unsigned HOST_WIDE_INT
9956 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
9957 unsigned int n_groups = 0;
9958 for (j = 0; j < ncopies; j++)
9960 if (nloads > 1 && !costing_p)
9961 vec_alloc (v, nloads);
9962 gimple *new_stmt = NULL;
9963 for (i = 0; i < nloads; i++)
9965 if (costing_p)
9967 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
9968 avoid ICE, see PR110776. */
9969 if (VECTOR_TYPE_P (ltype)
9970 && memory_access_type != VMAT_ELEMENTWISE)
9971 vect_get_load_cost (vinfo, stmt_info, 1,
9972 alignment_support_scheme, misalignment,
9973 false, &inside_cost, nullptr, cost_vec,
9974 cost_vec, true);
9975 else
9976 inside_cost += record_stmt_cost (cost_vec, 1, scalar_load,
9977 stmt_info, 0, vect_body);
9978 continue;
9980 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9981 group_el * elsz + cst_offset);
9982 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9983 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9984 new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
9985 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
9986 if (nloads > 1)
9987 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9988 gimple_assign_lhs (new_stmt));
9990 group_el += lnel;
9991 if (! slp
9992 || group_el == group_size)
9994 n_groups++;
9995 /* When doing SLP make sure to not load elements from
9996 the next vector iteration, those will not be accessed
9997 so just use the last element again. See PR107451. */
9998 if (!slp || known_lt (n_groups, vf))
10000 tree newoff = copy_ssa_name (running_off);
10001 gimple *incr
10002 = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
10003 running_off, stride_step);
10004 vect_finish_stmt_generation (vinfo, stmt_info, incr, gsi);
10005 running_off = newoff;
10007 group_el = 0;
10011 if (nloads > 1)
10013 if (costing_p)
10014 inside_cost += record_stmt_cost (cost_vec, 1, vec_construct,
10015 stmt_info, 0, vect_body);
10016 else
10018 tree vec_inv = build_constructor (lvectype, v);
10019 new_temp = vect_init_vector (vinfo, stmt_info, vec_inv,
10020 lvectype, gsi);
10021 new_stmt = SSA_NAME_DEF_STMT (new_temp);
10022 if (lvectype != vectype)
10024 new_stmt
10025 = gimple_build_assign (make_ssa_name (vectype),
10026 VIEW_CONVERT_EXPR,
10027 build1 (VIEW_CONVERT_EXPR,
10028 vectype, new_temp));
10029 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10030 gsi);
10035 if (!costing_p)
10037 if (slp)
10039 if (slp_perm)
10040 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
10041 else
10042 slp_node->push_vec_def (new_stmt);
10044 else
10046 if (j == 0)
10047 *vec_stmt = new_stmt;
10048 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
10052 if (slp_perm)
10054 unsigned n_perms;
10055 if (costing_p)
10057 unsigned n_loads;
10058 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, NULL, vf,
10059 true, &n_perms, &n_loads);
10060 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
10061 first_stmt_info, 0, vect_body);
10063 else
10064 vect_transform_slp_perm_load (vinfo, slp_node, dr_chain, gsi, vf,
10065 false, &n_perms);
10068 if (costing_p && dump_enabled_p ())
10069 dump_printf_loc (MSG_NOTE, vect_location,
10070 "vect_model_load_cost: inside_cost = %u, "
10071 "prologue_cost = 0 .\n",
10072 inside_cost);
10074 return true;
10077 if (memory_access_type == VMAT_GATHER_SCATTER
10078 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
10079 grouped_load = false;
10081 if (grouped_load
10082 || (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()))
10084 if (grouped_load)
10086 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
10087 group_size = DR_GROUP_SIZE (first_stmt_info);
10089 else
10091 first_stmt_info = stmt_info;
10092 group_size = 1;
10094 /* For SLP vectorization we directly vectorize a subchain
10095 without permutation. */
10096 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
10097 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
10098 /* For BB vectorization always use the first stmt to base
10099 the data ref pointer on. */
10100 if (bb_vinfo)
10101 first_stmt_info_for_drptr
10102 = vect_find_first_scalar_stmt_in_slp (slp_node);
10104 /* Check if the chain of loads is already vectorized. */
10105 if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
10106 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10107 ??? But we can only do so if there is exactly one
10108 as we have no way to get at the rest. Leave the CSE
10109 opportunity alone.
10110 ??? With the group load eventually participating
10111 in multiple different permutations (having multiple
10112 slp nodes which refer to the same group) the CSE
10113 is even wrong code. See PR56270. */
10114 && !slp)
10116 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10117 return true;
10119 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
10120 group_gap_adj = 0;
10122 /* VEC_NUM is the number of vect stmts to be created for this group. */
10123 if (slp)
10125 grouped_load = false;
10126 /* If an SLP permutation is from N elements to N elements,
10127 and if one vector holds a whole number of N, we can load
10128 the inputs to the permutation in the same way as an
10129 unpermuted sequence. In other cases we need to load the
10130 whole group, not only the number of vector stmts the
10131 permutation result fits in. */
10132 unsigned scalar_lanes = SLP_TREE_LANES (slp_node);
10133 if (slp_perm
10134 && (group_size != scalar_lanes
10135 || !multiple_p (nunits, group_size)))
10137 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10138 variable VF; see vect_transform_slp_perm_load. */
10139 unsigned int const_vf = vf.to_constant ();
10140 unsigned int const_nunits = nunits.to_constant ();
10141 vec_num = CEIL (group_size * const_vf, const_nunits);
10142 group_gap_adj = vf * group_size - nunits * vec_num;
10144 else
10146 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10147 group_gap_adj
10148 = group_size - scalar_lanes;
10151 else
10152 vec_num = group_size;
10154 ref_type = get_group_alias_ptr_type (first_stmt_info);
10156 else
10158 first_stmt_info = stmt_info;
10159 first_dr_info = dr_info;
10160 group_size = vec_num = 1;
10161 group_gap_adj = 0;
10162 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
10163 if (slp)
10164 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
10167 gcc_assert (alignment_support_scheme);
10168 vec_loop_masks *loop_masks
10169 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
10170 ? &LOOP_VINFO_MASKS (loop_vinfo)
10171 : NULL);
10172 vec_loop_lens *loop_lens
10173 = (loop_vinfo && LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo)
10174 ? &LOOP_VINFO_LENS (loop_vinfo)
10175 : NULL);
10177 /* Shouldn't go with length-based approach if fully masked. */
10178 gcc_assert (!loop_lens || !loop_masks);
10180 /* Targets with store-lane instructions must not require explicit
10181 realignment. vect_supportable_dr_alignment always returns either
10182 dr_aligned or dr_unaligned_supported for masked operations. */
10183 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
10184 && !mask
10185 && !loop_masks)
10186 || alignment_support_scheme == dr_aligned
10187 || alignment_support_scheme == dr_unaligned_supported);
10189 /* In case the vectorization factor (VF) is bigger than the number
10190 of elements that we can fit in a vectype (nunits), we have to generate
10191 more than one vector stmt - i.e - we need to "unroll" the
10192 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10193 from one copy of the vector stmt to the next, in the field
10194 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10195 stages to find the correct vector defs to be used when vectorizing
10196 stmts that use the defs of the current stmt. The example below
10197 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10198 need to create 4 vectorized stmts):
10200 before vectorization:
10201 RELATED_STMT VEC_STMT
10202 S1: x = memref - -
10203 S2: z = x + 1 - -
10205 step 1: vectorize stmt S1:
10206 We first create the vector stmt VS1_0, and, as usual, record a
10207 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10208 Next, we create the vector stmt VS1_1, and record a pointer to
10209 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10210 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10211 stmts and pointers:
10212 RELATED_STMT VEC_STMT
10213 VS1_0: vx0 = memref0 VS1_1 -
10214 VS1_1: vx1 = memref1 VS1_2 -
10215 VS1_2: vx2 = memref2 VS1_3 -
10216 VS1_3: vx3 = memref3 - -
10217 S1: x = load - VS1_0
10218 S2: z = x + 1 - -
10221 /* In case of interleaving (non-unit grouped access):
10223 S1: x2 = &base + 2
10224 S2: x0 = &base
10225 S3: x1 = &base + 1
10226 S4: x3 = &base + 3
10228 Vectorized loads are created in the order of memory accesses
10229 starting from the access of the first stmt of the chain:
10231 VS1: vx0 = &base
10232 VS2: vx1 = &base + vec_size*1
10233 VS3: vx3 = &base + vec_size*2
10234 VS4: vx4 = &base + vec_size*3
10236 Then permutation statements are generated:
10238 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10239 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10242 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10243 (the order of the data-refs in the output of vect_permute_load_chain
10244 corresponds to the order of scalar stmts in the interleaving chain - see
10245 the documentation of vect_permute_load_chain()).
10246 The generation of permutation stmts and recording them in
10247 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10249 In case of both multiple types and interleaving, the vector loads and
10250 permutation stmts above are created for every copy. The result vector
10251 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10252 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10254 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10255 on a target that supports unaligned accesses (dr_unaligned_supported)
10256 we generate the following code:
10257 p = initial_addr;
10258 indx = 0;
10259 loop {
10260 p = p + indx * vectype_size;
10261 vec_dest = *(p);
10262 indx = indx + 1;
10265 Otherwise, the data reference is potentially unaligned on a target that
10266 does not support unaligned accesses (dr_explicit_realign_optimized) -
10267 then generate the following code, in which the data in each iteration is
10268 obtained by two vector loads, one from the previous iteration, and one
10269 from the current iteration:
10270 p1 = initial_addr;
10271 msq_init = *(floor(p1))
10272 p2 = initial_addr + VS - 1;
10273 realignment_token = call target_builtin;
10274 indx = 0;
10275 loop {
10276 p2 = p2 + indx * vectype_size
10277 lsq = *(floor(p2))
10278 vec_dest = realign_load (msq, lsq, realignment_token)
10279 indx = indx + 1;
10280 msq = lsq;
10281 } */
10283 /* If the misalignment remains the same throughout the execution of the
10284 loop, we can create the init_addr and permutation mask at the loop
10285 preheader. Otherwise, it needs to be created inside the loop.
10286 This can only occur when vectorizing memory accesses in the inner-loop
10287 nested within an outer-loop that is being vectorized. */
10289 if (nested_in_vect_loop
10290 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
10291 GET_MODE_SIZE (TYPE_MODE (vectype))))
10293 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
10294 compute_in_loop = true;
10297 bool diff_first_stmt_info
10298 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
10300 tree offset = NULL_TREE;
10301 if ((alignment_support_scheme == dr_explicit_realign_optimized
10302 || alignment_support_scheme == dr_explicit_realign)
10303 && !compute_in_loop)
10305 /* If we have different first_stmt_info, we can't set up realignment
10306 here, since we can't guarantee first_stmt_info DR has been
10307 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10308 distance from first_stmt_info DR instead as below. */
10309 if (!costing_p)
10311 if (!diff_first_stmt_info)
10312 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10313 &realignment_token,
10314 alignment_support_scheme, NULL_TREE,
10315 &at_loop);
10316 if (alignment_support_scheme == dr_explicit_realign_optimized)
10318 phi = as_a<gphi *> (SSA_NAME_DEF_STMT (msq));
10319 offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
10320 size_one_node);
10321 gcc_assert (!first_stmt_info_for_drptr);
10325 else
10326 at_loop = loop;
10328 if (!known_eq (poffset, 0))
10329 offset = (offset
10330 ? size_binop (PLUS_EXPR, offset, size_int (poffset))
10331 : size_int (poffset));
10333 tree bump;
10334 tree vec_offset = NULL_TREE;
10335 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10337 aggr_type = NULL_TREE;
10338 bump = NULL_TREE;
10340 else if (memory_access_type == VMAT_GATHER_SCATTER)
10342 aggr_type = elem_type;
10343 if (!costing_p)
10344 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, gsi, &gs_info,
10345 &bump, &vec_offset, loop_lens);
10347 else
10349 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10350 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
10351 else
10352 aggr_type = vectype;
10353 bump = vect_get_data_ptr_increment (vinfo, gsi, dr_info, aggr_type,
10354 memory_access_type, loop_lens);
10357 auto_vec<tree> vec_offsets;
10358 auto_vec<tree> vec_masks;
10359 if (mask && !costing_p)
10361 if (slp_node)
10362 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node)[mask_index],
10363 &vec_masks);
10364 else
10365 vect_get_vec_defs_for_operand (vinfo, stmt_info, ncopies, mask,
10366 &vec_masks, mask_vectype);
10369 tree vec_mask = NULL_TREE;
10370 if (memory_access_type == VMAT_LOAD_STORE_LANES)
10372 gcc_assert (alignment_support_scheme == dr_aligned
10373 || alignment_support_scheme == dr_unaligned_supported);
10374 gcc_assert (grouped_load && !slp);
10376 unsigned int inside_cost = 0, prologue_cost = 0;
10377 for (j = 0; j < ncopies; j++)
10379 if (costing_p)
10381 /* An IFN_LOAD_LANES will load all its vector results,
10382 regardless of which ones we actually need. Account
10383 for the cost of unused results. */
10384 if (first_stmt_info == stmt_info)
10386 unsigned int gaps = DR_GROUP_SIZE (first_stmt_info);
10387 stmt_vec_info next_stmt_info = first_stmt_info;
10390 gaps -= 1;
10391 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
10393 while (next_stmt_info);
10394 if (gaps)
10396 if (dump_enabled_p ())
10397 dump_printf_loc (MSG_NOTE, vect_location,
10398 "vect_model_load_cost: %d "
10399 "unused vectors.\n",
10400 gaps);
10401 vect_get_load_cost (vinfo, stmt_info, gaps,
10402 alignment_support_scheme,
10403 misalignment, false, &inside_cost,
10404 &prologue_cost, cost_vec, cost_vec,
10405 true);
10408 vect_get_load_cost (vinfo, stmt_info, 1, alignment_support_scheme,
10409 misalignment, false, &inside_cost,
10410 &prologue_cost, cost_vec, cost_vec, true);
10411 continue;
10414 /* 1. Create the vector or array pointer update chain. */
10415 if (j == 0)
10416 dataref_ptr
10417 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10418 at_loop, offset, &dummy, gsi,
10419 &ptr_incr, false, bump);
10420 else
10422 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10423 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10424 stmt_info, bump);
10426 if (mask)
10427 vec_mask = vec_masks[j];
10429 tree vec_array = create_vector_array (vectype, vec_num);
10431 tree final_mask = NULL_TREE;
10432 tree final_len = NULL_TREE;
10433 tree bias = NULL_TREE;
10434 if (loop_masks)
10435 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10436 ncopies, vectype, j);
10437 if (vec_mask)
10438 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype, final_mask,
10439 vec_mask, gsi);
10441 if (lanes_ifn == IFN_MASK_LEN_LOAD_LANES)
10443 if (loop_lens)
10444 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10445 ncopies, vectype, j, 1);
10446 else
10447 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10448 signed char biasval
10449 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10450 bias = build_int_cst (intQI_type_node, biasval);
10451 if (!final_mask)
10453 mask_vectype = truth_type_for (vectype);
10454 final_mask = build_minus_one_cst (mask_vectype);
10458 gcall *call;
10459 if (final_len && final_mask)
10461 /* Emit:
10462 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10463 VEC_MASK, LEN, BIAS). */
10464 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10465 tree alias_ptr = build_int_cst (ref_type, align);
10466 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES, 5,
10467 dataref_ptr, alias_ptr,
10468 final_mask, final_len, bias);
10470 else if (final_mask)
10472 /* Emit:
10473 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10474 VEC_MASK). */
10475 unsigned int align = TYPE_ALIGN (TREE_TYPE (vectype));
10476 tree alias_ptr = build_int_cst (ref_type, align);
10477 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
10478 dataref_ptr, alias_ptr,
10479 final_mask);
10481 else
10483 /* Emit:
10484 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10485 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
10486 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
10488 gimple_call_set_lhs (call, vec_array);
10489 gimple_call_set_nothrow (call, true);
10490 vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
10492 dr_chain.create (vec_num);
10493 /* Extract each vector into an SSA_NAME. */
10494 for (i = 0; i < vec_num; i++)
10496 new_temp = read_vector_array (vinfo, stmt_info, gsi, scalar_dest,
10497 vec_array, i);
10498 dr_chain.quick_push (new_temp);
10501 /* Record the mapping between SSA_NAMEs and statements. */
10502 vect_record_grouped_load_vectors (vinfo, stmt_info, dr_chain);
10504 /* Record that VEC_ARRAY is now dead. */
10505 vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
10507 dr_chain.release ();
10509 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
10512 if (costing_p && dump_enabled_p ())
10513 dump_printf_loc (MSG_NOTE, vect_location,
10514 "vect_model_load_cost: inside_cost = %u, "
10515 "prologue_cost = %u .\n",
10516 inside_cost, prologue_cost);
10518 return true;
10521 poly_uint64 group_elt = 0;
10522 unsigned int inside_cost = 0, prologue_cost = 0;
10523 for (j = 0; j < ncopies; j++)
10525 /* 1. Create the vector or array pointer update chain. */
10526 if (j == 0 && !costing_p)
10528 bool simd_lane_access_p
10529 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
10530 if (simd_lane_access_p
10531 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
10532 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
10533 && integer_zerop (get_dr_vinfo_offset (vinfo, first_dr_info))
10534 && integer_zerop (DR_INIT (first_dr_info->dr))
10535 && alias_sets_conflict_p (get_alias_set (aggr_type),
10536 get_alias_set (TREE_TYPE (ref_type)))
10537 && (alignment_support_scheme == dr_aligned
10538 || alignment_support_scheme == dr_unaligned_supported))
10540 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
10541 dataref_offset = build_int_cst (ref_type, 0);
10543 else if (diff_first_stmt_info)
10545 dataref_ptr
10546 = vect_create_data_ref_ptr (vinfo, first_stmt_info_for_drptr,
10547 aggr_type, at_loop, offset, &dummy,
10548 gsi, &ptr_incr, simd_lane_access_p,
10549 bump);
10550 /* Adjust the pointer by the difference to first_stmt. */
10551 data_reference_p ptrdr
10552 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
10553 tree diff
10554 = fold_convert (sizetype,
10555 size_binop (MINUS_EXPR,
10556 DR_INIT (first_dr_info->dr),
10557 DR_INIT (ptrdr)));
10558 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10559 stmt_info, diff);
10560 if (alignment_support_scheme == dr_explicit_realign)
10562 msq = vect_setup_realignment (vinfo,
10563 first_stmt_info_for_drptr, gsi,
10564 &realignment_token,
10565 alignment_support_scheme,
10566 dataref_ptr, &at_loop);
10567 gcc_assert (!compute_in_loop);
10570 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10572 vect_get_gather_scatter_ops (loop_vinfo, loop, stmt_info,
10573 slp_node, &gs_info, &dataref_ptr,
10574 &vec_offsets);
10576 else
10577 dataref_ptr
10578 = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
10579 at_loop,
10580 offset, &dummy, gsi, &ptr_incr,
10581 simd_lane_access_p, bump);
10582 if (mask)
10583 vec_mask = vec_masks[0];
10585 else if (!costing_p)
10587 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo));
10588 if (dataref_offset)
10589 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
10590 bump);
10591 else if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10592 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
10593 stmt_info, bump);
10594 if (mask)
10595 vec_mask = vec_masks[j];
10598 if (grouped_load || slp_perm)
10599 dr_chain.create (vec_num);
10601 gimple *new_stmt = NULL;
10602 for (i = 0; i < vec_num; i++)
10604 tree final_mask = NULL_TREE;
10605 tree final_len = NULL_TREE;
10606 tree bias = NULL_TREE;
10607 if (!costing_p)
10609 if (loop_masks)
10610 final_mask = vect_get_loop_mask (loop_vinfo, gsi, loop_masks,
10611 vec_num * ncopies, vectype,
10612 vec_num * j + i);
10613 if (vec_mask)
10614 final_mask = prepare_vec_mask (loop_vinfo, mask_vectype,
10615 final_mask, vec_mask, gsi);
10617 if (i > 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10618 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
10619 gsi, stmt_info, bump);
10622 /* 2. Create the vector-load in the loop. */
10623 switch (alignment_support_scheme)
10625 case dr_aligned:
10626 case dr_unaligned_supported:
10628 unsigned int misalign;
10629 unsigned HOST_WIDE_INT align;
10631 if (memory_access_type == VMAT_GATHER_SCATTER
10632 && gs_info.ifn != IFN_LAST)
10634 if (costing_p)
10636 unsigned int cnunits = vect_nunits_for_cost (vectype);
10637 inside_cost
10638 = record_stmt_cost (cost_vec, cnunits, scalar_load,
10639 stmt_info, 0, vect_body);
10640 break;
10642 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
10643 vec_offset = vec_offsets[vec_num * j + i];
10644 tree zero = build_zero_cst (vectype);
10645 tree scale = size_int (gs_info.scale);
10647 if (gs_info.ifn == IFN_MASK_LEN_GATHER_LOAD)
10649 if (loop_lens)
10650 final_len
10651 = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10652 vec_num * ncopies, vectype,
10653 vec_num * j + i, 1);
10654 else
10655 final_len
10656 = build_int_cst (sizetype,
10657 TYPE_VECTOR_SUBPARTS (vectype));
10658 signed char biasval
10659 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10660 bias = build_int_cst (intQI_type_node, biasval);
10661 if (!final_mask)
10663 mask_vectype = truth_type_for (vectype);
10664 final_mask = build_minus_one_cst (mask_vectype);
10668 gcall *call;
10669 if (final_len && final_mask)
10670 call = gimple_build_call_internal (
10671 IFN_MASK_LEN_GATHER_LOAD, 7, dataref_ptr, vec_offset,
10672 scale, zero, final_mask, final_len, bias);
10673 else if (final_mask)
10674 call
10675 = gimple_build_call_internal (IFN_MASK_GATHER_LOAD, 5,
10676 dataref_ptr, vec_offset,
10677 scale, zero, final_mask);
10678 else
10679 call
10680 = gimple_build_call_internal (IFN_GATHER_LOAD, 4,
10681 dataref_ptr, vec_offset,
10682 scale, zero);
10683 gimple_call_set_nothrow (call, true);
10684 new_stmt = call;
10685 data_ref = NULL_TREE;
10686 break;
10688 else if (memory_access_type == VMAT_GATHER_SCATTER)
10690 /* Emulated gather-scatter. */
10691 gcc_assert (!final_mask);
10692 unsigned HOST_WIDE_INT const_nunits = nunits.to_constant ();
10693 if (costing_p)
10695 /* For emulated gathers N offset vector element
10696 offset add is consumed by the load). */
10697 inside_cost
10698 = record_stmt_cost (cost_vec, const_nunits,
10699 vec_to_scalar, stmt_info, 0,
10700 vect_body);
10701 /* N scalar loads plus gathering them into a
10702 vector. */
10703 inside_cost = record_stmt_cost (cost_vec, const_nunits,
10704 scalar_load, stmt_info,
10705 0, vect_body);
10706 inside_cost
10707 = record_stmt_cost (cost_vec, 1, vec_construct,
10708 stmt_info, 0, vect_body);
10709 break;
10711 unsigned HOST_WIDE_INT const_offset_nunits
10712 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype)
10713 .to_constant ();
10714 vec<constructor_elt, va_gc> *ctor_elts;
10715 vec_alloc (ctor_elts, const_nunits);
10716 gimple_seq stmts = NULL;
10717 /* We support offset vectors with more elements
10718 than the data vector for now. */
10719 unsigned HOST_WIDE_INT factor
10720 = const_offset_nunits / const_nunits;
10721 vec_offset = vec_offsets[j / factor];
10722 unsigned elt_offset = (j % factor) * const_nunits;
10723 tree idx_type = TREE_TYPE (TREE_TYPE (vec_offset));
10724 tree scale = size_int (gs_info.scale);
10725 align = get_object_alignment (DR_REF (first_dr_info->dr));
10726 tree ltype
10727 = build_aligned_type (TREE_TYPE (vectype), align);
10728 for (unsigned k = 0; k < const_nunits; ++k)
10730 tree boff = size_binop (MULT_EXPR, TYPE_SIZE (idx_type),
10731 bitsize_int (k + elt_offset));
10732 tree idx = gimple_build (&stmts, BIT_FIELD_REF,
10733 idx_type, vec_offset,
10734 TYPE_SIZE (idx_type), boff);
10735 idx = gimple_convert (&stmts, sizetype, idx);
10736 idx = gimple_build (&stmts, MULT_EXPR, sizetype, idx,
10737 scale);
10738 tree ptr = gimple_build (&stmts, PLUS_EXPR,
10739 TREE_TYPE (dataref_ptr),
10740 dataref_ptr, idx);
10741 ptr = gimple_convert (&stmts, ptr_type_node, ptr);
10742 tree elt = make_ssa_name (TREE_TYPE (vectype));
10743 tree ref = build2 (MEM_REF, ltype, ptr,
10744 build_int_cst (ref_type, 0));
10745 new_stmt = gimple_build_assign (elt, ref);
10746 gimple_set_vuse (new_stmt,
10747 gimple_vuse (gsi_stmt (*gsi)));
10748 gimple_seq_add_stmt (&stmts, new_stmt);
10749 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE, elt);
10751 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
10752 new_stmt = gimple_build_assign (
10753 NULL_TREE, build_constructor (vectype, ctor_elts));
10754 data_ref = NULL_TREE;
10755 break;
10758 if (costing_p)
10759 break;
10761 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
10762 if (alignment_support_scheme == dr_aligned)
10763 misalign = 0;
10764 else if (misalignment == DR_MISALIGNMENT_UNKNOWN)
10766 align
10767 = dr_alignment (vect_dr_behavior (vinfo, first_dr_info));
10768 misalign = 0;
10770 else
10771 misalign = misalignment;
10772 if (dataref_offset == NULL_TREE
10773 && TREE_CODE (dataref_ptr) == SSA_NAME)
10774 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
10775 misalign);
10776 align = least_bit_hwi (misalign | align);
10778 /* Compute IFN when LOOP_LENS or final_mask valid. */
10779 machine_mode vmode = TYPE_MODE (vectype);
10780 machine_mode new_vmode = vmode;
10781 internal_fn partial_ifn = IFN_LAST;
10782 if (loop_lens)
10784 opt_machine_mode new_ovmode
10785 = get_len_load_store_mode (vmode, true, &partial_ifn);
10786 new_vmode = new_ovmode.require ();
10787 unsigned factor
10788 = (new_ovmode == vmode) ? 1 : GET_MODE_UNIT_SIZE (vmode);
10789 final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
10790 vec_num * ncopies, vectype,
10791 vec_num * j + i, factor);
10793 else if (final_mask)
10795 if (!can_vec_mask_load_store_p (
10796 vmode, TYPE_MODE (TREE_TYPE (final_mask)), true,
10797 &partial_ifn))
10798 gcc_unreachable ();
10801 if (partial_ifn == IFN_MASK_LEN_LOAD)
10803 if (!final_len)
10805 /* Pass VF value to 'len' argument of
10806 MASK_LEN_LOAD if LOOP_LENS is invalid. */
10807 final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10809 if (!final_mask)
10811 /* Pass all ones value to 'mask' argument of
10812 MASK_LEN_LOAD if final_mask is invalid. */
10813 mask_vectype = truth_type_for (vectype);
10814 final_mask = build_minus_one_cst (mask_vectype);
10817 if (final_len)
10819 signed char biasval
10820 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
10822 bias = build_int_cst (intQI_type_node, biasval);
10825 if (final_len)
10827 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
10828 gcall *call;
10829 if (partial_ifn == IFN_MASK_LEN_LOAD)
10830 call = gimple_build_call_internal (IFN_MASK_LEN_LOAD, 5,
10831 dataref_ptr, ptr,
10832 final_mask, final_len,
10833 bias);
10834 else
10835 call = gimple_build_call_internal (IFN_LEN_LOAD, 4,
10836 dataref_ptr, ptr,
10837 final_len, bias);
10838 gimple_call_set_nothrow (call, true);
10839 new_stmt = call;
10840 data_ref = NULL_TREE;
10842 /* Need conversion if it's wrapped with VnQI. */
10843 if (vmode != new_vmode)
10845 tree new_vtype = build_vector_type_for_mode (
10846 unsigned_intQI_type_node, new_vmode);
10847 tree var
10848 = vect_get_new_ssa_name (new_vtype, vect_simple_var);
10849 gimple_set_lhs (call, var);
10850 vect_finish_stmt_generation (vinfo, stmt_info, call,
10851 gsi);
10852 tree op = build1 (VIEW_CONVERT_EXPR, vectype, var);
10853 new_stmt = gimple_build_assign (vec_dest,
10854 VIEW_CONVERT_EXPR, op);
10857 else if (final_mask)
10859 tree ptr = build_int_cst (ref_type, align * BITS_PER_UNIT);
10860 gcall *call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
10861 dataref_ptr, ptr,
10862 final_mask);
10863 gimple_call_set_nothrow (call, true);
10864 new_stmt = call;
10865 data_ref = NULL_TREE;
10867 else
10869 tree ltype = vectype;
10870 tree new_vtype = NULL_TREE;
10871 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
10872 unsigned int vect_align
10873 = vect_known_alignment_in_bytes (first_dr_info, vectype);
10874 unsigned int scalar_dr_size
10875 = vect_get_scalar_dr_size (first_dr_info);
10876 /* If there's no peeling for gaps but we have a gap
10877 with slp loads then load the lower half of the
10878 vector only. See get_group_load_store_type for
10879 when we apply this optimization. */
10880 if (slp
10881 && loop_vinfo
10882 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && gap != 0
10883 && known_eq (nunits, (group_size - gap) * 2)
10884 && known_eq (nunits, group_size)
10885 && gap >= (vect_align / scalar_dr_size))
10887 tree half_vtype;
10888 new_vtype
10889 = vector_vector_composition_type (vectype, 2,
10890 &half_vtype);
10891 if (new_vtype != NULL_TREE)
10892 ltype = half_vtype;
10894 tree offset
10895 = (dataref_offset ? dataref_offset
10896 : build_int_cst (ref_type, 0));
10897 if (ltype != vectype
10898 && memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10900 unsigned HOST_WIDE_INT gap_offset
10901 = gap * tree_to_uhwi (TYPE_SIZE_UNIT (elem_type));
10902 tree gapcst = build_int_cst (ref_type, gap_offset);
10903 offset = size_binop (PLUS_EXPR, offset, gapcst);
10905 data_ref
10906 = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
10907 if (alignment_support_scheme == dr_aligned)
10909 else
10910 TREE_TYPE (data_ref)
10911 = build_aligned_type (TREE_TYPE (data_ref),
10912 align * BITS_PER_UNIT);
10913 if (ltype != vectype)
10915 vect_copy_ref_info (data_ref,
10916 DR_REF (first_dr_info->dr));
10917 tree tem = make_ssa_name (ltype);
10918 new_stmt = gimple_build_assign (tem, data_ref);
10919 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt,
10920 gsi);
10921 data_ref = NULL;
10922 vec<constructor_elt, va_gc> *v;
10923 vec_alloc (v, 2);
10924 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
10926 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10927 build_zero_cst (ltype));
10928 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
10930 else
10932 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
10933 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
10934 build_zero_cst (ltype));
10936 gcc_assert (new_vtype != NULL_TREE);
10937 if (new_vtype == vectype)
10938 new_stmt = gimple_build_assign (
10939 vec_dest, build_constructor (vectype, v));
10940 else
10942 tree new_vname = make_ssa_name (new_vtype);
10943 new_stmt = gimple_build_assign (
10944 new_vname, build_constructor (new_vtype, v));
10945 vect_finish_stmt_generation (vinfo, stmt_info,
10946 new_stmt, gsi);
10947 new_stmt = gimple_build_assign (
10948 vec_dest,
10949 build1 (VIEW_CONVERT_EXPR, vectype, new_vname));
10953 break;
10955 case dr_explicit_realign:
10957 if (costing_p)
10958 break;
10959 tree ptr, bump;
10961 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
10963 if (compute_in_loop)
10964 msq = vect_setup_realignment (vinfo, first_stmt_info, gsi,
10965 &realignment_token,
10966 dr_explicit_realign,
10967 dataref_ptr, NULL);
10969 if (TREE_CODE (dataref_ptr) == SSA_NAME)
10970 ptr = copy_ssa_name (dataref_ptr);
10971 else
10972 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
10973 // For explicit realign the target alignment should be
10974 // known at compile time.
10975 unsigned HOST_WIDE_INT align
10976 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
10977 new_stmt = gimple_build_assign (
10978 ptr, BIT_AND_EXPR, dataref_ptr,
10979 build_int_cst (TREE_TYPE (dataref_ptr),
10980 -(HOST_WIDE_INT) align));
10981 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10982 data_ref
10983 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
10984 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
10985 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10986 new_stmt = gimple_build_assign (vec_dest, data_ref);
10987 new_temp = make_ssa_name (vec_dest, new_stmt);
10988 gimple_assign_set_lhs (new_stmt, new_temp);
10989 gimple_move_vops (new_stmt, stmt_info->stmt);
10990 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
10991 msq = new_temp;
10993 bump = size_binop (MULT_EXPR, vs, TYPE_SIZE_UNIT (elem_type));
10994 bump = size_binop (MINUS_EXPR, bump, size_one_node);
10995 ptr = bump_vector_ptr (vinfo, dataref_ptr, NULL, gsi, stmt_info,
10996 bump);
10997 new_stmt = gimple_build_assign (
10998 NULL_TREE, BIT_AND_EXPR, ptr,
10999 build_int_cst (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
11000 if (TREE_CODE (ptr) == SSA_NAME)
11001 ptr = copy_ssa_name (ptr, new_stmt);
11002 else
11003 ptr = make_ssa_name (TREE_TYPE (ptr), new_stmt);
11004 gimple_assign_set_lhs (new_stmt, ptr);
11005 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11006 data_ref
11007 = build2 (MEM_REF, vectype, ptr, build_int_cst (ref_type, 0));
11008 break;
11010 case dr_explicit_realign_optimized:
11012 if (costing_p)
11013 break;
11014 if (TREE_CODE (dataref_ptr) == SSA_NAME)
11015 new_temp = copy_ssa_name (dataref_ptr);
11016 else
11017 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
11018 // We should only be doing this if we know the target
11019 // alignment at compile time.
11020 unsigned HOST_WIDE_INT align
11021 = DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
11022 new_stmt = gimple_build_assign (
11023 new_temp, BIT_AND_EXPR, dataref_ptr,
11024 build_int_cst (TREE_TYPE (dataref_ptr),
11025 -(HOST_WIDE_INT) align));
11026 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11027 data_ref = build2 (MEM_REF, vectype, new_temp,
11028 build_int_cst (ref_type, 0));
11029 break;
11031 default:
11032 gcc_unreachable ();
11035 /* One common place to cost the above vect load for different
11036 alignment support schemes. */
11037 if (costing_p)
11039 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11040 only need to take care of the first stmt, whose
11041 stmt_info is first_stmt_info, vec_num iterating on it
11042 will cover the cost for the remaining, it's consistent
11043 with transforming. For the prologue cost for realign,
11044 we only need to count it once for the whole group. */
11045 bool first_stmt_info_p = first_stmt_info == stmt_info;
11046 bool add_realign_cost = first_stmt_info_p && i == 0;
11047 if (memory_access_type == VMAT_CONTIGUOUS
11048 || memory_access_type == VMAT_CONTIGUOUS_REVERSE
11049 || (memory_access_type == VMAT_CONTIGUOUS_PERMUTE
11050 && (!grouped_load || first_stmt_info_p)))
11051 vect_get_load_cost (vinfo, stmt_info, 1,
11052 alignment_support_scheme, misalignment,
11053 add_realign_cost, &inside_cost,
11054 &prologue_cost, cost_vec, cost_vec, true);
11056 else
11058 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11059 /* DATA_REF is null if we've already built the statement. */
11060 if (data_ref)
11062 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
11063 new_stmt = gimple_build_assign (vec_dest, data_ref);
11065 new_temp = make_ssa_name (vec_dest, new_stmt);
11066 gimple_set_lhs (new_stmt, new_temp);
11067 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11070 /* 3. Handle explicit realignment if necessary/supported.
11071 Create in loop:
11072 vec_dest = realign_load (msq, lsq, realignment_token) */
11073 if (!costing_p
11074 && (alignment_support_scheme == dr_explicit_realign_optimized
11075 || alignment_support_scheme == dr_explicit_realign))
11077 lsq = gimple_assign_lhs (new_stmt);
11078 if (!realignment_token)
11079 realignment_token = dataref_ptr;
11080 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11081 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR, msq,
11082 lsq, realignment_token);
11083 new_temp = make_ssa_name (vec_dest, new_stmt);
11084 gimple_assign_set_lhs (new_stmt, new_temp);
11085 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11087 if (alignment_support_scheme == dr_explicit_realign_optimized)
11089 gcc_assert (phi);
11090 if (i == vec_num - 1 && j == ncopies - 1)
11091 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop),
11092 UNKNOWN_LOCATION);
11093 msq = lsq;
11097 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
11099 if (costing_p)
11100 inside_cost = record_stmt_cost (cost_vec, 1, vec_perm,
11101 stmt_info, 0, vect_body);
11102 else
11104 tree perm_mask = perm_mask_for_reverse (vectype);
11105 new_temp = permute_vec_elements (vinfo, new_temp, new_temp,
11106 perm_mask, stmt_info, gsi);
11107 new_stmt = SSA_NAME_DEF_STMT (new_temp);
11111 /* Collect vector loads and later create their permutation in
11112 vect_transform_grouped_load (). */
11113 if (!costing_p && (grouped_load || slp_perm))
11114 dr_chain.quick_push (new_temp);
11116 /* Store vector loads in the corresponding SLP_NODE. */
11117 if (!costing_p && slp && !slp_perm)
11118 slp_node->push_vec_def (new_stmt);
11120 /* With SLP permutation we load the gaps as well, without
11121 we need to skip the gaps after we manage to fully load
11122 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11123 group_elt += nunits;
11124 if (!costing_p
11125 && maybe_ne (group_gap_adj, 0U)
11126 && !slp_perm
11127 && known_eq (group_elt, group_size - group_gap_adj))
11129 poly_wide_int bump_val
11130 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11131 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step)
11132 == -1)
11133 bump_val = -bump_val;
11134 tree bump = wide_int_to_tree (sizetype, bump_val);
11135 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11136 stmt_info, bump);
11137 group_elt = 0;
11140 /* Bump the vector pointer to account for a gap or for excess
11141 elements loaded for a permuted SLP load. */
11142 if (!costing_p
11143 && maybe_ne (group_gap_adj, 0U)
11144 && slp_perm)
11146 poly_wide_int bump_val
11147 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type)) * group_gap_adj);
11148 if (tree_int_cst_sgn (vect_dr_behavior (vinfo, dr_info)->step) == -1)
11149 bump_val = -bump_val;
11150 tree bump = wide_int_to_tree (sizetype, bump_val);
11151 dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr, gsi,
11152 stmt_info, bump);
11155 if (slp && !slp_perm)
11156 continue;
11158 if (slp_perm)
11160 unsigned n_perms;
11161 /* For SLP we know we've seen all possible uses of dr_chain so
11162 direct vect_transform_slp_perm_load to DCE the unused parts.
11163 ??? This is a hack to prevent compile-time issues as seen
11164 in PR101120 and friends. */
11165 if (costing_p)
11167 vect_transform_slp_perm_load (vinfo, slp_node, vNULL, nullptr, vf,
11168 true, &n_perms, nullptr);
11169 inside_cost = record_stmt_cost (cost_vec, n_perms, vec_perm,
11170 stmt_info, 0, vect_body);
11172 else
11174 bool ok = vect_transform_slp_perm_load (vinfo, slp_node, dr_chain,
11175 gsi, vf, false, &n_perms,
11176 nullptr, true);
11177 gcc_assert (ok);
11180 else
11182 if (grouped_load)
11184 gcc_assert (memory_access_type == VMAT_CONTIGUOUS_PERMUTE);
11185 /* We assume that the cost of a single load-lanes instruction
11186 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11187 If a grouped access is instead being provided by a
11188 load-and-permute operation, include the cost of the
11189 permutes. */
11190 if (costing_p && first_stmt_info == stmt_info)
11192 /* Uses an even and odd extract operations or shuffle
11193 operations for each needed permute. */
11194 int group_size = DR_GROUP_SIZE (first_stmt_info);
11195 int nstmts = ceil_log2 (group_size) * group_size;
11196 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
11197 stmt_info, 0, vect_body);
11199 if (dump_enabled_p ())
11200 dump_printf_loc (MSG_NOTE, vect_location,
11201 "vect_model_load_cost:"
11202 "strided group_size = %d .\n",
11203 group_size);
11205 else if (!costing_p)
11207 vect_transform_grouped_load (vinfo, stmt_info, dr_chain,
11208 group_size, gsi);
11209 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11212 else if (!costing_p)
11213 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11215 dr_chain.release ();
11217 if (!slp && !costing_p)
11218 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11220 if (costing_p)
11222 gcc_assert (memory_access_type != VMAT_INVARIANT
11223 && memory_access_type != VMAT_ELEMENTWISE
11224 && memory_access_type != VMAT_STRIDED_SLP
11225 && memory_access_type != VMAT_LOAD_STORE_LANES);
11226 if (dump_enabled_p ())
11227 dump_printf_loc (MSG_NOTE, vect_location,
11228 "vect_model_load_cost: inside_cost = %u, "
11229 "prologue_cost = %u .\n",
11230 inside_cost, prologue_cost);
11233 return true;
11236 /* Function vect_is_simple_cond.
11238 Input:
11239 LOOP - the loop that is being vectorized.
11240 COND - Condition that is checked for simple use.
11242 Output:
11243 *COMP_VECTYPE - the vector type for the comparison.
11244 *DTS - The def types for the arguments of the comparison
11246 Returns whether a COND can be vectorized. Checks whether
11247 condition operands are supportable using vec_is_simple_use. */
11249 static bool
11250 vect_is_simple_cond (tree cond, vec_info *vinfo, stmt_vec_info stmt_info,
11251 slp_tree slp_node, tree *comp_vectype,
11252 enum vect_def_type *dts, tree vectype)
11254 tree lhs, rhs;
11255 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11256 slp_tree slp_op;
11258 /* Mask case. */
11259 if (TREE_CODE (cond) == SSA_NAME
11260 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
11262 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0, &cond,
11263 &slp_op, &dts[0], comp_vectype)
11264 || !*comp_vectype
11265 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
11266 return false;
11267 return true;
11270 if (!COMPARISON_CLASS_P (cond))
11271 return false;
11273 lhs = TREE_OPERAND (cond, 0);
11274 rhs = TREE_OPERAND (cond, 1);
11276 if (TREE_CODE (lhs) == SSA_NAME)
11278 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 0,
11279 &lhs, &slp_op, &dts[0], &vectype1))
11280 return false;
11282 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
11283 || TREE_CODE (lhs) == FIXED_CST)
11284 dts[0] = vect_constant_def;
11285 else
11286 return false;
11288 if (TREE_CODE (rhs) == SSA_NAME)
11290 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1,
11291 &rhs, &slp_op, &dts[1], &vectype2))
11292 return false;
11294 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
11295 || TREE_CODE (rhs) == FIXED_CST)
11296 dts[1] = vect_constant_def;
11297 else
11298 return false;
11300 if (vectype1 && vectype2
11301 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11302 TYPE_VECTOR_SUBPARTS (vectype2)))
11303 return false;
11305 *comp_vectype = vectype1 ? vectype1 : vectype2;
11306 /* Invariant comparison. */
11307 if (! *comp_vectype)
11309 tree scalar_type = TREE_TYPE (lhs);
11310 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11311 *comp_vectype = truth_type_for (vectype);
11312 else
11314 /* If we can widen the comparison to match vectype do so. */
11315 if (INTEGRAL_TYPE_P (scalar_type)
11316 && !slp_node
11317 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
11318 TYPE_SIZE (TREE_TYPE (vectype))))
11319 scalar_type = build_nonstandard_integer_type
11320 (vector_element_bits (vectype), TYPE_UNSIGNED (scalar_type));
11321 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
11322 slp_node);
11326 return true;
11329 /* vectorizable_condition.
11331 Check if STMT_INFO is conditional modify expression that can be vectorized.
11332 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11333 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
11334 at GSI.
11336 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
11338 Return true if STMT_INFO is vectorizable in this way. */
11340 static bool
11341 vectorizable_condition (vec_info *vinfo,
11342 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11343 gimple **vec_stmt,
11344 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11346 tree scalar_dest = NULL_TREE;
11347 tree vec_dest = NULL_TREE;
11348 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
11349 tree then_clause, else_clause;
11350 tree comp_vectype = NULL_TREE;
11351 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
11352 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
11353 tree vec_compare;
11354 tree new_temp;
11355 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11356 enum vect_def_type dts[4]
11357 = {vect_unknown_def_type, vect_unknown_def_type,
11358 vect_unknown_def_type, vect_unknown_def_type};
11359 int ndts = 4;
11360 int ncopies;
11361 int vec_num;
11362 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11363 int i;
11364 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11365 vec<tree> vec_oprnds0 = vNULL;
11366 vec<tree> vec_oprnds1 = vNULL;
11367 vec<tree> vec_oprnds2 = vNULL;
11368 vec<tree> vec_oprnds3 = vNULL;
11369 tree vec_cmp_type;
11370 bool masked = false;
11372 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11373 return false;
11375 /* Is vectorizable conditional operation? */
11376 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11377 if (!stmt)
11378 return false;
11380 code = gimple_assign_rhs_code (stmt);
11381 if (code != COND_EXPR)
11382 return false;
11384 stmt_vec_info reduc_info = NULL;
11385 int reduc_index = -1;
11386 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
11387 bool for_reduction
11388 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
11389 if (for_reduction)
11391 if (slp_node)
11392 return false;
11393 reduc_info = info_for_reduction (vinfo, stmt_info);
11394 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
11395 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
11396 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
11397 || reduc_index != -1);
11399 else
11401 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11402 return false;
11405 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11406 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11408 if (slp_node)
11410 ncopies = 1;
11411 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
11413 else
11415 ncopies = vect_get_num_copies (loop_vinfo, vectype);
11416 vec_num = 1;
11419 gcc_assert (ncopies >= 1);
11420 if (for_reduction && ncopies > 1)
11421 return false; /* FORNOW */
11423 cond_expr = gimple_assign_rhs1 (stmt);
11425 if (!vect_is_simple_cond (cond_expr, vinfo, stmt_info, slp_node,
11426 &comp_vectype, &dts[0], vectype)
11427 || !comp_vectype)
11428 return false;
11430 unsigned op_adjust = COMPARISON_CLASS_P (cond_expr) ? 1 : 0;
11431 slp_tree then_slp_node, else_slp_node;
11432 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 1 + op_adjust,
11433 &then_clause, &then_slp_node, &dts[2], &vectype1))
11434 return false;
11435 if (!vect_is_simple_use (vinfo, stmt_info, slp_node, 2 + op_adjust,
11436 &else_clause, &else_slp_node, &dts[3], &vectype2))
11437 return false;
11439 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
11440 return false;
11442 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
11443 return false;
11445 masked = !COMPARISON_CLASS_P (cond_expr);
11446 vec_cmp_type = truth_type_for (comp_vectype);
11448 if (vec_cmp_type == NULL_TREE)
11449 return false;
11451 cond_code = TREE_CODE (cond_expr);
11452 if (!masked)
11454 cond_expr0 = TREE_OPERAND (cond_expr, 0);
11455 cond_expr1 = TREE_OPERAND (cond_expr, 1);
11458 /* For conditional reductions, the "then" value needs to be the candidate
11459 value calculated by this iteration while the "else" value needs to be
11460 the result carried over from previous iterations. If the COND_EXPR
11461 is the other way around, we need to swap it. */
11462 bool must_invert_cmp_result = false;
11463 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
11465 if (masked)
11466 must_invert_cmp_result = true;
11467 else
11469 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
11470 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
11471 if (new_code == ERROR_MARK)
11472 must_invert_cmp_result = true;
11473 else
11475 cond_code = new_code;
11476 /* Make sure we don't accidentally use the old condition. */
11477 cond_expr = NULL_TREE;
11480 std::swap (then_clause, else_clause);
11483 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
11485 /* Boolean values may have another representation in vectors
11486 and therefore we prefer bit operations over comparison for
11487 them (which also works for scalar masks). We store opcodes
11488 to use in bitop1 and bitop2. Statement is vectorized as
11489 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
11490 depending on bitop1 and bitop2 arity. */
11491 switch (cond_code)
11493 case GT_EXPR:
11494 bitop1 = BIT_NOT_EXPR;
11495 bitop2 = BIT_AND_EXPR;
11496 break;
11497 case GE_EXPR:
11498 bitop1 = BIT_NOT_EXPR;
11499 bitop2 = BIT_IOR_EXPR;
11500 break;
11501 case LT_EXPR:
11502 bitop1 = BIT_NOT_EXPR;
11503 bitop2 = BIT_AND_EXPR;
11504 std::swap (cond_expr0, cond_expr1);
11505 break;
11506 case LE_EXPR:
11507 bitop1 = BIT_NOT_EXPR;
11508 bitop2 = BIT_IOR_EXPR;
11509 std::swap (cond_expr0, cond_expr1);
11510 break;
11511 case NE_EXPR:
11512 bitop1 = BIT_XOR_EXPR;
11513 break;
11514 case EQ_EXPR:
11515 bitop1 = BIT_XOR_EXPR;
11516 bitop2 = BIT_NOT_EXPR;
11517 break;
11518 default:
11519 return false;
11521 cond_code = SSA_NAME;
11524 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
11525 && reduction_type == EXTRACT_LAST_REDUCTION
11526 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
11528 if (dump_enabled_p ())
11529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11530 "reduction comparison operation not supported.\n");
11531 return false;
11534 if (!vec_stmt)
11536 if (bitop1 != NOP_EXPR)
11538 machine_mode mode = TYPE_MODE (comp_vectype);
11539 optab optab;
11541 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
11542 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11543 return false;
11545 if (bitop2 != NOP_EXPR)
11547 optab = optab_for_tree_code (bitop2, comp_vectype,
11548 optab_default);
11549 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11550 return false;
11554 vect_cost_for_stmt kind = vector_stmt;
11555 if (reduction_type == EXTRACT_LAST_REDUCTION)
11556 /* Count one reduction-like operation per vector. */
11557 kind = vec_to_scalar;
11558 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code)
11559 && (masked
11560 || (!expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type,
11561 cond_code)
11562 || !expand_vec_cond_expr_p (vectype, vec_cmp_type,
11563 ERROR_MARK))))
11564 return false;
11566 if (slp_node
11567 && (!vect_maybe_update_slp_op_vectype
11568 (SLP_TREE_CHILDREN (slp_node)[0], comp_vectype)
11569 || (op_adjust == 1
11570 && !vect_maybe_update_slp_op_vectype
11571 (SLP_TREE_CHILDREN (slp_node)[1], comp_vectype))
11572 || !vect_maybe_update_slp_op_vectype (then_slp_node, vectype)
11573 || !vect_maybe_update_slp_op_vectype (else_slp_node, vectype)))
11575 if (dump_enabled_p ())
11576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11577 "incompatible vector types for invariants\n");
11578 return false;
11581 if (loop_vinfo && for_reduction
11582 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
11584 if (reduction_type == EXTRACT_LAST_REDUCTION)
11585 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
11586 ncopies * vec_num, vectype, NULL);
11587 /* Extra inactive lanes should be safe for vect_nested_cycle. */
11588 else if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_nested_cycle)
11590 if (dump_enabled_p ())
11591 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11592 "conditional reduction prevents the use"
11593 " of partial vectors.\n");
11594 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
11598 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
11599 vect_model_simple_cost (vinfo, stmt_info, ncopies, dts, ndts, slp_node,
11600 cost_vec, kind);
11601 return true;
11604 /* Transform. */
11606 /* Handle def. */
11607 scalar_dest = gimple_assign_lhs (stmt);
11608 if (reduction_type != EXTRACT_LAST_REDUCTION)
11609 vec_dest = vect_create_destination_var (scalar_dest, vectype);
11611 bool swap_cond_operands = false;
11613 /* See whether another part of the vectorized code applies a loop
11614 mask to the condition, or to its inverse. */
11616 vec_loop_masks *masks = NULL;
11617 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
11619 if (reduction_type == EXTRACT_LAST_REDUCTION)
11620 masks = &LOOP_VINFO_MASKS (loop_vinfo);
11621 else
11623 scalar_cond_masked_key cond (cond_expr, ncopies);
11624 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
11625 masks = &LOOP_VINFO_MASKS (loop_vinfo);
11626 else
11628 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
11629 tree_code orig_code = cond.code;
11630 cond.code = invert_tree_comparison (cond.code, honor_nans);
11631 if (!masked && loop_vinfo->scalar_cond_masked_set.contains (cond))
11633 masks = &LOOP_VINFO_MASKS (loop_vinfo);
11634 cond_code = cond.code;
11635 swap_cond_operands = true;
11637 else
11639 /* Try the inverse of the current mask. We check if the
11640 inverse mask is live and if so we generate a negate of
11641 the current mask such that we still honor NaNs. */
11642 cond.inverted_p = true;
11643 cond.code = orig_code;
11644 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
11646 masks = &LOOP_VINFO_MASKS (loop_vinfo);
11647 cond_code = cond.code;
11648 swap_cond_operands = true;
11649 must_invert_cmp_result = true;
11656 /* Handle cond expr. */
11657 if (masked)
11658 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
11659 cond_expr, &vec_oprnds0, comp_vectype,
11660 then_clause, &vec_oprnds2, vectype,
11661 reduction_type != EXTRACT_LAST_REDUCTION
11662 ? else_clause : NULL, &vec_oprnds3, vectype);
11663 else
11664 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
11665 cond_expr0, &vec_oprnds0, comp_vectype,
11666 cond_expr1, &vec_oprnds1, comp_vectype,
11667 then_clause, &vec_oprnds2, vectype,
11668 reduction_type != EXTRACT_LAST_REDUCTION
11669 ? else_clause : NULL, &vec_oprnds3, vectype);
11671 /* Arguments are ready. Create the new vector stmt. */
11672 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
11674 vec_then_clause = vec_oprnds2[i];
11675 if (reduction_type != EXTRACT_LAST_REDUCTION)
11676 vec_else_clause = vec_oprnds3[i];
11678 if (swap_cond_operands)
11679 std::swap (vec_then_clause, vec_else_clause);
11681 if (masked)
11682 vec_compare = vec_cond_lhs;
11683 else
11685 vec_cond_rhs = vec_oprnds1[i];
11686 if (bitop1 == NOP_EXPR)
11688 gimple_seq stmts = NULL;
11689 vec_compare = gimple_build (&stmts, cond_code, vec_cmp_type,
11690 vec_cond_lhs, vec_cond_rhs);
11691 gsi_insert_before (gsi, stmts, GSI_SAME_STMT);
11693 else
11695 new_temp = make_ssa_name (vec_cmp_type);
11696 gassign *new_stmt;
11697 if (bitop1 == BIT_NOT_EXPR)
11698 new_stmt = gimple_build_assign (new_temp, bitop1,
11699 vec_cond_rhs);
11700 else
11701 new_stmt
11702 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
11703 vec_cond_rhs);
11704 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11705 if (bitop2 == NOP_EXPR)
11706 vec_compare = new_temp;
11707 else if (bitop2 == BIT_NOT_EXPR
11708 && reduction_type != EXTRACT_LAST_REDUCTION)
11710 /* Instead of doing ~x ? y : z do x ? z : y. */
11711 vec_compare = new_temp;
11712 std::swap (vec_then_clause, vec_else_clause);
11714 else
11716 vec_compare = make_ssa_name (vec_cmp_type);
11717 if (bitop2 == BIT_NOT_EXPR)
11718 new_stmt
11719 = gimple_build_assign (vec_compare, bitop2, new_temp);
11720 else
11721 new_stmt
11722 = gimple_build_assign (vec_compare, bitop2,
11723 vec_cond_lhs, new_temp);
11724 vect_finish_stmt_generation (vinfo, stmt_info,
11725 new_stmt, gsi);
11730 /* If we decided to apply a loop mask to the result of the vector
11731 comparison, AND the comparison with the mask now. Later passes
11732 should then be able to reuse the AND results between mulitple
11733 vector statements.
11735 For example:
11736 for (int i = 0; i < 100; ++i)
11737 x[i] = y[i] ? z[i] : 10;
11739 results in following optimized GIMPLE:
11741 mask__35.8_43 = vect__4.7_41 != { 0, ... };
11742 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
11743 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
11744 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
11745 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
11746 vect_iftmp.11_47, { 10, ... }>;
11748 instead of using a masked and unmasked forms of
11749 vec != { 0, ... } (masked in the MASK_LOAD,
11750 unmasked in the VEC_COND_EXPR). */
11752 /* Force vec_compare to be an SSA_NAME rather than a comparison,
11753 in cases where that's necessary. */
11755 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
11757 if (!is_gimple_val (vec_compare))
11759 tree vec_compare_name = make_ssa_name (vec_cmp_type);
11760 gassign *new_stmt = gimple_build_assign (vec_compare_name,
11761 vec_compare);
11762 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11763 vec_compare = vec_compare_name;
11766 if (must_invert_cmp_result)
11768 tree vec_compare_name = make_ssa_name (vec_cmp_type);
11769 gassign *new_stmt = gimple_build_assign (vec_compare_name,
11770 BIT_NOT_EXPR,
11771 vec_compare);
11772 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11773 vec_compare = vec_compare_name;
11776 if (masks)
11778 tree loop_mask
11779 = vect_get_loop_mask (loop_vinfo, gsi, masks, vec_num * ncopies,
11780 vectype, i);
11781 tree tmp2 = make_ssa_name (vec_cmp_type);
11782 gassign *g
11783 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
11784 loop_mask);
11785 vect_finish_stmt_generation (vinfo, stmt_info, g, gsi);
11786 vec_compare = tmp2;
11790 gimple *new_stmt;
11791 if (reduction_type == EXTRACT_LAST_REDUCTION)
11793 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
11794 tree lhs = gimple_get_lhs (old_stmt);
11795 new_stmt = gimple_build_call_internal
11796 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
11797 vec_then_clause);
11798 gimple_call_set_lhs (new_stmt, lhs);
11799 SSA_NAME_DEF_STMT (lhs) = new_stmt;
11800 if (old_stmt == gsi_stmt (*gsi))
11801 vect_finish_replace_stmt (vinfo, stmt_info, new_stmt);
11802 else
11804 /* In this case we're moving the definition to later in the
11805 block. That doesn't matter because the only uses of the
11806 lhs are in phi statements. */
11807 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
11808 gsi_remove (&old_gsi, true);
11809 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11812 else
11814 new_temp = make_ssa_name (vec_dest);
11815 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
11816 vec_then_clause, vec_else_clause);
11817 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
11819 if (slp_node)
11820 slp_node->push_vec_def (new_stmt);
11821 else
11822 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
11825 if (!slp_node)
11826 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
11828 vec_oprnds0.release ();
11829 vec_oprnds1.release ();
11830 vec_oprnds2.release ();
11831 vec_oprnds3.release ();
11833 return true;
11836 /* vectorizable_comparison.
11838 Check if STMT_INFO is comparison expression that can be vectorized.
11839 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
11840 comparison, put it in VEC_STMT, and insert it at GSI.
11842 Return true if STMT_INFO is vectorizable in this way. */
11844 static bool
11845 vectorizable_comparison (vec_info *vinfo,
11846 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
11847 gimple **vec_stmt,
11848 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
11850 tree lhs, rhs1, rhs2;
11851 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
11852 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
11853 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
11854 tree new_temp;
11855 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
11856 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
11857 int ndts = 2;
11858 poly_uint64 nunits;
11859 int ncopies;
11860 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
11861 int i;
11862 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
11863 vec<tree> vec_oprnds0 = vNULL;
11864 vec<tree> vec_oprnds1 = vNULL;
11865 tree mask_type;
11866 tree mask;
11868 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
11869 return false;
11871 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
11872 return false;
11874 mask_type = vectype;
11875 nunits = TYPE_VECTOR_SUBPARTS (vectype);
11877 if (slp_node)
11878 ncopies = 1;
11879 else
11880 ncopies = vect_get_num_copies (loop_vinfo, vectype);
11882 gcc_assert (ncopies >= 1);
11883 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
11884 return false;
11886 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
11887 if (!stmt)
11888 return false;
11890 code = gimple_assign_rhs_code (stmt);
11892 if (TREE_CODE_CLASS (code) != tcc_comparison)
11893 return false;
11895 slp_tree slp_rhs1, slp_rhs2;
11896 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
11897 0, &rhs1, &slp_rhs1, &dts[0], &vectype1))
11898 return false;
11900 if (!vect_is_simple_use (vinfo, stmt_info, slp_node,
11901 1, &rhs2, &slp_rhs2, &dts[1], &vectype2))
11902 return false;
11904 if (vectype1 && vectype2
11905 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
11906 TYPE_VECTOR_SUBPARTS (vectype2)))
11907 return false;
11909 vectype = vectype1 ? vectype1 : vectype2;
11911 /* Invariant comparison. */
11912 if (!vectype)
11914 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
11915 vectype = mask_type;
11916 else
11917 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
11918 slp_node);
11919 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
11920 return false;
11922 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
11923 return false;
11925 /* Can't compare mask and non-mask types. */
11926 if (vectype1 && vectype2
11927 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
11928 return false;
11930 /* Boolean values may have another representation in vectors
11931 and therefore we prefer bit operations over comparison for
11932 them (which also works for scalar masks). We store opcodes
11933 to use in bitop1 and bitop2. Statement is vectorized as
11934 BITOP2 (rhs1 BITOP1 rhs2) or
11935 rhs1 BITOP2 (BITOP1 rhs2)
11936 depending on bitop1 and bitop2 arity. */
11937 bool swap_p = false;
11938 if (VECTOR_BOOLEAN_TYPE_P (vectype))
11940 if (code == GT_EXPR)
11942 bitop1 = BIT_NOT_EXPR;
11943 bitop2 = BIT_AND_EXPR;
11945 else if (code == GE_EXPR)
11947 bitop1 = BIT_NOT_EXPR;
11948 bitop2 = BIT_IOR_EXPR;
11950 else if (code == LT_EXPR)
11952 bitop1 = BIT_NOT_EXPR;
11953 bitop2 = BIT_AND_EXPR;
11954 swap_p = true;
11956 else if (code == LE_EXPR)
11958 bitop1 = BIT_NOT_EXPR;
11959 bitop2 = BIT_IOR_EXPR;
11960 swap_p = true;
11962 else
11964 bitop1 = BIT_XOR_EXPR;
11965 if (code == EQ_EXPR)
11966 bitop2 = BIT_NOT_EXPR;
11970 if (!vec_stmt)
11972 if (bitop1 == NOP_EXPR)
11974 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
11975 return false;
11977 else
11979 machine_mode mode = TYPE_MODE (vectype);
11980 optab optab;
11982 optab = optab_for_tree_code (bitop1, vectype, optab_default);
11983 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11984 return false;
11986 if (bitop2 != NOP_EXPR)
11988 optab = optab_for_tree_code (bitop2, vectype, optab_default);
11989 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
11990 return false;
11994 /* Put types on constant and invariant SLP children. */
11995 if (slp_node
11996 && (!vect_maybe_update_slp_op_vectype (slp_rhs1, vectype)
11997 || !vect_maybe_update_slp_op_vectype (slp_rhs2, vectype)))
11999 if (dump_enabled_p ())
12000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12001 "incompatible vector types for invariants\n");
12002 return false;
12005 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
12006 vect_model_simple_cost (vinfo, stmt_info,
12007 ncopies * (1 + (bitop2 != NOP_EXPR)),
12008 dts, ndts, slp_node, cost_vec);
12009 return true;
12012 /* Transform. */
12014 /* Handle def. */
12015 lhs = gimple_assign_lhs (stmt);
12016 mask = vect_create_destination_var (lhs, mask_type);
12018 vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies,
12019 rhs1, &vec_oprnds0, vectype,
12020 rhs2, &vec_oprnds1, vectype);
12021 if (swap_p)
12022 std::swap (vec_oprnds0, vec_oprnds1);
12024 /* Arguments are ready. Create the new vector stmt. */
12025 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
12027 gimple *new_stmt;
12028 vec_rhs2 = vec_oprnds1[i];
12030 new_temp = make_ssa_name (mask);
12031 if (bitop1 == NOP_EXPR)
12033 new_stmt = gimple_build_assign (new_temp, code,
12034 vec_rhs1, vec_rhs2);
12035 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12037 else
12039 if (bitop1 == BIT_NOT_EXPR)
12040 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
12041 else
12042 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
12043 vec_rhs2);
12044 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12045 if (bitop2 != NOP_EXPR)
12047 tree res = make_ssa_name (mask);
12048 if (bitop2 == BIT_NOT_EXPR)
12049 new_stmt = gimple_build_assign (res, bitop2, new_temp);
12050 else
12051 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
12052 new_temp);
12053 vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi);
12056 if (slp_node)
12057 slp_node->push_vec_def (new_stmt);
12058 else
12059 STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt);
12062 if (!slp_node)
12063 *vec_stmt = STMT_VINFO_VEC_STMTS (stmt_info)[0];
12065 vec_oprnds0.release ();
12066 vec_oprnds1.release ();
12068 return true;
12071 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
12072 can handle all live statements in the node. Otherwise return true
12073 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
12074 VEC_STMT_P is as for vectorizable_live_operation. */
12076 static bool
12077 can_vectorize_live_stmts (vec_info *vinfo, stmt_vec_info stmt_info,
12078 slp_tree slp_node, slp_instance slp_node_instance,
12079 bool vec_stmt_p,
12080 stmt_vector_for_cost *cost_vec)
12082 if (slp_node)
12084 stmt_vec_info slp_stmt_info;
12085 unsigned int i;
12086 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
12088 if (STMT_VINFO_LIVE_P (slp_stmt_info)
12089 && !vectorizable_live_operation (vinfo, slp_stmt_info, slp_node,
12090 slp_node_instance, i,
12091 vec_stmt_p, cost_vec))
12092 return false;
12095 else if (STMT_VINFO_LIVE_P (stmt_info)
12096 && !vectorizable_live_operation (vinfo, stmt_info,
12097 slp_node, slp_node_instance, -1,
12098 vec_stmt_p, cost_vec))
12099 return false;
12101 return true;
12104 /* Make sure the statement is vectorizable. */
12106 opt_result
12107 vect_analyze_stmt (vec_info *vinfo,
12108 stmt_vec_info stmt_info, bool *need_to_vectorize,
12109 slp_tree node, slp_instance node_instance,
12110 stmt_vector_for_cost *cost_vec)
12112 bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
12113 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
12114 bool ok;
12115 gimple_seq pattern_def_seq;
12117 if (dump_enabled_p ())
12118 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
12119 stmt_info->stmt);
12121 if (gimple_has_volatile_ops (stmt_info->stmt))
12122 return opt_result::failure_at (stmt_info->stmt,
12123 "not vectorized:"
12124 " stmt has volatile operands: %G\n",
12125 stmt_info->stmt);
12127 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12128 && node == NULL
12129 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
12131 gimple_stmt_iterator si;
12133 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
12135 stmt_vec_info pattern_def_stmt_info
12136 = vinfo->lookup_stmt (gsi_stmt (si));
12137 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
12138 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
12140 /* Analyze def stmt of STMT if it's a pattern stmt. */
12141 if (dump_enabled_p ())
12142 dump_printf_loc (MSG_NOTE, vect_location,
12143 "==> examining pattern def statement: %G",
12144 pattern_def_stmt_info->stmt);
12146 opt_result res
12147 = vect_analyze_stmt (vinfo, pattern_def_stmt_info,
12148 need_to_vectorize, node, node_instance,
12149 cost_vec);
12150 if (!res)
12151 return res;
12156 /* Skip stmts that do not need to be vectorized. In loops this is expected
12157 to include:
12158 - the COND_EXPR which is the loop exit condition
12159 - any LABEL_EXPRs in the loop
12160 - computations that are used only for array indexing or loop control.
12161 In basic blocks we only analyze statements that are a part of some SLP
12162 instance, therefore, all the statements are relevant.
12164 Pattern statement needs to be analyzed instead of the original statement
12165 if the original statement is not relevant. Otherwise, we analyze both
12166 statements. In basic blocks we are called from some SLP instance
12167 traversal, don't analyze pattern stmts instead, the pattern stmts
12168 already will be part of SLP instance. */
12170 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
12171 if (!STMT_VINFO_RELEVANT_P (stmt_info)
12172 && !STMT_VINFO_LIVE_P (stmt_info))
12174 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12175 && pattern_stmt_info
12176 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12177 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12179 /* Analyze PATTERN_STMT instead of the original stmt. */
12180 stmt_info = pattern_stmt_info;
12181 if (dump_enabled_p ())
12182 dump_printf_loc (MSG_NOTE, vect_location,
12183 "==> examining pattern statement: %G",
12184 stmt_info->stmt);
12186 else
12188 if (dump_enabled_p ())
12189 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
12191 return opt_result::success ();
12194 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
12195 && node == NULL
12196 && pattern_stmt_info
12197 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
12198 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
12200 /* Analyze PATTERN_STMT too. */
12201 if (dump_enabled_p ())
12202 dump_printf_loc (MSG_NOTE, vect_location,
12203 "==> examining pattern statement: %G",
12204 pattern_stmt_info->stmt);
12206 opt_result res
12207 = vect_analyze_stmt (vinfo, pattern_stmt_info, need_to_vectorize, node,
12208 node_instance, cost_vec);
12209 if (!res)
12210 return res;
12213 switch (STMT_VINFO_DEF_TYPE (stmt_info))
12215 case vect_internal_def:
12216 break;
12218 case vect_reduction_def:
12219 case vect_nested_cycle:
12220 gcc_assert (!bb_vinfo
12221 && (relevance == vect_used_in_outer
12222 || relevance == vect_used_in_outer_by_reduction
12223 || relevance == vect_used_by_reduction
12224 || relevance == vect_unused_in_scope
12225 || relevance == vect_used_only_live));
12226 break;
12228 case vect_induction_def:
12229 case vect_first_order_recurrence:
12230 gcc_assert (!bb_vinfo);
12231 break;
12233 case vect_constant_def:
12234 case vect_external_def:
12235 case vect_unknown_def_type:
12236 default:
12237 gcc_unreachable ();
12240 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12241 if (node)
12242 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (node);
12244 if (STMT_VINFO_RELEVANT_P (stmt_info))
12246 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
12247 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
12248 || (call && gimple_call_lhs (call) == NULL_TREE));
12249 *need_to_vectorize = true;
12252 if (PURE_SLP_STMT (stmt_info) && !node)
12254 if (dump_enabled_p ())
12255 dump_printf_loc (MSG_NOTE, vect_location,
12256 "handled only by SLP analysis\n");
12257 return opt_result::success ();
12260 ok = true;
12261 if (!bb_vinfo
12262 && (STMT_VINFO_RELEVANT_P (stmt_info)
12263 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
12264 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
12265 -mveclibabi= takes preference over library functions with
12266 the simd attribute. */
12267 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12268 || vectorizable_simd_clone_call (vinfo, stmt_info, NULL, NULL, node,
12269 cost_vec)
12270 || vectorizable_conversion (vinfo, stmt_info,
12271 NULL, NULL, node, cost_vec)
12272 || vectorizable_operation (vinfo, stmt_info,
12273 NULL, NULL, node, cost_vec)
12274 || vectorizable_assignment (vinfo, stmt_info,
12275 NULL, NULL, node, cost_vec)
12276 || vectorizable_load (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12277 || vectorizable_store (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12278 || vectorizable_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
12279 node, node_instance, cost_vec)
12280 || vectorizable_induction (as_a <loop_vec_info> (vinfo), stmt_info,
12281 NULL, node, cost_vec)
12282 || vectorizable_shift (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12283 || vectorizable_condition (vinfo, stmt_info,
12284 NULL, NULL, node, cost_vec)
12285 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12286 cost_vec)
12287 || vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
12288 stmt_info, NULL, node)
12289 || vectorizable_recurr (as_a <loop_vec_info> (vinfo),
12290 stmt_info, NULL, node, cost_vec));
12291 else
12293 if (bb_vinfo)
12294 ok = (vectorizable_call (vinfo, stmt_info, NULL, NULL, node, cost_vec)
12295 || vectorizable_simd_clone_call (vinfo, stmt_info,
12296 NULL, NULL, node, cost_vec)
12297 || vectorizable_conversion (vinfo, stmt_info, NULL, NULL, node,
12298 cost_vec)
12299 || vectorizable_shift (vinfo, stmt_info,
12300 NULL, NULL, node, cost_vec)
12301 || vectorizable_operation (vinfo, stmt_info,
12302 NULL, NULL, node, cost_vec)
12303 || vectorizable_assignment (vinfo, stmt_info, NULL, NULL, node,
12304 cost_vec)
12305 || vectorizable_load (vinfo, stmt_info,
12306 NULL, NULL, node, cost_vec)
12307 || vectorizable_store (vinfo, stmt_info,
12308 NULL, NULL, node, cost_vec)
12309 || vectorizable_condition (vinfo, stmt_info,
12310 NULL, NULL, node, cost_vec)
12311 || vectorizable_comparison (vinfo, stmt_info, NULL, NULL, node,
12312 cost_vec)
12313 || vectorizable_phi (vinfo, stmt_info, NULL, node, cost_vec));
12316 if (node)
12317 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12319 if (!ok)
12320 return opt_result::failure_at (stmt_info->stmt,
12321 "not vectorized:"
12322 " relevant stmt not supported: %G",
12323 stmt_info->stmt);
12325 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
12326 need extra handling, except for vectorizable reductions. */
12327 if (!bb_vinfo
12328 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
12329 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
12330 && !can_vectorize_live_stmts (as_a <loop_vec_info> (vinfo),
12331 stmt_info, node, node_instance,
12332 false, cost_vec))
12333 return opt_result::failure_at (stmt_info->stmt,
12334 "not vectorized:"
12335 " live stmt not supported: %G",
12336 stmt_info->stmt);
12338 return opt_result::success ();
12342 /* Function vect_transform_stmt.
12344 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
12346 bool
12347 vect_transform_stmt (vec_info *vinfo,
12348 stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
12349 slp_tree slp_node, slp_instance slp_node_instance)
12351 bool is_store = false;
12352 gimple *vec_stmt = NULL;
12353 bool done;
12355 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
12357 tree saved_vectype = STMT_VINFO_VECTYPE (stmt_info);
12358 if (slp_node)
12359 STMT_VINFO_VECTYPE (stmt_info) = SLP_TREE_VECTYPE (slp_node);
12361 switch (STMT_VINFO_TYPE (stmt_info))
12363 case type_demotion_vec_info_type:
12364 case type_promotion_vec_info_type:
12365 case type_conversion_vec_info_type:
12366 done = vectorizable_conversion (vinfo, stmt_info,
12367 gsi, &vec_stmt, slp_node, NULL);
12368 gcc_assert (done);
12369 break;
12371 case induc_vec_info_type:
12372 done = vectorizable_induction (as_a <loop_vec_info> (vinfo),
12373 stmt_info, &vec_stmt, slp_node,
12374 NULL);
12375 gcc_assert (done);
12376 break;
12378 case shift_vec_info_type:
12379 done = vectorizable_shift (vinfo, stmt_info,
12380 gsi, &vec_stmt, slp_node, NULL);
12381 gcc_assert (done);
12382 break;
12384 case op_vec_info_type:
12385 done = vectorizable_operation (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
12386 NULL);
12387 gcc_assert (done);
12388 break;
12390 case assignment_vec_info_type:
12391 done = vectorizable_assignment (vinfo, stmt_info,
12392 gsi, &vec_stmt, slp_node, NULL);
12393 gcc_assert (done);
12394 break;
12396 case load_vec_info_type:
12397 done = vectorizable_load (vinfo, stmt_info, gsi, &vec_stmt, slp_node,
12398 NULL);
12399 gcc_assert (done);
12400 break;
12402 case store_vec_info_type:
12403 done = vectorizable_store (vinfo, stmt_info,
12404 gsi, &vec_stmt, slp_node, NULL);
12405 gcc_assert (done);
12406 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
12408 /* In case of interleaving, the whole chain is vectorized when the
12409 last store in the chain is reached. Store stmts before the last
12410 one are skipped, and there vec_stmt_info shouldn't be freed
12411 meanwhile. */
12412 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
12413 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
12414 is_store = true;
12416 else
12417 is_store = true;
12418 break;
12420 case condition_vec_info_type:
12421 done = vectorizable_condition (vinfo, stmt_info,
12422 gsi, &vec_stmt, slp_node, NULL);
12423 gcc_assert (done);
12424 break;
12426 case comparison_vec_info_type:
12427 done = vectorizable_comparison (vinfo, stmt_info, gsi, &vec_stmt,
12428 slp_node, NULL);
12429 gcc_assert (done);
12430 break;
12432 case call_vec_info_type:
12433 done = vectorizable_call (vinfo, stmt_info,
12434 gsi, &vec_stmt, slp_node, NULL);
12435 break;
12437 case call_simd_clone_vec_info_type:
12438 done = vectorizable_simd_clone_call (vinfo, stmt_info, gsi, &vec_stmt,
12439 slp_node, NULL);
12440 break;
12442 case reduc_vec_info_type:
12443 done = vect_transform_reduction (as_a <loop_vec_info> (vinfo), stmt_info,
12444 gsi, &vec_stmt, slp_node);
12445 gcc_assert (done);
12446 break;
12448 case cycle_phi_info_type:
12449 done = vect_transform_cycle_phi (as_a <loop_vec_info> (vinfo), stmt_info,
12450 &vec_stmt, slp_node, slp_node_instance);
12451 gcc_assert (done);
12452 break;
12454 case lc_phi_info_type:
12455 done = vectorizable_lc_phi (as_a <loop_vec_info> (vinfo),
12456 stmt_info, &vec_stmt, slp_node);
12457 gcc_assert (done);
12458 break;
12460 case recurr_info_type:
12461 done = vectorizable_recurr (as_a <loop_vec_info> (vinfo),
12462 stmt_info, &vec_stmt, slp_node, NULL);
12463 gcc_assert (done);
12464 break;
12466 case phi_info_type:
12467 done = vectorizable_phi (vinfo, stmt_info, &vec_stmt, slp_node, NULL);
12468 gcc_assert (done);
12469 break;
12471 default:
12472 if (!STMT_VINFO_LIVE_P (stmt_info))
12474 if (dump_enabled_p ())
12475 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12476 "stmt not supported.\n");
12477 gcc_unreachable ();
12479 done = true;
12482 if (!slp_node && vec_stmt)
12483 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info).exists ());
12485 if (STMT_VINFO_TYPE (stmt_info) != store_vec_info_type)
12487 /* Handle stmts whose DEF is used outside the loop-nest that is
12488 being vectorized. */
12489 done = can_vectorize_live_stmts (vinfo, stmt_info, slp_node,
12490 slp_node_instance, true, NULL);
12491 gcc_assert (done);
12494 if (slp_node)
12495 STMT_VINFO_VECTYPE (stmt_info) = saved_vectype;
12497 return is_store;
12501 /* Remove a group of stores (for SLP or interleaving), free their
12502 stmt_vec_info. */
12504 void
12505 vect_remove_stores (vec_info *vinfo, stmt_vec_info first_stmt_info)
12507 stmt_vec_info next_stmt_info = first_stmt_info;
12509 while (next_stmt_info)
12511 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
12512 next_stmt_info = vect_orig_stmt (next_stmt_info);
12513 /* Free the attached stmt_vec_info and remove the stmt. */
12514 vinfo->remove_stmt (next_stmt_info);
12515 next_stmt_info = tmp;
12519 /* If NUNITS is nonzero, return a vector type that contains NUNITS
12520 elements of type SCALAR_TYPE, or null if the target doesn't support
12521 such a type.
12523 If NUNITS is zero, return a vector type that contains elements of
12524 type SCALAR_TYPE, choosing whichever vector size the target prefers.
12526 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
12527 for this vectorization region and want to "autodetect" the best choice.
12528 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
12529 and we want the new type to be interoperable with it. PREVAILING_MODE
12530 in this case can be a scalar integer mode or a vector mode; when it
12531 is a vector mode, the function acts like a tree-level version of
12532 related_vector_mode. */
12534 tree
12535 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
12536 tree scalar_type, poly_uint64 nunits)
12538 tree orig_scalar_type = scalar_type;
12539 scalar_mode inner_mode;
12540 machine_mode simd_mode;
12541 tree vectype;
12543 if ((!INTEGRAL_TYPE_P (scalar_type)
12544 && !POINTER_TYPE_P (scalar_type)
12545 && !SCALAR_FLOAT_TYPE_P (scalar_type))
12546 || (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
12547 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode)))
12548 return NULL_TREE;
12550 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
12552 /* Interoperability between modes requires one to be a constant multiple
12553 of the other, so that the number of vectors required for each operation
12554 is a compile-time constant. */
12555 if (prevailing_mode != VOIDmode
12556 && !constant_multiple_p (nunits * nbytes,
12557 GET_MODE_SIZE (prevailing_mode))
12558 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode),
12559 nunits * nbytes))
12560 return NULL_TREE;
12562 /* For vector types of elements whose mode precision doesn't
12563 match their types precision we use a element type of mode
12564 precision. The vectorization routines will have to make sure
12565 they support the proper result truncation/extension.
12566 We also make sure to build vector types with INTEGER_TYPE
12567 component type only. */
12568 if (INTEGRAL_TYPE_P (scalar_type)
12569 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
12570 || TREE_CODE (scalar_type) != INTEGER_TYPE))
12571 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
12572 TYPE_UNSIGNED (scalar_type));
12574 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
12575 When the component mode passes the above test simply use a type
12576 corresponding to that mode. The theory is that any use that
12577 would cause problems with this will disable vectorization anyway. */
12578 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
12579 && !INTEGRAL_TYPE_P (scalar_type))
12580 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
12582 /* We can't build a vector type of elements with alignment bigger than
12583 their size. */
12584 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
12585 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
12586 TYPE_UNSIGNED (scalar_type));
12588 /* If we felt back to using the mode fail if there was
12589 no scalar type for it. */
12590 if (scalar_type == NULL_TREE)
12591 return NULL_TREE;
12593 /* If no prevailing mode was supplied, use the mode the target prefers.
12594 Otherwise lookup a vector mode based on the prevailing mode. */
12595 if (prevailing_mode == VOIDmode)
12597 gcc_assert (known_eq (nunits, 0U));
12598 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
12599 if (SCALAR_INT_MODE_P (simd_mode))
12601 /* Traditional behavior is not to take the integer mode
12602 literally, but simply to use it as a way of determining
12603 the vector size. It is up to mode_for_vector to decide
12604 what the TYPE_MODE should be.
12606 Note that nunits == 1 is allowed in order to support single
12607 element vector types. */
12608 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
12609 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
12610 return NULL_TREE;
12613 else if (SCALAR_INT_MODE_P (prevailing_mode)
12614 || !related_vector_mode (prevailing_mode,
12615 inner_mode, nunits).exists (&simd_mode))
12617 /* Fall back to using mode_for_vector, mostly in the hope of being
12618 able to use an integer mode. */
12619 if (known_eq (nunits, 0U)
12620 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
12621 return NULL_TREE;
12623 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
12624 return NULL_TREE;
12627 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
12629 /* In cases where the mode was chosen by mode_for_vector, check that
12630 the target actually supports the chosen mode, or that it at least
12631 allows the vector mode to be replaced by a like-sized integer. */
12632 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
12633 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
12634 return NULL_TREE;
12636 /* Re-attach the address-space qualifier if we canonicalized the scalar
12637 type. */
12638 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
12639 return build_qualified_type
12640 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
12642 return vectype;
12645 /* Function get_vectype_for_scalar_type.
12647 Returns the vector type corresponding to SCALAR_TYPE as supported
12648 by the target. If GROUP_SIZE is nonzero and we're performing BB
12649 vectorization, make sure that the number of elements in the vector
12650 is no bigger than GROUP_SIZE. */
12652 tree
12653 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
12654 unsigned int group_size)
12656 /* For BB vectorization, we should always have a group size once we've
12657 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12658 are tentative requests during things like early data reference
12659 analysis and pattern recognition. */
12660 if (is_a <bb_vec_info> (vinfo))
12661 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12662 else
12663 group_size = 0;
12665 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
12666 scalar_type);
12667 if (vectype && vinfo->vector_mode == VOIDmode)
12668 vinfo->vector_mode = TYPE_MODE (vectype);
12670 /* Register the natural choice of vector type, before the group size
12671 has been applied. */
12672 if (vectype)
12673 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
12675 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
12676 try again with an explicit number of elements. */
12677 if (vectype
12678 && group_size
12679 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
12681 /* Start with the biggest number of units that fits within
12682 GROUP_SIZE and halve it until we find a valid vector type.
12683 Usually either the first attempt will succeed or all will
12684 fail (in the latter case because GROUP_SIZE is too small
12685 for the target), but it's possible that a target could have
12686 a hole between supported vector types.
12688 If GROUP_SIZE is not a power of 2, this has the effect of
12689 trying the largest power of 2 that fits within the group,
12690 even though the group is not a multiple of that vector size.
12691 The BB vectorizer will then try to carve up the group into
12692 smaller pieces. */
12693 unsigned int nunits = 1 << floor_log2 (group_size);
12696 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
12697 scalar_type, nunits);
12698 nunits /= 2;
12700 while (nunits > 1 && !vectype);
12703 return vectype;
12706 /* Return the vector type corresponding to SCALAR_TYPE as supported
12707 by the target. NODE, if nonnull, is the SLP tree node that will
12708 use the returned vector type. */
12710 tree
12711 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
12713 unsigned int group_size = 0;
12714 if (node)
12715 group_size = SLP_TREE_LANES (node);
12716 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12719 /* Function get_mask_type_for_scalar_type.
12721 Returns the mask type corresponding to a result of comparison
12722 of vectors of specified SCALAR_TYPE as supported by target.
12723 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12724 make sure that the number of elements in the vector is no bigger
12725 than GROUP_SIZE. */
12727 tree
12728 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
12729 unsigned int group_size)
12731 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12733 if (!vectype)
12734 return NULL;
12736 return truth_type_for (vectype);
12739 /* Function get_same_sized_vectype
12741 Returns a vector type corresponding to SCALAR_TYPE of size
12742 VECTOR_TYPE if supported by the target. */
12744 tree
12745 get_same_sized_vectype (tree scalar_type, tree vector_type)
12747 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
12748 return truth_type_for (vector_type);
12750 poly_uint64 nunits;
12751 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
12752 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
12753 return NULL_TREE;
12755 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
12756 scalar_type, nunits);
12759 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
12760 would not change the chosen vector modes. */
12762 bool
12763 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
12765 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
12766 i != vinfo->used_vector_modes.end (); ++i)
12767 if (!VECTOR_MODE_P (*i)
12768 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
12769 return false;
12770 return true;
12773 /* Function vect_is_simple_use.
12775 Input:
12776 VINFO - the vect info of the loop or basic block that is being vectorized.
12777 OPERAND - operand in the loop or bb.
12778 Output:
12779 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
12780 case OPERAND is an SSA_NAME that is defined in the vectorizable region
12781 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
12782 the definition could be anywhere in the function
12783 DT - the type of definition
12785 Returns whether a stmt with OPERAND can be vectorized.
12786 For loops, supportable operands are constants, loop invariants, and operands
12787 that are defined by the current iteration of the loop. Unsupportable
12788 operands are those that are defined by a previous iteration of the loop (as
12789 is the case in reduction/induction computations).
12790 For basic blocks, supportable operands are constants and bb invariants.
12791 For now, operands defined outside the basic block are not supported. */
12793 bool
12794 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
12795 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
12797 if (def_stmt_info_out)
12798 *def_stmt_info_out = NULL;
12799 if (def_stmt_out)
12800 *def_stmt_out = NULL;
12801 *dt = vect_unknown_def_type;
12803 if (dump_enabled_p ())
12805 dump_printf_loc (MSG_NOTE, vect_location,
12806 "vect_is_simple_use: operand ");
12807 if (TREE_CODE (operand) == SSA_NAME
12808 && !SSA_NAME_IS_DEFAULT_DEF (operand))
12809 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
12810 else
12811 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
12814 if (CONSTANT_CLASS_P (operand))
12815 *dt = vect_constant_def;
12816 else if (is_gimple_min_invariant (operand))
12817 *dt = vect_external_def;
12818 else if (TREE_CODE (operand) != SSA_NAME)
12819 *dt = vect_unknown_def_type;
12820 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
12821 *dt = vect_external_def;
12822 else
12824 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
12825 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
12826 if (!stmt_vinfo)
12827 *dt = vect_external_def;
12828 else
12830 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
12831 def_stmt = stmt_vinfo->stmt;
12832 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
12833 if (def_stmt_info_out)
12834 *def_stmt_info_out = stmt_vinfo;
12836 if (def_stmt_out)
12837 *def_stmt_out = def_stmt;
12840 if (dump_enabled_p ())
12842 dump_printf (MSG_NOTE, ", type of def: ");
12843 switch (*dt)
12845 case vect_uninitialized_def:
12846 dump_printf (MSG_NOTE, "uninitialized\n");
12847 break;
12848 case vect_constant_def:
12849 dump_printf (MSG_NOTE, "constant\n");
12850 break;
12851 case vect_external_def:
12852 dump_printf (MSG_NOTE, "external\n");
12853 break;
12854 case vect_internal_def:
12855 dump_printf (MSG_NOTE, "internal\n");
12856 break;
12857 case vect_induction_def:
12858 dump_printf (MSG_NOTE, "induction\n");
12859 break;
12860 case vect_reduction_def:
12861 dump_printf (MSG_NOTE, "reduction\n");
12862 break;
12863 case vect_double_reduction_def:
12864 dump_printf (MSG_NOTE, "double reduction\n");
12865 break;
12866 case vect_nested_cycle:
12867 dump_printf (MSG_NOTE, "nested cycle\n");
12868 break;
12869 case vect_first_order_recurrence:
12870 dump_printf (MSG_NOTE, "first order recurrence\n");
12871 break;
12872 case vect_unknown_def_type:
12873 dump_printf (MSG_NOTE, "unknown\n");
12874 break;
12878 if (*dt == vect_unknown_def_type)
12880 if (dump_enabled_p ())
12881 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
12882 "Unsupported pattern.\n");
12883 return false;
12886 return true;
12889 /* Function vect_is_simple_use.
12891 Same as vect_is_simple_use but also determines the vector operand
12892 type of OPERAND and stores it to *VECTYPE. If the definition of
12893 OPERAND is vect_uninitialized_def, vect_constant_def or
12894 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
12895 is responsible to compute the best suited vector type for the
12896 scalar operand. */
12898 bool
12899 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
12900 tree *vectype, stmt_vec_info *def_stmt_info_out,
12901 gimple **def_stmt_out)
12903 stmt_vec_info def_stmt_info;
12904 gimple *def_stmt;
12905 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
12906 return false;
12908 if (def_stmt_out)
12909 *def_stmt_out = def_stmt;
12910 if (def_stmt_info_out)
12911 *def_stmt_info_out = def_stmt_info;
12913 /* Now get a vector type if the def is internal, otherwise supply
12914 NULL_TREE and leave it up to the caller to figure out a proper
12915 type for the use stmt. */
12916 if (*dt == vect_internal_def
12917 || *dt == vect_induction_def
12918 || *dt == vect_reduction_def
12919 || *dt == vect_double_reduction_def
12920 || *dt == vect_nested_cycle
12921 || *dt == vect_first_order_recurrence)
12923 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
12924 gcc_assert (*vectype != NULL_TREE);
12925 if (dump_enabled_p ())
12926 dump_printf_loc (MSG_NOTE, vect_location,
12927 "vect_is_simple_use: vectype %T\n", *vectype);
12929 else if (*dt == vect_uninitialized_def
12930 || *dt == vect_constant_def
12931 || *dt == vect_external_def)
12932 *vectype = NULL_TREE;
12933 else
12934 gcc_unreachable ();
12936 return true;
12939 /* Function vect_is_simple_use.
12941 Same as vect_is_simple_use but determines the operand by operand
12942 position OPERAND from either STMT or SLP_NODE, filling in *OP
12943 and *SLP_DEF (when SLP_NODE is not NULL). */
12945 bool
12946 vect_is_simple_use (vec_info *vinfo, stmt_vec_info stmt, slp_tree slp_node,
12947 unsigned operand, tree *op, slp_tree *slp_def,
12948 enum vect_def_type *dt,
12949 tree *vectype, stmt_vec_info *def_stmt_info_out)
12951 if (slp_node)
12953 slp_tree child = SLP_TREE_CHILDREN (slp_node)[operand];
12954 *slp_def = child;
12955 *vectype = SLP_TREE_VECTYPE (child);
12956 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
12958 *op = gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child)->stmt);
12959 return vect_is_simple_use (*op, vinfo, dt, def_stmt_info_out);
12961 else
12963 if (def_stmt_info_out)
12964 *def_stmt_info_out = NULL;
12965 *op = SLP_TREE_SCALAR_OPS (child)[0];
12966 *dt = SLP_TREE_DEF_TYPE (child);
12967 return true;
12970 else
12972 *slp_def = NULL;
12973 if (gassign *ass = dyn_cast <gassign *> (stmt->stmt))
12975 if (gimple_assign_rhs_code (ass) == COND_EXPR
12976 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass)))
12978 if (operand < 2)
12979 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), operand);
12980 else
12981 *op = gimple_op (ass, operand);
12983 else if (gimple_assign_rhs_code (ass) == VIEW_CONVERT_EXPR)
12984 *op = TREE_OPERAND (gimple_assign_rhs1 (ass), 0);
12985 else
12986 *op = gimple_op (ass, operand + 1);
12988 else if (gcall *call = dyn_cast <gcall *> (stmt->stmt))
12989 *op = gimple_call_arg (call, operand);
12990 else
12991 gcc_unreachable ();
12992 return vect_is_simple_use (*op, vinfo, dt, vectype, def_stmt_info_out);
12996 /* If OP is not NULL and is external or constant update its vector
12997 type with VECTYPE. Returns true if successful or false if not,
12998 for example when conflicting vector types are present. */
13000 bool
13001 vect_maybe_update_slp_op_vectype (slp_tree op, tree vectype)
13003 if (!op || SLP_TREE_DEF_TYPE (op) == vect_internal_def)
13004 return true;
13005 if (SLP_TREE_VECTYPE (op))
13006 return types_compatible_p (SLP_TREE_VECTYPE (op), vectype);
13007 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
13008 should be handled by patters. Allow vect_constant_def for now. */
13009 if (VECTOR_BOOLEAN_TYPE_P (vectype)
13010 && SLP_TREE_DEF_TYPE (op) == vect_external_def)
13011 return false;
13012 SLP_TREE_VECTYPE (op) = vectype;
13013 return true;
13016 /* Function supportable_widening_operation
13018 Check whether an operation represented by the code CODE is a
13019 widening operation that is supported by the target platform in
13020 vector form (i.e., when operating on arguments of type VECTYPE_IN
13021 producing a result of type VECTYPE_OUT).
13023 Widening operations we currently support are NOP (CONVERT), FLOAT,
13024 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
13025 are supported by the target platform either directly (via vector
13026 tree-codes), or via target builtins.
13028 Output:
13029 - CODE1 and CODE2 are codes of vector operations to be used when
13030 vectorizing the operation, if available.
13031 - MULTI_STEP_CVT determines the number of required intermediate steps in
13032 case of multi-step conversion (like char->short->int - in that case
13033 MULTI_STEP_CVT will be 1).
13034 - INTERM_TYPES contains the intermediate type required to perform the
13035 widening operation (short in the above example). */
13037 bool
13038 supportable_widening_operation (vec_info *vinfo,
13039 code_helper code,
13040 stmt_vec_info stmt_info,
13041 tree vectype_out, tree vectype_in,
13042 code_helper *code1,
13043 code_helper *code2,
13044 int *multi_step_cvt,
13045 vec<tree> *interm_types)
13047 loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo);
13048 class loop *vect_loop = NULL;
13049 machine_mode vec_mode;
13050 enum insn_code icode1, icode2;
13051 optab optab1 = unknown_optab, optab2 = unknown_optab;
13052 tree vectype = vectype_in;
13053 tree wide_vectype = vectype_out;
13054 tree_code c1 = MAX_TREE_CODES, c2 = MAX_TREE_CODES;
13055 int i;
13056 tree prev_type, intermediate_type;
13057 machine_mode intermediate_mode, prev_mode;
13058 optab optab3, optab4;
13060 *multi_step_cvt = 0;
13061 if (loop_info)
13062 vect_loop = LOOP_VINFO_LOOP (loop_info);
13064 switch (code.safe_as_tree_code ())
13066 case MAX_TREE_CODES:
13067 /* Don't set c1 and c2 if code is not a tree_code. */
13068 break;
13070 case WIDEN_MULT_EXPR:
13071 /* The result of a vectorized widening operation usually requires
13072 two vectors (because the widened results do not fit into one vector).
13073 The generated vector results would normally be expected to be
13074 generated in the same order as in the original scalar computation,
13075 i.e. if 8 results are generated in each vector iteration, they are
13076 to be organized as follows:
13077 vect1: [res1,res2,res3,res4],
13078 vect2: [res5,res6,res7,res8].
13080 However, in the special case that the result of the widening
13081 operation is used in a reduction computation only, the order doesn't
13082 matter (because when vectorizing a reduction we change the order of
13083 the computation). Some targets can take advantage of this and
13084 generate more efficient code. For example, targets like Altivec,
13085 that support widen_mult using a sequence of {mult_even,mult_odd}
13086 generate the following vectors:
13087 vect1: [res1,res3,res5,res7],
13088 vect2: [res2,res4,res6,res8].
13090 When vectorizing outer-loops, we execute the inner-loop sequentially
13091 (each vectorized inner-loop iteration contributes to VF outer-loop
13092 iterations in parallel). We therefore don't allow to change the
13093 order of the computation in the inner-loop during outer-loop
13094 vectorization. */
13095 /* TODO: Another case in which order doesn't *really* matter is when we
13096 widen and then contract again, e.g. (short)((int)x * y >> 8).
13097 Normally, pack_trunc performs an even/odd permute, whereas the
13098 repack from an even/odd expansion would be an interleave, which
13099 would be significantly simpler for e.g. AVX2. */
13100 /* In any case, in order to avoid duplicating the code below, recurse
13101 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
13102 are properly set up for the caller. If we fail, we'll continue with
13103 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
13104 if (vect_loop
13105 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
13106 && !nested_in_vect_loop_p (vect_loop, stmt_info)
13107 && supportable_widening_operation (vinfo, VEC_WIDEN_MULT_EVEN_EXPR,
13108 stmt_info, vectype_out,
13109 vectype_in, code1,
13110 code2, multi_step_cvt,
13111 interm_types))
13113 /* Elements in a vector with vect_used_by_reduction property cannot
13114 be reordered if the use chain with this property does not have the
13115 same operation. One such an example is s += a * b, where elements
13116 in a and b cannot be reordered. Here we check if the vector defined
13117 by STMT is only directly used in the reduction statement. */
13118 tree lhs = gimple_assign_lhs (stmt_info->stmt);
13119 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
13120 if (use_stmt_info
13121 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
13122 return true;
13124 c1 = VEC_WIDEN_MULT_LO_EXPR;
13125 c2 = VEC_WIDEN_MULT_HI_EXPR;
13126 break;
13128 case DOT_PROD_EXPR:
13129 c1 = DOT_PROD_EXPR;
13130 c2 = DOT_PROD_EXPR;
13131 break;
13133 case SAD_EXPR:
13134 c1 = SAD_EXPR;
13135 c2 = SAD_EXPR;
13136 break;
13138 case VEC_WIDEN_MULT_EVEN_EXPR:
13139 /* Support the recursion induced just above. */
13140 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
13141 c2 = VEC_WIDEN_MULT_ODD_EXPR;
13142 break;
13144 case WIDEN_LSHIFT_EXPR:
13145 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
13146 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
13147 break;
13149 CASE_CONVERT:
13150 c1 = VEC_UNPACK_LO_EXPR;
13151 c2 = VEC_UNPACK_HI_EXPR;
13152 break;
13154 case FLOAT_EXPR:
13155 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
13156 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
13157 break;
13159 case FIX_TRUNC_EXPR:
13160 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
13161 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
13162 break;
13164 default:
13165 gcc_unreachable ();
13168 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
13169 std::swap (c1, c2);
13171 if (code == FIX_TRUNC_EXPR)
13173 /* The signedness is determined from output operand. */
13174 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13175 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13177 else if (CONVERT_EXPR_CODE_P (code.safe_as_tree_code ())
13178 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13179 && VECTOR_BOOLEAN_TYPE_P (vectype)
13180 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13181 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13183 /* If the input and result modes are the same, a different optab
13184 is needed where we pass in the number of units in vectype. */
13185 optab1 = vec_unpacks_sbool_lo_optab;
13186 optab2 = vec_unpacks_sbool_hi_optab;
13189 vec_mode = TYPE_MODE (vectype);
13190 if (widening_fn_p (code))
13192 /* If this is an internal fn then we must check whether the target
13193 supports either a low-high split or an even-odd split. */
13194 internal_fn ifn = as_internal_fn ((combined_fn) code);
13196 internal_fn lo, hi, even, odd;
13197 lookup_hilo_internal_fn (ifn, &lo, &hi);
13198 *code1 = as_combined_fn (lo);
13199 *code2 = as_combined_fn (hi);
13200 optab1 = direct_internal_fn_optab (lo, {vectype, vectype});
13201 optab2 = direct_internal_fn_optab (hi, {vectype, vectype});
13203 /* If we don't support low-high, then check for even-odd. */
13204 if (!optab1
13205 || (icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13206 || !optab2
13207 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13209 lookup_evenodd_internal_fn (ifn, &even, &odd);
13210 *code1 = as_combined_fn (even);
13211 *code2 = as_combined_fn (odd);
13212 optab1 = direct_internal_fn_optab (even, {vectype, vectype});
13213 optab2 = direct_internal_fn_optab (odd, {vectype, vectype});
13216 else if (code.is_tree_code ())
13218 if (code == FIX_TRUNC_EXPR)
13220 /* The signedness is determined from output operand. */
13221 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13222 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
13224 else if (CONVERT_EXPR_CODE_P ((tree_code) code.safe_as_tree_code ())
13225 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
13226 && VECTOR_BOOLEAN_TYPE_P (vectype)
13227 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
13228 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
13230 /* If the input and result modes are the same, a different optab
13231 is needed where we pass in the number of units in vectype. */
13232 optab1 = vec_unpacks_sbool_lo_optab;
13233 optab2 = vec_unpacks_sbool_hi_optab;
13235 else
13237 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13238 optab2 = optab_for_tree_code (c2, vectype, optab_default);
13240 *code1 = c1;
13241 *code2 = c2;
13244 if (!optab1 || !optab2)
13245 return false;
13247 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
13248 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
13249 return false;
13252 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13253 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13255 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13256 return true;
13257 /* For scalar masks we may have different boolean
13258 vector types having the same QImode. Thus we
13259 add additional check for elements number. */
13260 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
13261 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13262 return true;
13265 /* Check if it's a multi-step conversion that can be done using intermediate
13266 types. */
13268 prev_type = vectype;
13269 prev_mode = vec_mode;
13271 if (!CONVERT_EXPR_CODE_P (code.safe_as_tree_code ()))
13272 return false;
13274 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13275 intermediate steps in promotion sequence. We try
13276 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
13277 not. */
13278 interm_types->create (MAX_INTERM_CVT_STEPS);
13279 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
13281 intermediate_mode = insn_data[icode1].operand[0].mode;
13282 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13283 intermediate_type
13284 = vect_halve_mask_nunits (prev_type, intermediate_mode);
13285 else if (VECTOR_MODE_P (intermediate_mode))
13287 tree intermediate_element_type
13288 = lang_hooks.types.type_for_mode (GET_MODE_INNER (intermediate_mode),
13289 TYPE_UNSIGNED (prev_type));
13290 intermediate_type
13291 = build_vector_type_for_mode (intermediate_element_type,
13292 intermediate_mode);
13294 else
13295 intermediate_type
13296 = lang_hooks.types.type_for_mode (intermediate_mode,
13297 TYPE_UNSIGNED (prev_type));
13299 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13300 && VECTOR_BOOLEAN_TYPE_P (prev_type)
13301 && intermediate_mode == prev_mode
13302 && SCALAR_INT_MODE_P (prev_mode))
13304 /* If the input and result modes are the same, a different optab
13305 is needed where we pass in the number of units in vectype. */
13306 optab3 = vec_unpacks_sbool_lo_optab;
13307 optab4 = vec_unpacks_sbool_hi_optab;
13309 else
13311 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
13312 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
13315 if (!optab3 || !optab4
13316 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
13317 || insn_data[icode1].operand[0].mode != intermediate_mode
13318 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
13319 || insn_data[icode2].operand[0].mode != intermediate_mode
13320 || ((icode1 = optab_handler (optab3, intermediate_mode))
13321 == CODE_FOR_nothing)
13322 || ((icode2 = optab_handler (optab4, intermediate_mode))
13323 == CODE_FOR_nothing))
13324 break;
13326 interm_types->quick_push (intermediate_type);
13327 (*multi_step_cvt)++;
13329 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
13330 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
13332 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13333 return true;
13334 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
13335 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
13336 return true;
13339 prev_type = intermediate_type;
13340 prev_mode = intermediate_mode;
13343 interm_types->release ();
13344 return false;
13348 /* Function supportable_narrowing_operation
13350 Check whether an operation represented by the code CODE is a
13351 narrowing operation that is supported by the target platform in
13352 vector form (i.e., when operating on arguments of type VECTYPE_IN
13353 and producing a result of type VECTYPE_OUT).
13355 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
13356 and FLOAT. This function checks if these operations are supported by
13357 the target platform directly via vector tree-codes.
13359 Output:
13360 - CODE1 is the code of a vector operation to be used when
13361 vectorizing the operation, if available.
13362 - MULTI_STEP_CVT determines the number of required intermediate steps in
13363 case of multi-step conversion (like int->short->char - in that case
13364 MULTI_STEP_CVT will be 1).
13365 - INTERM_TYPES contains the intermediate type required to perform the
13366 narrowing operation (short in the above example). */
13368 bool
13369 supportable_narrowing_operation (code_helper code,
13370 tree vectype_out, tree vectype_in,
13371 code_helper *code1, int *multi_step_cvt,
13372 vec<tree> *interm_types)
13374 machine_mode vec_mode;
13375 enum insn_code icode1;
13376 optab optab1, interm_optab;
13377 tree vectype = vectype_in;
13378 tree narrow_vectype = vectype_out;
13379 enum tree_code c1;
13380 tree intermediate_type, prev_type;
13381 machine_mode intermediate_mode, prev_mode;
13382 int i;
13383 unsigned HOST_WIDE_INT n_elts;
13384 bool uns;
13386 if (!code.is_tree_code ())
13387 return false;
13389 *multi_step_cvt = 0;
13390 switch ((tree_code) code)
13392 CASE_CONVERT:
13393 c1 = VEC_PACK_TRUNC_EXPR;
13394 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
13395 && VECTOR_BOOLEAN_TYPE_P (vectype)
13396 && SCALAR_INT_MODE_P (TYPE_MODE (vectype))
13397 && TYPE_VECTOR_SUBPARTS (vectype).is_constant (&n_elts)
13398 && n_elts < BITS_PER_UNIT)
13399 optab1 = vec_pack_sbool_trunc_optab;
13400 else
13401 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13402 break;
13404 case FIX_TRUNC_EXPR:
13405 c1 = VEC_PACK_FIX_TRUNC_EXPR;
13406 /* The signedness is determined from output operand. */
13407 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
13408 break;
13410 case FLOAT_EXPR:
13411 c1 = VEC_PACK_FLOAT_EXPR;
13412 optab1 = optab_for_tree_code (c1, vectype, optab_default);
13413 break;
13415 default:
13416 gcc_unreachable ();
13419 if (!optab1)
13420 return false;
13422 vec_mode = TYPE_MODE (vectype);
13423 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
13424 return false;
13426 *code1 = c1;
13428 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
13430 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13431 return true;
13432 /* For scalar masks we may have different boolean
13433 vector types having the same QImode. Thus we
13434 add additional check for elements number. */
13435 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
13436 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
13437 return true;
13440 if (code == FLOAT_EXPR)
13441 return false;
13443 /* Check if it's a multi-step conversion that can be done using intermediate
13444 types. */
13445 prev_mode = vec_mode;
13446 prev_type = vectype;
13447 if (code == FIX_TRUNC_EXPR)
13448 uns = TYPE_UNSIGNED (vectype_out);
13449 else
13450 uns = TYPE_UNSIGNED (vectype);
13452 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
13453 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
13454 costly than signed. */
13455 if (code == FIX_TRUNC_EXPR && uns)
13457 enum insn_code icode2;
13459 intermediate_type
13460 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
13461 interm_optab
13462 = optab_for_tree_code (c1, intermediate_type, optab_default);
13463 if (interm_optab != unknown_optab
13464 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
13465 && insn_data[icode1].operand[0].mode
13466 == insn_data[icode2].operand[0].mode)
13468 uns = false;
13469 optab1 = interm_optab;
13470 icode1 = icode2;
13474 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
13475 intermediate steps in promotion sequence. We try
13476 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
13477 interm_types->create (MAX_INTERM_CVT_STEPS);
13478 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
13480 intermediate_mode = insn_data[icode1].operand[0].mode;
13481 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
13482 intermediate_type
13483 = vect_double_mask_nunits (prev_type, intermediate_mode);
13484 else
13485 intermediate_type
13486 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
13487 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
13488 && VECTOR_BOOLEAN_TYPE_P (prev_type)
13489 && SCALAR_INT_MODE_P (prev_mode)
13490 && TYPE_VECTOR_SUBPARTS (intermediate_type).is_constant (&n_elts)
13491 && n_elts < BITS_PER_UNIT)
13492 interm_optab = vec_pack_sbool_trunc_optab;
13493 else
13494 interm_optab
13495 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
13496 optab_default);
13497 if (!interm_optab
13498 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
13499 || insn_data[icode1].operand[0].mode != intermediate_mode
13500 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
13501 == CODE_FOR_nothing))
13502 break;
13504 interm_types->quick_push (intermediate_type);
13505 (*multi_step_cvt)++;
13507 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
13509 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13510 return true;
13511 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
13512 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
13513 return true;
13516 prev_mode = intermediate_mode;
13517 prev_type = intermediate_type;
13518 optab1 = interm_optab;
13521 interm_types->release ();
13522 return false;
13525 /* Generate and return a vector mask of MASK_TYPE such that
13526 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
13527 Add the statements to SEQ. */
13529 tree
13530 vect_gen_while (gimple_seq *seq, tree mask_type, tree start_index,
13531 tree end_index, const char *name)
13533 tree cmp_type = TREE_TYPE (start_index);
13534 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
13535 cmp_type, mask_type,
13536 OPTIMIZE_FOR_SPEED));
13537 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
13538 start_index, end_index,
13539 build_zero_cst (mask_type));
13540 tree tmp;
13541 if (name)
13542 tmp = make_temp_ssa_name (mask_type, NULL, name);
13543 else
13544 tmp = make_ssa_name (mask_type);
13545 gimple_call_set_lhs (call, tmp);
13546 gimple_seq_add_stmt (seq, call);
13547 return tmp;
13550 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
13551 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
13553 tree
13554 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
13555 tree end_index)
13557 tree tmp = vect_gen_while (seq, mask_type, start_index, end_index);
13558 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
13561 /* Try to compute the vector types required to vectorize STMT_INFO,
13562 returning true on success and false if vectorization isn't possible.
13563 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13564 take sure that the number of elements in the vectors is no bigger
13565 than GROUP_SIZE.
13567 On success:
13569 - Set *STMT_VECTYPE_OUT to:
13570 - NULL_TREE if the statement doesn't need to be vectorized;
13571 - the equivalent of STMT_VINFO_VECTYPE otherwise.
13573 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
13574 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
13575 statement does not help to determine the overall number of units. */
13577 opt_result
13578 vect_get_vector_types_for_stmt (vec_info *vinfo, stmt_vec_info stmt_info,
13579 tree *stmt_vectype_out,
13580 tree *nunits_vectype_out,
13581 unsigned int group_size)
13583 gimple *stmt = stmt_info->stmt;
13585 /* For BB vectorization, we should always have a group size once we've
13586 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13587 are tentative requests during things like early data reference
13588 analysis and pattern recognition. */
13589 if (is_a <bb_vec_info> (vinfo))
13590 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
13591 else
13592 group_size = 0;
13594 *stmt_vectype_out = NULL_TREE;
13595 *nunits_vectype_out = NULL_TREE;
13597 if (gimple_get_lhs (stmt) == NULL_TREE
13598 /* MASK_STORE has no lhs, but is ok. */
13599 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
13601 if (is_a <gcall *> (stmt))
13603 /* Ignore calls with no lhs. These must be calls to
13604 #pragma omp simd functions, and what vectorization factor
13605 it really needs can't be determined until
13606 vectorizable_simd_clone_call. */
13607 if (dump_enabled_p ())
13608 dump_printf_loc (MSG_NOTE, vect_location,
13609 "defer to SIMD clone analysis.\n");
13610 return opt_result::success ();
13613 return opt_result::failure_at (stmt,
13614 "not vectorized: irregular stmt.%G", stmt);
13617 tree vectype;
13618 tree scalar_type = NULL_TREE;
13619 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
13621 vectype = STMT_VINFO_VECTYPE (stmt_info);
13622 if (dump_enabled_p ())
13623 dump_printf_loc (MSG_NOTE, vect_location,
13624 "precomputed vectype: %T\n", vectype);
13626 else if (vect_use_mask_type_p (stmt_info))
13628 unsigned int precision = stmt_info->mask_precision;
13629 scalar_type = build_nonstandard_integer_type (precision, 1);
13630 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
13631 if (!vectype)
13632 return opt_result::failure_at (stmt, "not vectorized: unsupported"
13633 " data-type %T\n", scalar_type);
13634 if (dump_enabled_p ())
13635 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
13637 else
13639 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
13640 scalar_type = TREE_TYPE (DR_REF (dr));
13641 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
13642 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
13643 else
13644 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
13646 if (dump_enabled_p ())
13648 if (group_size)
13649 dump_printf_loc (MSG_NOTE, vect_location,
13650 "get vectype for scalar type (group size %d):"
13651 " %T\n", group_size, scalar_type);
13652 else
13653 dump_printf_loc (MSG_NOTE, vect_location,
13654 "get vectype for scalar type: %T\n", scalar_type);
13656 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
13657 if (!vectype)
13658 return opt_result::failure_at (stmt,
13659 "not vectorized:"
13660 " unsupported data-type %T\n",
13661 scalar_type);
13663 if (dump_enabled_p ())
13664 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
13667 if (scalar_type && VECTOR_MODE_P (TYPE_MODE (scalar_type)))
13668 return opt_result::failure_at (stmt,
13669 "not vectorized: vector stmt in loop:%G",
13670 stmt);
13672 *stmt_vectype_out = vectype;
13674 /* Don't try to compute scalar types if the stmt produces a boolean
13675 vector; use the existing vector type instead. */
13676 tree nunits_vectype = vectype;
13677 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
13679 /* The number of units is set according to the smallest scalar
13680 type (or the largest vector size, but we only support one
13681 vector size per vectorization). */
13682 scalar_type = vect_get_smallest_scalar_type (stmt_info,
13683 TREE_TYPE (vectype));
13684 if (scalar_type != TREE_TYPE (vectype))
13686 if (dump_enabled_p ())
13687 dump_printf_loc (MSG_NOTE, vect_location,
13688 "get vectype for smallest scalar type: %T\n",
13689 scalar_type);
13690 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
13691 group_size);
13692 if (!nunits_vectype)
13693 return opt_result::failure_at
13694 (stmt, "not vectorized: unsupported data-type %T\n",
13695 scalar_type);
13696 if (dump_enabled_p ())
13697 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
13698 nunits_vectype);
13702 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
13703 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)))
13704 return opt_result::failure_at (stmt,
13705 "Not vectorized: Incompatible number "
13706 "of vector subparts between %T and %T\n",
13707 nunits_vectype, *stmt_vectype_out);
13709 if (dump_enabled_p ())
13711 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
13712 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
13713 dump_printf (MSG_NOTE, "\n");
13716 *nunits_vectype_out = nunits_vectype;
13717 return opt_result::success ();
13720 /* Generate and return statement sequence that sets vector length LEN that is:
13722 min_of_start_and_end = min (START_INDEX, END_INDEX);
13723 left_len = END_INDEX - min_of_start_and_end;
13724 rhs = min (left_len, LEN_LIMIT);
13725 LEN = rhs;
13727 Note: the cost of the code generated by this function is modeled
13728 by vect_estimate_min_profitable_iters, so changes here may need
13729 corresponding changes there. */
13731 gimple_seq
13732 vect_gen_len (tree len, tree start_index, tree end_index, tree len_limit)
13734 gimple_seq stmts = NULL;
13735 tree len_type = TREE_TYPE (len);
13736 gcc_assert (TREE_TYPE (start_index) == len_type);
13738 tree min = gimple_build (&stmts, MIN_EXPR, len_type, start_index, end_index);
13739 tree left_len = gimple_build (&stmts, MINUS_EXPR, len_type, end_index, min);
13740 tree rhs = gimple_build (&stmts, MIN_EXPR, len_type, left_len, len_limit);
13741 gimple* stmt = gimple_build_assign (len, rhs);
13742 gimple_seq_add_stmt (&stmts, stmt);
13744 return stmts;