Add testcase of PR c++/92542, already fixed.
[official-gcc.git] / gcc / tree-vect-stmts.c
blob2ca8e494680d45dea486d91c97627acda941c879
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "explow.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
50 #include "builtins.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
56 #include "regs.h"
57 #include "attribs.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
64 tree
65 stmt_vectype (class _stmt_vec_info *stmt_info)
67 return STMT_VINFO_VECTYPE (stmt_info);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
72 bool
73 stmt_in_inner_loop_p (class _stmt_vec_info *stmt_info)
75 gimple *stmt = STMT_VINFO_STMT (stmt_info);
76 basic_block bb = gimple_bb (stmt);
77 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
78 class loop* loop;
80 if (!loop_vinfo)
81 return false;
83 loop = LOOP_VINFO_LOOP (loop_vinfo);
85 return (bb->loop_father == loop->inner);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
92 unsigned
93 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
94 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
95 int misalign, enum vect_cost_model_location where)
97 if ((kind == vector_load || kind == unaligned_load)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_gather_load;
100 if ((kind == vector_store || kind == unaligned_store)
101 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
102 kind = vector_scatter_store;
104 stmt_info_for_cost si = { count, kind, where, stmt_info, misalign };
105 body_cost_vec->safe_push (si);
107 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
108 return (unsigned)
109 (builtin_vectorization_cost (kind, vectype, misalign) * count);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
114 static tree
115 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
117 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
118 "vect_array");
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
126 static tree
127 read_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
128 tree scalar_dest, tree array, unsigned HOST_WIDE_INT n)
130 tree vect_type, vect, vect_name, array_ref;
131 gimple *new_stmt;
133 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
134 vect_type = TREE_TYPE (TREE_TYPE (array));
135 vect = vect_create_destination_var (scalar_dest, vect_type);
136 array_ref = build4 (ARRAY_REF, vect_type, array,
137 build_int_cst (size_type_node, n),
138 NULL_TREE, NULL_TREE);
140 new_stmt = gimple_build_assign (vect, array_ref);
141 vect_name = make_ssa_name (vect, new_stmt);
142 gimple_assign_set_lhs (new_stmt, vect_name);
143 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
145 return vect_name;
148 /* ARRAY is an array of vectors created by create_vector_array.
149 Emit code to store SSA_NAME VECT in index N of the array.
150 The store is part of the vectorization of STMT_INFO. */
152 static void
153 write_vector_array (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
154 tree vect, tree array, unsigned HOST_WIDE_INT n)
156 tree array_ref;
157 gimple *new_stmt;
159 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
160 build_int_cst (size_type_node, n),
161 NULL_TREE, NULL_TREE);
163 new_stmt = gimple_build_assign (array_ref, vect);
164 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
167 /* PTR is a pointer to an array of type TYPE. Return a representation
168 of *PTR. The memory reference replaces those in FIRST_DR
169 (and its group). */
171 static tree
172 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
174 tree mem_ref;
176 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
177 /* Arrays have the same alignment as their type. */
178 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
179 return mem_ref;
182 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
183 Emit the clobber before *GSI. */
185 static void
186 vect_clobber_variable (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
187 tree var)
189 tree clobber = build_clobber (TREE_TYPE (var));
190 gimple *new_stmt = gimple_build_assign (var, clobber);
191 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
194 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
196 /* Function vect_mark_relevant.
198 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
200 static void
201 vect_mark_relevant (vec<stmt_vec_info> *worklist, stmt_vec_info stmt_info,
202 enum vect_relevant relevant, bool live_p)
204 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
205 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
207 if (dump_enabled_p ())
208 dump_printf_loc (MSG_NOTE, vect_location,
209 "mark relevant %d, live %d: %G", relevant, live_p,
210 stmt_info->stmt);
212 /* If this stmt is an original stmt in a pattern, we might need to mark its
213 related pattern stmt instead of the original stmt. However, such stmts
214 may have their own uses that are not in any pattern, in such cases the
215 stmt itself should be marked. */
216 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
218 /* This is the last stmt in a sequence that was detected as a
219 pattern that can potentially be vectorized. Don't mark the stmt
220 as relevant/live because it's not going to be vectorized.
221 Instead mark the pattern-stmt that replaces it. */
223 if (dump_enabled_p ())
224 dump_printf_loc (MSG_NOTE, vect_location,
225 "last stmt in pattern. don't mark"
226 " relevant/live.\n");
227 stmt_vec_info old_stmt_info = stmt_info;
228 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
229 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == old_stmt_info);
230 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
231 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
234 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
235 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
236 STMT_VINFO_RELEVANT (stmt_info) = relevant;
238 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
239 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
241 if (dump_enabled_p ())
242 dump_printf_loc (MSG_NOTE, vect_location,
243 "already marked relevant/live.\n");
244 return;
247 worklist->safe_push (stmt_info);
251 /* Function is_simple_and_all_uses_invariant
253 Return true if STMT_INFO is simple and all uses of it are invariant. */
255 bool
256 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info,
257 loop_vec_info loop_vinfo)
259 tree op;
260 ssa_op_iter iter;
262 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
263 if (!stmt)
264 return false;
266 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
268 enum vect_def_type dt = vect_uninitialized_def;
270 if (!vect_is_simple_use (op, loop_vinfo, &dt))
272 if (dump_enabled_p ())
273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
274 "use not simple.\n");
275 return false;
278 if (dt != vect_external_def && dt != vect_constant_def)
279 return false;
281 return true;
284 /* Function vect_stmt_relevant_p.
286 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
287 is "relevant for vectorization".
289 A stmt is considered "relevant for vectorization" if:
290 - it has uses outside the loop.
291 - it has vdefs (it alters memory).
292 - control stmts in the loop (except for the exit condition).
294 CHECKME: what other side effects would the vectorizer allow? */
296 static bool
297 vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
298 enum vect_relevant *relevant, bool *live_p)
300 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
301 ssa_op_iter op_iter;
302 imm_use_iterator imm_iter;
303 use_operand_p use_p;
304 def_operand_p def_p;
306 *relevant = vect_unused_in_scope;
307 *live_p = false;
309 /* cond stmt other than loop exit cond. */
310 if (is_ctrl_stmt (stmt_info->stmt)
311 && STMT_VINFO_TYPE (stmt_info) != loop_exit_ctrl_vec_info_type)
312 *relevant = vect_used_in_scope;
314 /* changing memory. */
315 if (gimple_code (stmt_info->stmt) != GIMPLE_PHI)
316 if (gimple_vdef (stmt_info->stmt)
317 && !gimple_clobber_p (stmt_info->stmt))
319 if (dump_enabled_p ())
320 dump_printf_loc (MSG_NOTE, vect_location,
321 "vec_stmt_relevant_p: stmt has vdefs.\n");
322 *relevant = vect_used_in_scope;
325 /* uses outside the loop. */
326 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
328 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
330 basic_block bb = gimple_bb (USE_STMT (use_p));
331 if (!flow_bb_inside_loop_p (loop, bb))
333 if (is_gimple_debug (USE_STMT (use_p)))
334 continue;
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE, vect_location,
338 "vec_stmt_relevant_p: used out of loop.\n");
340 /* We expect all such uses to be in the loop exit phis
341 (because of loop closed form) */
342 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
343 gcc_assert (bb == single_exit (loop)->dest);
345 *live_p = true;
350 if (*live_p && *relevant == vect_unused_in_scope
351 && !is_simple_and_all_uses_invariant (stmt_info, loop_vinfo))
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_NOTE, vect_location,
355 "vec_stmt_relevant_p: stmt live but not relevant.\n");
356 *relevant = vect_used_only_live;
359 return (*live_p || *relevant);
363 /* Function exist_non_indexing_operands_for_use_p
365 USE is one of the uses attached to STMT_INFO. Check if USE is
366 used in STMT_INFO for anything other than indexing an array. */
368 static bool
369 exist_non_indexing_operands_for_use_p (tree use, stmt_vec_info stmt_info)
371 tree operand;
373 /* USE corresponds to some operand in STMT. If there is no data
374 reference in STMT, then any operand that corresponds to USE
375 is not indexing an array. */
376 if (!STMT_VINFO_DATA_REF (stmt_info))
377 return true;
379 /* STMT has a data_ref. FORNOW this means that its of one of
380 the following forms:
381 -1- ARRAY_REF = var
382 -2- var = ARRAY_REF
383 (This should have been verified in analyze_data_refs).
385 'var' in the second case corresponds to a def, not a use,
386 so USE cannot correspond to any operands that are not used
387 for array indexing.
389 Therefore, all we need to check is if STMT falls into the
390 first case, and whether var corresponds to USE. */
392 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
393 if (!assign || !gimple_assign_copy_p (assign))
395 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
396 if (call && gimple_call_internal_p (call))
398 internal_fn ifn = gimple_call_internal_fn (call);
399 int mask_index = internal_fn_mask_index (ifn);
400 if (mask_index >= 0
401 && use == gimple_call_arg (call, mask_index))
402 return true;
403 int stored_value_index = internal_fn_stored_value_index (ifn);
404 if (stored_value_index >= 0
405 && use == gimple_call_arg (call, stored_value_index))
406 return true;
407 if (internal_gather_scatter_fn_p (ifn)
408 && use == gimple_call_arg (call, 1))
409 return true;
411 return false;
414 if (TREE_CODE (gimple_assign_lhs (assign)) == SSA_NAME)
415 return false;
416 operand = gimple_assign_rhs1 (assign);
417 if (TREE_CODE (operand) != SSA_NAME)
418 return false;
420 if (operand == use)
421 return true;
423 return false;
428 Function process_use.
430 Inputs:
431 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
432 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
433 that defined USE. This is done by calling mark_relevant and passing it
434 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
435 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
436 be performed.
438 Outputs:
439 Generally, LIVE_P and RELEVANT are used to define the liveness and
440 relevance info of the DEF_STMT of this USE:
441 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
442 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
443 Exceptions:
444 - case 1: If USE is used only for address computations (e.g. array indexing),
445 which does not need to be directly vectorized, then the liveness/relevance
446 of the respective DEF_STMT is left unchanged.
447 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
448 we skip DEF_STMT cause it had already been processed.
449 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
450 "relevant" will be modified accordingly.
452 Return true if everything is as expected. Return false otherwise. */
454 static opt_result
455 process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
456 enum vect_relevant relevant, vec<stmt_vec_info> *worklist,
457 bool force)
459 stmt_vec_info dstmt_vinfo;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt_vinfo))
465 return opt_result::success ();
467 if (!vect_is_simple_use (use, loop_vinfo, &dt, &dstmt_vinfo))
468 return opt_result::failure_at (stmt_vinfo->stmt,
469 "not vectorized:"
470 " unsupported use in stmt.\n");
472 if (!dstmt_vinfo)
473 return opt_result::success ();
475 basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
476 basic_block bb = gimple_bb (stmt_vinfo->stmt);
478 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
479 We have to force the stmt live since the epilogue loop needs it to
480 continue computing the reduction. */
481 if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
482 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
483 && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
485 && bb->loop_father == def_bb->loop_father)
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location,
489 "reduc-stmt defining reduc-phi in the same nest.\n");
490 vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
491 return opt_result::success ();
494 /* case 3a: outer-loop stmt defining an inner-loop stmt:
495 outer-loop-header-bb:
496 d = dstmt_vinfo
497 inner-loop:
498 stmt # use (d)
499 outer-loop-tail-bb:
500 ... */
501 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
503 if (dump_enabled_p ())
504 dump_printf_loc (MSG_NOTE, vect_location,
505 "outer-loop def-stmt defining inner-loop stmt.\n");
507 switch (relevant)
509 case vect_unused_in_scope:
510 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
511 vect_used_in_scope : vect_unused_in_scope;
512 break;
514 case vect_used_in_outer_by_reduction:
515 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
516 relevant = vect_used_by_reduction;
517 break;
519 case vect_used_in_outer:
520 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
521 relevant = vect_used_in_scope;
522 break;
524 case vect_used_in_scope:
525 break;
527 default:
528 gcc_unreachable ();
532 /* case 3b: inner-loop stmt defining an outer-loop stmt:
533 outer-loop-header-bb:
535 inner-loop:
536 d = dstmt_vinfo
537 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
538 stmt # use (d) */
539 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
541 if (dump_enabled_p ())
542 dump_printf_loc (MSG_NOTE, vect_location,
543 "inner-loop def-stmt defining outer-loop stmt.\n");
545 switch (relevant)
547 case vect_unused_in_scope:
548 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
549 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
550 vect_used_in_outer_by_reduction : vect_unused_in_scope;
551 break;
553 case vect_used_by_reduction:
554 case vect_used_only_live:
555 relevant = vect_used_in_outer_by_reduction;
556 break;
558 case vect_used_in_scope:
559 relevant = vect_used_in_outer;
560 break;
562 default:
563 gcc_unreachable ();
566 /* We are also not interested in uses on loop PHI backedges that are
567 inductions. Otherwise we'll needlessly vectorize the IV increment
568 and cause hybrid SLP for SLP inductions. Unless the PHI is live
569 of course. */
570 else if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
571 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
572 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
573 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
574 loop_latch_edge (bb->loop_father))
575 == use))
577 if (dump_enabled_p ())
578 dump_printf_loc (MSG_NOTE, vect_location,
579 "induction value on backedge.\n");
580 return opt_result::success ();
584 vect_mark_relevant (worklist, dstmt_vinfo, relevant, false);
585 return opt_result::success ();
589 /* Function vect_mark_stmts_to_be_vectorized.
591 Not all stmts in the loop need to be vectorized. For example:
593 for i...
594 for j...
595 1. T0 = i + j
596 2. T1 = a[T0]
598 3. j = j + 1
600 Stmt 1 and 3 do not need to be vectorized, because loop control and
601 addressing of vectorized data-refs are handled differently.
603 This pass detects such stmts. */
605 opt_result
606 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
608 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
609 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
610 unsigned int nbbs = loop->num_nodes;
611 gimple_stmt_iterator si;
612 unsigned int i;
613 basic_block bb;
614 bool live_p;
615 enum vect_relevant relevant;
617 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
619 auto_vec<stmt_vec_info, 64> worklist;
621 /* 1. Init worklist. */
622 for (i = 0; i < nbbs; i++)
624 bb = bbs[i];
625 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
627 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
628 if (dump_enabled_p ())
629 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? %G",
630 phi_info->stmt);
632 if (vect_stmt_relevant_p (phi_info, loop_vinfo, &relevant, &live_p))
633 vect_mark_relevant (&worklist, phi_info, relevant, live_p);
635 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
637 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
638 if (dump_enabled_p ())
639 dump_printf_loc (MSG_NOTE, vect_location,
640 "init: stmt relevant? %G", stmt_info->stmt);
642 if (vect_stmt_relevant_p (stmt_info, loop_vinfo, &relevant, &live_p))
643 vect_mark_relevant (&worklist, stmt_info, relevant, live_p);
647 /* 2. Process_worklist */
648 while (worklist.length () > 0)
650 use_operand_p use_p;
651 ssa_op_iter iter;
653 stmt_vec_info stmt_vinfo = worklist.pop ();
654 if (dump_enabled_p ())
655 dump_printf_loc (MSG_NOTE, vect_location,
656 "worklist: examine stmt: %G", stmt_vinfo->stmt);
658 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
659 (DEF_STMT) as relevant/irrelevant according to the relevance property
660 of STMT. */
661 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
663 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
664 propagated as is to the DEF_STMTs of its USEs.
666 One exception is when STMT has been identified as defining a reduction
667 variable; in this case we set the relevance to vect_used_by_reduction.
668 This is because we distinguish between two kinds of relevant stmts -
669 those that are used by a reduction computation, and those that are
670 (also) used by a regular computation. This allows us later on to
671 identify stmts that are used solely by a reduction, and therefore the
672 order of the results that they produce does not have to be kept. */
674 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
676 case vect_reduction_def:
677 gcc_assert (relevant != vect_unused_in_scope);
678 if (relevant != vect_unused_in_scope
679 && relevant != vect_used_in_scope
680 && relevant != vect_used_by_reduction
681 && relevant != vect_used_only_live)
682 return opt_result::failure_at
683 (stmt_vinfo->stmt, "unsupported use of reduction.\n");
684 break;
686 case vect_nested_cycle:
687 if (relevant != vect_unused_in_scope
688 && relevant != vect_used_in_outer_by_reduction
689 && relevant != vect_used_in_outer)
690 return opt_result::failure_at
691 (stmt_vinfo->stmt, "unsupported use of nested cycle.\n");
692 break;
694 case vect_double_reduction_def:
695 if (relevant != vect_unused_in_scope
696 && relevant != vect_used_by_reduction
697 && relevant != vect_used_only_live)
698 return opt_result::failure_at
699 (stmt_vinfo->stmt, "unsupported use of double reduction.\n");
700 break;
702 default:
703 break;
706 if (is_pattern_stmt_p (stmt_vinfo))
708 /* Pattern statements are not inserted into the code, so
709 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
710 have to scan the RHS or function arguments instead. */
711 if (gassign *assign = dyn_cast <gassign *> (stmt_vinfo->stmt))
713 enum tree_code rhs_code = gimple_assign_rhs_code (assign);
714 tree op = gimple_assign_rhs1 (assign);
716 i = 1;
717 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
719 opt_result res
720 = process_use (stmt_vinfo, TREE_OPERAND (op, 0),
721 loop_vinfo, relevant, &worklist, false);
722 if (!res)
723 return res;
724 res = process_use (stmt_vinfo, TREE_OPERAND (op, 1),
725 loop_vinfo, relevant, &worklist, false);
726 if (!res)
727 return res;
728 i = 2;
730 for (; i < gimple_num_ops (assign); i++)
732 op = gimple_op (assign, i);
733 if (TREE_CODE (op) == SSA_NAME)
735 opt_result res
736 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
737 &worklist, false);
738 if (!res)
739 return res;
743 else if (gcall *call = dyn_cast <gcall *> (stmt_vinfo->stmt))
745 for (i = 0; i < gimple_call_num_args (call); i++)
747 tree arg = gimple_call_arg (call, i);
748 opt_result res
749 = process_use (stmt_vinfo, arg, loop_vinfo, relevant,
750 &worklist, false);
751 if (!res)
752 return res;
756 else
757 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt_vinfo->stmt, iter, SSA_OP_USE)
759 tree op = USE_FROM_PTR (use_p);
760 opt_result res
761 = process_use (stmt_vinfo, op, loop_vinfo, relevant,
762 &worklist, false);
763 if (!res)
764 return res;
767 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
769 gather_scatter_info gs_info;
770 if (!vect_check_gather_scatter (stmt_vinfo, loop_vinfo, &gs_info))
771 gcc_unreachable ();
772 opt_result res
773 = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
774 &worklist, true);
775 if (!res)
777 if (fatal)
778 *fatal = false;
779 return res;
782 } /* while worklist */
784 return opt_result::success ();
787 /* Compute the prologue cost for invariant or constant operands. */
789 static unsigned
790 vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
791 unsigned opno, enum vect_def_type dt,
792 stmt_vector_for_cost *cost_vec)
794 vec_info *vinfo = stmt_info->vinfo;
795 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
796 tree op = gimple_op (stmt, opno);
797 unsigned prologue_cost = 0;
799 /* Without looking at the actual initializer a vector of
800 constants can be implemented as load from the constant pool.
801 When all elements are the same we can use a splat. */
802 tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), node);
803 unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
804 unsigned num_vects_to_check;
805 unsigned HOST_WIDE_INT const_nunits;
806 unsigned nelt_limit;
807 if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits)
808 && ! multiple_p (const_nunits, group_size))
810 num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
811 nelt_limit = const_nunits;
813 else
815 /* If either the vector has variable length or the vectors
816 are composed of repeated whole groups we only need to
817 cost construction once. All vectors will be the same. */
818 num_vects_to_check = 1;
819 nelt_limit = group_size;
821 tree elt = NULL_TREE;
822 unsigned nelt = 0;
823 for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j)
825 unsigned si = j % group_size;
826 if (nelt == 0)
827 elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt, opno);
828 /* ??? We're just tracking whether all operands of a single
829 vector initializer are the same, ideally we'd check if
830 we emitted the same one already. */
831 else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si]->stmt,
832 opno))
833 elt = NULL_TREE;
834 nelt++;
835 if (nelt == nelt_limit)
837 /* ??? We need to pass down stmt_info for a vector type
838 even if it points to the wrong stmt. */
839 prologue_cost += record_stmt_cost
840 (cost_vec, 1,
841 dt == vect_external_def
842 ? (elt ? scalar_to_vec : vec_construct)
843 : vector_load,
844 stmt_info, 0, vect_prologue);
845 nelt = 0;
849 return prologue_cost;
852 /* Function vect_model_simple_cost.
854 Models cost for simple operations, i.e. those that only emit ncopies of a
855 single op. Right now, this does not account for multiple insns that could
856 be generated for the single vector op. We will handle that shortly. */
858 static void
859 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
860 enum vect_def_type *dt,
861 int ndts,
862 slp_tree node,
863 stmt_vector_for_cost *cost_vec,
864 vect_cost_for_stmt kind = vector_stmt)
866 int inside_cost = 0, prologue_cost = 0;
868 gcc_assert (cost_vec != NULL);
870 /* ??? Somehow we need to fix this at the callers. */
871 if (node)
872 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
874 if (node)
876 /* Scan operands and account for prologue cost of constants/externals.
877 ??? This over-estimates cost for multiple uses and should be
878 re-engineered. */
879 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
880 tree lhs = gimple_get_lhs (stmt);
881 for (unsigned i = 0; i < gimple_num_ops (stmt); ++i)
883 tree op = gimple_op (stmt, i);
884 enum vect_def_type dt;
885 if (!op || op == lhs)
886 continue;
887 if (vect_is_simple_use (op, stmt_info->vinfo, &dt)
888 && (dt == vect_constant_def || dt == vect_external_def))
889 prologue_cost += vect_prologue_cost_for_slp_op (node, stmt_info,
890 i, dt, cost_vec);
893 else
894 /* Cost the "broadcast" of a scalar operand in to a vector operand.
895 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
896 cost model. */
897 for (int i = 0; i < ndts; i++)
898 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
899 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
900 stmt_info, 0, vect_prologue);
902 /* Adjust for two-operator SLP nodes. */
903 if (node && SLP_TREE_TWO_OPERATORS (node))
905 ncopies *= 2;
906 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_perm,
907 stmt_info, 0, vect_body);
910 /* Pass the inside-of-loop statements to the target-specific cost model. */
911 inside_cost += record_stmt_cost (cost_vec, ncopies, kind,
912 stmt_info, 0, vect_body);
914 if (dump_enabled_p ())
915 dump_printf_loc (MSG_NOTE, vect_location,
916 "vect_model_simple_cost: inside_cost = %d, "
917 "prologue_cost = %d .\n", inside_cost, prologue_cost);
921 /* Model cost for type demotion and promotion operations. PWR is
922 normally zero for single-step promotions and demotions. It will be
923 one if two-step promotion/demotion is required, and so on. NCOPIES
924 is the number of vector results (and thus number of instructions)
925 for the narrowest end of the operation chain. Each additional
926 step doubles the number of instructions required. */
928 static void
929 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
930 enum vect_def_type *dt,
931 unsigned int ncopies, int pwr,
932 stmt_vector_for_cost *cost_vec)
934 int i;
935 int inside_cost = 0, prologue_cost = 0;
937 for (i = 0; i < pwr + 1; i++)
939 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_promote_demote,
940 stmt_info, 0, vect_body);
941 ncopies *= 2;
944 /* FORNOW: Assuming maximum 2 args per stmts. */
945 for (i = 0; i < 2; i++)
946 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
947 prologue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
948 stmt_info, 0, vect_prologue);
950 if (dump_enabled_p ())
951 dump_printf_loc (MSG_NOTE, vect_location,
952 "vect_model_promotion_demotion_cost: inside_cost = %d, "
953 "prologue_cost = %d .\n", inside_cost, prologue_cost);
956 /* Returns true if the current function returns DECL. */
958 static bool
959 cfun_returns (tree decl)
961 edge_iterator ei;
962 edge e;
963 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
965 greturn *ret = safe_dyn_cast <greturn *> (last_stmt (e->src));
966 if (!ret)
967 continue;
968 if (gimple_return_retval (ret) == decl)
969 return true;
970 /* We often end up with an aggregate copy to the result decl,
971 handle that case as well. First skip intermediate clobbers
972 though. */
973 gimple *def = ret;
976 def = SSA_NAME_DEF_STMT (gimple_vuse (def));
978 while (gimple_clobber_p (def));
979 if (is_a <gassign *> (def)
980 && gimple_assign_lhs (def) == gimple_return_retval (ret)
981 && gimple_assign_rhs1 (def) == decl)
982 return true;
984 return false;
987 /* Function vect_model_store_cost
989 Models cost for stores. In the case of grouped accesses, one access
990 has the overhead of the grouped access attributed to it. */
992 static void
993 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
994 enum vect_def_type dt,
995 vect_memory_access_type memory_access_type,
996 vec_load_store_type vls_type, slp_tree slp_node,
997 stmt_vector_for_cost *cost_vec)
999 unsigned int inside_cost = 0, prologue_cost = 0;
1000 stmt_vec_info first_stmt_info = stmt_info;
1001 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1003 /* ??? Somehow we need to fix this at the callers. */
1004 if (slp_node)
1005 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1007 if (vls_type == VLS_STORE_INVARIANT)
1009 if (slp_node)
1010 prologue_cost += vect_prologue_cost_for_slp_op (slp_node, stmt_info,
1011 1, dt, cost_vec);
1012 else
1013 prologue_cost += record_stmt_cost (cost_vec, 1, scalar_to_vec,
1014 stmt_info, 0, vect_prologue);
1017 /* Grouped stores update all elements in the group at once,
1018 so we want the DR for the first statement. */
1019 if (!slp_node && grouped_access_p)
1020 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1022 /* True if we should include any once-per-group costs as well as
1023 the cost of the statement itself. For SLP we only get called
1024 once per group anyhow. */
1025 bool first_stmt_p = (first_stmt_info == stmt_info);
1027 /* We assume that the cost of a single store-lanes instruction is
1028 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
1029 access is instead being provided by a permute-and-store operation,
1030 include the cost of the permutes. */
1031 if (first_stmt_p
1032 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1034 /* Uses a high and low interleave or shuffle operations for each
1035 needed permute. */
1036 int group_size = DR_GROUP_SIZE (first_stmt_info);
1037 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1038 inside_cost = record_stmt_cost (cost_vec, nstmts, vec_perm,
1039 stmt_info, 0, vect_body);
1041 if (dump_enabled_p ())
1042 dump_printf_loc (MSG_NOTE, vect_location,
1043 "vect_model_store_cost: strided group_size = %d .\n",
1044 group_size);
1047 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1048 /* Costs of the stores. */
1049 if (memory_access_type == VMAT_ELEMENTWISE
1050 || memory_access_type == VMAT_GATHER_SCATTER)
1052 /* N scalar stores plus extracting the elements. */
1053 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1054 inside_cost += record_stmt_cost (cost_vec,
1055 ncopies * assumed_nunits,
1056 scalar_store, stmt_info, 0, vect_body);
1058 else
1059 vect_get_store_cost (stmt_info, ncopies, &inside_cost, cost_vec);
1061 if (memory_access_type == VMAT_ELEMENTWISE
1062 || memory_access_type == VMAT_STRIDED_SLP)
1064 /* N scalar stores plus extracting the elements. */
1065 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1066 inside_cost += record_stmt_cost (cost_vec,
1067 ncopies * assumed_nunits,
1068 vec_to_scalar, stmt_info, 0, vect_body);
1071 /* When vectorizing a store into the function result assign
1072 a penalty if the function returns in a multi-register location.
1073 In this case we assume we'll end up with having to spill the
1074 vector result and do piecewise loads as a conservative estimate. */
1075 tree base = get_base_address (STMT_VINFO_DATA_REF (stmt_info)->ref);
1076 if (base
1077 && (TREE_CODE (base) == RESULT_DECL
1078 || (DECL_P (base) && cfun_returns (base)))
1079 && !aggregate_value_p (base, cfun->decl))
1081 rtx reg = hard_function_value (TREE_TYPE (base), cfun->decl, 0, 1);
1082 /* ??? Handle PARALLEL in some way. */
1083 if (REG_P (reg))
1085 int nregs = hard_regno_nregs (REGNO (reg), GET_MODE (reg));
1086 /* Assume that a single reg-reg move is possible and cheap,
1087 do not account for vector to gp register move cost. */
1088 if (nregs > 1)
1090 /* Spill. */
1091 prologue_cost += record_stmt_cost (cost_vec, ncopies,
1092 vector_store,
1093 stmt_info, 0, vect_epilogue);
1094 /* Loads. */
1095 prologue_cost += record_stmt_cost (cost_vec, ncopies * nregs,
1096 scalar_load,
1097 stmt_info, 0, vect_epilogue);
1102 if (dump_enabled_p ())
1103 dump_printf_loc (MSG_NOTE, vect_location,
1104 "vect_model_store_cost: inside_cost = %d, "
1105 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1109 /* Calculate cost of DR's memory access. */
1110 void
1111 vect_get_store_cost (stmt_vec_info stmt_info, int ncopies,
1112 unsigned int *inside_cost,
1113 stmt_vector_for_cost *body_cost_vec)
1115 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1116 int alignment_support_scheme
1117 = vect_supportable_dr_alignment (dr_info, false);
1119 switch (alignment_support_scheme)
1121 case dr_aligned:
1123 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1124 vector_store, stmt_info, 0,
1125 vect_body);
1127 if (dump_enabled_p ())
1128 dump_printf_loc (MSG_NOTE, vect_location,
1129 "vect_model_store_cost: aligned.\n");
1130 break;
1133 case dr_unaligned_supported:
1135 /* Here, we assign an additional cost for the unaligned store. */
1136 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1137 unaligned_store, stmt_info,
1138 DR_MISALIGNMENT (dr_info),
1139 vect_body);
1140 if (dump_enabled_p ())
1141 dump_printf_loc (MSG_NOTE, vect_location,
1142 "vect_model_store_cost: unaligned supported by "
1143 "hardware.\n");
1144 break;
1147 case dr_unaligned_unsupported:
1149 *inside_cost = VECT_MAX_COST;
1151 if (dump_enabled_p ())
1152 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1153 "vect_model_store_cost: unsupported access.\n");
1154 break;
1157 default:
1158 gcc_unreachable ();
1163 /* Function vect_model_load_cost
1165 Models cost for loads. In the case of grouped accesses, one access has
1166 the overhead of the grouped access attributed to it. Since unaligned
1167 accesses are supported for loads, we also account for the costs of the
1168 access scheme chosen. */
1170 static void
1171 vect_model_load_cost (stmt_vec_info stmt_info, unsigned ncopies,
1172 vect_memory_access_type memory_access_type,
1173 slp_instance instance,
1174 slp_tree slp_node,
1175 stmt_vector_for_cost *cost_vec)
1177 unsigned int inside_cost = 0, prologue_cost = 0;
1178 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1180 gcc_assert (cost_vec);
1182 /* ??? Somehow we need to fix this at the callers. */
1183 if (slp_node)
1184 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1186 if (slp_node && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
1188 /* If the load is permuted then the alignment is determined by
1189 the first group element not by the first scalar stmt DR. */
1190 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1191 /* Record the cost for the permutation. */
1192 unsigned n_perms;
1193 unsigned assumed_nunits
1194 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info));
1195 unsigned slp_vf = (ncopies * assumed_nunits) / instance->group_size;
1196 vect_transform_slp_perm_load (slp_node, vNULL, NULL,
1197 slp_vf, instance, true,
1198 &n_perms);
1199 inside_cost += record_stmt_cost (cost_vec, n_perms, vec_perm,
1200 first_stmt_info, 0, vect_body);
1201 /* And adjust the number of loads performed. This handles
1202 redundancies as well as loads that are later dead. */
1203 auto_sbitmap perm (DR_GROUP_SIZE (first_stmt_info));
1204 bitmap_clear (perm);
1205 for (unsigned i = 0;
1206 i < SLP_TREE_LOAD_PERMUTATION (slp_node).length (); ++i)
1207 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (slp_node)[i]);
1208 ncopies = 0;
1209 bool load_seen = false;
1210 for (unsigned i = 0; i < DR_GROUP_SIZE (first_stmt_info); ++i)
1212 if (i % assumed_nunits == 0)
1214 if (load_seen)
1215 ncopies++;
1216 load_seen = false;
1218 if (bitmap_bit_p (perm, i))
1219 load_seen = true;
1221 if (load_seen)
1222 ncopies++;
1223 gcc_assert (ncopies
1224 <= (DR_GROUP_SIZE (first_stmt_info)
1225 - DR_GROUP_GAP (first_stmt_info)
1226 + assumed_nunits - 1) / assumed_nunits);
1229 /* Grouped loads read all elements in the group at once,
1230 so we want the DR for the first statement. */
1231 stmt_vec_info first_stmt_info = stmt_info;
1232 if (!slp_node && grouped_access_p)
1233 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
1235 /* True if we should include any once-per-group costs as well as
1236 the cost of the statement itself. For SLP we only get called
1237 once per group anyhow. */
1238 bool first_stmt_p = (first_stmt_info == stmt_info);
1240 /* We assume that the cost of a single load-lanes instruction is
1241 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1242 access is instead being provided by a load-and-permute operation,
1243 include the cost of the permutes. */
1244 if (first_stmt_p
1245 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1247 /* Uses an even and odd extract operations or shuffle operations
1248 for each needed permute. */
1249 int group_size = DR_GROUP_SIZE (first_stmt_info);
1250 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1251 inside_cost += record_stmt_cost (cost_vec, nstmts, vec_perm,
1252 stmt_info, 0, vect_body);
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE, vect_location,
1256 "vect_model_load_cost: strided group_size = %d .\n",
1257 group_size);
1260 /* The loads themselves. */
1261 if (memory_access_type == VMAT_ELEMENTWISE
1262 || memory_access_type == VMAT_GATHER_SCATTER)
1264 /* N scalar loads plus gathering them into a vector. */
1265 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1266 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1267 inside_cost += record_stmt_cost (cost_vec,
1268 ncopies * assumed_nunits,
1269 scalar_load, stmt_info, 0, vect_body);
1271 else
1272 vect_get_load_cost (stmt_info, ncopies, first_stmt_p,
1273 &inside_cost, &prologue_cost,
1274 cost_vec, cost_vec, true);
1275 if (memory_access_type == VMAT_ELEMENTWISE
1276 || memory_access_type == VMAT_STRIDED_SLP)
1277 inside_cost += record_stmt_cost (cost_vec, ncopies, vec_construct,
1278 stmt_info, 0, vect_body);
1280 if (dump_enabled_p ())
1281 dump_printf_loc (MSG_NOTE, vect_location,
1282 "vect_model_load_cost: inside_cost = %d, "
1283 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1287 /* Calculate cost of DR's memory access. */
1288 void
1289 vect_get_load_cost (stmt_vec_info stmt_info, int ncopies,
1290 bool add_realign_cost, unsigned int *inside_cost,
1291 unsigned int *prologue_cost,
1292 stmt_vector_for_cost *prologue_cost_vec,
1293 stmt_vector_for_cost *body_cost_vec,
1294 bool record_prologue_costs)
1296 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
1297 int alignment_support_scheme
1298 = vect_supportable_dr_alignment (dr_info, false);
1300 switch (alignment_support_scheme)
1302 case dr_aligned:
1304 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1305 stmt_info, 0, vect_body);
1307 if (dump_enabled_p ())
1308 dump_printf_loc (MSG_NOTE, vect_location,
1309 "vect_model_load_cost: aligned.\n");
1311 break;
1313 case dr_unaligned_supported:
1315 /* Here, we assign an additional cost for the unaligned load. */
1316 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1317 unaligned_load, stmt_info,
1318 DR_MISALIGNMENT (dr_info),
1319 vect_body);
1321 if (dump_enabled_p ())
1322 dump_printf_loc (MSG_NOTE, vect_location,
1323 "vect_model_load_cost: unaligned supported by "
1324 "hardware.\n");
1326 break;
1328 case dr_explicit_realign:
1330 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1331 vector_load, stmt_info, 0, vect_body);
1332 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1333 vec_perm, stmt_info, 0, vect_body);
1335 /* FIXME: If the misalignment remains fixed across the iterations of
1336 the containing loop, the following cost should be added to the
1337 prologue costs. */
1338 if (targetm.vectorize.builtin_mask_for_load)
1339 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1340 stmt_info, 0, vect_body);
1342 if (dump_enabled_p ())
1343 dump_printf_loc (MSG_NOTE, vect_location,
1344 "vect_model_load_cost: explicit realign\n");
1346 break;
1348 case dr_explicit_realign_optimized:
1350 if (dump_enabled_p ())
1351 dump_printf_loc (MSG_NOTE, vect_location,
1352 "vect_model_load_cost: unaligned software "
1353 "pipelined.\n");
1355 /* Unaligned software pipeline has a load of an address, an initial
1356 load, and possibly a mask operation to "prime" the loop. However,
1357 if this is an access in a group of loads, which provide grouped
1358 access, then the above cost should only be considered for one
1359 access in the group. Inside the loop, there is a load op
1360 and a realignment op. */
1362 if (add_realign_cost && record_prologue_costs)
1364 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1365 vector_stmt, stmt_info,
1366 0, vect_prologue);
1367 if (targetm.vectorize.builtin_mask_for_load)
1368 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1369 vector_stmt, stmt_info,
1370 0, vect_prologue);
1373 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1374 stmt_info, 0, vect_body);
1375 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1376 stmt_info, 0, vect_body);
1378 if (dump_enabled_p ())
1379 dump_printf_loc (MSG_NOTE, vect_location,
1380 "vect_model_load_cost: explicit realign optimized"
1381 "\n");
1383 break;
1386 case dr_unaligned_unsupported:
1388 *inside_cost = VECT_MAX_COST;
1390 if (dump_enabled_p ())
1391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1392 "vect_model_load_cost: unsupported access.\n");
1393 break;
1396 default:
1397 gcc_unreachable ();
1401 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1402 the loop preheader for the vectorized stmt STMT_VINFO. */
1404 static void
1405 vect_init_vector_1 (stmt_vec_info stmt_vinfo, gimple *new_stmt,
1406 gimple_stmt_iterator *gsi)
1408 if (gsi)
1409 vect_finish_stmt_generation (stmt_vinfo, new_stmt, gsi);
1410 else
1412 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1414 if (loop_vinfo)
1416 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1417 basic_block new_bb;
1418 edge pe;
1420 if (nested_in_vect_loop_p (loop, stmt_vinfo))
1421 loop = loop->inner;
1423 pe = loop_preheader_edge (loop);
1424 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1425 gcc_assert (!new_bb);
1427 else
1429 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1430 basic_block bb;
1431 gimple_stmt_iterator gsi_bb_start;
1433 gcc_assert (bb_vinfo);
1434 bb = BB_VINFO_BB (bb_vinfo);
1435 gsi_bb_start = gsi_after_labels (bb);
1436 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1440 if (dump_enabled_p ())
1441 dump_printf_loc (MSG_NOTE, vect_location,
1442 "created new init_stmt: %G", new_stmt);
1445 /* Function vect_init_vector.
1447 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1448 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1449 vector type a vector with all elements equal to VAL is created first.
1450 Place the initialization at GSI if it is not NULL. Otherwise, place the
1451 initialization at the loop preheader.
1452 Return the DEF of INIT_STMT.
1453 It will be used in the vectorization of STMT_INFO. */
1455 tree
1456 vect_init_vector (stmt_vec_info stmt_info, tree val, tree type,
1457 gimple_stmt_iterator *gsi)
1459 gimple *init_stmt;
1460 tree new_temp;
1462 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1463 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1465 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1466 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1468 /* Scalar boolean value should be transformed into
1469 all zeros or all ones value before building a vector. */
1470 if (VECTOR_BOOLEAN_TYPE_P (type))
1472 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1473 tree false_val = build_zero_cst (TREE_TYPE (type));
1475 if (CONSTANT_CLASS_P (val))
1476 val = integer_zerop (val) ? false_val : true_val;
1477 else
1479 new_temp = make_ssa_name (TREE_TYPE (type));
1480 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1481 val, true_val, false_val);
1482 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1483 val = new_temp;
1486 else
1488 gimple_seq stmts = NULL;
1489 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1490 val = gimple_build (&stmts, VIEW_CONVERT_EXPR,
1491 TREE_TYPE (type), val);
1492 else
1493 /* ??? Condition vectorization expects us to do
1494 promotion of invariant/external defs. */
1495 val = gimple_convert (&stmts, TREE_TYPE (type), val);
1496 for (gimple_stmt_iterator gsi2 = gsi_start (stmts);
1497 !gsi_end_p (gsi2); )
1499 init_stmt = gsi_stmt (gsi2);
1500 gsi_remove (&gsi2, false);
1501 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1505 val = build_vector_from_val (type, val);
1508 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1509 init_stmt = gimple_build_assign (new_temp, val);
1510 vect_init_vector_1 (stmt_info, init_stmt, gsi);
1511 return new_temp;
1514 /* Function vect_get_vec_def_for_operand_1.
1516 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1517 with type DT that will be used in the vectorized stmt. */
1519 tree
1520 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info,
1521 enum vect_def_type dt)
1523 tree vec_oprnd;
1524 stmt_vec_info vec_stmt_info;
1526 switch (dt)
1528 /* operand is a constant or a loop invariant. */
1529 case vect_constant_def:
1530 case vect_external_def:
1531 /* Code should use vect_get_vec_def_for_operand. */
1532 gcc_unreachable ();
1534 /* Operand is defined by a loop header phi. In case of nested
1535 cycles we also may have uses of the backedge def. */
1536 case vect_reduction_def:
1537 case vect_double_reduction_def:
1538 case vect_nested_cycle:
1539 case vect_induction_def:
1540 gcc_assert (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
1541 || dt == vect_nested_cycle);
1542 /* Fallthru. */
1544 /* operand is defined inside the loop. */
1545 case vect_internal_def:
1547 /* Get the def from the vectorized stmt. */
1548 vec_stmt_info = STMT_VINFO_VEC_STMT (def_stmt_info);
1549 /* Get vectorized pattern statement. */
1550 if (!vec_stmt_info
1551 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1552 && !STMT_VINFO_RELEVANT (def_stmt_info))
1553 vec_stmt_info = (STMT_VINFO_VEC_STMT
1554 (STMT_VINFO_RELATED_STMT (def_stmt_info)));
1555 gcc_assert (vec_stmt_info);
1556 if (gphi *phi = dyn_cast <gphi *> (vec_stmt_info->stmt))
1557 vec_oprnd = PHI_RESULT (phi);
1558 else
1559 vec_oprnd = gimple_get_lhs (vec_stmt_info->stmt);
1560 return vec_oprnd;
1563 default:
1564 gcc_unreachable ();
1569 /* Function vect_get_vec_def_for_operand.
1571 OP is an operand in STMT_VINFO. This function returns a (vector) def
1572 that will be used in the vectorized stmt for STMT_VINFO.
1574 In the case that OP is an SSA_NAME which is defined in the loop, then
1575 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1577 In case OP is an invariant or constant, a new stmt that creates a vector def
1578 needs to be introduced. VECTYPE may be used to specify a required type for
1579 vector invariant. */
1581 tree
1582 vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
1584 gimple *def_stmt;
1585 enum vect_def_type dt;
1586 bool is_simple_use;
1587 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1589 if (dump_enabled_p ())
1590 dump_printf_loc (MSG_NOTE, vect_location,
1591 "vect_get_vec_def_for_operand: %T\n", op);
1593 stmt_vec_info def_stmt_info;
1594 is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt,
1595 &def_stmt_info, &def_stmt);
1596 gcc_assert (is_simple_use);
1597 if (def_stmt && dump_enabled_p ())
1598 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = %G", def_stmt);
1600 if (dt == vect_constant_def || dt == vect_external_def)
1602 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1603 tree vector_type;
1605 if (vectype)
1606 vector_type = vectype;
1607 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1608 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1609 vector_type = truth_type_for (stmt_vectype);
1610 else
1611 vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
1613 gcc_assert (vector_type);
1614 return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
1616 else
1617 return vect_get_vec_def_for_operand_1 (def_stmt_info, dt);
1621 /* Function vect_get_vec_def_for_stmt_copy
1623 Return a vector-def for an operand. This function is used when the
1624 vectorized stmt to be created (by the caller to this function) is a "copy"
1625 created in case the vectorized result cannot fit in one vector, and several
1626 copies of the vector-stmt are required. In this case the vector-def is
1627 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1628 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1630 Context:
1631 In case the vectorization factor (VF) is bigger than the number
1632 of elements that can fit in a vectype (nunits), we have to generate
1633 more than one vector stmt to vectorize the scalar stmt. This situation
1634 arises when there are multiple data-types operated upon in the loop; the
1635 smallest data-type determines the VF, and as a result, when vectorizing
1636 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1637 vector stmt (each computing a vector of 'nunits' results, and together
1638 computing 'VF' results in each iteration). This function is called when
1639 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1640 which VF=16 and nunits=4, so the number of copies required is 4):
1642 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1644 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1645 VS1.1: vx.1 = memref1 VS1.2
1646 VS1.2: vx.2 = memref2 VS1.3
1647 VS1.3: vx.3 = memref3
1649 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1650 VSnew.1: vz1 = vx.1 + ... VSnew.2
1651 VSnew.2: vz2 = vx.2 + ... VSnew.3
1652 VSnew.3: vz3 = vx.3 + ...
1654 The vectorization of S1 is explained in vectorizable_load.
1655 The vectorization of S2:
1656 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1657 the function 'vect_get_vec_def_for_operand' is called to
1658 get the relevant vector-def for each operand of S2. For operand x it
1659 returns the vector-def 'vx.0'.
1661 To create the remaining copies of the vector-stmt (VSnew.j), this
1662 function is called to get the relevant vector-def for each operand. It is
1663 obtained from the respective VS1.j stmt, which is recorded in the
1664 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1666 For example, to obtain the vector-def 'vx.1' in order to create the
1667 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1668 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1669 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1670 and return its def ('vx.1').
1671 Overall, to create the above sequence this function will be called 3 times:
1672 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1673 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1674 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1676 tree
1677 vect_get_vec_def_for_stmt_copy (vec_info *vinfo, tree vec_oprnd)
1679 stmt_vec_info def_stmt_info = vinfo->lookup_def (vec_oprnd);
1680 if (!def_stmt_info)
1681 /* Do nothing; can reuse same def. */
1682 return vec_oprnd;
1684 def_stmt_info = STMT_VINFO_RELATED_STMT (def_stmt_info);
1685 gcc_assert (def_stmt_info);
1686 if (gphi *phi = dyn_cast <gphi *> (def_stmt_info->stmt))
1687 vec_oprnd = PHI_RESULT (phi);
1688 else
1689 vec_oprnd = gimple_get_lhs (def_stmt_info->stmt);
1690 return vec_oprnd;
1694 /* Get vectorized definitions for the operands to create a copy of an original
1695 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1697 void
1698 vect_get_vec_defs_for_stmt_copy (vec_info *vinfo,
1699 vec<tree> *vec_oprnds0,
1700 vec<tree> *vec_oprnds1)
1702 tree vec_oprnd = vec_oprnds0->pop ();
1704 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1705 vec_oprnds0->quick_push (vec_oprnd);
1707 if (vec_oprnds1 && vec_oprnds1->length ())
1709 vec_oprnd = vec_oprnds1->pop ();
1710 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
1711 vec_oprnds1->quick_push (vec_oprnd);
1716 /* Get vectorized definitions for OP0 and OP1. */
1718 void
1719 vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
1720 vec<tree> *vec_oprnds0,
1721 vec<tree> *vec_oprnds1,
1722 slp_tree slp_node)
1724 if (slp_node)
1726 auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
1727 vect_get_slp_defs (slp_node, &vec_defs, op1 ? 2 : 1);
1728 *vec_oprnds0 = vec_defs[0];
1729 if (op1)
1730 *vec_oprnds1 = vec_defs[1];
1732 else
1734 tree vec_oprnd;
1736 vec_oprnds0->create (1);
1737 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt_info);
1738 vec_oprnds0->quick_push (vec_oprnd);
1740 if (op1)
1742 vec_oprnds1->create (1);
1743 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt_info);
1744 vec_oprnds1->quick_push (vec_oprnd);
1749 /* Helper function called by vect_finish_replace_stmt and
1750 vect_finish_stmt_generation. Set the location of the new
1751 statement and create and return a stmt_vec_info for it. */
1753 static stmt_vec_info
1754 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info, gimple *vec_stmt)
1756 vec_info *vinfo = stmt_info->vinfo;
1758 stmt_vec_info vec_stmt_info = vinfo->add_stmt (vec_stmt);
1760 if (dump_enabled_p ())
1761 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
1763 gimple_set_location (vec_stmt, gimple_location (stmt_info->stmt));
1765 /* While EH edges will generally prevent vectorization, stmt might
1766 e.g. be in a must-not-throw region. Ensure newly created stmts
1767 that could throw are part of the same region. */
1768 int lp_nr = lookup_stmt_eh_lp (stmt_info->stmt);
1769 if (lp_nr != 0 && stmt_could_throw_p (cfun, vec_stmt))
1770 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1772 return vec_stmt_info;
1775 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1776 which sets the same scalar result as STMT_INFO did. Create and return a
1777 stmt_vec_info for VEC_STMT. */
1779 stmt_vec_info
1780 vect_finish_replace_stmt (stmt_vec_info stmt_info, gimple *vec_stmt)
1782 gimple *scalar_stmt = vect_orig_stmt (stmt_info)->stmt;
1783 gcc_assert (gimple_get_lhs (scalar_stmt) == gimple_get_lhs (vec_stmt));
1785 gimple_stmt_iterator gsi = gsi_for_stmt (scalar_stmt);
1786 gsi_replace (&gsi, vec_stmt, true);
1788 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1791 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1792 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1794 stmt_vec_info
1795 vect_finish_stmt_generation (stmt_vec_info stmt_info, gimple *vec_stmt,
1796 gimple_stmt_iterator *gsi)
1798 gcc_assert (gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
1800 if (!gsi_end_p (*gsi)
1801 && gimple_has_mem_ops (vec_stmt))
1803 gimple *at_stmt = gsi_stmt (*gsi);
1804 tree vuse = gimple_vuse (at_stmt);
1805 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1807 tree vdef = gimple_vdef (at_stmt);
1808 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1809 /* If we have an SSA vuse and insert a store, update virtual
1810 SSA form to avoid triggering the renamer. Do so only
1811 if we can easily see all uses - which is what almost always
1812 happens with the way vectorized stmts are inserted. */
1813 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1814 && ((is_gimple_assign (vec_stmt)
1815 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1816 || (is_gimple_call (vec_stmt)
1817 && !(gimple_call_flags (vec_stmt)
1818 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1820 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1821 gimple_set_vdef (vec_stmt, new_vdef);
1822 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1826 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1827 return vect_finish_stmt_generation_1 (stmt_info, vec_stmt);
1830 /* We want to vectorize a call to combined function CFN with function
1831 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1832 as the types of all inputs. Check whether this is possible using
1833 an internal function, returning its code if so or IFN_LAST if not. */
1835 static internal_fn
1836 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1837 tree vectype_out, tree vectype_in)
1839 internal_fn ifn;
1840 if (internal_fn_p (cfn))
1841 ifn = as_internal_fn (cfn);
1842 else
1843 ifn = associated_internal_fn (fndecl);
1844 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1846 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1847 if (info.vectorizable)
1849 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1850 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1851 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1852 OPTIMIZE_FOR_SPEED))
1853 return ifn;
1856 return IFN_LAST;
1860 static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
1861 gimple_stmt_iterator *);
1863 /* Check whether a load or store statement in the loop described by
1864 LOOP_VINFO is possible in a fully-masked loop. This is testing
1865 whether the vectorizer pass has the appropriate support, as well as
1866 whether the target does.
1868 VLS_TYPE says whether the statement is a load or store and VECTYPE
1869 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1870 says how the load or store is going to be implemented and GROUP_SIZE
1871 is the number of load or store statements in the containing group.
1872 If the access is a gather load or scatter store, GS_INFO describes
1873 its arguments. If the load or store is conditional, SCALAR_MASK is the
1874 condition under which it occurs.
1876 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1877 supported, otherwise record the required mask types. */
1879 static void
1880 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1881 vec_load_store_type vls_type, int group_size,
1882 vect_memory_access_type memory_access_type,
1883 gather_scatter_info *gs_info, tree scalar_mask)
1885 /* Invariant loads need no special support. */
1886 if (memory_access_type == VMAT_INVARIANT)
1887 return;
1889 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1890 machine_mode vecmode = TYPE_MODE (vectype);
1891 bool is_load = (vls_type == VLS_LOAD);
1892 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1894 if (is_load
1895 ? !vect_load_lanes_supported (vectype, group_size, true)
1896 : !vect_store_lanes_supported (vectype, group_size, true))
1898 if (dump_enabled_p ())
1899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1900 "can't use a fully-masked loop because the"
1901 " target doesn't have an appropriate masked"
1902 " load/store-lanes instruction.\n");
1903 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1904 return;
1906 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1907 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1908 return;
1911 if (memory_access_type == VMAT_GATHER_SCATTER)
1913 internal_fn ifn = (is_load
1914 ? IFN_MASK_GATHER_LOAD
1915 : IFN_MASK_SCATTER_STORE);
1916 if (!internal_gather_scatter_fn_supported_p (ifn, vectype,
1917 gs_info->memory_type,
1918 gs_info->offset_vectype,
1919 gs_info->scale))
1921 if (dump_enabled_p ())
1922 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1923 "can't use a fully-masked loop because the"
1924 " target doesn't have an appropriate masked"
1925 " gather load or scatter store instruction.\n");
1926 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1927 return;
1929 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1930 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
1931 return;
1934 if (memory_access_type != VMAT_CONTIGUOUS
1935 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1937 /* Element X of the data must come from iteration i * VF + X of the
1938 scalar loop. We need more work to support other mappings. */
1939 if (dump_enabled_p ())
1940 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1941 "can't use a fully-masked loop because an access"
1942 " isn't contiguous.\n");
1943 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1944 return;
1947 machine_mode mask_mode;
1948 if (!VECTOR_MODE_P (vecmode)
1949 || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
1950 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1952 if (dump_enabled_p ())
1953 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1954 "can't use a fully-masked loop because the target"
1955 " doesn't have the appropriate masked load or"
1956 " store.\n");
1957 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1958 return;
1960 /* We might load more scalars than we need for permuting SLP loads.
1961 We checked in get_group_load_store_type that the extra elements
1962 don't leak into a new vector. */
1963 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1964 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1965 unsigned int nvectors;
1966 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1967 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
1968 else
1969 gcc_unreachable ();
1972 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1973 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1974 that needs to be applied to all loads and stores in a vectorized loop.
1975 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1977 MASK_TYPE is the type of both masks. If new statements are needed,
1978 insert them before GSI. */
1980 static tree
1981 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1982 gimple_stmt_iterator *gsi)
1984 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1985 if (!loop_mask)
1986 return vec_mask;
1988 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1989 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1990 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1991 vec_mask, loop_mask);
1992 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1993 return and_res;
1996 /* Determine whether we can use a gather load or scatter store to vectorize
1997 strided load or store STMT_INFO by truncating the current offset to a
1998 smaller width. We need to be able to construct an offset vector:
2000 { 0, X, X*2, X*3, ... }
2002 without loss of precision, where X is STMT_INFO's DR_STEP.
2004 Return true if this is possible, describing the gather load or scatter
2005 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
2007 static bool
2008 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info,
2009 loop_vec_info loop_vinfo, bool masked_p,
2010 gather_scatter_info *gs_info)
2012 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2013 data_reference *dr = dr_info->dr;
2014 tree step = DR_STEP (dr);
2015 if (TREE_CODE (step) != INTEGER_CST)
2017 /* ??? Perhaps we could use range information here? */
2018 if (dump_enabled_p ())
2019 dump_printf_loc (MSG_NOTE, vect_location,
2020 "cannot truncate variable step.\n");
2021 return false;
2024 /* Get the number of bits in an element. */
2025 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2026 scalar_mode element_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2027 unsigned int element_bits = GET_MODE_BITSIZE (element_mode);
2029 /* Set COUNT to the upper limit on the number of elements - 1.
2030 Start with the maximum vectorization factor. */
2031 unsigned HOST_WIDE_INT count = vect_max_vf (loop_vinfo) - 1;
2033 /* Try lowering COUNT to the number of scalar latch iterations. */
2034 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2035 widest_int max_iters;
2036 if (max_loop_iterations (loop, &max_iters)
2037 && max_iters < count)
2038 count = max_iters.to_shwi ();
2040 /* Try scales of 1 and the element size. */
2041 int scales[] = { 1, vect_get_scalar_dr_size (dr_info) };
2042 wi::overflow_type overflow = wi::OVF_NONE;
2043 for (int i = 0; i < 2; ++i)
2045 int scale = scales[i];
2046 widest_int factor;
2047 if (!wi::multiple_of_p (wi::to_widest (step), scale, SIGNED, &factor))
2048 continue;
2050 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
2051 widest_int range = wi::mul (count, factor, SIGNED, &overflow);
2052 if (overflow)
2053 continue;
2054 signop sign = range >= 0 ? UNSIGNED : SIGNED;
2055 unsigned int min_offset_bits = wi::min_precision (range, sign);
2057 /* Find the narrowest viable offset type. */
2058 unsigned int offset_bits = 1U << ceil_log2 (min_offset_bits);
2059 tree offset_type = build_nonstandard_integer_type (offset_bits,
2060 sign == UNSIGNED);
2062 /* See whether the target supports the operation with an offset
2063 no narrower than OFFSET_TYPE. */
2064 tree memory_type = TREE_TYPE (DR_REF (dr));
2065 if (!vect_gather_scatter_fn_p (loop_vinfo, DR_IS_READ (dr), masked_p,
2066 vectype, memory_type, offset_type, scale,
2067 &gs_info->ifn, &gs_info->offset_vectype))
2068 continue;
2070 gs_info->decl = NULL_TREE;
2071 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2072 but we don't need to store that here. */
2073 gs_info->base = NULL_TREE;
2074 gs_info->element_type = TREE_TYPE (vectype);
2075 gs_info->offset = fold_convert (offset_type, step);
2076 gs_info->offset_dt = vect_constant_def;
2077 gs_info->scale = scale;
2078 gs_info->memory_type = memory_type;
2079 return true;
2082 if (overflow && dump_enabled_p ())
2083 dump_printf_loc (MSG_NOTE, vect_location,
2084 "truncating gather/scatter offset to %d bits"
2085 " might change its value.\n", element_bits);
2087 return false;
2090 /* Return true if we can use gather/scatter internal functions to
2091 vectorize STMT_INFO, which is a grouped or strided load or store.
2092 MASKED_P is true if load or store is conditional. When returning
2093 true, fill in GS_INFO with the information required to perform the
2094 operation. */
2096 static bool
2097 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info,
2098 loop_vec_info loop_vinfo, bool masked_p,
2099 gather_scatter_info *gs_info)
2101 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info)
2102 || gs_info->decl)
2103 return vect_truncate_gather_scatter_offset (stmt_info, loop_vinfo,
2104 masked_p, gs_info);
2106 tree old_offset_type = TREE_TYPE (gs_info->offset);
2107 tree new_offset_type = TREE_TYPE (gs_info->offset_vectype);
2109 gcc_assert (TYPE_PRECISION (new_offset_type)
2110 >= TYPE_PRECISION (old_offset_type));
2111 gs_info->offset = fold_convert (new_offset_type, gs_info->offset);
2113 if (dump_enabled_p ())
2114 dump_printf_loc (MSG_NOTE, vect_location,
2115 "using gather/scatter for strided/grouped access,"
2116 " scale = %d\n", gs_info->scale);
2118 return true;
2121 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2122 elements with a known constant step. Return -1 if that step
2123 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2125 static int
2126 compare_step_with_zero (stmt_vec_info stmt_info)
2128 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2129 return tree_int_cst_compare (vect_dr_behavior (dr_info)->step,
2130 size_zero_node);
2133 /* If the target supports a permute mask that reverses the elements in
2134 a vector of type VECTYPE, return that mask, otherwise return null. */
2136 static tree
2137 perm_mask_for_reverse (tree vectype)
2139 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2141 /* The encoding has a single stepped pattern. */
2142 vec_perm_builder sel (nunits, 1, 3);
2143 for (int i = 0; i < 3; ++i)
2144 sel.quick_push (nunits - 1 - i);
2146 vec_perm_indices indices (sel, 1, nunits);
2147 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
2148 return NULL_TREE;
2149 return vect_gen_perm_mask_checked (vectype, indices);
2152 /* A subroutine of get_load_store_type, with a subset of the same
2153 arguments. Handle the case where STMT_INFO is a load or store that
2154 accesses consecutive elements with a negative step. */
2156 static vect_memory_access_type
2157 get_negative_load_store_type (stmt_vec_info stmt_info, tree vectype,
2158 vec_load_store_type vls_type,
2159 unsigned int ncopies)
2161 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2162 dr_alignment_support alignment_support_scheme;
2164 if (ncopies > 1)
2166 if (dump_enabled_p ())
2167 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2168 "multiple types with negative step.\n");
2169 return VMAT_ELEMENTWISE;
2172 alignment_support_scheme = vect_supportable_dr_alignment (dr_info, false);
2173 if (alignment_support_scheme != dr_aligned
2174 && alignment_support_scheme != dr_unaligned_supported)
2176 if (dump_enabled_p ())
2177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2178 "negative step but alignment required.\n");
2179 return VMAT_ELEMENTWISE;
2182 if (vls_type == VLS_STORE_INVARIANT)
2184 if (dump_enabled_p ())
2185 dump_printf_loc (MSG_NOTE, vect_location,
2186 "negative step with invariant source;"
2187 " no permute needed.\n");
2188 return VMAT_CONTIGUOUS_DOWN;
2191 if (!perm_mask_for_reverse (vectype))
2193 if (dump_enabled_p ())
2194 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2195 "negative step and reversing not supported.\n");
2196 return VMAT_ELEMENTWISE;
2199 return VMAT_CONTIGUOUS_REVERSE;
2202 /* STMT_INFO is either a masked or unconditional store. Return the value
2203 being stored. */
2205 tree
2206 vect_get_store_rhs (stmt_vec_info stmt_info)
2208 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
2210 gcc_assert (gimple_assign_single_p (assign));
2211 return gimple_assign_rhs1 (assign);
2213 if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
2215 internal_fn ifn = gimple_call_internal_fn (call);
2216 int index = internal_fn_stored_value_index (ifn);
2217 gcc_assert (index >= 0);
2218 return gimple_call_arg (call, index);
2220 gcc_unreachable ();
2223 /* A subroutine of get_load_store_type, with a subset of the same
2224 arguments. Handle the case where STMT_INFO is part of a grouped load
2225 or store.
2227 For stores, the statements in the group are all consecutive
2228 and there is no gap at the end. For loads, the statements in the
2229 group might not be consecutive; there can be gaps between statements
2230 as well as at the end. */
2232 static bool
2233 get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2234 bool masked_p, vec_load_store_type vls_type,
2235 vect_memory_access_type *memory_access_type,
2236 gather_scatter_info *gs_info)
2238 vec_info *vinfo = stmt_info->vinfo;
2239 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2240 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2241 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2242 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2243 unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
2244 bool single_element_p = (stmt_info == first_stmt_info
2245 && !DR_GROUP_NEXT_ELEMENT (stmt_info));
2246 unsigned HOST_WIDE_INT gap = DR_GROUP_GAP (first_stmt_info);
2247 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2249 /* True if the vectorized statements would access beyond the last
2250 statement in the group. */
2251 bool overrun_p = false;
2253 /* True if we can cope with such overrun by peeling for gaps, so that
2254 there is at least one final scalar iteration after the vector loop. */
2255 bool can_overrun_p = (!masked_p
2256 && vls_type == VLS_LOAD
2257 && loop_vinfo
2258 && !loop->inner);
2260 /* There can only be a gap at the end of the group if the stride is
2261 known at compile time. */
2262 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info) || gap == 0);
2264 /* Stores can't yet have gaps. */
2265 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
2267 if (slp)
2269 if (STMT_VINFO_STRIDED_P (first_stmt_info))
2271 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2272 separated by the stride, until we have a complete vector.
2273 Fall back to scalar accesses if that isn't possible. */
2274 if (multiple_p (nunits, group_size))
2275 *memory_access_type = VMAT_STRIDED_SLP;
2276 else
2277 *memory_access_type = VMAT_ELEMENTWISE;
2279 else
2281 overrun_p = loop_vinfo && gap != 0;
2282 if (overrun_p && vls_type != VLS_LOAD)
2284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2285 "Grouped store with gaps requires"
2286 " non-consecutive accesses\n");
2287 return false;
2289 /* An overrun is fine if the trailing elements are smaller
2290 than the alignment boundary B. Every vector access will
2291 be a multiple of B and so we are guaranteed to access a
2292 non-gap element in the same B-sized block. */
2293 if (overrun_p
2294 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2295 / vect_get_scalar_dr_size (first_dr_info)))
2296 overrun_p = false;
2298 /* If the gap splits the vector in half and the target
2299 can do half-vector operations avoid the epilogue peeling
2300 by simply loading half of the vector only. Usually
2301 the construction with an upper zero half will be elided. */
2302 dr_alignment_support alignment_support_scheme;
2303 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
2304 machine_mode vmode;
2305 if (overrun_p
2306 && !masked_p
2307 && (((alignment_support_scheme
2308 = vect_supportable_dr_alignment (first_dr_info, false)))
2309 == dr_aligned
2310 || alignment_support_scheme == dr_unaligned_supported)
2311 && known_eq (nunits, (group_size - gap) * 2)
2312 && known_eq (nunits, group_size)
2313 && VECTOR_MODE_P (TYPE_MODE (vectype))
2314 && related_vector_mode (TYPE_MODE (vectype), elmode,
2315 group_size - gap).exists (&vmode)
2316 && (convert_optab_handler (vec_init_optab,
2317 TYPE_MODE (vectype), vmode)
2318 != CODE_FOR_nothing))
2319 overrun_p = false;
2321 if (overrun_p && !can_overrun_p)
2323 if (dump_enabled_p ())
2324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2325 "Peeling for outer loop is not supported\n");
2326 return false;
2328 int cmp = compare_step_with_zero (stmt_info);
2329 if (cmp < 0)
2330 *memory_access_type = get_negative_load_store_type
2331 (stmt_info, vectype, vls_type, 1);
2332 else
2334 gcc_assert (!loop_vinfo || cmp > 0);
2335 *memory_access_type = VMAT_CONTIGUOUS;
2339 else
2341 /* We can always handle this case using elementwise accesses,
2342 but see if something more efficient is available. */
2343 *memory_access_type = VMAT_ELEMENTWISE;
2345 /* If there is a gap at the end of the group then these optimizations
2346 would access excess elements in the last iteration. */
2347 bool would_overrun_p = (gap != 0);
2348 /* An overrun is fine if the trailing elements are smaller than the
2349 alignment boundary B. Every vector access will be a multiple of B
2350 and so we are guaranteed to access a non-gap element in the
2351 same B-sized block. */
2352 if (would_overrun_p
2353 && !masked_p
2354 && gap < (vect_known_alignment_in_bytes (first_dr_info)
2355 / vect_get_scalar_dr_size (first_dr_info)))
2356 would_overrun_p = false;
2358 if (!STMT_VINFO_STRIDED_P (first_stmt_info)
2359 && (can_overrun_p || !would_overrun_p)
2360 && compare_step_with_zero (stmt_info) > 0)
2362 /* First cope with the degenerate case of a single-element
2363 vector. */
2364 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
2365 *memory_access_type = VMAT_CONTIGUOUS;
2367 /* Otherwise try using LOAD/STORE_LANES. */
2368 if (*memory_access_type == VMAT_ELEMENTWISE
2369 && (vls_type == VLS_LOAD
2370 ? vect_load_lanes_supported (vectype, group_size, masked_p)
2371 : vect_store_lanes_supported (vectype, group_size,
2372 masked_p)))
2374 *memory_access_type = VMAT_LOAD_STORE_LANES;
2375 overrun_p = would_overrun_p;
2378 /* If that fails, try using permuting loads. */
2379 if (*memory_access_type == VMAT_ELEMENTWISE
2380 && (vls_type == VLS_LOAD
2381 ? vect_grouped_load_supported (vectype, single_element_p,
2382 group_size)
2383 : vect_grouped_store_supported (vectype, group_size)))
2385 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2386 overrun_p = would_overrun_p;
2390 /* As a last resort, trying using a gather load or scatter store.
2392 ??? Although the code can handle all group sizes correctly,
2393 it probably isn't a win to use separate strided accesses based
2394 on nearby locations. Or, even if it's a win over scalar code,
2395 it might not be a win over vectorizing at a lower VF, if that
2396 allows us to use contiguous accesses. */
2397 if (*memory_access_type == VMAT_ELEMENTWISE
2398 && single_element_p
2399 && loop_vinfo
2400 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2401 masked_p, gs_info))
2402 *memory_access_type = VMAT_GATHER_SCATTER;
2405 if (vls_type != VLS_LOAD && first_stmt_info == stmt_info)
2407 /* STMT is the leader of the group. Check the operands of all the
2408 stmts of the group. */
2409 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2410 while (next_stmt_info)
2412 tree op = vect_get_store_rhs (next_stmt_info);
2413 enum vect_def_type dt;
2414 if (!vect_is_simple_use (op, vinfo, &dt))
2416 if (dump_enabled_p ())
2417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2418 "use not simple.\n");
2419 return false;
2421 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
2425 if (overrun_p)
2427 gcc_assert (can_overrun_p);
2428 if (dump_enabled_p ())
2429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2430 "Data access with gaps requires scalar "
2431 "epilogue loop\n");
2432 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2435 return true;
2438 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2439 if there is a memory access type that the vectorized form can use,
2440 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2441 or scatters, fill in GS_INFO accordingly.
2443 SLP says whether we're performing SLP rather than loop vectorization.
2444 MASKED_P is true if the statement is conditional on a vectorized mask.
2445 VECTYPE is the vector type that the vectorized statements will use.
2446 NCOPIES is the number of vector statements that will be needed. */
2448 static bool
2449 get_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
2450 bool masked_p, vec_load_store_type vls_type,
2451 unsigned int ncopies,
2452 vect_memory_access_type *memory_access_type,
2453 gather_scatter_info *gs_info)
2455 vec_info *vinfo = stmt_info->vinfo;
2456 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2457 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2458 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2460 *memory_access_type = VMAT_GATHER_SCATTER;
2461 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, gs_info))
2462 gcc_unreachable ();
2463 else if (!vect_is_simple_use (gs_info->offset, vinfo,
2464 &gs_info->offset_dt,
2465 &gs_info->offset_vectype))
2467 if (dump_enabled_p ())
2468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2469 "%s index use not simple.\n",
2470 vls_type == VLS_LOAD ? "gather" : "scatter");
2471 return false;
2474 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2476 if (!get_group_load_store_type (stmt_info, vectype, slp, masked_p,
2477 vls_type, memory_access_type, gs_info))
2478 return false;
2480 else if (STMT_VINFO_STRIDED_P (stmt_info))
2482 gcc_assert (!slp);
2483 if (loop_vinfo
2484 && vect_use_strided_gather_scatters_p (stmt_info, loop_vinfo,
2485 masked_p, gs_info))
2486 *memory_access_type = VMAT_GATHER_SCATTER;
2487 else
2488 *memory_access_type = VMAT_ELEMENTWISE;
2490 else
2492 int cmp = compare_step_with_zero (stmt_info);
2493 if (cmp < 0)
2494 *memory_access_type = get_negative_load_store_type
2495 (stmt_info, vectype, vls_type, ncopies);
2496 else if (cmp == 0)
2498 gcc_assert (vls_type == VLS_LOAD);
2499 *memory_access_type = VMAT_INVARIANT;
2501 else
2502 *memory_access_type = VMAT_CONTIGUOUS;
2505 if ((*memory_access_type == VMAT_ELEMENTWISE
2506 || *memory_access_type == VMAT_STRIDED_SLP)
2507 && !nunits.is_constant ())
2509 if (dump_enabled_p ())
2510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2511 "Not using elementwise accesses due to variable "
2512 "vectorization factor.\n");
2513 return false;
2516 /* FIXME: At the moment the cost model seems to underestimate the
2517 cost of using elementwise accesses. This check preserves the
2518 traditional behavior until that can be fixed. */
2519 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
2520 if (!first_stmt_info)
2521 first_stmt_info = stmt_info;
2522 if (*memory_access_type == VMAT_ELEMENTWISE
2523 && !STMT_VINFO_STRIDED_P (first_stmt_info)
2524 && !(stmt_info == DR_GROUP_FIRST_ELEMENT (stmt_info)
2525 && !DR_GROUP_NEXT_ELEMENT (stmt_info)
2526 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info))))
2528 if (dump_enabled_p ())
2529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2530 "not falling back to elementwise accesses\n");
2531 return false;
2533 return true;
2536 /* Return true if boolean argument MASK is suitable for vectorizing
2537 conditional operation STMT_INFO. When returning true, store the type
2538 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2539 in *MASK_VECTYPE_OUT. */
2541 static bool
2542 vect_check_scalar_mask (stmt_vec_info stmt_info, tree mask,
2543 vect_def_type *mask_dt_out,
2544 tree *mask_vectype_out)
2546 vec_info *vinfo = stmt_info->vinfo;
2547 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2549 if (dump_enabled_p ())
2550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2551 "mask argument is not a boolean.\n");
2552 return false;
2555 if (TREE_CODE (mask) != SSA_NAME)
2557 if (dump_enabled_p ())
2558 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2559 "mask argument is not an SSA name.\n");
2560 return false;
2563 enum vect_def_type mask_dt;
2564 tree mask_vectype;
2565 if (!vect_is_simple_use (mask, stmt_info->vinfo, &mask_dt, &mask_vectype))
2567 if (dump_enabled_p ())
2568 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2569 "mask use not simple.\n");
2570 return false;
2573 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2574 if (!mask_vectype)
2575 mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
2577 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2579 if (dump_enabled_p ())
2580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2581 "could not find an appropriate vector mask type.\n");
2582 return false;
2585 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2586 TYPE_VECTOR_SUBPARTS (vectype)))
2588 if (dump_enabled_p ())
2589 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2590 "vector mask type %T"
2591 " does not match vector data type %T.\n",
2592 mask_vectype, vectype);
2594 return false;
2597 *mask_dt_out = mask_dt;
2598 *mask_vectype_out = mask_vectype;
2599 return true;
2602 /* Return true if stored value RHS is suitable for vectorizing store
2603 statement STMT_INFO. When returning true, store the type of the
2604 definition in *RHS_DT_OUT, the type of the vectorized store value in
2605 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2607 static bool
2608 vect_check_store_rhs (stmt_vec_info stmt_info, tree rhs,
2609 vect_def_type *rhs_dt_out, tree *rhs_vectype_out,
2610 vec_load_store_type *vls_type_out)
2612 /* In the case this is a store from a constant make sure
2613 native_encode_expr can handle it. */
2614 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2616 if (dump_enabled_p ())
2617 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2618 "cannot encode constant as a byte sequence.\n");
2619 return false;
2622 enum vect_def_type rhs_dt;
2623 tree rhs_vectype;
2624 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &rhs_dt, &rhs_vectype))
2626 if (dump_enabled_p ())
2627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2628 "use not simple.\n");
2629 return false;
2632 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2633 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2635 if (dump_enabled_p ())
2636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2637 "incompatible vector types.\n");
2638 return false;
2641 *rhs_dt_out = rhs_dt;
2642 *rhs_vectype_out = rhs_vectype;
2643 if (rhs_dt == vect_constant_def || rhs_dt == vect_external_def)
2644 *vls_type_out = VLS_STORE_INVARIANT;
2645 else
2646 *vls_type_out = VLS_STORE;
2647 return true;
2650 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2651 Note that we support masks with floating-point type, in which case the
2652 floats are interpreted as a bitmask. */
2654 static tree
2655 vect_build_all_ones_mask (stmt_vec_info stmt_info, tree masktype)
2657 if (TREE_CODE (masktype) == INTEGER_TYPE)
2658 return build_int_cst (masktype, -1);
2659 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2661 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2662 mask = build_vector_from_val (masktype, mask);
2663 return vect_init_vector (stmt_info, mask, masktype, NULL);
2665 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2667 REAL_VALUE_TYPE r;
2668 long tmp[6];
2669 for (int j = 0; j < 6; ++j)
2670 tmp[j] = -1;
2671 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2672 tree mask = build_real (TREE_TYPE (masktype), r);
2673 mask = build_vector_from_val (masktype, mask);
2674 return vect_init_vector (stmt_info, mask, masktype, NULL);
2676 gcc_unreachable ();
2679 /* Build an all-zero merge value of type VECTYPE while vectorizing
2680 STMT_INFO as a gather load. */
2682 static tree
2683 vect_build_zero_merge_argument (stmt_vec_info stmt_info, tree vectype)
2685 tree merge;
2686 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2687 merge = build_int_cst (TREE_TYPE (vectype), 0);
2688 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2690 REAL_VALUE_TYPE r;
2691 long tmp[6];
2692 for (int j = 0; j < 6; ++j)
2693 tmp[j] = 0;
2694 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2695 merge = build_real (TREE_TYPE (vectype), r);
2697 else
2698 gcc_unreachable ();
2699 merge = build_vector_from_val (vectype, merge);
2700 return vect_init_vector (stmt_info, merge, vectype, NULL);
2703 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2704 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2705 the gather load operation. If the load is conditional, MASK is the
2706 unvectorized condition and MASK_DT is its definition type, otherwise
2707 MASK is null. */
2709 static void
2710 vect_build_gather_load_calls (stmt_vec_info stmt_info,
2711 gimple_stmt_iterator *gsi,
2712 stmt_vec_info *vec_stmt,
2713 gather_scatter_info *gs_info,
2714 tree mask)
2716 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2717 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2718 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2719 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2720 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2721 edge pe = loop_preheader_edge (loop);
2722 enum { NARROW, NONE, WIDEN } modifier;
2723 poly_uint64 gather_off_nunits
2724 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2726 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2727 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2728 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2729 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2730 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2731 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2732 tree scaletype = TREE_VALUE (arglist);
2733 tree real_masktype = masktype;
2734 gcc_checking_assert (types_compatible_p (srctype, rettype)
2735 && (!mask
2736 || TREE_CODE (masktype) == INTEGER_TYPE
2737 || types_compatible_p (srctype, masktype)));
2738 if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
2739 masktype = truth_type_for (srctype);
2741 tree mask_halftype = masktype;
2742 tree perm_mask = NULL_TREE;
2743 tree mask_perm_mask = NULL_TREE;
2744 if (known_eq (nunits, gather_off_nunits))
2745 modifier = NONE;
2746 else if (known_eq (nunits * 2, gather_off_nunits))
2748 modifier = WIDEN;
2750 /* Currently widening gathers and scatters are only supported for
2751 fixed-length vectors. */
2752 int count = gather_off_nunits.to_constant ();
2753 vec_perm_builder sel (count, count, 1);
2754 for (int i = 0; i < count; ++i)
2755 sel.quick_push (i | (count / 2));
2757 vec_perm_indices indices (sel, 1, count);
2758 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2759 indices);
2761 else if (known_eq (nunits, gather_off_nunits * 2))
2763 modifier = NARROW;
2765 /* Currently narrowing gathers and scatters are only supported for
2766 fixed-length vectors. */
2767 int count = nunits.to_constant ();
2768 vec_perm_builder sel (count, count, 1);
2769 sel.quick_grow (count);
2770 for (int i = 0; i < count; ++i)
2771 sel[i] = i < count / 2 ? i : i + count / 2;
2772 vec_perm_indices indices (sel, 2, count);
2773 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2775 ncopies *= 2;
2777 if (mask && masktype == real_masktype)
2779 for (int i = 0; i < count; ++i)
2780 sel[i] = i | (count / 2);
2781 indices.new_vector (sel, 2, count);
2782 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2784 else if (mask)
2785 mask_halftype = truth_type_for (gs_info->offset_vectype);
2787 else
2788 gcc_unreachable ();
2790 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
2791 tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
2793 tree ptr = fold_convert (ptrtype, gs_info->base);
2794 if (!is_gimple_min_invariant (ptr))
2796 gimple_seq seq;
2797 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2798 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2799 gcc_assert (!new_bb);
2802 tree scale = build_int_cst (scaletype, gs_info->scale);
2804 tree vec_oprnd0 = NULL_TREE;
2805 tree vec_mask = NULL_TREE;
2806 tree src_op = NULL_TREE;
2807 tree mask_op = NULL_TREE;
2808 tree prev_res = NULL_TREE;
2809 stmt_vec_info prev_stmt_info = NULL;
2811 if (!mask)
2813 src_op = vect_build_zero_merge_argument (stmt_info, rettype);
2814 mask_op = vect_build_all_ones_mask (stmt_info, masktype);
2817 for (int j = 0; j < ncopies; ++j)
2819 tree op, var;
2820 if (modifier == WIDEN && (j & 1))
2821 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2822 perm_mask, stmt_info, gsi);
2823 else if (j == 0)
2824 op = vec_oprnd0
2825 = vect_get_vec_def_for_operand (gs_info->offset, stmt_info);
2826 else
2827 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2828 vec_oprnd0);
2830 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2832 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2833 TYPE_VECTOR_SUBPARTS (idxtype)));
2834 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2835 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2836 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2837 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2838 op = var;
2841 if (mask)
2843 if (mask_perm_mask && (j & 1))
2844 mask_op = permute_vec_elements (mask_op, mask_op,
2845 mask_perm_mask, stmt_info, gsi);
2846 else
2848 if (j == 0)
2849 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info);
2850 else if (modifier != NARROW || (j & 1) == 0)
2851 vec_mask = vect_get_vec_def_for_stmt_copy (loop_vinfo,
2852 vec_mask);
2854 mask_op = vec_mask;
2855 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2857 poly_uint64 sub1 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op));
2858 poly_uint64 sub2 = TYPE_VECTOR_SUBPARTS (masktype);
2859 gcc_assert (known_eq (sub1, sub2));
2860 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2861 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2862 gassign *new_stmt
2863 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2864 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2865 mask_op = var;
2868 if (modifier == NARROW && masktype != real_masktype)
2870 var = vect_get_new_ssa_name (mask_halftype, vect_simple_var);
2871 gassign *new_stmt
2872 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
2873 : VEC_UNPACK_LO_EXPR,
2874 mask_op);
2875 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2876 mask_op = var;
2878 src_op = mask_op;
2881 tree mask_arg = mask_op;
2882 if (masktype != real_masktype)
2884 tree utype, optype = TREE_TYPE (mask_op);
2885 if (TYPE_MODE (real_masktype) == TYPE_MODE (optype))
2886 utype = real_masktype;
2887 else
2888 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
2889 var = vect_get_new_ssa_name (utype, vect_scalar_var);
2890 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_op);
2891 gassign *new_stmt
2892 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
2893 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2894 mask_arg = var;
2895 if (!useless_type_conversion_p (real_masktype, utype))
2897 gcc_assert (TYPE_PRECISION (utype)
2898 <= TYPE_PRECISION (real_masktype));
2899 var = vect_get_new_ssa_name (real_masktype, vect_scalar_var);
2900 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
2901 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2902 mask_arg = var;
2904 src_op = build_zero_cst (srctype);
2906 gcall *new_call = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2907 mask_arg, scale);
2909 stmt_vec_info new_stmt_info;
2910 if (!useless_type_conversion_p (vectype, rettype))
2912 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2913 TYPE_VECTOR_SUBPARTS (rettype)));
2914 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2915 gimple_call_set_lhs (new_call, op);
2916 vect_finish_stmt_generation (stmt_info, new_call, gsi);
2917 var = make_ssa_name (vec_dest);
2918 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2919 gassign *new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2920 new_stmt_info
2921 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
2923 else
2925 var = make_ssa_name (vec_dest, new_call);
2926 gimple_call_set_lhs (new_call, var);
2927 new_stmt_info
2928 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
2931 if (modifier == NARROW)
2933 if ((j & 1) == 0)
2935 prev_res = var;
2936 continue;
2938 var = permute_vec_elements (prev_res, var, perm_mask,
2939 stmt_info, gsi);
2940 new_stmt_info = loop_vinfo->lookup_def (var);
2943 if (prev_stmt_info == NULL)
2944 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
2945 else
2946 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
2947 prev_stmt_info = new_stmt_info;
2951 /* Prepare the base and offset in GS_INFO for vectorization.
2952 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2953 to the vectorized offset argument for the first copy of STMT_INFO.
2954 STMT_INFO is the statement described by GS_INFO and LOOP is the
2955 containing loop. */
2957 static void
2958 vect_get_gather_scatter_ops (class loop *loop, stmt_vec_info stmt_info,
2959 gather_scatter_info *gs_info,
2960 tree *dataref_ptr, tree *vec_offset)
2962 gimple_seq stmts = NULL;
2963 *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
2964 if (stmts != NULL)
2966 basic_block new_bb;
2967 edge pe = loop_preheader_edge (loop);
2968 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2969 gcc_assert (!new_bb);
2971 *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
2972 gs_info->offset_vectype);
2975 /* Prepare to implement a grouped or strided load or store using
2976 the gather load or scatter store operation described by GS_INFO.
2977 STMT_INFO is the load or store statement.
2979 Set *DATAREF_BUMP to the amount that should be added to the base
2980 address after each copy of the vectorized statement. Set *VEC_OFFSET
2981 to an invariant offset vector in which element I has the value
2982 I * DR_STEP / SCALE. */
2984 static void
2985 vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
2986 loop_vec_info loop_vinfo,
2987 gather_scatter_info *gs_info,
2988 tree *dataref_bump, tree *vec_offset)
2990 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2991 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2992 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2993 gimple_seq stmts;
2995 tree bump = size_binop (MULT_EXPR,
2996 fold_convert (sizetype, unshare_expr (DR_STEP (dr))),
2997 size_int (TYPE_VECTOR_SUBPARTS (vectype)));
2998 *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
2999 if (stmts)
3000 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3002 /* The offset given in GS_INFO can have pointer type, so use the element
3003 type of the vector instead. */
3004 tree offset_type = TREE_TYPE (gs_info->offset);
3005 offset_type = TREE_TYPE (gs_info->offset_vectype);
3007 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
3008 tree step = size_binop (EXACT_DIV_EXPR, unshare_expr (DR_STEP (dr)),
3009 ssize_int (gs_info->scale));
3010 step = fold_convert (offset_type, step);
3011 step = force_gimple_operand (step, &stmts, true, NULL_TREE);
3013 /* Create {0, X, X*2, X*3, ...}. */
3014 *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, gs_info->offset_vectype,
3015 build_zero_cst (offset_type), step);
3016 if (stmts)
3017 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3020 /* Return the amount that should be added to a vector pointer to move
3021 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
3022 being vectorized and MEMORY_ACCESS_TYPE describes the type of
3023 vectorization. */
3025 static tree
3026 vect_get_data_ptr_increment (dr_vec_info *dr_info, tree aggr_type,
3027 vect_memory_access_type memory_access_type)
3029 if (memory_access_type == VMAT_INVARIANT)
3030 return size_zero_node;
3032 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
3033 tree step = vect_dr_behavior (dr_info)->step;
3034 if (tree_int_cst_sgn (step) == -1)
3035 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
3036 return iv_step;
3039 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
3041 static bool
3042 vectorizable_bswap (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3043 stmt_vec_info *vec_stmt, slp_tree slp_node,
3044 tree vectype_in, stmt_vector_for_cost *cost_vec)
3046 tree op, vectype;
3047 gcall *stmt = as_a <gcall *> (stmt_info->stmt);
3048 vec_info *vinfo = stmt_info->vinfo;
3049 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3050 unsigned ncopies;
3052 op = gimple_call_arg (stmt, 0);
3053 vectype = STMT_VINFO_VECTYPE (stmt_info);
3054 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3056 /* Multiple types in SLP are handled by creating the appropriate number of
3057 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3058 case of SLP. */
3059 if (slp_node)
3060 ncopies = 1;
3061 else
3062 ncopies = vect_get_num_copies (loop_vinfo, vectype);
3064 gcc_assert (ncopies >= 1);
3066 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
3067 if (! char_vectype)
3068 return false;
3070 poly_uint64 num_bytes = TYPE_VECTOR_SUBPARTS (char_vectype);
3071 unsigned word_bytes;
3072 if (!constant_multiple_p (num_bytes, nunits, &word_bytes))
3073 return false;
3075 /* The encoding uses one stepped pattern for each byte in the word. */
3076 vec_perm_builder elts (num_bytes, word_bytes, 3);
3077 for (unsigned i = 0; i < 3; ++i)
3078 for (unsigned j = 0; j < word_bytes; ++j)
3079 elts.quick_push ((i + 1) * word_bytes - j - 1);
3081 vec_perm_indices indices (elts, 1, num_bytes);
3082 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
3083 return false;
3085 if (! vec_stmt)
3087 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3088 DUMP_VECT_SCOPE ("vectorizable_bswap");
3089 if (! slp_node)
3091 record_stmt_cost (cost_vec,
3092 1, vector_stmt, stmt_info, 0, vect_prologue);
3093 record_stmt_cost (cost_vec,
3094 ncopies, vec_perm, stmt_info, 0, vect_body);
3096 return true;
3099 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
3101 /* Transform. */
3102 vec<tree> vec_oprnds = vNULL;
3103 stmt_vec_info new_stmt_info = NULL;
3104 stmt_vec_info prev_stmt_info = NULL;
3105 for (unsigned j = 0; j < ncopies; j++)
3107 /* Handle uses. */
3108 if (j == 0)
3109 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
3110 else
3111 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
3113 /* Arguments are ready. create the new vector stmt. */
3114 unsigned i;
3115 tree vop;
3116 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
3118 gimple *new_stmt;
3119 tree tem = make_ssa_name (char_vectype);
3120 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3121 char_vectype, vop));
3122 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3123 tree tem2 = make_ssa_name (char_vectype);
3124 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
3125 tem, tem, bswap_vconst);
3126 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3127 tem = make_ssa_name (vectype);
3128 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
3129 vectype, tem2));
3130 new_stmt_info
3131 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3132 if (slp_node)
3133 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3136 if (slp_node)
3137 continue;
3139 if (j == 0)
3140 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3141 else
3142 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3144 prev_stmt_info = new_stmt_info;
3147 vec_oprnds.release ();
3148 return true;
3151 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3152 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3153 in a single step. On success, store the binary pack code in
3154 *CONVERT_CODE. */
3156 static bool
3157 simple_integer_narrowing (tree vectype_out, tree vectype_in,
3158 tree_code *convert_code)
3160 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
3161 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
3162 return false;
3164 tree_code code;
3165 int multi_step_cvt = 0;
3166 auto_vec <tree, 8> interm_types;
3167 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
3168 &code, &multi_step_cvt, &interm_types)
3169 || multi_step_cvt)
3170 return false;
3172 *convert_code = code;
3173 return true;
3176 /* Function vectorizable_call.
3178 Check if STMT_INFO performs a function call that can be vectorized.
3179 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3180 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3181 Return true if STMT_INFO is vectorizable in this way. */
3183 static bool
3184 vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
3185 stmt_vec_info *vec_stmt, slp_tree slp_node,
3186 stmt_vector_for_cost *cost_vec)
3188 gcall *stmt;
3189 tree vec_dest;
3190 tree scalar_dest;
3191 tree op;
3192 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3193 stmt_vec_info prev_stmt_info;
3194 tree vectype_out, vectype_in;
3195 poly_uint64 nunits_in;
3196 poly_uint64 nunits_out;
3197 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3198 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3199 vec_info *vinfo = stmt_info->vinfo;
3200 tree fndecl, new_temp, rhs_type;
3201 enum vect_def_type dt[4]
3202 = { vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type,
3203 vect_unknown_def_type };
3204 tree vectypes[ARRAY_SIZE (dt)] = {};
3205 int ndts = ARRAY_SIZE (dt);
3206 int ncopies, j;
3207 auto_vec<tree, 8> vargs;
3208 auto_vec<tree, 8> orig_vargs;
3209 enum { NARROW, NONE, WIDEN } modifier;
3210 size_t i, nargs;
3211 tree lhs;
3213 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3214 return false;
3216 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3217 && ! vec_stmt)
3218 return false;
3220 /* Is STMT_INFO a vectorizable call? */
3221 stmt = dyn_cast <gcall *> (stmt_info->stmt);
3222 if (!stmt)
3223 return false;
3225 if (gimple_call_internal_p (stmt)
3226 && (internal_load_fn_p (gimple_call_internal_fn (stmt))
3227 || internal_store_fn_p (gimple_call_internal_fn (stmt))))
3228 /* Handled by vectorizable_load and vectorizable_store. */
3229 return false;
3231 if (gimple_call_lhs (stmt) == NULL_TREE
3232 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3233 return false;
3235 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3237 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3239 /* Process function arguments. */
3240 rhs_type = NULL_TREE;
3241 vectype_in = NULL_TREE;
3242 nargs = gimple_call_num_args (stmt);
3244 /* Bail out if the function has more than three arguments, we do not have
3245 interesting builtin functions to vectorize with more than two arguments
3246 except for fma. No arguments is also not good. */
3247 if (nargs == 0 || nargs > 4)
3248 return false;
3250 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3251 combined_fn cfn = gimple_call_combined_fn (stmt);
3252 if (cfn == CFN_GOMP_SIMD_LANE)
3254 nargs = 0;
3255 rhs_type = unsigned_type_node;
3258 int mask_opno = -1;
3259 if (internal_fn_p (cfn))
3260 mask_opno = internal_fn_mask_index (as_internal_fn (cfn));
3262 for (i = 0; i < nargs; i++)
3264 op = gimple_call_arg (stmt, i);
3266 if ((int) i == mask_opno)
3268 if (!vect_check_scalar_mask (stmt_info, op, &dt[i], &vectypes[i]))
3269 return false;
3270 continue;
3273 if (!vect_is_simple_use (op, vinfo, &dt[i], &vectypes[i]))
3275 if (dump_enabled_p ())
3276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3277 "use not simple.\n");
3278 return false;
3281 /* We can only handle calls with arguments of the same type. */
3282 if (rhs_type
3283 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3287 "argument types differ.\n");
3288 return false;
3290 if (!rhs_type)
3291 rhs_type = TREE_TYPE (op);
3293 if (!vectype_in)
3294 vectype_in = vectypes[i];
3295 else if (vectypes[i]
3296 && !types_compatible_p (vectypes[i], vectype_in))
3298 if (dump_enabled_p ())
3299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3300 "argument vector types differ.\n");
3301 return false;
3304 /* If all arguments are external or constant defs, infer the vector type
3305 from the scalar type. */
3306 if (!vectype_in)
3307 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
3308 if (vec_stmt)
3309 gcc_assert (vectype_in);
3310 if (!vectype_in)
3312 if (dump_enabled_p ())
3313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3314 "no vectype for scalar type %T\n", rhs_type);
3316 return false;
3318 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3319 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3320 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3321 by a pack of the two vectors into an SI vector. We would need
3322 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3323 if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
3325 if (dump_enabled_p ())
3326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3327 "mismatched vector sizes %T and %T\n",
3328 vectype_in, vectype_out);
3329 return false;
3332 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
3333 != VECTOR_BOOLEAN_TYPE_P (vectype_in))
3335 if (dump_enabled_p ())
3336 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3337 "mixed mask and nonmask vector types\n");
3338 return false;
3341 /* FORNOW */
3342 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3343 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3344 if (known_eq (nunits_in * 2, nunits_out))
3345 modifier = NARROW;
3346 else if (known_eq (nunits_out, nunits_in))
3347 modifier = NONE;
3348 else if (known_eq (nunits_out * 2, nunits_in))
3349 modifier = WIDEN;
3350 else
3351 return false;
3353 /* We only handle functions that do not read or clobber memory. */
3354 if (gimple_vuse (stmt))
3356 if (dump_enabled_p ())
3357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3358 "function reads from or writes to memory.\n");
3359 return false;
3362 /* For now, we only vectorize functions if a target specific builtin
3363 is available. TODO -- in some cases, it might be profitable to
3364 insert the calls for pieces of the vector, in order to be able
3365 to vectorize other operations in the loop. */
3366 fndecl = NULL_TREE;
3367 internal_fn ifn = IFN_LAST;
3368 tree callee = gimple_call_fndecl (stmt);
3370 /* First try using an internal function. */
3371 tree_code convert_code = ERROR_MARK;
3372 if (cfn != CFN_LAST
3373 && (modifier == NONE
3374 || (modifier == NARROW
3375 && simple_integer_narrowing (vectype_out, vectype_in,
3376 &convert_code))))
3377 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
3378 vectype_in);
3380 /* If that fails, try asking for a target-specific built-in function. */
3381 if (ifn == IFN_LAST)
3383 if (cfn != CFN_LAST)
3384 fndecl = targetm.vectorize.builtin_vectorized_function
3385 (cfn, vectype_out, vectype_in);
3386 else if (callee && fndecl_built_in_p (callee, BUILT_IN_MD))
3387 fndecl = targetm.vectorize.builtin_md_vectorized_function
3388 (callee, vectype_out, vectype_in);
3391 if (ifn == IFN_LAST && !fndecl)
3393 if (cfn == CFN_GOMP_SIMD_LANE
3394 && !slp_node
3395 && loop_vinfo
3396 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3397 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
3398 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
3399 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
3401 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3402 { 0, 1, 2, ... vf - 1 } vector. */
3403 gcc_assert (nargs == 0);
3405 else if (modifier == NONE
3406 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
3407 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
3408 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
3409 return vectorizable_bswap (stmt_info, gsi, vec_stmt, slp_node,
3410 vectype_in, cost_vec);
3411 else
3413 if (dump_enabled_p ())
3414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3415 "function is not vectorizable.\n");
3416 return false;
3420 if (slp_node)
3421 ncopies = 1;
3422 else if (modifier == NARROW && ifn == IFN_LAST)
3423 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
3424 else
3425 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
3427 /* Sanity check: make sure that at least one copy of the vectorized stmt
3428 needs to be generated. */
3429 gcc_assert (ncopies >= 1);
3431 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
3432 if (!vec_stmt) /* transformation not required. */
3434 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3435 DUMP_VECT_SCOPE ("vectorizable_call");
3436 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
3437 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
3438 record_stmt_cost (cost_vec, ncopies / 2,
3439 vec_promote_demote, stmt_info, 0, vect_body);
3441 if (loop_vinfo && mask_opno >= 0)
3443 unsigned int nvectors = (slp_node
3444 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3445 : ncopies);
3446 tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
3447 vect_record_loop_mask (loop_vinfo, masks, nvectors,
3448 vectype_out, scalar_mask);
3450 return true;
3453 /* Transform. */
3455 if (dump_enabled_p ())
3456 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3458 /* Handle def. */
3459 scalar_dest = gimple_call_lhs (stmt);
3460 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3462 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
3464 stmt_vec_info new_stmt_info = NULL;
3465 prev_stmt_info = NULL;
3466 if (modifier == NONE || ifn != IFN_LAST)
3468 tree prev_res = NULL_TREE;
3469 vargs.safe_grow (nargs);
3470 orig_vargs.safe_grow (nargs);
3471 for (j = 0; j < ncopies; ++j)
3473 /* Build argument list for the vectorized call. */
3474 if (slp_node)
3476 auto_vec<vec<tree> > vec_defs (nargs);
3477 vec<tree> vec_oprnds0;
3479 vect_get_slp_defs (slp_node, &vec_defs);
3480 vec_oprnds0 = vec_defs[0];
3482 /* Arguments are ready. Create the new vector stmt. */
3483 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
3485 size_t k;
3486 for (k = 0; k < nargs; k++)
3488 vec<tree> vec_oprndsk = vec_defs[k];
3489 vargs[k] = vec_oprndsk[i];
3491 if (modifier == NARROW)
3493 /* We don't define any narrowing conditional functions
3494 at present. */
3495 gcc_assert (mask_opno < 0);
3496 tree half_res = make_ssa_name (vectype_in);
3497 gcall *call
3498 = gimple_build_call_internal_vec (ifn, vargs);
3499 gimple_call_set_lhs (call, half_res);
3500 gimple_call_set_nothrow (call, true);
3501 vect_finish_stmt_generation (stmt_info, call, gsi);
3502 if ((i & 1) == 0)
3504 prev_res = half_res;
3505 continue;
3507 new_temp = make_ssa_name (vec_dest);
3508 gimple *new_stmt
3509 = gimple_build_assign (new_temp, convert_code,
3510 prev_res, half_res);
3511 new_stmt_info
3512 = vect_finish_stmt_generation (stmt_info, new_stmt,
3513 gsi);
3515 else
3517 if (mask_opno >= 0 && masked_loop_p)
3519 unsigned int vec_num = vec_oprnds0.length ();
3520 /* Always true for SLP. */
3521 gcc_assert (ncopies == 1);
3522 tree mask = vect_get_loop_mask (gsi, masks, vec_num,
3523 vectype_out, i);
3524 vargs[mask_opno] = prepare_load_store_mask
3525 (TREE_TYPE (mask), mask, vargs[mask_opno], gsi);
3528 gcall *call;
3529 if (ifn != IFN_LAST)
3530 call = gimple_build_call_internal_vec (ifn, vargs);
3531 else
3532 call = gimple_build_call_vec (fndecl, vargs);
3533 new_temp = make_ssa_name (vec_dest, call);
3534 gimple_call_set_lhs (call, new_temp);
3535 gimple_call_set_nothrow (call, true);
3536 new_stmt_info
3537 = vect_finish_stmt_generation (stmt_info, call, gsi);
3539 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3542 for (i = 0; i < nargs; i++)
3544 vec<tree> vec_oprndsi = vec_defs[i];
3545 vec_oprndsi.release ();
3547 continue;
3550 for (i = 0; i < nargs; i++)
3552 op = gimple_call_arg (stmt, i);
3553 if (j == 0)
3554 vec_oprnd0
3555 = vect_get_vec_def_for_operand (op, stmt_info, vectypes[i]);
3556 else
3557 vec_oprnd0
3558 = vect_get_vec_def_for_stmt_copy (vinfo, orig_vargs[i]);
3560 orig_vargs[i] = vargs[i] = vec_oprnd0;
3563 if (mask_opno >= 0 && masked_loop_p)
3565 tree mask = vect_get_loop_mask (gsi, masks, ncopies,
3566 vectype_out, j);
3567 vargs[mask_opno]
3568 = prepare_load_store_mask (TREE_TYPE (mask), mask,
3569 vargs[mask_opno], gsi);
3572 if (cfn == CFN_GOMP_SIMD_LANE)
3574 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3575 tree new_var
3576 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3577 gimple *init_stmt = gimple_build_assign (new_var, cst);
3578 vect_init_vector_1 (stmt_info, init_stmt, NULL);
3579 new_temp = make_ssa_name (vec_dest);
3580 gimple *new_stmt = gimple_build_assign (new_temp, new_var);
3581 new_stmt_info
3582 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3584 else if (modifier == NARROW)
3586 /* We don't define any narrowing conditional functions at
3587 present. */
3588 gcc_assert (mask_opno < 0);
3589 tree half_res = make_ssa_name (vectype_in);
3590 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3591 gimple_call_set_lhs (call, half_res);
3592 gimple_call_set_nothrow (call, true);
3593 vect_finish_stmt_generation (stmt_info, call, gsi);
3594 if ((j & 1) == 0)
3596 prev_res = half_res;
3597 continue;
3599 new_temp = make_ssa_name (vec_dest);
3600 gassign *new_stmt = gimple_build_assign (new_temp, convert_code,
3601 prev_res, half_res);
3602 new_stmt_info
3603 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3605 else
3607 gcall *call;
3608 if (ifn != IFN_LAST)
3609 call = gimple_build_call_internal_vec (ifn, vargs);
3610 else
3611 call = gimple_build_call_vec (fndecl, vargs);
3612 new_temp = make_ssa_name (vec_dest, call);
3613 gimple_call_set_lhs (call, new_temp);
3614 gimple_call_set_nothrow (call, true);
3615 new_stmt_info
3616 = vect_finish_stmt_generation (stmt_info, call, gsi);
3619 if (j == (modifier == NARROW ? 1 : 0))
3620 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
3621 else
3622 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3624 prev_stmt_info = new_stmt_info;
3627 else if (modifier == NARROW)
3629 /* We don't define any narrowing conditional functions at present. */
3630 gcc_assert (mask_opno < 0);
3631 for (j = 0; j < ncopies; ++j)
3633 /* Build argument list for the vectorized call. */
3634 if (j == 0)
3635 vargs.create (nargs * 2);
3636 else
3637 vargs.truncate (0);
3639 if (slp_node)
3641 auto_vec<vec<tree> > vec_defs (nargs);
3642 vec<tree> vec_oprnds0;
3644 vect_get_slp_defs (slp_node, &vec_defs);
3645 vec_oprnds0 = vec_defs[0];
3647 /* Arguments are ready. Create the new vector stmt. */
3648 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3650 size_t k;
3651 vargs.truncate (0);
3652 for (k = 0; k < nargs; k++)
3654 vec<tree> vec_oprndsk = vec_defs[k];
3655 vargs.quick_push (vec_oprndsk[i]);
3656 vargs.quick_push (vec_oprndsk[i + 1]);
3658 gcall *call;
3659 if (ifn != IFN_LAST)
3660 call = gimple_build_call_internal_vec (ifn, vargs);
3661 else
3662 call = gimple_build_call_vec (fndecl, vargs);
3663 new_temp = make_ssa_name (vec_dest, call);
3664 gimple_call_set_lhs (call, new_temp);
3665 gimple_call_set_nothrow (call, true);
3666 new_stmt_info
3667 = vect_finish_stmt_generation (stmt_info, call, gsi);
3668 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
3671 for (i = 0; i < nargs; i++)
3673 vec<tree> vec_oprndsi = vec_defs[i];
3674 vec_oprndsi.release ();
3676 continue;
3679 for (i = 0; i < nargs; i++)
3681 op = gimple_call_arg (stmt, i);
3682 if (j == 0)
3684 vec_oprnd0
3685 = vect_get_vec_def_for_operand (op, stmt_info,
3686 vectypes[i]);
3687 vec_oprnd1
3688 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3690 else
3692 vec_oprnd1 = gimple_call_arg (new_stmt_info->stmt,
3693 2 * i + 1);
3694 vec_oprnd0
3695 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
3696 vec_oprnd1
3697 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
3700 vargs.quick_push (vec_oprnd0);
3701 vargs.quick_push (vec_oprnd1);
3704 gcall *new_stmt = gimple_build_call_vec (fndecl, vargs);
3705 new_temp = make_ssa_name (vec_dest, new_stmt);
3706 gimple_call_set_lhs (new_stmt, new_temp);
3707 new_stmt_info
3708 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
3710 if (j == 0)
3711 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
3712 else
3713 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
3715 prev_stmt_info = new_stmt_info;
3718 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3720 else
3721 /* No current target implements this case. */
3722 return false;
3724 vargs.release ();
3726 /* The call in STMT might prevent it from being removed in dce.
3727 We however cannot remove it here, due to the way the ssa name
3728 it defines is mapped to the new definition. So just replace
3729 rhs of the statement with something harmless. */
3731 if (slp_node)
3732 return true;
3734 stmt_info = vect_orig_stmt (stmt_info);
3735 lhs = gimple_get_lhs (stmt_info->stmt);
3737 gassign *new_stmt
3738 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3739 vinfo->replace_stmt (gsi, stmt_info, new_stmt);
3741 return true;
3745 struct simd_call_arg_info
3747 tree vectype;
3748 tree op;
3749 HOST_WIDE_INT linear_step;
3750 enum vect_def_type dt;
3751 unsigned int align;
3752 bool simd_lane_linear;
3755 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3756 is linear within simd lane (but not within whole loop), note it in
3757 *ARGINFO. */
3759 static void
3760 vect_simd_lane_linear (tree op, class loop *loop,
3761 struct simd_call_arg_info *arginfo)
3763 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3765 if (!is_gimple_assign (def_stmt)
3766 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3767 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3768 return;
3770 tree base = gimple_assign_rhs1 (def_stmt);
3771 HOST_WIDE_INT linear_step = 0;
3772 tree v = gimple_assign_rhs2 (def_stmt);
3773 while (TREE_CODE (v) == SSA_NAME)
3775 tree t;
3776 def_stmt = SSA_NAME_DEF_STMT (v);
3777 if (is_gimple_assign (def_stmt))
3778 switch (gimple_assign_rhs_code (def_stmt))
3780 case PLUS_EXPR:
3781 t = gimple_assign_rhs2 (def_stmt);
3782 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3783 return;
3784 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3785 v = gimple_assign_rhs1 (def_stmt);
3786 continue;
3787 case MULT_EXPR:
3788 t = gimple_assign_rhs2 (def_stmt);
3789 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3790 return;
3791 linear_step = tree_to_shwi (t);
3792 v = gimple_assign_rhs1 (def_stmt);
3793 continue;
3794 CASE_CONVERT:
3795 t = gimple_assign_rhs1 (def_stmt);
3796 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3797 || (TYPE_PRECISION (TREE_TYPE (v))
3798 < TYPE_PRECISION (TREE_TYPE (t))))
3799 return;
3800 if (!linear_step)
3801 linear_step = 1;
3802 v = t;
3803 continue;
3804 default:
3805 return;
3807 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3808 && loop->simduid
3809 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3810 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3811 == loop->simduid))
3813 if (!linear_step)
3814 linear_step = 1;
3815 arginfo->linear_step = linear_step;
3816 arginfo->op = base;
3817 arginfo->simd_lane_linear = true;
3818 return;
3823 /* Return the number of elements in vector type VECTYPE, which is associated
3824 with a SIMD clone. At present these vectors always have a constant
3825 length. */
3827 static unsigned HOST_WIDE_INT
3828 simd_clone_subparts (tree vectype)
3830 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3833 /* Function vectorizable_simd_clone_call.
3835 Check if STMT_INFO performs a function call that can be vectorized
3836 by calling a simd clone of the function.
3837 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3838 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3839 Return true if STMT_INFO is vectorizable in this way. */
3841 static bool
3842 vectorizable_simd_clone_call (stmt_vec_info stmt_info,
3843 gimple_stmt_iterator *gsi,
3844 stmt_vec_info *vec_stmt, slp_tree slp_node,
3845 stmt_vector_for_cost *)
3847 tree vec_dest;
3848 tree scalar_dest;
3849 tree op, type;
3850 tree vec_oprnd0 = NULL_TREE;
3851 stmt_vec_info prev_stmt_info;
3852 tree vectype;
3853 unsigned int nunits;
3854 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3855 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3856 vec_info *vinfo = stmt_info->vinfo;
3857 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3858 tree fndecl, new_temp;
3859 int ncopies, j;
3860 auto_vec<simd_call_arg_info> arginfo;
3861 vec<tree> vargs = vNULL;
3862 size_t i, nargs;
3863 tree lhs, rtype, ratype;
3864 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3866 /* Is STMT a vectorizable call? */
3867 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3868 if (!stmt)
3869 return false;
3871 fndecl = gimple_call_fndecl (stmt);
3872 if (fndecl == NULL_TREE)
3873 return false;
3875 struct cgraph_node *node = cgraph_node::get (fndecl);
3876 if (node == NULL || node->simd_clones == NULL)
3877 return false;
3879 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3880 return false;
3882 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3883 && ! vec_stmt)
3884 return false;
3886 if (gimple_call_lhs (stmt)
3887 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3888 return false;
3890 gcc_checking_assert (!stmt_can_throw_internal (cfun, stmt));
3892 vectype = STMT_VINFO_VECTYPE (stmt_info);
3894 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt_info))
3895 return false;
3897 /* FORNOW */
3898 if (slp_node)
3899 return false;
3901 /* Process function arguments. */
3902 nargs = gimple_call_num_args (stmt);
3904 /* Bail out if the function has zero arguments. */
3905 if (nargs == 0)
3906 return false;
3908 arginfo.reserve (nargs, true);
3910 for (i = 0; i < nargs; i++)
3912 simd_call_arg_info thisarginfo;
3913 affine_iv iv;
3915 thisarginfo.linear_step = 0;
3916 thisarginfo.align = 0;
3917 thisarginfo.op = NULL_TREE;
3918 thisarginfo.simd_lane_linear = false;
3920 op = gimple_call_arg (stmt, i);
3921 if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt,
3922 &thisarginfo.vectype)
3923 || thisarginfo.dt == vect_uninitialized_def)
3925 if (dump_enabled_p ())
3926 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3927 "use not simple.\n");
3928 return false;
3931 if (thisarginfo.dt == vect_constant_def
3932 || thisarginfo.dt == vect_external_def)
3933 gcc_assert (thisarginfo.vectype == NULL_TREE);
3934 else
3936 gcc_assert (thisarginfo.vectype != NULL_TREE);
3937 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype))
3939 if (dump_enabled_p ())
3940 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3941 "vector mask arguments are not supported\n");
3942 return false;
3946 /* For linear arguments, the analyze phase should have saved
3947 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3948 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3949 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3951 gcc_assert (vec_stmt);
3952 thisarginfo.linear_step
3953 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3954 thisarginfo.op
3955 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3956 thisarginfo.simd_lane_linear
3957 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3958 == boolean_true_node);
3959 /* If loop has been peeled for alignment, we need to adjust it. */
3960 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3961 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3962 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3964 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3965 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3966 tree opt = TREE_TYPE (thisarginfo.op);
3967 bias = fold_convert (TREE_TYPE (step), bias);
3968 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3969 thisarginfo.op
3970 = fold_build2 (POINTER_TYPE_P (opt)
3971 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3972 thisarginfo.op, bias);
3975 else if (!vec_stmt
3976 && thisarginfo.dt != vect_constant_def
3977 && thisarginfo.dt != vect_external_def
3978 && loop_vinfo
3979 && TREE_CODE (op) == SSA_NAME
3980 && simple_iv (loop, loop_containing_stmt (stmt), op,
3981 &iv, false)
3982 && tree_fits_shwi_p (iv.step))
3984 thisarginfo.linear_step = tree_to_shwi (iv.step);
3985 thisarginfo.op = iv.base;
3987 else if ((thisarginfo.dt == vect_constant_def
3988 || thisarginfo.dt == vect_external_def)
3989 && POINTER_TYPE_P (TREE_TYPE (op)))
3990 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3991 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3992 linear too. */
3993 if (POINTER_TYPE_P (TREE_TYPE (op))
3994 && !thisarginfo.linear_step
3995 && !vec_stmt
3996 && thisarginfo.dt != vect_constant_def
3997 && thisarginfo.dt != vect_external_def
3998 && loop_vinfo
3999 && !slp_node
4000 && TREE_CODE (op) == SSA_NAME)
4001 vect_simd_lane_linear (op, loop, &thisarginfo);
4003 arginfo.quick_push (thisarginfo);
4006 unsigned HOST_WIDE_INT vf;
4007 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
4009 if (dump_enabled_p ())
4010 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4011 "not considering SIMD clones; not yet supported"
4012 " for variable-width vectors.\n");
4013 return false;
4016 unsigned int badness = 0;
4017 struct cgraph_node *bestn = NULL;
4018 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
4019 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
4020 else
4021 for (struct cgraph_node *n = node->simd_clones; n != NULL;
4022 n = n->simdclone->next_clone)
4024 unsigned int this_badness = 0;
4025 if (n->simdclone->simdlen > vf
4026 || n->simdclone->nargs != nargs)
4027 continue;
4028 if (n->simdclone->simdlen < vf)
4029 this_badness += (exact_log2 (vf)
4030 - exact_log2 (n->simdclone->simdlen)) * 1024;
4031 if (n->simdclone->inbranch)
4032 this_badness += 2048;
4033 int target_badness = targetm.simd_clone.usable (n);
4034 if (target_badness < 0)
4035 continue;
4036 this_badness += target_badness * 512;
4037 /* FORNOW: Have to add code to add the mask argument. */
4038 if (n->simdclone->inbranch)
4039 continue;
4040 for (i = 0; i < nargs; i++)
4042 switch (n->simdclone->args[i].arg_type)
4044 case SIMD_CLONE_ARG_TYPE_VECTOR:
4045 if (!useless_type_conversion_p
4046 (n->simdclone->args[i].orig_type,
4047 TREE_TYPE (gimple_call_arg (stmt, i))))
4048 i = -1;
4049 else if (arginfo[i].dt == vect_constant_def
4050 || arginfo[i].dt == vect_external_def
4051 || arginfo[i].linear_step)
4052 this_badness += 64;
4053 break;
4054 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4055 if (arginfo[i].dt != vect_constant_def
4056 && arginfo[i].dt != vect_external_def)
4057 i = -1;
4058 break;
4059 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4060 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4061 if (arginfo[i].dt == vect_constant_def
4062 || arginfo[i].dt == vect_external_def
4063 || (arginfo[i].linear_step
4064 != n->simdclone->args[i].linear_step))
4065 i = -1;
4066 break;
4067 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4068 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4069 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4070 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4071 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4072 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4073 /* FORNOW */
4074 i = -1;
4075 break;
4076 case SIMD_CLONE_ARG_TYPE_MASK:
4077 gcc_unreachable ();
4079 if (i == (size_t) -1)
4080 break;
4081 if (n->simdclone->args[i].alignment > arginfo[i].align)
4083 i = -1;
4084 break;
4086 if (arginfo[i].align)
4087 this_badness += (exact_log2 (arginfo[i].align)
4088 - exact_log2 (n->simdclone->args[i].alignment));
4090 if (i == (size_t) -1)
4091 continue;
4092 if (bestn == NULL || this_badness < badness)
4094 bestn = n;
4095 badness = this_badness;
4099 if (bestn == NULL)
4100 return false;
4102 for (i = 0; i < nargs; i++)
4103 if ((arginfo[i].dt == vect_constant_def
4104 || arginfo[i].dt == vect_external_def)
4105 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
4107 tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
4108 arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type,
4109 slp_node);
4110 if (arginfo[i].vectype == NULL
4111 || (simd_clone_subparts (arginfo[i].vectype)
4112 > bestn->simdclone->simdlen))
4113 return false;
4116 fndecl = bestn->decl;
4117 nunits = bestn->simdclone->simdlen;
4118 ncopies = vf / nunits;
4120 /* If the function isn't const, only allow it in simd loops where user
4121 has asserted that at least nunits consecutive iterations can be
4122 performed using SIMD instructions. */
4123 if ((loop == NULL || (unsigned) loop->safelen < nunits)
4124 && gimple_vuse (stmt))
4125 return false;
4127 /* Sanity check: make sure that at least one copy of the vectorized stmt
4128 needs to be generated. */
4129 gcc_assert (ncopies >= 1);
4131 if (!vec_stmt) /* transformation not required. */
4133 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
4134 for (i = 0; i < nargs; i++)
4135 if ((bestn->simdclone->args[i].arg_type
4136 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
4137 || (bestn->simdclone->args[i].arg_type
4138 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
4140 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
4141 + 1);
4142 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
4143 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
4144 ? size_type_node : TREE_TYPE (arginfo[i].op);
4145 tree ls = build_int_cst (lst, arginfo[i].linear_step);
4146 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
4147 tree sll = arginfo[i].simd_lane_linear
4148 ? boolean_true_node : boolean_false_node;
4149 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
4151 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
4152 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4153 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4154 return true;
4157 /* Transform. */
4159 if (dump_enabled_p ())
4160 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
4162 /* Handle def. */
4163 scalar_dest = gimple_call_lhs (stmt);
4164 vec_dest = NULL_TREE;
4165 rtype = NULL_TREE;
4166 ratype = NULL_TREE;
4167 if (scalar_dest)
4169 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4170 rtype = TREE_TYPE (TREE_TYPE (fndecl));
4171 if (TREE_CODE (rtype) == ARRAY_TYPE)
4173 ratype = rtype;
4174 rtype = TREE_TYPE (ratype);
4178 prev_stmt_info = NULL;
4179 for (j = 0; j < ncopies; ++j)
4181 /* Build argument list for the vectorized call. */
4182 if (j == 0)
4183 vargs.create (nargs);
4184 else
4185 vargs.truncate (0);
4187 for (i = 0; i < nargs; i++)
4189 unsigned int k, l, m, o;
4190 tree atype;
4191 op = gimple_call_arg (stmt, i);
4192 switch (bestn->simdclone->args[i].arg_type)
4194 case SIMD_CLONE_ARG_TYPE_VECTOR:
4195 atype = bestn->simdclone->args[i].vector_type;
4196 o = nunits / simd_clone_subparts (atype);
4197 for (m = j * o; m < (j + 1) * o; m++)
4199 if (simd_clone_subparts (atype)
4200 < simd_clone_subparts (arginfo[i].vectype))
4202 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
4203 k = (simd_clone_subparts (arginfo[i].vectype)
4204 / simd_clone_subparts (atype));
4205 gcc_assert ((k & (k - 1)) == 0);
4206 if (m == 0)
4207 vec_oprnd0
4208 = vect_get_vec_def_for_operand (op, stmt_info);
4209 else
4211 vec_oprnd0 = arginfo[i].op;
4212 if ((m & (k - 1)) == 0)
4213 vec_oprnd0
4214 = vect_get_vec_def_for_stmt_copy (vinfo,
4215 vec_oprnd0);
4217 arginfo[i].op = vec_oprnd0;
4218 vec_oprnd0
4219 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
4220 bitsize_int (prec),
4221 bitsize_int ((m & (k - 1)) * prec));
4222 gassign *new_stmt
4223 = gimple_build_assign (make_ssa_name (atype),
4224 vec_oprnd0);
4225 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4226 vargs.safe_push (gimple_assign_lhs (new_stmt));
4228 else
4230 k = (simd_clone_subparts (atype)
4231 / simd_clone_subparts (arginfo[i].vectype));
4232 gcc_assert ((k & (k - 1)) == 0);
4233 vec<constructor_elt, va_gc> *ctor_elts;
4234 if (k != 1)
4235 vec_alloc (ctor_elts, k);
4236 else
4237 ctor_elts = NULL;
4238 for (l = 0; l < k; l++)
4240 if (m == 0 && l == 0)
4241 vec_oprnd0
4242 = vect_get_vec_def_for_operand (op, stmt_info);
4243 else
4244 vec_oprnd0
4245 = vect_get_vec_def_for_stmt_copy (vinfo,
4246 arginfo[i].op);
4247 arginfo[i].op = vec_oprnd0;
4248 if (k == 1)
4249 break;
4250 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
4251 vec_oprnd0);
4253 if (k == 1)
4254 vargs.safe_push (vec_oprnd0);
4255 else
4257 vec_oprnd0 = build_constructor (atype, ctor_elts);
4258 gassign *new_stmt
4259 = gimple_build_assign (make_ssa_name (atype),
4260 vec_oprnd0);
4261 vect_finish_stmt_generation (stmt_info, new_stmt,
4262 gsi);
4263 vargs.safe_push (gimple_assign_lhs (new_stmt));
4267 break;
4268 case SIMD_CLONE_ARG_TYPE_UNIFORM:
4269 vargs.safe_push (op);
4270 break;
4271 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
4272 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
4273 if (j == 0)
4275 gimple_seq stmts;
4276 arginfo[i].op
4277 = force_gimple_operand (unshare_expr (arginfo[i].op),
4278 &stmts, true, NULL_TREE);
4279 if (stmts != NULL)
4281 basic_block new_bb;
4282 edge pe = loop_preheader_edge (loop);
4283 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4284 gcc_assert (!new_bb);
4286 if (arginfo[i].simd_lane_linear)
4288 vargs.safe_push (arginfo[i].op);
4289 break;
4291 tree phi_res = copy_ssa_name (op);
4292 gphi *new_phi = create_phi_node (phi_res, loop->header);
4293 loop_vinfo->add_stmt (new_phi);
4294 add_phi_arg (new_phi, arginfo[i].op,
4295 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4296 enum tree_code code
4297 = POINTER_TYPE_P (TREE_TYPE (op))
4298 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4299 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4300 ? sizetype : TREE_TYPE (op);
4301 widest_int cst
4302 = wi::mul (bestn->simdclone->args[i].linear_step,
4303 ncopies * nunits);
4304 tree tcst = wide_int_to_tree (type, cst);
4305 tree phi_arg = copy_ssa_name (op);
4306 gassign *new_stmt
4307 = gimple_build_assign (phi_arg, code, phi_res, tcst);
4308 gimple_stmt_iterator si = gsi_after_labels (loop->header);
4309 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
4310 loop_vinfo->add_stmt (new_stmt);
4311 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
4312 UNKNOWN_LOCATION);
4313 arginfo[i].op = phi_res;
4314 vargs.safe_push (phi_res);
4316 else
4318 enum tree_code code
4319 = POINTER_TYPE_P (TREE_TYPE (op))
4320 ? POINTER_PLUS_EXPR : PLUS_EXPR;
4321 tree type = POINTER_TYPE_P (TREE_TYPE (op))
4322 ? sizetype : TREE_TYPE (op);
4323 widest_int cst
4324 = wi::mul (bestn->simdclone->args[i].linear_step,
4325 j * nunits);
4326 tree tcst = wide_int_to_tree (type, cst);
4327 new_temp = make_ssa_name (TREE_TYPE (op));
4328 gassign *new_stmt
4329 = gimple_build_assign (new_temp, code,
4330 arginfo[i].op, tcst);
4331 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4332 vargs.safe_push (new_temp);
4334 break;
4335 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
4336 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
4337 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
4338 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
4339 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
4340 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
4341 default:
4342 gcc_unreachable ();
4346 gcall *new_call = gimple_build_call_vec (fndecl, vargs);
4347 if (vec_dest)
4349 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
4350 if (ratype)
4351 new_temp = create_tmp_var (ratype);
4352 else if (simd_clone_subparts (vectype)
4353 == simd_clone_subparts (rtype))
4354 new_temp = make_ssa_name (vec_dest, new_call);
4355 else
4356 new_temp = make_ssa_name (rtype, new_call);
4357 gimple_call_set_lhs (new_call, new_temp);
4359 stmt_vec_info new_stmt_info
4360 = vect_finish_stmt_generation (stmt_info, new_call, gsi);
4362 if (vec_dest)
4364 if (simd_clone_subparts (vectype) < nunits)
4366 unsigned int k, l;
4367 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
4368 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
4369 k = nunits / simd_clone_subparts (vectype);
4370 gcc_assert ((k & (k - 1)) == 0);
4371 for (l = 0; l < k; l++)
4373 tree t;
4374 if (ratype)
4376 t = build_fold_addr_expr (new_temp);
4377 t = build2 (MEM_REF, vectype, t,
4378 build_int_cst (TREE_TYPE (t), l * bytes));
4380 else
4381 t = build3 (BIT_FIELD_REF, vectype, new_temp,
4382 bitsize_int (prec), bitsize_int (l * prec));
4383 gimple *new_stmt
4384 = gimple_build_assign (make_ssa_name (vectype), t);
4385 new_stmt_info
4386 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4388 if (j == 0 && l == 0)
4389 STMT_VINFO_VEC_STMT (stmt_info)
4390 = *vec_stmt = new_stmt_info;
4391 else
4392 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4394 prev_stmt_info = new_stmt_info;
4397 if (ratype)
4398 vect_clobber_variable (stmt_info, gsi, new_temp);
4399 continue;
4401 else if (simd_clone_subparts (vectype) > nunits)
4403 unsigned int k = (simd_clone_subparts (vectype)
4404 / simd_clone_subparts (rtype));
4405 gcc_assert ((k & (k - 1)) == 0);
4406 if ((j & (k - 1)) == 0)
4407 vec_alloc (ret_ctor_elts, k);
4408 if (ratype)
4410 unsigned int m, o = nunits / simd_clone_subparts (rtype);
4411 for (m = 0; m < o; m++)
4413 tree tem = build4 (ARRAY_REF, rtype, new_temp,
4414 size_int (m), NULL_TREE, NULL_TREE);
4415 gimple *new_stmt
4416 = gimple_build_assign (make_ssa_name (rtype), tem);
4417 new_stmt_info
4418 = vect_finish_stmt_generation (stmt_info, new_stmt,
4419 gsi);
4420 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
4421 gimple_assign_lhs (new_stmt));
4423 vect_clobber_variable (stmt_info, gsi, new_temp);
4425 else
4426 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
4427 if ((j & (k - 1)) != k - 1)
4428 continue;
4429 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
4430 gimple *new_stmt
4431 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
4432 new_stmt_info
4433 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4435 if ((unsigned) j == k - 1)
4436 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4437 else
4438 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4440 prev_stmt_info = new_stmt_info;
4441 continue;
4443 else if (ratype)
4445 tree t = build_fold_addr_expr (new_temp);
4446 t = build2 (MEM_REF, vectype, t,
4447 build_int_cst (TREE_TYPE (t), 0));
4448 gimple *new_stmt
4449 = gimple_build_assign (make_ssa_name (vec_dest), t);
4450 new_stmt_info
4451 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4452 vect_clobber_variable (stmt_info, gsi, new_temp);
4456 if (j == 0)
4457 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
4458 else
4459 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
4461 prev_stmt_info = new_stmt_info;
4464 vargs.release ();
4466 /* The call in STMT might prevent it from being removed in dce.
4467 We however cannot remove it here, due to the way the ssa name
4468 it defines is mapped to the new definition. So just replace
4469 rhs of the statement with something harmless. */
4471 if (slp_node)
4472 return true;
4474 gimple *new_stmt;
4475 if (scalar_dest)
4477 type = TREE_TYPE (scalar_dest);
4478 lhs = gimple_call_lhs (vect_orig_stmt (stmt_info)->stmt);
4479 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
4481 else
4482 new_stmt = gimple_build_nop ();
4483 vinfo->replace_stmt (gsi, vect_orig_stmt (stmt_info), new_stmt);
4484 unlink_stmt_vdef (stmt);
4486 return true;
4490 /* Function vect_gen_widened_results_half
4492 Create a vector stmt whose code, type, number of arguments, and result
4493 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4494 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4495 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4496 needs to be created (DECL is a function-decl of a target-builtin).
4497 STMT_INFO is the original scalar stmt that we are vectorizing. */
4499 static gimple *
4500 vect_gen_widened_results_half (enum tree_code code,
4501 tree vec_oprnd0, tree vec_oprnd1, int op_type,
4502 tree vec_dest, gimple_stmt_iterator *gsi,
4503 stmt_vec_info stmt_info)
4505 gimple *new_stmt;
4506 tree new_temp;
4508 /* Generate half of the widened result: */
4509 gcc_assert (op_type == TREE_CODE_LENGTH (code));
4510 if (op_type != binary_op)
4511 vec_oprnd1 = NULL;
4512 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
4513 new_temp = make_ssa_name (vec_dest, new_stmt);
4514 gimple_assign_set_lhs (new_stmt, new_temp);
4515 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4517 return new_stmt;
4521 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4522 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4523 containing scalar operand), and for the rest we get a copy with
4524 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4525 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4526 The vectors are collected into VEC_OPRNDS. */
4528 static void
4529 vect_get_loop_based_defs (tree *oprnd, stmt_vec_info stmt_info,
4530 vec<tree> *vec_oprnds, int multi_step_cvt)
4532 vec_info *vinfo = stmt_info->vinfo;
4533 tree vec_oprnd;
4535 /* Get first vector operand. */
4536 /* All the vector operands except the very first one (that is scalar oprnd)
4537 are stmt copies. */
4538 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4539 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt_info);
4540 else
4541 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, *oprnd);
4543 vec_oprnds->quick_push (vec_oprnd);
4545 /* Get second vector operand. */
4546 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd);
4547 vec_oprnds->quick_push (vec_oprnd);
4549 *oprnd = vec_oprnd;
4551 /* For conversion in multiple steps, continue to get operands
4552 recursively. */
4553 if (multi_step_cvt)
4554 vect_get_loop_based_defs (oprnd, stmt_info, vec_oprnds,
4555 multi_step_cvt - 1);
4559 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4560 For multi-step conversions store the resulting vectors and call the function
4561 recursively. */
4563 static void
4564 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4565 int multi_step_cvt,
4566 stmt_vec_info stmt_info,
4567 vec<tree> vec_dsts,
4568 gimple_stmt_iterator *gsi,
4569 slp_tree slp_node, enum tree_code code,
4570 stmt_vec_info *prev_stmt_info)
4572 unsigned int i;
4573 tree vop0, vop1, new_tmp, vec_dest;
4575 vec_dest = vec_dsts.pop ();
4577 for (i = 0; i < vec_oprnds->length (); i += 2)
4579 /* Create demotion operation. */
4580 vop0 = (*vec_oprnds)[i];
4581 vop1 = (*vec_oprnds)[i + 1];
4582 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4583 new_tmp = make_ssa_name (vec_dest, new_stmt);
4584 gimple_assign_set_lhs (new_stmt, new_tmp);
4585 stmt_vec_info new_stmt_info
4586 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
4588 if (multi_step_cvt)
4589 /* Store the resulting vector for next recursive call. */
4590 (*vec_oprnds)[i/2] = new_tmp;
4591 else
4593 /* This is the last step of the conversion sequence. Store the
4594 vectors in SLP_NODE or in vector info of the scalar statement
4595 (or in STMT_VINFO_RELATED_STMT chain). */
4596 if (slp_node)
4597 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
4598 else
4600 if (!*prev_stmt_info)
4601 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
4602 else
4603 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt_info;
4605 *prev_stmt_info = new_stmt_info;
4610 /* For multi-step demotion operations we first generate demotion operations
4611 from the source type to the intermediate types, and then combine the
4612 results (stored in VEC_OPRNDS) in demotion operation to the destination
4613 type. */
4614 if (multi_step_cvt)
4616 /* At each level of recursion we have half of the operands we had at the
4617 previous level. */
4618 vec_oprnds->truncate ((i+1)/2);
4619 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4620 stmt_info, vec_dsts, gsi,
4621 slp_node, VEC_PACK_TRUNC_EXPR,
4622 prev_stmt_info);
4625 vec_dsts.quick_push (vec_dest);
4629 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4630 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4631 STMT_INFO. For multi-step conversions store the resulting vectors and
4632 call the function recursively. */
4634 static void
4635 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4636 vec<tree> *vec_oprnds1,
4637 stmt_vec_info stmt_info, tree vec_dest,
4638 gimple_stmt_iterator *gsi,
4639 enum tree_code code1,
4640 enum tree_code code2, int op_type)
4642 int i;
4643 tree vop0, vop1, new_tmp1, new_tmp2;
4644 gimple *new_stmt1, *new_stmt2;
4645 vec<tree> vec_tmp = vNULL;
4647 vec_tmp.create (vec_oprnds0->length () * 2);
4648 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4650 if (op_type == binary_op)
4651 vop1 = (*vec_oprnds1)[i];
4652 else
4653 vop1 = NULL_TREE;
4655 /* Generate the two halves of promotion operation. */
4656 new_stmt1 = vect_gen_widened_results_half (code1, vop0, vop1,
4657 op_type, vec_dest, gsi,
4658 stmt_info);
4659 new_stmt2 = vect_gen_widened_results_half (code2, vop0, vop1,
4660 op_type, vec_dest, gsi,
4661 stmt_info);
4662 if (is_gimple_call (new_stmt1))
4664 new_tmp1 = gimple_call_lhs (new_stmt1);
4665 new_tmp2 = gimple_call_lhs (new_stmt2);
4667 else
4669 new_tmp1 = gimple_assign_lhs (new_stmt1);
4670 new_tmp2 = gimple_assign_lhs (new_stmt2);
4673 /* Store the results for the next step. */
4674 vec_tmp.quick_push (new_tmp1);
4675 vec_tmp.quick_push (new_tmp2);
4678 vec_oprnds0->release ();
4679 *vec_oprnds0 = vec_tmp;
4683 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4684 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4685 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4686 Return true if STMT_INFO is vectorizable in this way. */
4688 static bool
4689 vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
4690 stmt_vec_info *vec_stmt, slp_tree slp_node,
4691 stmt_vector_for_cost *cost_vec)
4693 tree vec_dest;
4694 tree scalar_dest;
4695 tree op0, op1 = NULL_TREE;
4696 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4697 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4698 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4699 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4700 tree new_temp;
4701 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4702 int ndts = 2;
4703 stmt_vec_info prev_stmt_info;
4704 poly_uint64 nunits_in;
4705 poly_uint64 nunits_out;
4706 tree vectype_out, vectype_in;
4707 int ncopies, i, j;
4708 tree lhs_type, rhs_type;
4709 enum { NARROW, NONE, WIDEN } modifier;
4710 vec<tree> vec_oprnds0 = vNULL;
4711 vec<tree> vec_oprnds1 = vNULL;
4712 tree vop0;
4713 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4714 vec_info *vinfo = stmt_info->vinfo;
4715 int multi_step_cvt = 0;
4716 vec<tree> interm_types = vNULL;
4717 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4718 int op_type;
4719 unsigned short fltsz;
4721 /* Is STMT a vectorizable conversion? */
4723 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4724 return false;
4726 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4727 && ! vec_stmt)
4728 return false;
4730 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
4731 if (!stmt)
4732 return false;
4734 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4735 return false;
4737 code = gimple_assign_rhs_code (stmt);
4738 if (!CONVERT_EXPR_CODE_P (code)
4739 && code != FIX_TRUNC_EXPR
4740 && code != FLOAT_EXPR
4741 && code != WIDEN_MULT_EXPR
4742 && code != WIDEN_LSHIFT_EXPR)
4743 return false;
4745 op_type = TREE_CODE_LENGTH (code);
4747 /* Check types of lhs and rhs. */
4748 scalar_dest = gimple_assign_lhs (stmt);
4749 lhs_type = TREE_TYPE (scalar_dest);
4750 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4752 op0 = gimple_assign_rhs1 (stmt);
4753 rhs_type = TREE_TYPE (op0);
4755 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4756 && !((INTEGRAL_TYPE_P (lhs_type)
4757 && INTEGRAL_TYPE_P (rhs_type))
4758 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4759 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4760 return false;
4762 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4763 && ((INTEGRAL_TYPE_P (lhs_type)
4764 && !type_has_mode_precision_p (lhs_type))
4765 || (INTEGRAL_TYPE_P (rhs_type)
4766 && !type_has_mode_precision_p (rhs_type))))
4768 if (dump_enabled_p ())
4769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4770 "type conversion to/from bit-precision unsupported."
4771 "\n");
4772 return false;
4775 /* Check the operands of the operation. */
4776 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype_in))
4778 if (dump_enabled_p ())
4779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4780 "use not simple.\n");
4781 return false;
4783 if (op_type == binary_op)
4785 bool ok;
4787 op1 = gimple_assign_rhs2 (stmt);
4788 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4789 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4790 OP1. */
4791 if (CONSTANT_CLASS_P (op0))
4792 ok = vect_is_simple_use (op1, vinfo, &dt[1], &vectype_in);
4793 else
4794 ok = vect_is_simple_use (op1, vinfo, &dt[1]);
4796 if (!ok)
4798 if (dump_enabled_p ())
4799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4800 "use not simple.\n");
4801 return false;
4805 /* If op0 is an external or constant def, infer the vector type
4806 from the scalar type. */
4807 if (!vectype_in)
4808 vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type, slp_node);
4809 if (vec_stmt)
4810 gcc_assert (vectype_in);
4811 if (!vectype_in)
4813 if (dump_enabled_p ())
4814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4815 "no vectype for scalar type %T\n", rhs_type);
4817 return false;
4820 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4821 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4823 if (dump_enabled_p ())
4824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4825 "can't convert between boolean and non "
4826 "boolean vectors %T\n", rhs_type);
4828 return false;
4831 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4832 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4833 if (known_eq (nunits_out, nunits_in))
4834 modifier = NONE;
4835 else if (multiple_p (nunits_out, nunits_in))
4836 modifier = NARROW;
4837 else
4839 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4840 modifier = WIDEN;
4843 /* Multiple types in SLP are handled by creating the appropriate number of
4844 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4845 case of SLP. */
4846 if (slp_node)
4847 ncopies = 1;
4848 else if (modifier == NARROW)
4849 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4850 else
4851 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4853 /* Sanity check: make sure that at least one copy of the vectorized stmt
4854 needs to be generated. */
4855 gcc_assert (ncopies >= 1);
4857 bool found_mode = false;
4858 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4859 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4860 opt_scalar_mode rhs_mode_iter;
4862 /* Supportable by target? */
4863 switch (modifier)
4865 case NONE:
4866 if (code != FIX_TRUNC_EXPR
4867 && code != FLOAT_EXPR
4868 && !CONVERT_EXPR_CODE_P (code))
4869 return false;
4870 if (supportable_convert_operation (code, vectype_out, vectype_in, &code1))
4871 break;
4872 /* FALLTHRU */
4873 unsupported:
4874 if (dump_enabled_p ())
4875 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4876 "conversion not supported by target.\n");
4877 return false;
4879 case WIDEN:
4880 if (supportable_widening_operation (code, stmt_info, vectype_out,
4881 vectype_in, &code1, &code2,
4882 &multi_step_cvt, &interm_types))
4884 /* Binary widening operation can only be supported directly by the
4885 architecture. */
4886 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4887 break;
4890 if (code != FLOAT_EXPR
4891 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4892 goto unsupported;
4894 fltsz = GET_MODE_SIZE (lhs_mode);
4895 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4897 rhs_mode = rhs_mode_iter.require ();
4898 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4899 break;
4901 cvt_type
4902 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4903 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4904 if (cvt_type == NULL_TREE)
4905 goto unsupported;
4907 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4909 if (!supportable_convert_operation (code, vectype_out,
4910 cvt_type, &codecvt1))
4911 goto unsupported;
4913 else if (!supportable_widening_operation (code, stmt_info,
4914 vectype_out, cvt_type,
4915 &codecvt1, &codecvt2,
4916 &multi_step_cvt,
4917 &interm_types))
4918 continue;
4919 else
4920 gcc_assert (multi_step_cvt == 0);
4922 if (supportable_widening_operation (NOP_EXPR, stmt_info, cvt_type,
4923 vectype_in, &code1, &code2,
4924 &multi_step_cvt, &interm_types))
4926 found_mode = true;
4927 break;
4931 if (!found_mode)
4932 goto unsupported;
4934 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4935 codecvt2 = ERROR_MARK;
4936 else
4938 multi_step_cvt++;
4939 interm_types.safe_push (cvt_type);
4940 cvt_type = NULL_TREE;
4942 break;
4944 case NARROW:
4945 gcc_assert (op_type == unary_op);
4946 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4947 &code1, &multi_step_cvt,
4948 &interm_types))
4949 break;
4951 if (code != FIX_TRUNC_EXPR
4952 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4953 goto unsupported;
4955 cvt_type
4956 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4957 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4958 if (cvt_type == NULL_TREE)
4959 goto unsupported;
4960 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4961 &codecvt1))
4962 goto unsupported;
4963 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4964 &code1, &multi_step_cvt,
4965 &interm_types))
4966 break;
4967 goto unsupported;
4969 default:
4970 gcc_unreachable ();
4973 if (!vec_stmt) /* transformation not required. */
4975 DUMP_VECT_SCOPE ("vectorizable_conversion");
4976 if (modifier == NONE)
4978 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4979 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
4980 cost_vec);
4982 else if (modifier == NARROW)
4984 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4985 /* The final packing step produces one vector result per copy. */
4986 unsigned int nvectors
4987 = (slp_node ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) : ncopies);
4988 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
4989 multi_step_cvt, cost_vec);
4991 else
4993 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4994 /* The initial unpacking step produces two vector results
4995 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4996 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4997 unsigned int nvectors
4998 = (slp_node
4999 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) >> multi_step_cvt
5000 : ncopies * 2);
5001 vect_model_promotion_demotion_cost (stmt_info, dt, nvectors,
5002 multi_step_cvt, cost_vec);
5004 interm_types.release ();
5005 return true;
5008 /* Transform. */
5009 if (dump_enabled_p ())
5010 dump_printf_loc (MSG_NOTE, vect_location,
5011 "transform conversion. ncopies = %d.\n", ncopies);
5013 if (op_type == binary_op)
5015 if (CONSTANT_CLASS_P (op0))
5016 op0 = fold_convert (TREE_TYPE (op1), op0);
5017 else if (CONSTANT_CLASS_P (op1))
5018 op1 = fold_convert (TREE_TYPE (op0), op1);
5021 /* In case of multi-step conversion, we first generate conversion operations
5022 to the intermediate types, and then from that types to the final one.
5023 We create vector destinations for the intermediate type (TYPES) received
5024 from supportable_*_operation, and store them in the correct order
5025 for future use in vect_create_vectorized_*_stmts (). */
5026 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
5027 vec_dest = vect_create_destination_var (scalar_dest,
5028 (cvt_type && modifier == WIDEN)
5029 ? cvt_type : vectype_out);
5030 vec_dsts.quick_push (vec_dest);
5032 if (multi_step_cvt)
5034 for (i = interm_types.length () - 1;
5035 interm_types.iterate (i, &intermediate_type); i--)
5037 vec_dest = vect_create_destination_var (scalar_dest,
5038 intermediate_type);
5039 vec_dsts.quick_push (vec_dest);
5043 if (cvt_type)
5044 vec_dest = vect_create_destination_var (scalar_dest,
5045 modifier == WIDEN
5046 ? vectype_out : cvt_type);
5048 if (!slp_node)
5050 if (modifier == WIDEN)
5052 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
5053 if (op_type == binary_op)
5054 vec_oprnds1.create (1);
5056 else if (modifier == NARROW)
5057 vec_oprnds0.create (
5058 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
5060 else if (code == WIDEN_LSHIFT_EXPR)
5061 vec_oprnds1.create (slp_node->vec_stmts_size);
5063 last_oprnd = op0;
5064 prev_stmt_info = NULL;
5065 switch (modifier)
5067 case NONE:
5068 for (j = 0; j < ncopies; j++)
5070 if (j == 0)
5071 vect_get_vec_defs (op0, NULL, stmt_info, &vec_oprnds0,
5072 NULL, slp_node);
5073 else
5074 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, NULL);
5076 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5078 stmt_vec_info new_stmt_info;
5079 /* Arguments are ready, create the new vector stmt. */
5080 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
5081 gassign *new_stmt = gimple_build_assign (vec_dest, code1, vop0);
5082 new_temp = make_ssa_name (vec_dest, new_stmt);
5083 gimple_assign_set_lhs (new_stmt, new_temp);
5084 new_stmt_info
5085 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5087 if (slp_node)
5088 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5089 else
5091 if (!prev_stmt_info)
5092 STMT_VINFO_VEC_STMT (stmt_info)
5093 = *vec_stmt = new_stmt_info;
5094 else
5095 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5096 prev_stmt_info = new_stmt_info;
5100 break;
5102 case WIDEN:
5103 /* In case the vectorization factor (VF) is bigger than the number
5104 of elements that we can fit in a vectype (nunits), we have to
5105 generate more than one vector stmt - i.e - we need to "unroll"
5106 the vector stmt by a factor VF/nunits. */
5107 for (j = 0; j < ncopies; j++)
5109 /* Handle uses. */
5110 if (j == 0)
5112 if (slp_node)
5114 if (code == WIDEN_LSHIFT_EXPR)
5116 unsigned int k;
5118 vec_oprnd1 = op1;
5119 /* Store vec_oprnd1 for every vector stmt to be created
5120 for SLP_NODE. We check during the analysis that all
5121 the shift arguments are the same. */
5122 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5123 vec_oprnds1.quick_push (vec_oprnd1);
5125 vect_get_vec_defs (op0, NULL_TREE, stmt_info,
5126 &vec_oprnds0, NULL, slp_node);
5128 else
5129 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
5130 &vec_oprnds1, slp_node);
5132 else
5134 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt_info);
5135 vec_oprnds0.quick_push (vec_oprnd0);
5136 if (op_type == binary_op)
5138 if (code == WIDEN_LSHIFT_EXPR)
5139 vec_oprnd1 = op1;
5140 else
5141 vec_oprnd1
5142 = vect_get_vec_def_for_operand (op1, stmt_info);
5143 vec_oprnds1.quick_push (vec_oprnd1);
5147 else
5149 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd0);
5150 vec_oprnds0.truncate (0);
5151 vec_oprnds0.quick_push (vec_oprnd0);
5152 if (op_type == binary_op)
5154 if (code == WIDEN_LSHIFT_EXPR)
5155 vec_oprnd1 = op1;
5156 else
5157 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
5158 vec_oprnd1);
5159 vec_oprnds1.truncate (0);
5160 vec_oprnds1.quick_push (vec_oprnd1);
5164 /* Arguments are ready. Create the new vector stmts. */
5165 for (i = multi_step_cvt; i >= 0; i--)
5167 tree this_dest = vec_dsts[i];
5168 enum tree_code c1 = code1, c2 = code2;
5169 if (i == 0 && codecvt2 != ERROR_MARK)
5171 c1 = codecvt1;
5172 c2 = codecvt2;
5174 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
5175 &vec_oprnds1, stmt_info,
5176 this_dest, gsi,
5177 c1, c2, op_type);
5180 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5182 stmt_vec_info new_stmt_info;
5183 if (cvt_type)
5185 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5186 new_temp = make_ssa_name (vec_dest);
5187 gassign *new_stmt
5188 = gimple_build_assign (new_temp, codecvt1, vop0);
5189 new_stmt_info
5190 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5192 else
5193 new_stmt_info = vinfo->lookup_def (vop0);
5195 if (slp_node)
5196 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5197 else
5199 if (!prev_stmt_info)
5200 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt_info;
5201 else
5202 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5203 prev_stmt_info = new_stmt_info;
5208 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5209 break;
5211 case NARROW:
5212 /* In case the vectorization factor (VF) is bigger than the number
5213 of elements that we can fit in a vectype (nunits), we have to
5214 generate more than one vector stmt - i.e - we need to "unroll"
5215 the vector stmt by a factor VF/nunits. */
5216 for (j = 0; j < ncopies; j++)
5218 /* Handle uses. */
5219 if (slp_node)
5220 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5221 slp_node);
5222 else
5224 vec_oprnds0.truncate (0);
5225 vect_get_loop_based_defs (&last_oprnd, stmt_info, &vec_oprnds0,
5226 vect_pow2 (multi_step_cvt) - 1);
5229 /* Arguments are ready. Create the new vector stmts. */
5230 if (cvt_type)
5231 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5233 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
5234 new_temp = make_ssa_name (vec_dest);
5235 gassign *new_stmt
5236 = gimple_build_assign (new_temp, codecvt1, vop0);
5237 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5238 vec_oprnds0[i] = new_temp;
5241 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
5242 stmt_info, vec_dsts, gsi,
5243 slp_node, code1,
5244 &prev_stmt_info);
5247 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5248 break;
5251 vec_oprnds0.release ();
5252 vec_oprnds1.release ();
5253 interm_types.release ();
5255 return true;
5258 /* Return true if we can assume from the scalar form of STMT_INFO that
5259 neither the scalar nor the vector forms will generate code. STMT_INFO
5260 is known not to involve a data reference. */
5262 bool
5263 vect_nop_conversion_p (stmt_vec_info stmt_info)
5265 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5266 if (!stmt)
5267 return false;
5269 tree lhs = gimple_assign_lhs (stmt);
5270 tree_code code = gimple_assign_rhs_code (stmt);
5271 tree rhs = gimple_assign_rhs1 (stmt);
5273 if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
5274 return true;
5276 if (CONVERT_EXPR_CODE_P (code))
5277 return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
5279 return false;
5282 /* Function vectorizable_assignment.
5284 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5285 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5286 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5287 Return true if STMT_INFO is vectorizable in this way. */
5289 static bool
5290 vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5291 stmt_vec_info *vec_stmt, slp_tree slp_node,
5292 stmt_vector_for_cost *cost_vec)
5294 tree vec_dest;
5295 tree scalar_dest;
5296 tree op;
5297 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5298 tree new_temp;
5299 enum vect_def_type dt[1] = {vect_unknown_def_type};
5300 int ndts = 1;
5301 int ncopies;
5302 int i, j;
5303 vec<tree> vec_oprnds = vNULL;
5304 tree vop;
5305 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5306 vec_info *vinfo = stmt_info->vinfo;
5307 stmt_vec_info prev_stmt_info = NULL;
5308 enum tree_code code;
5309 tree vectype_in;
5311 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5312 return false;
5314 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5315 && ! vec_stmt)
5316 return false;
5318 /* Is vectorizable assignment? */
5319 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5320 if (!stmt)
5321 return false;
5323 scalar_dest = gimple_assign_lhs (stmt);
5324 if (TREE_CODE (scalar_dest) != SSA_NAME)
5325 return false;
5327 code = gimple_assign_rhs_code (stmt);
5328 if (gimple_assign_single_p (stmt)
5329 || code == PAREN_EXPR
5330 || CONVERT_EXPR_CODE_P (code))
5331 op = gimple_assign_rhs1 (stmt);
5332 else
5333 return false;
5335 if (code == VIEW_CONVERT_EXPR)
5336 op = TREE_OPERAND (op, 0);
5338 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5339 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5341 /* Multiple types in SLP are handled by creating the appropriate number of
5342 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5343 case of SLP. */
5344 if (slp_node)
5345 ncopies = 1;
5346 else
5347 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5349 gcc_assert (ncopies >= 1);
5351 if (!vect_is_simple_use (op, vinfo, &dt[0], &vectype_in))
5353 if (dump_enabled_p ())
5354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5355 "use not simple.\n");
5356 return false;
5359 /* We can handle NOP_EXPR conversions that do not change the number
5360 of elements or the vector size. */
5361 if ((CONVERT_EXPR_CODE_P (code)
5362 || code == VIEW_CONVERT_EXPR)
5363 && (!vectype_in
5364 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
5365 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
5366 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
5367 return false;
5369 /* We do not handle bit-precision changes. */
5370 if ((CONVERT_EXPR_CODE_P (code)
5371 || code == VIEW_CONVERT_EXPR)
5372 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
5373 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5374 || !type_has_mode_precision_p (TREE_TYPE (op)))
5375 /* But a conversion that does not change the bit-pattern is ok. */
5376 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
5377 > TYPE_PRECISION (TREE_TYPE (op)))
5378 && TYPE_UNSIGNED (TREE_TYPE (op)))
5379 /* Conversion between boolean types of different sizes is
5380 a simple assignment in case their vectypes are same
5381 boolean vectors. */
5382 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
5383 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
5385 if (dump_enabled_p ())
5386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5387 "type conversion to/from bit-precision "
5388 "unsupported.\n");
5389 return false;
5392 if (!vec_stmt) /* transformation not required. */
5394 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
5395 DUMP_VECT_SCOPE ("vectorizable_assignment");
5396 if (!vect_nop_conversion_p (stmt_info))
5397 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
5398 cost_vec);
5399 return true;
5402 /* Transform. */
5403 if (dump_enabled_p ())
5404 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
5406 /* Handle def. */
5407 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5409 /* Handle use. */
5410 for (j = 0; j < ncopies; j++)
5412 /* Handle uses. */
5413 if (j == 0)
5414 vect_get_vec_defs (op, NULL, stmt_info, &vec_oprnds, NULL, slp_node);
5415 else
5416 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds, NULL);
5418 /* Arguments are ready. create the new vector stmt. */
5419 stmt_vec_info new_stmt_info = NULL;
5420 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
5422 if (CONVERT_EXPR_CODE_P (code)
5423 || code == VIEW_CONVERT_EXPR)
5424 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
5425 gassign *new_stmt = gimple_build_assign (vec_dest, vop);
5426 new_temp = make_ssa_name (vec_dest, new_stmt);
5427 gimple_assign_set_lhs (new_stmt, new_temp);
5428 new_stmt_info
5429 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5430 if (slp_node)
5431 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5434 if (slp_node)
5435 continue;
5437 if (j == 0)
5438 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5439 else
5440 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5442 prev_stmt_info = new_stmt_info;
5445 vec_oprnds.release ();
5446 return true;
5450 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5451 either as shift by a scalar or by a vector. */
5453 bool
5454 vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
5457 machine_mode vec_mode;
5458 optab optab;
5459 int icode;
5460 tree vectype;
5462 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
5463 if (!vectype)
5464 return false;
5466 optab = optab_for_tree_code (code, vectype, optab_scalar);
5467 if (!optab
5468 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
5470 optab = optab_for_tree_code (code, vectype, optab_vector);
5471 if (!optab
5472 || (optab_handler (optab, TYPE_MODE (vectype))
5473 == CODE_FOR_nothing))
5474 return false;
5477 vec_mode = TYPE_MODE (vectype);
5478 icode = (int) optab_handler (optab, vec_mode);
5479 if (icode == CODE_FOR_nothing)
5480 return false;
5482 return true;
5486 /* Function vectorizable_shift.
5488 Check if STMT_INFO performs a shift operation that can be vectorized.
5489 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5490 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5491 Return true if STMT_INFO is vectorizable in this way. */
5493 static bool
5494 vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5495 stmt_vec_info *vec_stmt, slp_tree slp_node,
5496 stmt_vector_for_cost *cost_vec)
5498 tree vec_dest;
5499 tree scalar_dest;
5500 tree op0, op1 = NULL;
5501 tree vec_oprnd1 = NULL_TREE;
5502 tree vectype;
5503 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5504 enum tree_code code;
5505 machine_mode vec_mode;
5506 tree new_temp;
5507 optab optab;
5508 int icode;
5509 machine_mode optab_op2_mode;
5510 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
5511 int ndts = 2;
5512 stmt_vec_info prev_stmt_info;
5513 poly_uint64 nunits_in;
5514 poly_uint64 nunits_out;
5515 tree vectype_out;
5516 tree op1_vectype;
5517 int ncopies;
5518 int j, i;
5519 vec<tree> vec_oprnds0 = vNULL;
5520 vec<tree> vec_oprnds1 = vNULL;
5521 tree vop0, vop1;
5522 unsigned int k;
5523 bool scalar_shift_arg = true;
5524 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5525 vec_info *vinfo = stmt_info->vinfo;
5526 bool incompatible_op1_vectype_p = false;
5528 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5529 return false;
5531 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5532 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle
5533 && ! vec_stmt)
5534 return false;
5536 /* Is STMT a vectorizable binary/unary operation? */
5537 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5538 if (!stmt)
5539 return false;
5541 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5542 return false;
5544 code = gimple_assign_rhs_code (stmt);
5546 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5547 || code == RROTATE_EXPR))
5548 return false;
5550 scalar_dest = gimple_assign_lhs (stmt);
5551 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5552 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5554 if (dump_enabled_p ())
5555 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5556 "bit-precision shifts not supported.\n");
5557 return false;
5560 op0 = gimple_assign_rhs1 (stmt);
5561 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
5563 if (dump_enabled_p ())
5564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5565 "use not simple.\n");
5566 return false;
5568 /* If op0 is an external or constant def, infer the vector type
5569 from the scalar type. */
5570 if (!vectype)
5571 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0), slp_node);
5572 if (vec_stmt)
5573 gcc_assert (vectype);
5574 if (!vectype)
5576 if (dump_enabled_p ())
5577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5578 "no vectype for scalar type\n");
5579 return false;
5582 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5583 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5584 if (maybe_ne (nunits_out, nunits_in))
5585 return false;
5587 op1 = gimple_assign_rhs2 (stmt);
5588 stmt_vec_info op1_def_stmt_info;
5589 if (!vect_is_simple_use (op1, vinfo, &dt[1], &op1_vectype,
5590 &op1_def_stmt_info))
5592 if (dump_enabled_p ())
5593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5594 "use not simple.\n");
5595 return false;
5598 /* Multiple types in SLP are handled by creating the appropriate number of
5599 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5600 case of SLP. */
5601 if (slp_node)
5602 ncopies = 1;
5603 else
5604 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5606 gcc_assert (ncopies >= 1);
5608 /* Determine whether the shift amount is a vector, or scalar. If the
5609 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5611 if ((dt[1] == vect_internal_def
5612 || dt[1] == vect_induction_def
5613 || dt[1] == vect_nested_cycle)
5614 && !slp_node)
5615 scalar_shift_arg = false;
5616 else if (dt[1] == vect_constant_def
5617 || dt[1] == vect_external_def
5618 || dt[1] == vect_internal_def)
5620 /* In SLP, need to check whether the shift count is the same,
5621 in loops if it is a constant or invariant, it is always
5622 a scalar shift. */
5623 if (slp_node)
5625 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5626 stmt_vec_info slpstmt_info;
5628 FOR_EACH_VEC_ELT (stmts, k, slpstmt_info)
5630 gassign *slpstmt = as_a <gassign *> (slpstmt_info->stmt);
5631 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5632 scalar_shift_arg = false;
5635 /* For internal SLP defs we have to make sure we see scalar stmts
5636 for all vector elements.
5637 ??? For different vectors we could resort to a different
5638 scalar shift operand but code-generation below simply always
5639 takes the first. */
5640 if (dt[1] == vect_internal_def
5641 && maybe_ne (nunits_out * SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node),
5642 stmts.length ()))
5643 scalar_shift_arg = false;
5646 /* If the shift amount is computed by a pattern stmt we cannot
5647 use the scalar amount directly thus give up and use a vector
5648 shift. */
5649 if (op1_def_stmt_info && is_pattern_stmt_p (op1_def_stmt_info))
5650 scalar_shift_arg = false;
5652 else
5654 if (dump_enabled_p ())
5655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5656 "operand mode requires invariant argument.\n");
5657 return false;
5660 /* Vector shifted by vector. */
5661 bool was_scalar_shift_arg = scalar_shift_arg;
5662 if (!scalar_shift_arg)
5664 optab = optab_for_tree_code (code, vectype, optab_vector);
5665 if (dump_enabled_p ())
5666 dump_printf_loc (MSG_NOTE, vect_location,
5667 "vector/vector shift/rotate found.\n");
5669 if (!op1_vectype)
5670 op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1),
5671 slp_node);
5672 incompatible_op1_vectype_p
5673 = (op1_vectype == NULL_TREE
5674 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
5675 TYPE_VECTOR_SUBPARTS (vectype))
5676 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
5677 if (incompatible_op1_vectype_p
5678 && (!slp_node
5679 || SLP_TREE_DEF_TYPE
5680 (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def))
5682 if (dump_enabled_p ())
5683 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5684 "unusable type for last operand in"
5685 " vector/vector shift/rotate.\n");
5686 return false;
5689 /* See if the machine has a vector shifted by scalar insn and if not
5690 then see if it has a vector shifted by vector insn. */
5691 else
5693 optab = optab_for_tree_code (code, vectype, optab_scalar);
5694 if (optab
5695 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5697 if (dump_enabled_p ())
5698 dump_printf_loc (MSG_NOTE, vect_location,
5699 "vector/scalar shift/rotate found.\n");
5701 else
5703 optab = optab_for_tree_code (code, vectype, optab_vector);
5704 if (optab
5705 && (optab_handler (optab, TYPE_MODE (vectype))
5706 != CODE_FOR_nothing))
5708 scalar_shift_arg = false;
5710 if (dump_enabled_p ())
5711 dump_printf_loc (MSG_NOTE, vect_location,
5712 "vector/vector shift/rotate found.\n");
5714 /* Unlike the other binary operators, shifts/rotates have
5715 the rhs being int, instead of the same type as the lhs,
5716 so make sure the scalar is the right type if we are
5717 dealing with vectors of long long/long/short/char. */
5718 incompatible_op1_vectype_p
5719 = !tree_nop_conversion_p (TREE_TYPE (vectype),
5720 TREE_TYPE (op1));
5725 /* Supportable by target? */
5726 if (!optab)
5728 if (dump_enabled_p ())
5729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5730 "no optab.\n");
5731 return false;
5733 vec_mode = TYPE_MODE (vectype);
5734 icode = (int) optab_handler (optab, vec_mode);
5735 if (icode == CODE_FOR_nothing)
5737 if (dump_enabled_p ())
5738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5739 "op not supported by target.\n");
5740 /* Check only during analysis. */
5741 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5742 || (!vec_stmt
5743 && !vect_worthwhile_without_simd_p (vinfo, code)))
5744 return false;
5745 if (dump_enabled_p ())
5746 dump_printf_loc (MSG_NOTE, vect_location,
5747 "proceeding using word mode.\n");
5750 /* Worthwhile without SIMD support? Check only during analysis. */
5751 if (!vec_stmt
5752 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5753 && !vect_worthwhile_without_simd_p (vinfo, code))
5755 if (dump_enabled_p ())
5756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5757 "not worthwhile without SIMD support.\n");
5758 return false;
5761 if (!vec_stmt) /* transformation not required. */
5763 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5764 DUMP_VECT_SCOPE ("vectorizable_shift");
5765 vect_model_simple_cost (stmt_info, ncopies, dt,
5766 scalar_shift_arg ? 1 : ndts, slp_node, cost_vec);
5767 return true;
5770 /* Transform. */
5772 if (dump_enabled_p ())
5773 dump_printf_loc (MSG_NOTE, vect_location,
5774 "transform binary/unary operation.\n");
5776 if (incompatible_op1_vectype_p && !slp_node)
5778 op1 = fold_convert (TREE_TYPE (vectype), op1);
5779 if (dt[1] != vect_constant_def)
5780 op1 = vect_init_vector (stmt_info, op1,
5781 TREE_TYPE (vectype), NULL);
5784 /* Handle def. */
5785 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5787 prev_stmt_info = NULL;
5788 for (j = 0; j < ncopies; j++)
5790 /* Handle uses. */
5791 if (j == 0)
5793 if (scalar_shift_arg)
5795 /* Vector shl and shr insn patterns can be defined with scalar
5796 operand 2 (shift operand). In this case, use constant or loop
5797 invariant op1 directly, without extending it to vector mode
5798 first. */
5799 optab_op2_mode = insn_data[icode].operand[2].mode;
5800 if (!VECTOR_MODE_P (optab_op2_mode))
5802 if (dump_enabled_p ())
5803 dump_printf_loc (MSG_NOTE, vect_location,
5804 "operand 1 using scalar mode.\n");
5805 vec_oprnd1 = op1;
5806 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5807 vec_oprnds1.quick_push (vec_oprnd1);
5808 if (slp_node)
5810 /* Store vec_oprnd1 for every vector stmt to be created
5811 for SLP_NODE. We check during the analysis that all
5812 the shift arguments are the same.
5813 TODO: Allow different constants for different vector
5814 stmts generated for an SLP instance. */
5815 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5816 vec_oprnds1.quick_push (vec_oprnd1);
5820 else if (slp_node && incompatible_op1_vectype_p)
5822 if (was_scalar_shift_arg)
5824 /* If the argument was the same in all lanes create
5825 the correctly typed vector shift amount directly. */
5826 op1 = fold_convert (TREE_TYPE (vectype), op1);
5827 op1 = vect_init_vector (stmt_info, op1, TREE_TYPE (vectype),
5828 !loop_vinfo ? gsi : NULL);
5829 vec_oprnd1 = vect_init_vector (stmt_info, op1, vectype,
5830 !loop_vinfo ? gsi : NULL);
5831 vec_oprnds1.create (slp_node->vec_stmts_size);
5832 for (k = 0; k < slp_node->vec_stmts_size; k++)
5833 vec_oprnds1.quick_push (vec_oprnd1);
5835 else if (dt[1] == vect_constant_def)
5837 /* Convert the scalar constant shift amounts in-place. */
5838 slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1];
5839 gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def);
5840 for (unsigned i = 0;
5841 i < SLP_TREE_SCALAR_OPS (shift).length (); ++i)
5843 SLP_TREE_SCALAR_OPS (shift)[i]
5844 = fold_convert (TREE_TYPE (vectype),
5845 SLP_TREE_SCALAR_OPS (shift)[i]);
5846 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i])
5847 == INTEGER_CST));
5850 else
5851 gcc_assert (TYPE_MODE (op1_vectype) == TYPE_MODE (vectype));
5854 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5855 (a special case for certain kind of vector shifts); otherwise,
5856 operand 1 should be of a vector type (the usual case). */
5857 if (vec_oprnd1)
5858 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5859 slp_node);
5860 else
5861 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
5862 slp_node);
5864 else
5865 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
5867 /* Arguments are ready. Create the new vector stmt. */
5868 stmt_vec_info new_stmt_info = NULL;
5869 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5871 vop1 = vec_oprnds1[i];
5872 gassign *new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5873 new_temp = make_ssa_name (vec_dest, new_stmt);
5874 gimple_assign_set_lhs (new_stmt, new_temp);
5875 new_stmt_info
5876 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
5877 if (slp_node)
5878 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5881 if (slp_node)
5882 continue;
5884 if (j == 0)
5885 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5886 else
5887 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
5888 prev_stmt_info = new_stmt_info;
5891 vec_oprnds0.release ();
5892 vec_oprnds1.release ();
5894 return true;
5898 /* Function vectorizable_operation.
5900 Check if STMT_INFO performs a binary, unary or ternary operation that can
5901 be vectorized.
5902 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5903 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5904 Return true if STMT_INFO is vectorizable in this way. */
5906 static bool
5907 vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5908 stmt_vec_info *vec_stmt, slp_tree slp_node,
5909 stmt_vector_for_cost *cost_vec)
5911 tree vec_dest;
5912 tree scalar_dest;
5913 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5914 tree vectype;
5915 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5916 enum tree_code code, orig_code;
5917 machine_mode vec_mode;
5918 tree new_temp;
5919 int op_type;
5920 optab optab;
5921 bool target_support_p;
5922 enum vect_def_type dt[3]
5923 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5924 int ndts = 3;
5925 stmt_vec_info prev_stmt_info;
5926 poly_uint64 nunits_in;
5927 poly_uint64 nunits_out;
5928 tree vectype_out;
5929 int ncopies, vec_num;
5930 int j, i;
5931 vec<tree> vec_oprnds0 = vNULL;
5932 vec<tree> vec_oprnds1 = vNULL;
5933 vec<tree> vec_oprnds2 = vNULL;
5934 tree vop0, vop1, vop2;
5935 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5936 vec_info *vinfo = stmt_info->vinfo;
5938 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5939 return false;
5941 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5942 && ! vec_stmt)
5943 return false;
5945 /* Is STMT a vectorizable binary/unary operation? */
5946 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
5947 if (!stmt)
5948 return false;
5950 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5951 return false;
5953 orig_code = code = gimple_assign_rhs_code (stmt);
5955 /* Shifts are handled in vectorizable_shift. */
5956 if (code == LSHIFT_EXPR
5957 || code == RSHIFT_EXPR
5958 || code == LROTATE_EXPR
5959 || code == RROTATE_EXPR)
5960 return false;
5962 /* Comparisons are handled in vectorizable_comparison. */
5963 if (TREE_CODE_CLASS (code) == tcc_comparison)
5964 return false;
5966 /* Conditions are handled in vectorizable_condition. */
5967 if (code == COND_EXPR)
5968 return false;
5970 /* For pointer addition and subtraction, we should use the normal
5971 plus and minus for the vector operation. */
5972 if (code == POINTER_PLUS_EXPR)
5973 code = PLUS_EXPR;
5974 if (code == POINTER_DIFF_EXPR)
5975 code = MINUS_EXPR;
5977 /* Support only unary or binary operations. */
5978 op_type = TREE_CODE_LENGTH (code);
5979 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5981 if (dump_enabled_p ())
5982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5983 "num. args = %d (not unary/binary/ternary op).\n",
5984 op_type);
5985 return false;
5988 scalar_dest = gimple_assign_lhs (stmt);
5989 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5991 /* Most operations cannot handle bit-precision types without extra
5992 truncations. */
5993 bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
5994 if (!mask_op_p
5995 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5996 /* Exception are bitwise binary operations. */
5997 && code != BIT_IOR_EXPR
5998 && code != BIT_XOR_EXPR
5999 && code != BIT_AND_EXPR)
6001 if (dump_enabled_p ())
6002 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6003 "bit-precision arithmetic not supported.\n");
6004 return false;
6007 op0 = gimple_assign_rhs1 (stmt);
6008 if (!vect_is_simple_use (op0, vinfo, &dt[0], &vectype))
6010 if (dump_enabled_p ())
6011 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6012 "use not simple.\n");
6013 return false;
6015 /* If op0 is an external or constant def, infer the vector type
6016 from the scalar type. */
6017 if (!vectype)
6019 /* For boolean type we cannot determine vectype by
6020 invariant value (don't know whether it is a vector
6021 of booleans or vector of integers). We use output
6022 vectype because operations on boolean don't change
6023 type. */
6024 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
6026 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
6028 if (dump_enabled_p ())
6029 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6030 "not supported operation on bool value.\n");
6031 return false;
6033 vectype = vectype_out;
6035 else
6036 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0),
6037 slp_node);
6039 if (vec_stmt)
6040 gcc_assert (vectype);
6041 if (!vectype)
6043 if (dump_enabled_p ())
6044 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6045 "no vectype for scalar type %T\n",
6046 TREE_TYPE (op0));
6048 return false;
6051 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
6052 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
6053 if (maybe_ne (nunits_out, nunits_in))
6054 return false;
6056 tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
6057 if (op_type == binary_op || op_type == ternary_op)
6059 op1 = gimple_assign_rhs2 (stmt);
6060 if (!vect_is_simple_use (op1, vinfo, &dt[1], &vectype2))
6062 if (dump_enabled_p ())
6063 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6064 "use not simple.\n");
6065 return false;
6068 if (op_type == ternary_op)
6070 op2 = gimple_assign_rhs3 (stmt);
6071 if (!vect_is_simple_use (op2, vinfo, &dt[2], &vectype3))
6073 if (dump_enabled_p ())
6074 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6075 "use not simple.\n");
6076 return false;
6080 /* Multiple types in SLP are handled by creating the appropriate number of
6081 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6082 case of SLP. */
6083 if (slp_node)
6085 ncopies = 1;
6086 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6088 else
6090 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6091 vec_num = 1;
6094 gcc_assert (ncopies >= 1);
6096 /* Reject attempts to combine mask types with nonmask types, e.g. if
6097 we have an AND between a (nonmask) boolean loaded from memory and
6098 a (mask) boolean result of a comparison.
6100 TODO: We could easily fix these cases up using pattern statements. */
6101 if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
6102 || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
6103 || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
6105 if (dump_enabled_p ())
6106 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6107 "mixed mask and nonmask vector types\n");
6108 return false;
6111 /* Supportable by target? */
6113 vec_mode = TYPE_MODE (vectype);
6114 if (code == MULT_HIGHPART_EXPR)
6115 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
6116 else
6118 optab = optab_for_tree_code (code, vectype, optab_default);
6119 if (!optab)
6121 if (dump_enabled_p ())
6122 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6123 "no optab.\n");
6124 return false;
6126 target_support_p = (optab_handler (optab, vec_mode)
6127 != CODE_FOR_nothing);
6130 if (!target_support_p)
6132 if (dump_enabled_p ())
6133 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6134 "op not supported by target.\n");
6135 /* Check only during analysis. */
6136 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6137 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
6138 return false;
6139 if (dump_enabled_p ())
6140 dump_printf_loc (MSG_NOTE, vect_location,
6141 "proceeding using word mode.\n");
6144 /* Worthwhile without SIMD support? Check only during analysis. */
6145 if (!VECTOR_MODE_P (vec_mode)
6146 && !vec_stmt
6147 && !vect_worthwhile_without_simd_p (vinfo, code))
6149 if (dump_enabled_p ())
6150 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6151 "not worthwhile without SIMD support.\n");
6152 return false;
6155 int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
6156 vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
6157 internal_fn cond_fn = get_conditional_internal_fn (code);
6159 if (!vec_stmt) /* transformation not required. */
6161 /* If this operation is part of a reduction, a fully-masked loop
6162 should only change the active lanes of the reduction chain,
6163 keeping the inactive lanes as-is. */
6164 if (loop_vinfo
6165 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
6166 && reduc_idx >= 0)
6168 if (cond_fn == IFN_LAST
6169 || !direct_internal_fn_supported_p (cond_fn, vectype,
6170 OPTIMIZE_FOR_SPEED))
6172 if (dump_enabled_p ())
6173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6174 "can't use a fully-masked loop because no"
6175 " conditional operation is available.\n");
6176 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
6178 else
6179 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6180 vectype, NULL);
6183 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
6184 DUMP_VECT_SCOPE ("vectorizable_operation");
6185 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
6186 return true;
6189 /* Transform. */
6191 if (dump_enabled_p ())
6192 dump_printf_loc (MSG_NOTE, vect_location,
6193 "transform binary/unary operation.\n");
6195 bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6197 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6198 vectors with unsigned elements, but the result is signed. So, we
6199 need to compute the MINUS_EXPR into vectype temporary and
6200 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6201 tree vec_cvt_dest = NULL_TREE;
6202 if (orig_code == POINTER_DIFF_EXPR)
6204 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6205 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
6207 /* Handle def. */
6208 else
6209 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6211 /* In case the vectorization factor (VF) is bigger than the number
6212 of elements that we can fit in a vectype (nunits), we have to generate
6213 more than one vector stmt - i.e - we need to "unroll" the
6214 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6215 from one copy of the vector stmt to the next, in the field
6216 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6217 stages to find the correct vector defs to be used when vectorizing
6218 stmts that use the defs of the current stmt. The example below
6219 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6220 we need to create 4 vectorized stmts):
6222 before vectorization:
6223 RELATED_STMT VEC_STMT
6224 S1: x = memref - -
6225 S2: z = x + 1 - -
6227 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6228 there):
6229 RELATED_STMT VEC_STMT
6230 VS1_0: vx0 = memref0 VS1_1 -
6231 VS1_1: vx1 = memref1 VS1_2 -
6232 VS1_2: vx2 = memref2 VS1_3 -
6233 VS1_3: vx3 = memref3 - -
6234 S1: x = load - VS1_0
6235 S2: z = x + 1 - -
6237 step2: vectorize stmt S2 (done here):
6238 To vectorize stmt S2 we first need to find the relevant vector
6239 def for the first operand 'x'. This is, as usual, obtained from
6240 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6241 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6242 relevant vector def 'vx0'. Having found 'vx0' we can generate
6243 the vector stmt VS2_0, and as usual, record it in the
6244 STMT_VINFO_VEC_STMT of stmt S2.
6245 When creating the second copy (VS2_1), we obtain the relevant vector
6246 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6247 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6248 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6249 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6250 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6251 chain of stmts and pointers:
6252 RELATED_STMT VEC_STMT
6253 VS1_0: vx0 = memref0 VS1_1 -
6254 VS1_1: vx1 = memref1 VS1_2 -
6255 VS1_2: vx2 = memref2 VS1_3 -
6256 VS1_3: vx3 = memref3 - -
6257 S1: x = load - VS1_0
6258 VS2_0: vz0 = vx0 + v1 VS2_1 -
6259 VS2_1: vz1 = vx1 + v1 VS2_2 -
6260 VS2_2: vz2 = vx2 + v1 VS2_3 -
6261 VS2_3: vz3 = vx3 + v1 - -
6262 S2: z = x + 1 - VS2_0 */
6264 prev_stmt_info = NULL;
6265 for (j = 0; j < ncopies; j++)
6267 /* Handle uses. */
6268 if (j == 0)
6270 if (op_type == binary_op)
6271 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0, &vec_oprnds1,
6272 slp_node);
6273 else if (op_type == ternary_op)
6275 if (slp_node)
6277 auto_vec<vec<tree> > vec_defs(3);
6278 vect_get_slp_defs (slp_node, &vec_defs);
6279 vec_oprnds0 = vec_defs[0];
6280 vec_oprnds1 = vec_defs[1];
6281 vec_oprnds2 = vec_defs[2];
6283 else
6285 vect_get_vec_defs (op0, op1, stmt_info, &vec_oprnds0,
6286 &vec_oprnds1, NULL);
6287 vect_get_vec_defs (op2, NULL_TREE, stmt_info, &vec_oprnds2,
6288 NULL, NULL);
6291 else
6292 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
6293 slp_node);
6295 else
6297 vect_get_vec_defs_for_stmt_copy (vinfo, &vec_oprnds0, &vec_oprnds1);
6298 if (op_type == ternary_op)
6300 tree vec_oprnd = vec_oprnds2.pop ();
6301 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (vinfo,
6302 vec_oprnd));
6306 /* Arguments are ready. Create the new vector stmt. */
6307 stmt_vec_info new_stmt_info = NULL;
6308 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
6310 vop1 = ((op_type == binary_op || op_type == ternary_op)
6311 ? vec_oprnds1[i] : NULL_TREE);
6312 vop2 = ((op_type == ternary_op)
6313 ? vec_oprnds2[i] : NULL_TREE);
6314 if (masked_loop_p && reduc_idx >= 0)
6316 /* Perform the operation on active elements only and take
6317 inactive elements from the reduction chain input. */
6318 gcc_assert (!vop2);
6319 vop2 = reduc_idx == 1 ? vop1 : vop0;
6320 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6321 vectype, i * ncopies + j);
6322 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
6323 vop0, vop1, vop2);
6324 new_temp = make_ssa_name (vec_dest, call);
6325 gimple_call_set_lhs (call, new_temp);
6326 gimple_call_set_nothrow (call, true);
6327 new_stmt_info
6328 = vect_finish_stmt_generation (stmt_info, call, gsi);
6330 else
6332 gassign *new_stmt = gimple_build_assign (vec_dest, code,
6333 vop0, vop1, vop2);
6334 new_temp = make_ssa_name (vec_dest, new_stmt);
6335 gimple_assign_set_lhs (new_stmt, new_temp);
6336 new_stmt_info
6337 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6338 if (vec_cvt_dest)
6340 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
6341 gassign *new_stmt
6342 = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
6343 new_temp);
6344 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
6345 gimple_assign_set_lhs (new_stmt, new_temp);
6346 new_stmt_info
6347 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
6350 if (slp_node)
6351 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6354 if (slp_node)
6355 continue;
6357 if (j == 0)
6358 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6359 else
6360 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6361 prev_stmt_info = new_stmt_info;
6364 vec_oprnds0.release ();
6365 vec_oprnds1.release ();
6366 vec_oprnds2.release ();
6368 return true;
6371 /* A helper function to ensure data reference DR_INFO's base alignment. */
6373 static void
6374 ensure_base_align (dr_vec_info *dr_info)
6376 if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
6377 return;
6379 if (dr_info->base_misaligned)
6381 tree base_decl = dr_info->base_decl;
6383 // We should only be able to increase the alignment of a base object if
6384 // we know what its new alignment should be at compile time.
6385 unsigned HOST_WIDE_INT align_base_to =
6386 DR_TARGET_ALIGNMENT (dr_info).to_constant () * BITS_PER_UNIT;
6388 if (decl_in_symtab_p (base_decl))
6389 symtab_node::get (base_decl)->increase_alignment (align_base_to);
6390 else if (DECL_ALIGN (base_decl) < align_base_to)
6392 SET_DECL_ALIGN (base_decl, align_base_to);
6393 DECL_USER_ALIGN (base_decl) = 1;
6395 dr_info->base_misaligned = false;
6400 /* Function get_group_alias_ptr_type.
6402 Return the alias type for the group starting at FIRST_STMT_INFO. */
6404 static tree
6405 get_group_alias_ptr_type (stmt_vec_info first_stmt_info)
6407 struct data_reference *first_dr, *next_dr;
6409 first_dr = STMT_VINFO_DATA_REF (first_stmt_info);
6410 stmt_vec_info next_stmt_info = DR_GROUP_NEXT_ELEMENT (first_stmt_info);
6411 while (next_stmt_info)
6413 next_dr = STMT_VINFO_DATA_REF (next_stmt_info);
6414 if (get_alias_set (DR_REF (first_dr))
6415 != get_alias_set (DR_REF (next_dr)))
6417 if (dump_enabled_p ())
6418 dump_printf_loc (MSG_NOTE, vect_location,
6419 "conflicting alias set types.\n");
6420 return ptr_type_node;
6422 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
6424 return reference_alias_ptr_type (DR_REF (first_dr));
6428 /* Function scan_operand_equal_p.
6430 Helper function for check_scan_store. Compare two references
6431 with .GOMP_SIMD_LANE bases. */
6433 static bool
6434 scan_operand_equal_p (tree ref1, tree ref2)
6436 tree ref[2] = { ref1, ref2 };
6437 poly_int64 bitsize[2], bitpos[2];
6438 tree offset[2], base[2];
6439 for (int i = 0; i < 2; ++i)
6441 machine_mode mode;
6442 int unsignedp, reversep, volatilep = 0;
6443 base[i] = get_inner_reference (ref[i], &bitsize[i], &bitpos[i],
6444 &offset[i], &mode, &unsignedp,
6445 &reversep, &volatilep);
6446 if (reversep || volatilep || maybe_ne (bitpos[i], 0))
6447 return false;
6448 if (TREE_CODE (base[i]) == MEM_REF
6449 && offset[i] == NULL_TREE
6450 && TREE_CODE (TREE_OPERAND (base[i], 0)) == SSA_NAME)
6452 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base[i], 0));
6453 if (is_gimple_assign (def_stmt)
6454 && gimple_assign_rhs_code (def_stmt) == POINTER_PLUS_EXPR
6455 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == ADDR_EXPR
6456 && TREE_CODE (gimple_assign_rhs2 (def_stmt)) == SSA_NAME)
6458 if (maybe_ne (mem_ref_offset (base[i]), 0))
6459 return false;
6460 base[i] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
6461 offset[i] = gimple_assign_rhs2 (def_stmt);
6466 if (!operand_equal_p (base[0], base[1], 0))
6467 return false;
6468 if (maybe_ne (bitsize[0], bitsize[1]))
6469 return false;
6470 if (offset[0] != offset[1])
6472 if (!offset[0] || !offset[1])
6473 return false;
6474 if (!operand_equal_p (offset[0], offset[1], 0))
6476 tree step[2];
6477 for (int i = 0; i < 2; ++i)
6479 step[i] = integer_one_node;
6480 if (TREE_CODE (offset[i]) == SSA_NAME)
6482 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6483 if (is_gimple_assign (def_stmt)
6484 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
6485 && (TREE_CODE (gimple_assign_rhs2 (def_stmt))
6486 == INTEGER_CST))
6488 step[i] = gimple_assign_rhs2 (def_stmt);
6489 offset[i] = gimple_assign_rhs1 (def_stmt);
6492 else if (TREE_CODE (offset[i]) == MULT_EXPR)
6494 step[i] = TREE_OPERAND (offset[i], 1);
6495 offset[i] = TREE_OPERAND (offset[i], 0);
6497 tree rhs1 = NULL_TREE;
6498 if (TREE_CODE (offset[i]) == SSA_NAME)
6500 gimple *def_stmt = SSA_NAME_DEF_STMT (offset[i]);
6501 if (gimple_assign_cast_p (def_stmt))
6502 rhs1 = gimple_assign_rhs1 (def_stmt);
6504 else if (CONVERT_EXPR_P (offset[i]))
6505 rhs1 = TREE_OPERAND (offset[i], 0);
6506 if (rhs1
6507 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
6508 && INTEGRAL_TYPE_P (TREE_TYPE (offset[i]))
6509 && (TYPE_PRECISION (TREE_TYPE (offset[i]))
6510 >= TYPE_PRECISION (TREE_TYPE (rhs1))))
6511 offset[i] = rhs1;
6513 if (!operand_equal_p (offset[0], offset[1], 0)
6514 || !operand_equal_p (step[0], step[1], 0))
6515 return false;
6518 return true;
6522 enum scan_store_kind {
6523 /* Normal permutation. */
6524 scan_store_kind_perm,
6526 /* Whole vector left shift permutation with zero init. */
6527 scan_store_kind_lshift_zero,
6529 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6530 scan_store_kind_lshift_cond
6533 /* Function check_scan_store.
6535 Verify if we can perform the needed permutations or whole vector shifts.
6536 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6537 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6538 to do at each step. */
6540 static int
6541 scan_store_can_perm_p (tree vectype, tree init,
6542 vec<enum scan_store_kind> *use_whole_vector = NULL)
6544 enum machine_mode vec_mode = TYPE_MODE (vectype);
6545 unsigned HOST_WIDE_INT nunits;
6546 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
6547 return -1;
6548 int units_log2 = exact_log2 (nunits);
6549 if (units_log2 <= 0)
6550 return -1;
6552 int i;
6553 enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
6554 for (i = 0; i <= units_log2; ++i)
6556 unsigned HOST_WIDE_INT j, k;
6557 enum scan_store_kind kind = scan_store_kind_perm;
6558 vec_perm_builder sel (nunits, nunits, 1);
6559 sel.quick_grow (nunits);
6560 if (i == units_log2)
6562 for (j = 0; j < nunits; ++j)
6563 sel[j] = nunits - 1;
6565 else
6567 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
6568 sel[j] = j;
6569 for (k = 0; j < nunits; ++j, ++k)
6570 sel[j] = nunits + k;
6572 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
6573 if (!can_vec_perm_const_p (vec_mode, indices))
6575 if (i == units_log2)
6576 return -1;
6578 if (whole_vector_shift_kind == scan_store_kind_perm)
6580 if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
6581 return -1;
6582 whole_vector_shift_kind = scan_store_kind_lshift_zero;
6583 /* Whole vector shifts shift in zeros, so if init is all zero
6584 constant, there is no need to do anything further. */
6585 if ((TREE_CODE (init) != INTEGER_CST
6586 && TREE_CODE (init) != REAL_CST)
6587 || !initializer_zerop (init))
6589 tree masktype = truth_type_for (vectype);
6590 if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
6591 return -1;
6592 whole_vector_shift_kind = scan_store_kind_lshift_cond;
6595 kind = whole_vector_shift_kind;
6597 if (use_whole_vector)
6599 if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
6600 use_whole_vector->safe_grow_cleared (i);
6601 if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
6602 use_whole_vector->safe_push (kind);
6606 return units_log2;
6610 /* Function check_scan_store.
6612 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6614 static bool
6615 check_scan_store (stmt_vec_info stmt_info, tree vectype,
6616 enum vect_def_type rhs_dt, bool slp, tree mask,
6617 vect_memory_access_type memory_access_type)
6619 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6620 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
6621 tree ref_type;
6623 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1);
6624 if (slp
6625 || mask
6626 || memory_access_type != VMAT_CONTIGUOUS
6627 || TREE_CODE (DR_BASE_ADDRESS (dr_info->dr)) != ADDR_EXPR
6628 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0))
6629 || loop_vinfo == NULL
6630 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
6631 || STMT_VINFO_GROUPED_ACCESS (stmt_info)
6632 || !integer_zerop (get_dr_vinfo_offset (dr_info))
6633 || !integer_zerop (DR_INIT (dr_info->dr))
6634 || !(ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr)))
6635 || !alias_sets_conflict_p (get_alias_set (vectype),
6636 get_alias_set (TREE_TYPE (ref_type))))
6638 if (dump_enabled_p ())
6639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6640 "unsupported OpenMP scan store.\n");
6641 return false;
6644 /* We need to pattern match code built by OpenMP lowering and simplified
6645 by following optimizations into something we can handle.
6646 #pragma omp simd reduction(inscan,+:r)
6647 for (...)
6649 r += something ();
6650 #pragma omp scan inclusive (r)
6651 use (r);
6653 shall have body with:
6654 // Initialization for input phase, store the reduction initializer:
6655 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6656 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6657 D.2042[_21] = 0;
6658 // Actual input phase:
6660 r.0_5 = D.2042[_20];
6661 _6 = _4 + r.0_5;
6662 D.2042[_20] = _6;
6663 // Initialization for scan phase:
6664 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6665 _26 = D.2043[_25];
6666 _27 = D.2042[_25];
6667 _28 = _26 + _27;
6668 D.2043[_25] = _28;
6669 D.2042[_25] = _28;
6670 // Actual scan phase:
6672 r.1_8 = D.2042[_20];
6674 The "omp simd array" variable D.2042 holds the privatized copy used
6675 inside of the loop and D.2043 is another one that holds copies of
6676 the current original list item. The separate GOMP_SIMD_LANE ifn
6677 kinds are there in order to allow optimizing the initializer store
6678 and combiner sequence, e.g. if it is originally some C++ish user
6679 defined reduction, but allow the vectorizer to pattern recognize it
6680 and turn into the appropriate vectorized scan.
6682 For exclusive scan, this is slightly different:
6683 #pragma omp simd reduction(inscan,+:r)
6684 for (...)
6686 use (r);
6687 #pragma omp scan exclusive (r)
6688 r += something ();
6690 shall have body with:
6691 // Initialization for input phase, store the reduction initializer:
6692 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6693 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6694 D.2042[_21] = 0;
6695 // Actual input phase:
6697 r.0_5 = D.2042[_20];
6698 _6 = _4 + r.0_5;
6699 D.2042[_20] = _6;
6700 // Initialization for scan phase:
6701 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6702 _26 = D.2043[_25];
6703 D.2044[_25] = _26;
6704 _27 = D.2042[_25];
6705 _28 = _26 + _27;
6706 D.2043[_25] = _28;
6707 // Actual scan phase:
6709 r.1_8 = D.2044[_20];
6710 ... */
6712 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 2)
6714 /* Match the D.2042[_21] = 0; store above. Just require that
6715 it is a constant or external definition store. */
6716 if (rhs_dt != vect_constant_def && rhs_dt != vect_external_def)
6718 fail_init:
6719 if (dump_enabled_p ())
6720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6721 "unsupported OpenMP scan initializer store.\n");
6722 return false;
6725 if (! loop_vinfo->scan_map)
6726 loop_vinfo->scan_map = new hash_map<tree, tree>;
6727 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6728 tree &cached = loop_vinfo->scan_map->get_or_insert (var);
6729 if (cached)
6730 goto fail_init;
6731 cached = gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info));
6733 /* These stores can be vectorized normally. */
6734 return true;
6737 if (rhs_dt != vect_internal_def)
6739 fail:
6740 if (dump_enabled_p ())
6741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6742 "unsupported OpenMP scan combiner pattern.\n");
6743 return false;
6746 gimple *stmt = STMT_VINFO_STMT (stmt_info);
6747 tree rhs = gimple_assign_rhs1 (stmt);
6748 if (TREE_CODE (rhs) != SSA_NAME)
6749 goto fail;
6751 gimple *other_store_stmt = NULL;
6752 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6753 bool inscan_var_store
6754 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
6756 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6758 if (!inscan_var_store)
6760 use_operand_p use_p;
6761 imm_use_iterator iter;
6762 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6764 gimple *use_stmt = USE_STMT (use_p);
6765 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6766 continue;
6767 if (gimple_bb (use_stmt) != gimple_bb (stmt)
6768 || !is_gimple_assign (use_stmt)
6769 || gimple_assign_rhs_class (use_stmt) != GIMPLE_BINARY_RHS
6770 || other_store_stmt
6771 || TREE_CODE (gimple_assign_lhs (use_stmt)) != SSA_NAME)
6772 goto fail;
6773 other_store_stmt = use_stmt;
6775 if (other_store_stmt == NULL)
6776 goto fail;
6777 rhs = gimple_assign_lhs (other_store_stmt);
6778 if (!single_imm_use (rhs, &use_p, &other_store_stmt))
6779 goto fail;
6782 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3)
6784 use_operand_p use_p;
6785 imm_use_iterator iter;
6786 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
6788 gimple *use_stmt = USE_STMT (use_p);
6789 if (use_stmt == stmt || is_gimple_debug (use_stmt))
6790 continue;
6791 if (other_store_stmt)
6792 goto fail;
6793 other_store_stmt = use_stmt;
6796 else
6797 goto fail;
6799 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
6800 if (gimple_bb (def_stmt) != gimple_bb (stmt)
6801 || !is_gimple_assign (def_stmt)
6802 || gimple_assign_rhs_class (def_stmt) != GIMPLE_BINARY_RHS)
6803 goto fail;
6805 enum tree_code code = gimple_assign_rhs_code (def_stmt);
6806 /* For pointer addition, we should use the normal plus for the vector
6807 operation. */
6808 switch (code)
6810 case POINTER_PLUS_EXPR:
6811 code = PLUS_EXPR;
6812 break;
6813 case MULT_HIGHPART_EXPR:
6814 goto fail;
6815 default:
6816 break;
6818 if (TREE_CODE_LENGTH (code) != binary_op || !commutative_tree_code (code))
6819 goto fail;
6821 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6822 tree rhs2 = gimple_assign_rhs2 (def_stmt);
6823 if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != SSA_NAME)
6824 goto fail;
6826 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
6827 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
6828 if (gimple_bb (load1_stmt) != gimple_bb (stmt)
6829 || !gimple_assign_load_p (load1_stmt)
6830 || gimple_bb (load2_stmt) != gimple_bb (stmt)
6831 || !gimple_assign_load_p (load2_stmt))
6832 goto fail;
6834 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
6835 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
6836 if (load1_stmt_info == NULL
6837 || load2_stmt_info == NULL
6838 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info)
6839 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info))
6840 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info)
6841 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6842 goto fail;
6844 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && inscan_var_store)
6846 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
6847 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info->dr)) != ADDR_EXPR
6848 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0)))
6849 goto fail;
6850 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
6851 tree lrhs;
6852 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6853 lrhs = rhs1;
6854 else
6855 lrhs = rhs2;
6856 use_operand_p use_p;
6857 imm_use_iterator iter;
6858 FOR_EACH_IMM_USE_FAST (use_p, iter, lrhs)
6860 gimple *use_stmt = USE_STMT (use_p);
6861 if (use_stmt == def_stmt || is_gimple_debug (use_stmt))
6862 continue;
6863 if (other_store_stmt)
6864 goto fail;
6865 other_store_stmt = use_stmt;
6869 if (other_store_stmt == NULL)
6870 goto fail;
6871 if (gimple_bb (other_store_stmt) != gimple_bb (stmt)
6872 || !gimple_store_p (other_store_stmt))
6873 goto fail;
6875 stmt_vec_info other_store_stmt_info
6876 = loop_vinfo->lookup_stmt (other_store_stmt);
6877 if (other_store_stmt_info == NULL
6878 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info)
6879 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)))
6880 goto fail;
6882 gimple *stmt1 = stmt;
6883 gimple *stmt2 = other_store_stmt;
6884 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
6885 std::swap (stmt1, stmt2);
6886 if (scan_operand_equal_p (gimple_assign_lhs (stmt1),
6887 gimple_assign_rhs1 (load2_stmt)))
6889 std::swap (rhs1, rhs2);
6890 std::swap (load1_stmt, load2_stmt);
6891 std::swap (load1_stmt_info, load2_stmt_info);
6893 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1),
6894 gimple_assign_rhs1 (load1_stmt)))
6895 goto fail;
6897 tree var3 = NULL_TREE;
6898 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 3
6899 && !scan_operand_equal_p (gimple_assign_lhs (stmt2),
6900 gimple_assign_rhs1 (load2_stmt)))
6901 goto fail;
6902 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6904 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
6905 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info->dr)) != ADDR_EXPR
6906 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0)))
6907 goto fail;
6908 var3 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
6909 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3))
6910 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3))
6911 || lookup_attribute ("omp simd inscan exclusive",
6912 DECL_ATTRIBUTES (var3)))
6913 goto fail;
6916 dr_vec_info *other_dr_info = STMT_VINFO_DR_INFO (other_store_stmt_info);
6917 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info->dr)) != ADDR_EXPR
6918 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0)))
6919 goto fail;
6921 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
6922 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info->dr), 0);
6923 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1))
6924 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2))
6925 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6926 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2))))
6927 goto fail;
6929 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
6930 std::swap (var1, var2);
6932 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
6934 if (!lookup_attribute ("omp simd inscan exclusive",
6935 DECL_ATTRIBUTES (var1)))
6936 goto fail;
6937 var1 = var3;
6940 if (loop_vinfo->scan_map == NULL)
6941 goto fail;
6942 tree *init = loop_vinfo->scan_map->get (var1);
6943 if (init == NULL)
6944 goto fail;
6946 /* The IL is as expected, now check if we can actually vectorize it.
6947 Inclusive scan:
6948 _26 = D.2043[_25];
6949 _27 = D.2042[_25];
6950 _28 = _26 + _27;
6951 D.2043[_25] = _28;
6952 D.2042[_25] = _28;
6953 should be vectorized as (where _40 is the vectorized rhs
6954 from the D.2042[_21] = 0; store):
6955 _30 = MEM <vector(8) int> [(int *)&D.2043];
6956 _31 = MEM <vector(8) int> [(int *)&D.2042];
6957 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6958 _33 = _31 + _32;
6959 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6960 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6961 _35 = _33 + _34;
6962 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6963 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6964 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6965 _37 = _35 + _36;
6966 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6967 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6968 _38 = _30 + _37;
6969 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6970 MEM <vector(8) int> [(int *)&D.2043] = _39;
6971 MEM <vector(8) int> [(int *)&D.2042] = _38;
6972 Exclusive scan:
6973 _26 = D.2043[_25];
6974 D.2044[_25] = _26;
6975 _27 = D.2042[_25];
6976 _28 = _26 + _27;
6977 D.2043[_25] = _28;
6978 should be vectorized as (where _40 is the vectorized rhs
6979 from the D.2042[_21] = 0; store):
6980 _30 = MEM <vector(8) int> [(int *)&D.2043];
6981 _31 = MEM <vector(8) int> [(int *)&D.2042];
6982 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6983 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6984 _34 = _32 + _33;
6985 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6986 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6987 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6988 _36 = _34 + _35;
6989 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6990 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6991 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6992 _38 = _36 + _37;
6993 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6994 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6995 _39 = _30 + _38;
6996 _50 = _31 + _39;
6997 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6998 MEM <vector(8) int> [(int *)&D.2044] = _39;
6999 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7000 enum machine_mode vec_mode = TYPE_MODE (vectype);
7001 optab optab = optab_for_tree_code (code, vectype, optab_default);
7002 if (!optab || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
7003 goto fail;
7005 int units_log2 = scan_store_can_perm_p (vectype, *init);
7006 if (units_log2 == -1)
7007 goto fail;
7009 return true;
7013 /* Function vectorizable_scan_store.
7015 Helper of vectorizable_score, arguments like on vectorizable_store.
7016 Handle only the transformation, checking is done in check_scan_store. */
7018 static bool
7019 vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7020 stmt_vec_info *vec_stmt, int ncopies)
7022 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7023 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
7024 tree ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
7025 vec_info *vinfo = stmt_info->vinfo;
7026 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7028 if (dump_enabled_p ())
7029 dump_printf_loc (MSG_NOTE, vect_location,
7030 "transform scan store. ncopies = %d\n", ncopies);
7032 gimple *stmt = STMT_VINFO_STMT (stmt_info);
7033 tree rhs = gimple_assign_rhs1 (stmt);
7034 gcc_assert (TREE_CODE (rhs) == SSA_NAME);
7036 tree var = TREE_OPERAND (DR_BASE_ADDRESS (dr_info->dr), 0);
7037 bool inscan_var_store
7038 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var)) != NULL;
7040 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7042 use_operand_p use_p;
7043 imm_use_iterator iter;
7044 FOR_EACH_IMM_USE_FAST (use_p, iter, rhs)
7046 gimple *use_stmt = USE_STMT (use_p);
7047 if (use_stmt == stmt || is_gimple_debug (use_stmt))
7048 continue;
7049 rhs = gimple_assign_lhs (use_stmt);
7050 break;
7054 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs);
7055 enum tree_code code = gimple_assign_rhs_code (def_stmt);
7056 if (code == POINTER_PLUS_EXPR)
7057 code = PLUS_EXPR;
7058 gcc_assert (TREE_CODE_LENGTH (code) == binary_op
7059 && commutative_tree_code (code));
7060 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7061 tree rhs2 = gimple_assign_rhs2 (def_stmt);
7062 gcc_assert (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME);
7063 gimple *load1_stmt = SSA_NAME_DEF_STMT (rhs1);
7064 gimple *load2_stmt = SSA_NAME_DEF_STMT (rhs2);
7065 stmt_vec_info load1_stmt_info = loop_vinfo->lookup_stmt (load1_stmt);
7066 stmt_vec_info load2_stmt_info = loop_vinfo->lookup_stmt (load2_stmt);
7067 dr_vec_info *load1_dr_info = STMT_VINFO_DR_INFO (load1_stmt_info);
7068 dr_vec_info *load2_dr_info = STMT_VINFO_DR_INFO (load2_stmt_info);
7069 tree var1 = TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info->dr), 0);
7070 tree var2 = TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info->dr), 0);
7072 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1)))
7074 std::swap (rhs1, rhs2);
7075 std::swap (var1, var2);
7076 std::swap (load1_dr_info, load2_dr_info);
7079 tree *init = loop_vinfo->scan_map->get (var1);
7080 gcc_assert (init);
7082 unsigned HOST_WIDE_INT nunits;
7083 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
7084 gcc_unreachable ();
7085 auto_vec<enum scan_store_kind, 16> use_whole_vector;
7086 int units_log2 = scan_store_can_perm_p (vectype, *init, &use_whole_vector);
7087 gcc_assert (units_log2 > 0);
7088 auto_vec<tree, 16> perms;
7089 perms.quick_grow (units_log2 + 1);
7090 tree zero_vec = NULL_TREE, masktype = NULL_TREE;
7091 for (int i = 0; i <= units_log2; ++i)
7093 unsigned HOST_WIDE_INT j, k;
7094 vec_perm_builder sel (nunits, nunits, 1);
7095 sel.quick_grow (nunits);
7096 if (i == units_log2)
7097 for (j = 0; j < nunits; ++j)
7098 sel[j] = nunits - 1;
7099 else
7101 for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
7102 sel[j] = j;
7103 for (k = 0; j < nunits; ++j, ++k)
7104 sel[j] = nunits + k;
7106 vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
7107 if (!use_whole_vector.is_empty ()
7108 && use_whole_vector[i] != scan_store_kind_perm)
7110 if (zero_vec == NULL_TREE)
7111 zero_vec = build_zero_cst (vectype);
7112 if (masktype == NULL_TREE
7113 && use_whole_vector[i] == scan_store_kind_lshift_cond)
7114 masktype = truth_type_for (vectype);
7115 perms[i] = vect_gen_perm_mask_any (vectype, indices);
7117 else
7118 perms[i] = vect_gen_perm_mask_checked (vectype, indices);
7121 stmt_vec_info prev_stmt_info = NULL;
7122 tree vec_oprnd1 = NULL_TREE;
7123 tree vec_oprnd2 = NULL_TREE;
7124 tree vec_oprnd3 = NULL_TREE;
7125 tree dataref_ptr = DR_BASE_ADDRESS (dr_info->dr);
7126 tree dataref_offset = build_int_cst (ref_type, 0);
7127 tree bump = vect_get_data_ptr_increment (dr_info, vectype, VMAT_CONTIGUOUS);
7128 tree ldataref_ptr = NULL_TREE;
7129 tree orig = NULL_TREE;
7130 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4 && !inscan_var_store)
7131 ldataref_ptr = DR_BASE_ADDRESS (load1_dr_info->dr);
7132 for (int j = 0; j < ncopies; j++)
7134 stmt_vec_info new_stmt_info;
7135 if (j == 0)
7137 vec_oprnd1 = vect_get_vec_def_for_operand (*init, stmt_info);
7138 if (ldataref_ptr == NULL)
7139 vec_oprnd2 = vect_get_vec_def_for_operand (rhs1, stmt_info);
7140 vec_oprnd3 = vect_get_vec_def_for_operand (rhs2, stmt_info);
7141 orig = vec_oprnd3;
7143 else
7145 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd1);
7146 if (ldataref_ptr == NULL)
7147 vec_oprnd2 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd2);
7148 vec_oprnd3 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnd3);
7149 if (!inscan_var_store)
7150 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7153 if (ldataref_ptr)
7155 vec_oprnd2 = make_ssa_name (vectype);
7156 tree data_ref = fold_build2 (MEM_REF, vectype,
7157 unshare_expr (ldataref_ptr),
7158 dataref_offset);
7159 vect_copy_ref_info (data_ref, DR_REF (load1_dr_info->dr));
7160 gimple *g = gimple_build_assign (vec_oprnd2, data_ref);
7161 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7162 if (prev_stmt_info == NULL)
7163 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7164 else
7165 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7166 prev_stmt_info = new_stmt_info;
7169 tree v = vec_oprnd2;
7170 for (int i = 0; i < units_log2; ++i)
7172 tree new_temp = make_ssa_name (vectype);
7173 gimple *g = gimple_build_assign (new_temp, VEC_PERM_EXPR,
7174 (zero_vec
7175 && (use_whole_vector[i]
7176 != scan_store_kind_perm))
7177 ? zero_vec : vec_oprnd1, v,
7178 perms[i]);
7179 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7180 if (prev_stmt_info == NULL)
7181 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7182 else
7183 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7184 prev_stmt_info = new_stmt_info;
7186 if (zero_vec && use_whole_vector[i] == scan_store_kind_lshift_cond)
7188 /* Whole vector shift shifted in zero bits, but if *init
7189 is not initializer_zerop, we need to replace those elements
7190 with elements from vec_oprnd1. */
7191 tree_vector_builder vb (masktype, nunits, 1);
7192 for (unsigned HOST_WIDE_INT k = 0; k < nunits; ++k)
7193 vb.quick_push (k < (HOST_WIDE_INT_1U << i)
7194 ? boolean_false_node : boolean_true_node);
7196 tree new_temp2 = make_ssa_name (vectype);
7197 g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
7198 new_temp, vec_oprnd1);
7199 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7200 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7201 prev_stmt_info = new_stmt_info;
7202 new_temp = new_temp2;
7205 /* For exclusive scan, perform the perms[i] permutation once
7206 more. */
7207 if (i == 0
7208 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4
7209 && v == vec_oprnd2)
7211 v = new_temp;
7212 --i;
7213 continue;
7216 tree new_temp2 = make_ssa_name (vectype);
7217 g = gimple_build_assign (new_temp2, code, v, new_temp);
7218 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7219 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7220 prev_stmt_info = new_stmt_info;
7222 v = new_temp2;
7225 tree new_temp = make_ssa_name (vectype);
7226 gimple *g = gimple_build_assign (new_temp, code, orig, v);
7227 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7228 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7229 prev_stmt_info = new_stmt_info;
7231 tree last_perm_arg = new_temp;
7232 /* For exclusive scan, new_temp computed above is the exclusive scan
7233 prefix sum. Turn it into inclusive prefix sum for the broadcast
7234 of the last element into orig. */
7235 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) == 4)
7237 last_perm_arg = make_ssa_name (vectype);
7238 g = gimple_build_assign (last_perm_arg, code, new_temp, vec_oprnd2);
7239 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7240 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7241 prev_stmt_info = new_stmt_info;
7244 orig = make_ssa_name (vectype);
7245 g = gimple_build_assign (orig, VEC_PERM_EXPR, last_perm_arg,
7246 last_perm_arg, perms[units_log2]);
7247 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7248 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7249 prev_stmt_info = new_stmt_info;
7251 if (!inscan_var_store)
7253 tree data_ref = fold_build2 (MEM_REF, vectype,
7254 unshare_expr (dataref_ptr),
7255 dataref_offset);
7256 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7257 g = gimple_build_assign (data_ref, new_temp);
7258 new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
7259 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7260 prev_stmt_info = new_stmt_info;
7264 if (inscan_var_store)
7265 for (int j = 0; j < ncopies; j++)
7267 if (j != 0)
7268 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset, bump);
7270 tree data_ref = fold_build2 (MEM_REF, vectype,
7271 unshare_expr (dataref_ptr),
7272 dataref_offset);
7273 vect_copy_ref_info (data_ref, DR_REF (dr_info->dr));
7274 gimple *g = gimple_build_assign (data_ref, orig);
7275 stmt_vec_info new_stmt_info
7276 = vect_finish_stmt_generation (stmt_info, g, gsi);
7277 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7278 prev_stmt_info = new_stmt_info;
7280 return true;
7284 /* Function vectorizable_store.
7286 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
7287 that can be vectorized.
7288 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7289 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7290 Return true if STMT_INFO is vectorizable in this way. */
7292 static bool
7293 vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
7294 stmt_vec_info *vec_stmt, slp_tree slp_node,
7295 stmt_vector_for_cost *cost_vec)
7297 tree data_ref;
7298 tree op;
7299 tree vec_oprnd = NULL_TREE;
7300 tree elem_type;
7301 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7302 class loop *loop = NULL;
7303 machine_mode vec_mode;
7304 tree dummy;
7305 enum dr_alignment_support alignment_support_scheme;
7306 enum vect_def_type rhs_dt = vect_unknown_def_type;
7307 enum vect_def_type mask_dt = vect_unknown_def_type;
7308 stmt_vec_info prev_stmt_info = NULL;
7309 tree dataref_ptr = NULL_TREE;
7310 tree dataref_offset = NULL_TREE;
7311 gimple *ptr_incr = NULL;
7312 int ncopies;
7313 int j;
7314 stmt_vec_info first_stmt_info;
7315 bool grouped_store;
7316 unsigned int group_size, i;
7317 vec<tree> oprnds = vNULL;
7318 vec<tree> result_chain = vNULL;
7319 tree offset = NULL_TREE;
7320 vec<tree> vec_oprnds = vNULL;
7321 bool slp = (slp_node != NULL);
7322 unsigned int vec_num;
7323 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7324 vec_info *vinfo = stmt_info->vinfo;
7325 tree aggr_type;
7326 gather_scatter_info gs_info;
7327 poly_uint64 vf;
7328 vec_load_store_type vls_type;
7329 tree ref_type;
7331 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7332 return false;
7334 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7335 && ! vec_stmt)
7336 return false;
7338 /* Is vectorizable store? */
7340 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
7341 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
7343 tree scalar_dest = gimple_assign_lhs (assign);
7344 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
7345 && is_pattern_stmt_p (stmt_info))
7346 scalar_dest = TREE_OPERAND (scalar_dest, 0);
7347 if (TREE_CODE (scalar_dest) != ARRAY_REF
7348 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
7349 && TREE_CODE (scalar_dest) != INDIRECT_REF
7350 && TREE_CODE (scalar_dest) != COMPONENT_REF
7351 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
7352 && TREE_CODE (scalar_dest) != REALPART_EXPR
7353 && TREE_CODE (scalar_dest) != MEM_REF)
7354 return false;
7356 else
7358 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
7359 if (!call || !gimple_call_internal_p (call))
7360 return false;
7362 internal_fn ifn = gimple_call_internal_fn (call);
7363 if (!internal_store_fn_p (ifn))
7364 return false;
7366 if (slp_node != NULL)
7368 if (dump_enabled_p ())
7369 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7370 "SLP of masked stores not supported.\n");
7371 return false;
7374 int mask_index = internal_fn_mask_index (ifn);
7375 if (mask_index >= 0)
7377 mask = gimple_call_arg (call, mask_index);
7378 if (!vect_check_scalar_mask (stmt_info, mask, &mask_dt,
7379 &mask_vectype))
7380 return false;
7384 op = vect_get_store_rhs (stmt_info);
7386 /* Cannot have hybrid store SLP -- that would mean storing to the
7387 same location twice. */
7388 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
7390 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
7391 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7393 if (loop_vinfo)
7395 loop = LOOP_VINFO_LOOP (loop_vinfo);
7396 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7398 else
7399 vf = 1;
7401 /* Multiple types in SLP are handled by creating the appropriate number of
7402 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7403 case of SLP. */
7404 if (slp)
7405 ncopies = 1;
7406 else
7407 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7409 gcc_assert (ncopies >= 1);
7411 /* FORNOW. This restriction should be relaxed. */
7412 if (loop && nested_in_vect_loop_p (loop, stmt_info) && ncopies > 1)
7414 if (dump_enabled_p ())
7415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7416 "multiple types in nested loop.\n");
7417 return false;
7420 if (!vect_check_store_rhs (stmt_info, op, &rhs_dt, &rhs_vectype, &vls_type))
7421 return false;
7423 elem_type = TREE_TYPE (vectype);
7424 vec_mode = TYPE_MODE (vectype);
7426 if (!STMT_VINFO_DATA_REF (stmt_info))
7427 return false;
7429 vect_memory_access_type memory_access_type;
7430 if (!get_load_store_type (stmt_info, vectype, slp, mask, vls_type, ncopies,
7431 &memory_access_type, &gs_info))
7432 return false;
7434 if (mask)
7436 if (memory_access_type == VMAT_CONTIGUOUS)
7438 if (!VECTOR_MODE_P (vec_mode)
7439 || !can_vec_mask_load_store_p (vec_mode,
7440 TYPE_MODE (mask_vectype), false))
7441 return false;
7443 else if (memory_access_type != VMAT_LOAD_STORE_LANES
7444 && (memory_access_type != VMAT_GATHER_SCATTER
7445 || (gs_info.decl && !VECTOR_BOOLEAN_TYPE_P (mask_vectype))))
7447 if (dump_enabled_p ())
7448 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7449 "unsupported access type for masked store.\n");
7450 return false;
7453 else
7455 /* FORNOW. In some cases can vectorize even if data-type not supported
7456 (e.g. - array initialization with 0). */
7457 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
7458 return false;
7461 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
7462 grouped_store = (STMT_VINFO_GROUPED_ACCESS (stmt_info)
7463 && memory_access_type != VMAT_GATHER_SCATTER
7464 && (slp || memory_access_type != VMAT_CONTIGUOUS));
7465 if (grouped_store)
7467 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
7468 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7469 group_size = DR_GROUP_SIZE (first_stmt_info);
7471 else
7473 first_stmt_info = stmt_info;
7474 first_dr_info = dr_info;
7475 group_size = vec_num = 1;
7478 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) > 1 && !vec_stmt)
7480 if (!check_scan_store (stmt_info, vectype, rhs_dt, slp, mask,
7481 memory_access_type))
7482 return false;
7485 if (!vec_stmt) /* transformation not required. */
7487 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7489 if (loop_vinfo
7490 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7491 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
7492 memory_access_type, &gs_info, mask);
7494 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
7495 vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
7496 vls_type, slp_node, cost_vec);
7497 return true;
7499 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7501 /* Transform. */
7503 ensure_base_align (dr_info);
7505 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
7507 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
7508 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7509 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
7510 tree ptr, var, scale, vec_mask;
7511 tree mask_arg = NULL_TREE, mask_op = NULL_TREE, perm_mask = NULL_TREE;
7512 tree mask_halfvectype = mask_vectype;
7513 edge pe = loop_preheader_edge (loop);
7514 gimple_seq seq;
7515 basic_block new_bb;
7516 enum { NARROW, NONE, WIDEN } modifier;
7517 poly_uint64 scatter_off_nunits
7518 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
7520 if (known_eq (nunits, scatter_off_nunits))
7521 modifier = NONE;
7522 else if (known_eq (nunits * 2, scatter_off_nunits))
7524 modifier = WIDEN;
7526 /* Currently gathers and scatters are only supported for
7527 fixed-length vectors. */
7528 unsigned int count = scatter_off_nunits.to_constant ();
7529 vec_perm_builder sel (count, count, 1);
7530 for (i = 0; i < (unsigned int) count; ++i)
7531 sel.quick_push (i | (count / 2));
7533 vec_perm_indices indices (sel, 1, count);
7534 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
7535 indices);
7536 gcc_assert (perm_mask != NULL_TREE);
7538 else if (known_eq (nunits, scatter_off_nunits * 2))
7540 modifier = NARROW;
7542 /* Currently gathers and scatters are only supported for
7543 fixed-length vectors. */
7544 unsigned int count = nunits.to_constant ();
7545 vec_perm_builder sel (count, count, 1);
7546 for (i = 0; i < (unsigned int) count; ++i)
7547 sel.quick_push (i | (count / 2));
7549 vec_perm_indices indices (sel, 2, count);
7550 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
7551 gcc_assert (perm_mask != NULL_TREE);
7552 ncopies *= 2;
7554 if (mask)
7555 mask_halfvectype = truth_type_for (gs_info.offset_vectype);
7557 else
7558 gcc_unreachable ();
7560 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
7561 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7562 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7563 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7564 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
7565 scaletype = TREE_VALUE (arglist);
7567 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
7568 && TREE_CODE (rettype) == VOID_TYPE);
7570 ptr = fold_convert (ptrtype, gs_info.base);
7571 if (!is_gimple_min_invariant (ptr))
7573 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
7574 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
7575 gcc_assert (!new_bb);
7578 if (mask == NULL_TREE)
7580 mask_arg = build_int_cst (masktype, -1);
7581 mask_arg = vect_init_vector (stmt_info, mask_arg, masktype, NULL);
7584 scale = build_int_cst (scaletype, gs_info.scale);
7586 prev_stmt_info = NULL;
7587 for (j = 0; j < ncopies; ++j)
7589 if (j == 0)
7591 src = vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt_info);
7592 op = vec_oprnd0 = vect_get_vec_def_for_operand (gs_info.offset,
7593 stmt_info);
7594 if (mask)
7595 mask_op = vec_mask = vect_get_vec_def_for_operand (mask,
7596 stmt_info);
7598 else if (modifier != NONE && (j & 1))
7600 if (modifier == WIDEN)
7603 = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7604 vec_oprnd1);
7605 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
7606 stmt_info, gsi);
7607 if (mask)
7608 mask_op
7609 = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7610 vec_mask);
7612 else if (modifier == NARROW)
7614 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
7615 stmt_info, gsi);
7616 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7617 vec_oprnd0);
7619 else
7620 gcc_unreachable ();
7622 else
7624 src = vec_oprnd1 = vect_get_vec_def_for_stmt_copy (vinfo,
7625 vec_oprnd1);
7626 op = vec_oprnd0 = vect_get_vec_def_for_stmt_copy (vinfo,
7627 vec_oprnd0);
7628 if (mask)
7629 mask_op = vec_mask = vect_get_vec_def_for_stmt_copy (vinfo,
7630 vec_mask);
7633 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
7635 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
7636 TYPE_VECTOR_SUBPARTS (srctype)));
7637 var = vect_get_new_ssa_name (srctype, vect_simple_var);
7638 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
7639 gassign *new_stmt
7640 = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
7641 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7642 src = var;
7645 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
7647 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
7648 TYPE_VECTOR_SUBPARTS (idxtype)));
7649 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
7650 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
7651 gassign *new_stmt
7652 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
7653 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7654 op = var;
7657 if (mask)
7659 tree utype;
7660 mask_arg = mask_op;
7661 if (modifier == NARROW)
7663 var = vect_get_new_ssa_name (mask_halfvectype,
7664 vect_simple_var);
7665 gassign *new_stmt
7666 = gimple_build_assign (var, (j & 1) ? VEC_UNPACK_HI_EXPR
7667 : VEC_UNPACK_LO_EXPR,
7668 mask_op);
7669 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7670 mask_arg = var;
7672 tree optype = TREE_TYPE (mask_arg);
7673 if (TYPE_MODE (masktype) == TYPE_MODE (optype))
7674 utype = masktype;
7675 else
7676 utype = lang_hooks.types.type_for_mode (TYPE_MODE (optype), 1);
7677 var = vect_get_new_ssa_name (utype, vect_scalar_var);
7678 mask_arg = build1 (VIEW_CONVERT_EXPR, utype, mask_arg);
7679 gassign *new_stmt
7680 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_arg);
7681 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7682 mask_arg = var;
7683 if (!useless_type_conversion_p (masktype, utype))
7685 gcc_assert (TYPE_PRECISION (utype)
7686 <= TYPE_PRECISION (masktype));
7687 var = vect_get_new_ssa_name (masktype, vect_scalar_var);
7688 new_stmt = gimple_build_assign (var, NOP_EXPR, mask_arg);
7689 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7690 mask_arg = var;
7694 gcall *new_stmt
7695 = gimple_build_call (gs_info.decl, 5, ptr, mask_arg, op, src, scale);
7696 stmt_vec_info new_stmt_info
7697 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7699 if (prev_stmt_info == NULL)
7700 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
7701 else
7702 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
7703 prev_stmt_info = new_stmt_info;
7705 return true;
7707 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) >= 3)
7708 return vectorizable_scan_store (stmt_info, gsi, vec_stmt, ncopies);
7710 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7711 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info))++;
7713 if (grouped_store)
7715 /* FORNOW */
7716 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt_info));
7718 /* We vectorize all the stmts of the interleaving group when we
7719 reach the last stmt in the group. */
7720 if (DR_GROUP_STORE_COUNT (first_stmt_info)
7721 < DR_GROUP_SIZE (first_stmt_info)
7722 && !slp)
7724 *vec_stmt = NULL;
7725 return true;
7728 if (slp)
7730 grouped_store = false;
7731 /* VEC_NUM is the number of vect stmts to be created for this
7732 group. */
7733 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7734 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7735 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info)
7736 == first_stmt_info);
7737 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
7738 op = vect_get_store_rhs (first_stmt_info);
7740 else
7741 /* VEC_NUM is the number of vect stmts to be created for this
7742 group. */
7743 vec_num = group_size;
7745 ref_type = get_group_alias_ptr_type (first_stmt_info);
7747 else
7748 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
7750 if (dump_enabled_p ())
7751 dump_printf_loc (MSG_NOTE, vect_location,
7752 "transform store. ncopies = %d\n", ncopies);
7754 if (memory_access_type == VMAT_ELEMENTWISE
7755 || memory_access_type == VMAT_STRIDED_SLP)
7757 gimple_stmt_iterator incr_gsi;
7758 bool insert_after;
7759 gimple *incr;
7760 tree offvar;
7761 tree ivstep;
7762 tree running_off;
7763 tree stride_base, stride_step, alias_off;
7764 tree vec_oprnd;
7765 tree dr_offset;
7766 unsigned int g;
7767 /* Checked by get_load_store_type. */
7768 unsigned int const_nunits = nunits.to_constant ();
7770 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7771 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
7773 dr_offset = get_dr_vinfo_offset (first_dr_info);
7774 stride_base
7775 = fold_build_pointer_plus
7776 (DR_BASE_ADDRESS (first_dr_info->dr),
7777 size_binop (PLUS_EXPR,
7778 convert_to_ptrofftype (dr_offset),
7779 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
7780 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
7782 /* For a store with loop-invariant (but other than power-of-2)
7783 stride (i.e. not a grouped access) like so:
7785 for (i = 0; i < n; i += stride)
7786 array[i] = ...;
7788 we generate a new induction variable and new stores from
7789 the components of the (vectorized) rhs:
7791 for (j = 0; ; j += VF*stride)
7792 vectemp = ...;
7793 tmp1 = vectemp[0];
7794 array[j] = tmp1;
7795 tmp2 = vectemp[1];
7796 array[j + stride] = tmp2;
7800 unsigned nstores = const_nunits;
7801 unsigned lnel = 1;
7802 tree ltype = elem_type;
7803 tree lvectype = vectype;
7804 if (slp)
7806 if (group_size < const_nunits
7807 && const_nunits % group_size == 0)
7809 nstores = const_nunits / group_size;
7810 lnel = group_size;
7811 ltype = build_vector_type (elem_type, group_size);
7812 lvectype = vectype;
7814 /* First check if vec_extract optab doesn't support extraction
7815 of vector elts directly. */
7816 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
7817 machine_mode vmode;
7818 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7819 || !related_vector_mode (TYPE_MODE (vectype), elmode,
7820 group_size).exists (&vmode)
7821 || (convert_optab_handler (vec_extract_optab,
7822 TYPE_MODE (vectype), vmode)
7823 == CODE_FOR_nothing))
7825 /* Try to avoid emitting an extract of vector elements
7826 by performing the extracts using an integer type of the
7827 same size, extracting from a vector of those and then
7828 re-interpreting it as the original vector type if
7829 supported. */
7830 unsigned lsize
7831 = group_size * GET_MODE_BITSIZE (elmode);
7832 unsigned int lnunits = const_nunits / group_size;
7833 /* If we can't construct such a vector fall back to
7834 element extracts from the original vector type and
7835 element size stores. */
7836 if (int_mode_for_size (lsize, 0).exists (&elmode)
7837 && VECTOR_MODE_P (TYPE_MODE (vectype))
7838 && related_vector_mode (TYPE_MODE (vectype), elmode,
7839 lnunits).exists (&vmode)
7840 && (convert_optab_handler (vec_extract_optab,
7841 vmode, elmode)
7842 != CODE_FOR_nothing))
7844 nstores = lnunits;
7845 lnel = group_size;
7846 ltype = build_nonstandard_integer_type (lsize, 1);
7847 lvectype = build_vector_type (ltype, nstores);
7849 /* Else fall back to vector extraction anyway.
7850 Fewer stores are more important than avoiding spilling
7851 of the vector we extract from. Compared to the
7852 construction case in vectorizable_load no store-forwarding
7853 issue exists here for reasonable archs. */
7856 else if (group_size >= const_nunits
7857 && group_size % const_nunits == 0)
7859 nstores = 1;
7860 lnel = const_nunits;
7861 ltype = vectype;
7862 lvectype = vectype;
7864 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
7865 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7868 ivstep = stride_step;
7869 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
7870 build_int_cst (TREE_TYPE (ivstep), vf));
7872 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7874 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
7875 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
7876 create_iv (stride_base, ivstep, NULL,
7877 loop, &incr_gsi, insert_after,
7878 &offvar, NULL);
7879 incr = gsi_stmt (incr_gsi);
7880 loop_vinfo->add_stmt (incr);
7882 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
7884 prev_stmt_info = NULL;
7885 alias_off = build_int_cst (ref_type, 0);
7886 stmt_vec_info next_stmt_info = first_stmt_info;
7887 for (g = 0; g < group_size; g++)
7889 running_off = offvar;
7890 if (g)
7892 tree size = TYPE_SIZE_UNIT (ltype);
7893 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
7894 size);
7895 tree newoff = copy_ssa_name (running_off, NULL);
7896 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7897 running_off, pos);
7898 vect_finish_stmt_generation (stmt_info, incr, gsi);
7899 running_off = newoff;
7901 unsigned int group_el = 0;
7902 unsigned HOST_WIDE_INT
7903 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7904 for (j = 0; j < ncopies; j++)
7906 /* We've set op and dt above, from vect_get_store_rhs,
7907 and first_stmt_info == stmt_info. */
7908 if (j == 0)
7910 if (slp)
7912 vect_get_vec_defs (op, NULL_TREE, stmt_info,
7913 &vec_oprnds, NULL, slp_node);
7914 vec_oprnd = vec_oprnds[0];
7916 else
7918 op = vect_get_store_rhs (next_stmt_info);
7919 vec_oprnd = vect_get_vec_def_for_operand
7920 (op, next_stmt_info);
7923 else
7925 if (slp)
7926 vec_oprnd = vec_oprnds[j];
7927 else
7928 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo,
7929 vec_oprnd);
7931 /* Pun the vector to extract from if necessary. */
7932 if (lvectype != vectype)
7934 tree tem = make_ssa_name (lvectype);
7935 gimple *pun
7936 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
7937 lvectype, vec_oprnd));
7938 vect_finish_stmt_generation (stmt_info, pun, gsi);
7939 vec_oprnd = tem;
7941 for (i = 0; i < nstores; i++)
7943 tree newref, newoff;
7944 gimple *incr, *assign;
7945 tree size = TYPE_SIZE (ltype);
7946 /* Extract the i'th component. */
7947 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
7948 bitsize_int (i), size);
7949 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
7950 size, pos);
7952 elem = force_gimple_operand_gsi (gsi, elem, true,
7953 NULL_TREE, true,
7954 GSI_SAME_STMT);
7956 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7957 group_el * elsz);
7958 newref = build2 (MEM_REF, ltype,
7959 running_off, this_off);
7960 vect_copy_ref_info (newref, DR_REF (first_dr_info->dr));
7962 /* And store it to *running_off. */
7963 assign = gimple_build_assign (newref, elem);
7964 stmt_vec_info assign_info
7965 = vect_finish_stmt_generation (stmt_info, assign, gsi);
7967 group_el += lnel;
7968 if (! slp
7969 || group_el == group_size)
7971 newoff = copy_ssa_name (running_off, NULL);
7972 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7973 running_off, stride_step);
7974 vect_finish_stmt_generation (stmt_info, incr, gsi);
7976 running_off = newoff;
7977 group_el = 0;
7979 if (g == group_size - 1
7980 && !slp)
7982 if (j == 0 && i == 0)
7983 STMT_VINFO_VEC_STMT (stmt_info)
7984 = *vec_stmt = assign_info;
7985 else
7986 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign_info;
7987 prev_stmt_info = assign_info;
7991 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
7992 if (slp)
7993 break;
7996 vec_oprnds.release ();
7997 return true;
8000 auto_vec<tree> dr_chain (group_size);
8001 oprnds.create (group_size);
8003 alignment_support_scheme
8004 = vect_supportable_dr_alignment (first_dr_info, false);
8005 gcc_assert (alignment_support_scheme);
8006 vec_loop_masks *loop_masks
8007 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8008 ? &LOOP_VINFO_MASKS (loop_vinfo)
8009 : NULL);
8010 /* Targets with store-lane instructions must not require explicit
8011 realignment. vect_supportable_dr_alignment always returns either
8012 dr_aligned or dr_unaligned_supported for masked operations. */
8013 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
8014 && !mask
8015 && !loop_masks)
8016 || alignment_support_scheme == dr_aligned
8017 || alignment_support_scheme == dr_unaligned_supported);
8019 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
8020 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8021 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
8023 tree bump;
8024 tree vec_offset = NULL_TREE;
8025 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8027 aggr_type = NULL_TREE;
8028 bump = NULL_TREE;
8030 else if (memory_access_type == VMAT_GATHER_SCATTER)
8032 aggr_type = elem_type;
8033 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
8034 &bump, &vec_offset);
8036 else
8038 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8039 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
8040 else
8041 aggr_type = vectype;
8042 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
8043 memory_access_type);
8046 if (mask)
8047 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
8049 /* In case the vectorization factor (VF) is bigger than the number
8050 of elements that we can fit in a vectype (nunits), we have to generate
8051 more than one vector stmt - i.e - we need to "unroll" the
8052 vector stmt by a factor VF/nunits. For more details see documentation in
8053 vect_get_vec_def_for_copy_stmt. */
8055 /* In case of interleaving (non-unit grouped access):
8057 S1: &base + 2 = x2
8058 S2: &base = x0
8059 S3: &base + 1 = x1
8060 S4: &base + 3 = x3
8062 We create vectorized stores starting from base address (the access of the
8063 first stmt in the chain (S2 in the above example), when the last store stmt
8064 of the chain (S4) is reached:
8066 VS1: &base = vx2
8067 VS2: &base + vec_size*1 = vx0
8068 VS3: &base + vec_size*2 = vx1
8069 VS4: &base + vec_size*3 = vx3
8071 Then permutation statements are generated:
8073 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8074 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8077 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8078 (the order of the data-refs in the output of vect_permute_store_chain
8079 corresponds to the order of scalar stmts in the interleaving chain - see
8080 the documentation of vect_permute_store_chain()).
8082 In case of both multiple types and interleaving, above vector stores and
8083 permutation stmts are created for every copy. The result vector stmts are
8084 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8085 STMT_VINFO_RELATED_STMT for the next copies.
8088 prev_stmt_info = NULL;
8089 tree vec_mask = NULL_TREE;
8090 for (j = 0; j < ncopies; j++)
8092 stmt_vec_info new_stmt_info;
8093 if (j == 0)
8095 if (slp)
8097 /* Get vectorized arguments for SLP_NODE. */
8098 vect_get_vec_defs (op, NULL_TREE, stmt_info, &vec_oprnds,
8099 NULL, slp_node);
8101 vec_oprnd = vec_oprnds[0];
8103 else
8105 /* For interleaved stores we collect vectorized defs for all the
8106 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
8107 used as an input to vect_permute_store_chain(), and OPRNDS as
8108 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
8110 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8111 OPRNDS are of size 1. */
8112 stmt_vec_info next_stmt_info = first_stmt_info;
8113 for (i = 0; i < group_size; i++)
8115 /* Since gaps are not supported for interleaved stores,
8116 DR_GROUP_SIZE is the exact number of stmts in the chain.
8117 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
8118 that there is no interleaving, DR_GROUP_SIZE is 1,
8119 and only one iteration of the loop will be executed. */
8120 op = vect_get_store_rhs (next_stmt_info);
8121 vec_oprnd = vect_get_vec_def_for_operand
8122 (op, next_stmt_info);
8123 dr_chain.quick_push (vec_oprnd);
8124 oprnds.quick_push (vec_oprnd);
8125 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8127 if (mask)
8128 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
8129 mask_vectype);
8132 /* We should have catched mismatched types earlier. */
8133 gcc_assert (useless_type_conversion_p (vectype,
8134 TREE_TYPE (vec_oprnd)));
8135 bool simd_lane_access_p
8136 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
8137 if (simd_lane_access_p
8138 && !loop_masks
8139 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
8140 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
8141 && integer_zerop (get_dr_vinfo_offset (first_dr_info))
8142 && integer_zerop (DR_INIT (first_dr_info->dr))
8143 && alias_sets_conflict_p (get_alias_set (aggr_type),
8144 get_alias_set (TREE_TYPE (ref_type))))
8146 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
8147 dataref_offset = build_int_cst (ref_type, 0);
8149 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8150 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
8151 &dataref_ptr, &vec_offset);
8152 else
8153 dataref_ptr
8154 = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
8155 simd_lane_access_p ? loop : NULL,
8156 offset, &dummy, gsi, &ptr_incr,
8157 simd_lane_access_p, NULL_TREE, bump);
8159 else
8161 /* For interleaved stores we created vectorized defs for all the
8162 defs stored in OPRNDS in the previous iteration (previous copy).
8163 DR_CHAIN is then used as an input to vect_permute_store_chain(),
8164 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
8165 next copy.
8166 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
8167 OPRNDS are of size 1. */
8168 for (i = 0; i < group_size; i++)
8170 op = oprnds[i];
8171 vec_oprnd = vect_get_vec_def_for_stmt_copy (vinfo, op);
8172 dr_chain[i] = vec_oprnd;
8173 oprnds[i] = vec_oprnd;
8175 if (mask)
8176 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
8177 if (dataref_offset)
8178 dataref_offset
8179 = int_const_binop (PLUS_EXPR, dataref_offset, bump);
8180 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
8181 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
8182 else
8183 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8184 stmt_info, bump);
8187 if (memory_access_type == VMAT_LOAD_STORE_LANES)
8189 tree vec_array;
8191 /* Get an array into which we can store the individual vectors. */
8192 vec_array = create_vector_array (vectype, vec_num);
8194 /* Invalidate the current contents of VEC_ARRAY. This should
8195 become an RTL clobber too, which prevents the vector registers
8196 from being upward-exposed. */
8197 vect_clobber_variable (stmt_info, gsi, vec_array);
8199 /* Store the individual vectors into the array. */
8200 for (i = 0; i < vec_num; i++)
8202 vec_oprnd = dr_chain[i];
8203 write_vector_array (stmt_info, gsi, vec_oprnd, vec_array, i);
8206 tree final_mask = NULL;
8207 if (loop_masks)
8208 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
8209 vectype, j);
8210 if (vec_mask)
8211 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8212 vec_mask, gsi);
8214 gcall *call;
8215 if (final_mask)
8217 /* Emit:
8218 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8219 VEC_ARRAY). */
8220 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
8221 tree alias_ptr = build_int_cst (ref_type, align);
8222 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
8223 dataref_ptr, alias_ptr,
8224 final_mask, vec_array);
8226 else
8228 /* Emit:
8229 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8230 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
8231 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
8232 vec_array);
8233 gimple_call_set_lhs (call, data_ref);
8235 gimple_call_set_nothrow (call, true);
8236 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
8238 /* Record that VEC_ARRAY is now dead. */
8239 vect_clobber_variable (stmt_info, gsi, vec_array);
8241 else
8243 new_stmt_info = NULL;
8244 if (grouped_store)
8246 if (j == 0)
8247 result_chain.create (group_size);
8248 /* Permute. */
8249 vect_permute_store_chain (dr_chain, group_size, stmt_info, gsi,
8250 &result_chain);
8253 stmt_vec_info next_stmt_info = first_stmt_info;
8254 for (i = 0; i < vec_num; i++)
8256 unsigned misalign;
8257 unsigned HOST_WIDE_INT align;
8259 tree final_mask = NULL_TREE;
8260 if (loop_masks)
8261 final_mask = vect_get_loop_mask (gsi, loop_masks,
8262 vec_num * ncopies,
8263 vectype, vec_num * j + i);
8264 if (vec_mask)
8265 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
8266 vec_mask, gsi);
8268 if (memory_access_type == VMAT_GATHER_SCATTER)
8270 tree scale = size_int (gs_info.scale);
8271 gcall *call;
8272 if (loop_masks)
8273 call = gimple_build_call_internal
8274 (IFN_MASK_SCATTER_STORE, 5, dataref_ptr, vec_offset,
8275 scale, vec_oprnd, final_mask);
8276 else
8277 call = gimple_build_call_internal
8278 (IFN_SCATTER_STORE, 4, dataref_ptr, vec_offset,
8279 scale, vec_oprnd);
8280 gimple_call_set_nothrow (call, true);
8281 new_stmt_info
8282 = vect_finish_stmt_generation (stmt_info, call, gsi);
8283 break;
8286 if (i > 0)
8287 /* Bump the vector pointer. */
8288 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
8289 stmt_info, bump);
8291 if (slp)
8292 vec_oprnd = vec_oprnds[i];
8293 else if (grouped_store)
8294 /* For grouped stores vectorized defs are interleaved in
8295 vect_permute_store_chain(). */
8296 vec_oprnd = result_chain[i];
8298 align = known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
8299 if (aligned_access_p (first_dr_info))
8300 misalign = 0;
8301 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8303 align = dr_alignment (vect_dr_behavior (first_dr_info));
8304 misalign = 0;
8306 else
8307 misalign = DR_MISALIGNMENT (first_dr_info);
8308 if (dataref_offset == NULL_TREE
8309 && TREE_CODE (dataref_ptr) == SSA_NAME)
8310 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
8311 misalign);
8313 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
8315 tree perm_mask = perm_mask_for_reverse (vectype);
8316 tree perm_dest = vect_create_destination_var
8317 (vect_get_store_rhs (stmt_info), vectype);
8318 tree new_temp = make_ssa_name (perm_dest);
8320 /* Generate the permute statement. */
8321 gimple *perm_stmt
8322 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
8323 vec_oprnd, perm_mask);
8324 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8326 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
8327 vec_oprnd = new_temp;
8330 /* Arguments are ready. Create the new vector stmt. */
8331 if (final_mask)
8333 align = least_bit_hwi (misalign | align);
8334 tree ptr = build_int_cst (ref_type, align);
8335 gcall *call
8336 = gimple_build_call_internal (IFN_MASK_STORE, 4,
8337 dataref_ptr, ptr,
8338 final_mask, vec_oprnd);
8339 gimple_call_set_nothrow (call, true);
8340 new_stmt_info
8341 = vect_finish_stmt_generation (stmt_info, call, gsi);
8343 else
8345 data_ref = fold_build2 (MEM_REF, vectype,
8346 dataref_ptr,
8347 dataref_offset
8348 ? dataref_offset
8349 : build_int_cst (ref_type, 0));
8350 if (aligned_access_p (first_dr_info))
8352 else if (DR_MISALIGNMENT (first_dr_info) == -1)
8353 TREE_TYPE (data_ref)
8354 = build_aligned_type (TREE_TYPE (data_ref),
8355 align * BITS_PER_UNIT);
8356 else
8357 TREE_TYPE (data_ref)
8358 = build_aligned_type (TREE_TYPE (data_ref),
8359 TYPE_ALIGN (elem_type));
8360 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
8361 gassign *new_stmt
8362 = gimple_build_assign (data_ref, vec_oprnd);
8363 new_stmt_info
8364 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
8367 if (slp)
8368 continue;
8370 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
8371 if (!next_stmt_info)
8372 break;
8375 if (!slp)
8377 if (j == 0)
8378 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8379 else
8380 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8381 prev_stmt_info = new_stmt_info;
8385 oprnds.release ();
8386 result_chain.release ();
8387 vec_oprnds.release ();
8389 return true;
8392 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8393 VECTOR_CST mask. No checks are made that the target platform supports the
8394 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8395 vect_gen_perm_mask_checked. */
8397 tree
8398 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
8400 tree mask_type;
8402 poly_uint64 nunits = sel.length ();
8403 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
8405 mask_type = build_vector_type (ssizetype, nunits);
8406 return vec_perm_indices_to_tree (mask_type, sel);
8409 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8410 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8412 tree
8413 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
8415 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
8416 return vect_gen_perm_mask_any (vectype, sel);
8419 /* Given a vector variable X and Y, that was generated for the scalar
8420 STMT_INFO, generate instructions to permute the vector elements of X and Y
8421 using permutation mask MASK_VEC, insert them at *GSI and return the
8422 permuted vector variable. */
8424 static tree
8425 permute_vec_elements (tree x, tree y, tree mask_vec, stmt_vec_info stmt_info,
8426 gimple_stmt_iterator *gsi)
8428 tree vectype = TREE_TYPE (x);
8429 tree perm_dest, data_ref;
8430 gimple *perm_stmt;
8432 tree scalar_dest = gimple_get_lhs (stmt_info->stmt);
8433 if (scalar_dest && TREE_CODE (scalar_dest) == SSA_NAME)
8434 perm_dest = vect_create_destination_var (scalar_dest, vectype);
8435 else
8436 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
8437 data_ref = make_ssa_name (perm_dest);
8439 /* Generate the permute statement. */
8440 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
8441 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
8443 return data_ref;
8446 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8447 inserting them on the loops preheader edge. Returns true if we
8448 were successful in doing so (and thus STMT_INFO can be moved then),
8449 otherwise returns false. */
8451 static bool
8452 hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
8454 ssa_op_iter i;
8455 tree op;
8456 bool any = false;
8458 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8460 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8461 if (!gimple_nop_p (def_stmt)
8462 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8464 /* Make sure we don't need to recurse. While we could do
8465 so in simple cases when there are more complex use webs
8466 we don't have an easy way to preserve stmt order to fulfil
8467 dependencies within them. */
8468 tree op2;
8469 ssa_op_iter i2;
8470 if (gimple_code (def_stmt) == GIMPLE_PHI)
8471 return false;
8472 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
8474 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
8475 if (!gimple_nop_p (def_stmt2)
8476 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
8477 return false;
8479 any = true;
8483 if (!any)
8484 return true;
8486 FOR_EACH_SSA_TREE_OPERAND (op, stmt_info->stmt, i, SSA_OP_USE)
8488 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
8489 if (!gimple_nop_p (def_stmt)
8490 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
8492 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
8493 gsi_remove (&gsi, false);
8494 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
8498 return true;
8501 /* vectorizable_load.
8503 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8504 that can be vectorized.
8505 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8506 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8507 Return true if STMT_INFO is vectorizable in this way. */
8509 static bool
8510 vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
8511 stmt_vec_info *vec_stmt, slp_tree slp_node,
8512 slp_instance slp_node_instance,
8513 stmt_vector_for_cost *cost_vec)
8515 tree scalar_dest;
8516 tree vec_dest = NULL;
8517 tree data_ref = NULL;
8518 stmt_vec_info prev_stmt_info;
8519 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8520 class loop *loop = NULL;
8521 class loop *containing_loop = gimple_bb (stmt_info->stmt)->loop_father;
8522 bool nested_in_vect_loop = false;
8523 tree elem_type;
8524 tree new_temp;
8525 machine_mode mode;
8526 tree dummy;
8527 enum dr_alignment_support alignment_support_scheme;
8528 tree dataref_ptr = NULL_TREE;
8529 tree dataref_offset = NULL_TREE;
8530 gimple *ptr_incr = NULL;
8531 int ncopies;
8532 int i, j;
8533 unsigned int group_size;
8534 poly_uint64 group_gap_adj;
8535 tree msq = NULL_TREE, lsq;
8536 tree offset = NULL_TREE;
8537 tree byte_offset = NULL_TREE;
8538 tree realignment_token = NULL_TREE;
8539 gphi *phi = NULL;
8540 vec<tree> dr_chain = vNULL;
8541 bool grouped_load = false;
8542 stmt_vec_info first_stmt_info;
8543 stmt_vec_info first_stmt_info_for_drptr = NULL;
8544 bool compute_in_loop = false;
8545 class loop *at_loop;
8546 int vec_num;
8547 bool slp = (slp_node != NULL);
8548 bool slp_perm = false;
8549 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8550 poly_uint64 vf;
8551 tree aggr_type;
8552 gather_scatter_info gs_info;
8553 vec_info *vinfo = stmt_info->vinfo;
8554 tree ref_type;
8555 enum vect_def_type mask_dt = vect_unknown_def_type;
8557 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8558 return false;
8560 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8561 && ! vec_stmt)
8562 return false;
8564 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
8565 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
8567 scalar_dest = gimple_assign_lhs (assign);
8568 if (TREE_CODE (scalar_dest) != SSA_NAME)
8569 return false;
8571 tree_code code = gimple_assign_rhs_code (assign);
8572 if (code != ARRAY_REF
8573 && code != BIT_FIELD_REF
8574 && code != INDIRECT_REF
8575 && code != COMPONENT_REF
8576 && code != IMAGPART_EXPR
8577 && code != REALPART_EXPR
8578 && code != MEM_REF
8579 && TREE_CODE_CLASS (code) != tcc_declaration)
8580 return false;
8582 else
8584 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
8585 if (!call || !gimple_call_internal_p (call))
8586 return false;
8588 internal_fn ifn = gimple_call_internal_fn (call);
8589 if (!internal_load_fn_p (ifn))
8590 return false;
8592 scalar_dest = gimple_call_lhs (call);
8593 if (!scalar_dest)
8594 return false;
8596 int mask_index = internal_fn_mask_index (ifn);
8597 if (mask_index >= 0)
8599 mask = gimple_call_arg (call, mask_index);
8600 if (!vect_check_scalar_mask (stmt_info, mask, &mask_dt,
8601 &mask_vectype))
8602 return false;
8606 if (!STMT_VINFO_DATA_REF (stmt_info))
8607 return false;
8609 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8610 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8612 if (loop_vinfo)
8614 loop = LOOP_VINFO_LOOP (loop_vinfo);
8615 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
8616 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8618 else
8619 vf = 1;
8621 /* Multiple types in SLP are handled by creating the appropriate number of
8622 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8623 case of SLP. */
8624 if (slp)
8625 ncopies = 1;
8626 else
8627 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8629 gcc_assert (ncopies >= 1);
8631 /* FORNOW. This restriction should be relaxed. */
8632 if (nested_in_vect_loop && ncopies > 1)
8634 if (dump_enabled_p ())
8635 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8636 "multiple types in nested loop.\n");
8637 return false;
8640 /* Invalidate assumptions made by dependence analysis when vectorization
8641 on the unrolled body effectively re-orders stmts. */
8642 if (ncopies > 1
8643 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8644 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8645 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8647 if (dump_enabled_p ())
8648 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8649 "cannot perform implicit CSE when unrolling "
8650 "with negative dependence distance\n");
8651 return false;
8654 elem_type = TREE_TYPE (vectype);
8655 mode = TYPE_MODE (vectype);
8657 /* FORNOW. In some cases can vectorize even if data-type not supported
8658 (e.g. - data copies). */
8659 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
8661 if (dump_enabled_p ())
8662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8663 "Aligned load, but unsupported type.\n");
8664 return false;
8667 /* Check if the load is a part of an interleaving chain. */
8668 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
8670 grouped_load = true;
8671 /* FORNOW */
8672 gcc_assert (!nested_in_vect_loop);
8673 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
8675 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8676 group_size = DR_GROUP_SIZE (first_stmt_info);
8678 /* Refuse non-SLP vectorization of SLP-only groups. */
8679 if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
8681 if (dump_enabled_p ())
8682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8683 "cannot vectorize load in non-SLP mode.\n");
8684 return false;
8687 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
8688 slp_perm = true;
8690 /* Invalidate assumptions made by dependence analysis when vectorization
8691 on the unrolled body effectively re-orders stmts. */
8692 if (!PURE_SLP_STMT (stmt_info)
8693 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
8694 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
8695 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
8697 if (dump_enabled_p ())
8698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8699 "cannot perform implicit CSE when performing "
8700 "group loads with negative dependence distance\n");
8701 return false;
8704 else
8705 group_size = 1;
8707 vect_memory_access_type memory_access_type;
8708 if (!get_load_store_type (stmt_info, vectype, slp, mask, VLS_LOAD, ncopies,
8709 &memory_access_type, &gs_info))
8710 return false;
8712 if (mask)
8714 if (memory_access_type == VMAT_CONTIGUOUS)
8716 machine_mode vec_mode = TYPE_MODE (vectype);
8717 if (!VECTOR_MODE_P (vec_mode)
8718 || !can_vec_mask_load_store_p (vec_mode,
8719 TYPE_MODE (mask_vectype), true))
8720 return false;
8722 else if (memory_access_type != VMAT_LOAD_STORE_LANES
8723 && memory_access_type != VMAT_GATHER_SCATTER)
8725 if (dump_enabled_p ())
8726 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8727 "unsupported access type for masked load.\n");
8728 return false;
8732 if (!vec_stmt) /* transformation not required. */
8734 if (!slp)
8735 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
8737 if (loop_vinfo
8738 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
8739 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
8740 memory_access_type, &gs_info, mask);
8742 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
8743 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
8744 slp_node_instance, slp_node, cost_vec);
8745 return true;
8748 if (!slp)
8749 gcc_assert (memory_access_type
8750 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
8752 if (dump_enabled_p ())
8753 dump_printf_loc (MSG_NOTE, vect_location,
8754 "transform load. ncopies = %d\n", ncopies);
8756 /* Transform. */
8758 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info), *first_dr_info = NULL;
8759 ensure_base_align (dr_info);
8761 if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
8763 vect_build_gather_load_calls (stmt_info, gsi, vec_stmt, &gs_info, mask);
8764 return true;
8767 if (memory_access_type == VMAT_INVARIANT)
8769 gcc_assert (!grouped_load && !mask && !bb_vinfo);
8770 /* If we have versioned for aliasing or the loop doesn't
8771 have any data dependencies that would preclude this,
8772 then we are sure this is a loop invariant load and
8773 thus we can insert it on the preheader edge. */
8774 bool hoist_p = (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
8775 && !nested_in_vect_loop
8776 && hoist_defs_of_uses (stmt_info, loop));
8777 if (hoist_p)
8779 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
8780 if (dump_enabled_p ())
8781 dump_printf_loc (MSG_NOTE, vect_location,
8782 "hoisting out of the vectorized loop: %G", stmt);
8783 scalar_dest = copy_ssa_name (scalar_dest);
8784 tree rhs = unshare_expr (gimple_assign_rhs1 (stmt));
8785 gsi_insert_on_edge_immediate
8786 (loop_preheader_edge (loop),
8787 gimple_build_assign (scalar_dest, rhs));
8789 /* These copies are all equivalent, but currently the representation
8790 requires a separate STMT_VINFO_VEC_STMT for each one. */
8791 prev_stmt_info = NULL;
8792 gimple_stmt_iterator gsi2 = *gsi;
8793 gsi_next (&gsi2);
8794 for (j = 0; j < ncopies; j++)
8796 stmt_vec_info new_stmt_info;
8797 if (hoist_p)
8799 new_temp = vect_init_vector (stmt_info, scalar_dest,
8800 vectype, NULL);
8801 gimple *new_stmt = SSA_NAME_DEF_STMT (new_temp);
8802 new_stmt_info = vinfo->add_stmt (new_stmt);
8804 else
8806 new_temp = vect_init_vector (stmt_info, scalar_dest,
8807 vectype, &gsi2);
8808 new_stmt_info = vinfo->lookup_def (new_temp);
8810 if (slp)
8811 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
8812 else if (j == 0)
8813 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
8814 else
8815 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
8816 prev_stmt_info = new_stmt_info;
8818 return true;
8821 if (memory_access_type == VMAT_ELEMENTWISE
8822 || memory_access_type == VMAT_STRIDED_SLP)
8824 gimple_stmt_iterator incr_gsi;
8825 bool insert_after;
8826 gimple *incr;
8827 tree offvar;
8828 tree ivstep;
8829 tree running_off;
8830 vec<constructor_elt, va_gc> *v = NULL;
8831 tree stride_base, stride_step, alias_off;
8832 /* Checked by get_load_store_type. */
8833 unsigned int const_nunits = nunits.to_constant ();
8834 unsigned HOST_WIDE_INT cst_offset = 0;
8835 tree dr_offset;
8837 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
8838 gcc_assert (!nested_in_vect_loop);
8840 if (grouped_load)
8842 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
8843 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
8845 else
8847 first_stmt_info = stmt_info;
8848 first_dr_info = dr_info;
8850 if (slp && grouped_load)
8852 group_size = DR_GROUP_SIZE (first_stmt_info);
8853 ref_type = get_group_alias_ptr_type (first_stmt_info);
8855 else
8857 if (grouped_load)
8858 cst_offset
8859 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)))
8860 * vect_get_place_in_interleaving_chain (stmt_info,
8861 first_stmt_info));
8862 group_size = 1;
8863 ref_type = reference_alias_ptr_type (DR_REF (dr_info->dr));
8866 dr_offset = get_dr_vinfo_offset (first_dr_info);
8867 stride_base
8868 = fold_build_pointer_plus
8869 (DR_BASE_ADDRESS (first_dr_info->dr),
8870 size_binop (PLUS_EXPR,
8871 convert_to_ptrofftype (dr_offset),
8872 convert_to_ptrofftype (DR_INIT (first_dr_info->dr))));
8873 stride_step = fold_convert (sizetype, DR_STEP (first_dr_info->dr));
8875 /* For a load with loop-invariant (but other than power-of-2)
8876 stride (i.e. not a grouped access) like so:
8878 for (i = 0; i < n; i += stride)
8879 ... = array[i];
8881 we generate a new induction variable and new accesses to
8882 form a new vector (or vectors, depending on ncopies):
8884 for (j = 0; ; j += VF*stride)
8885 tmp1 = array[j];
8886 tmp2 = array[j + stride];
8888 vectemp = {tmp1, tmp2, ...}
8891 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
8892 build_int_cst (TREE_TYPE (stride_step), vf));
8894 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8896 stride_base = cse_and_gimplify_to_preheader (loop_vinfo, stride_base);
8897 ivstep = cse_and_gimplify_to_preheader (loop_vinfo, ivstep);
8898 create_iv (stride_base, ivstep, NULL,
8899 loop, &incr_gsi, insert_after,
8900 &offvar, NULL);
8901 incr = gsi_stmt (incr_gsi);
8902 loop_vinfo->add_stmt (incr);
8904 stride_step = cse_and_gimplify_to_preheader (loop_vinfo, stride_step);
8906 prev_stmt_info = NULL;
8907 running_off = offvar;
8908 alias_off = build_int_cst (ref_type, 0);
8909 int nloads = const_nunits;
8910 int lnel = 1;
8911 tree ltype = TREE_TYPE (vectype);
8912 tree lvectype = vectype;
8913 auto_vec<tree> dr_chain;
8914 if (memory_access_type == VMAT_STRIDED_SLP)
8916 if (group_size < const_nunits)
8918 /* First check if vec_init optab supports construction from
8919 vector elts directly. */
8920 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
8921 machine_mode vmode;
8922 if (VECTOR_MODE_P (TYPE_MODE (vectype))
8923 && related_vector_mode (TYPE_MODE (vectype), elmode,
8924 group_size).exists (&vmode)
8925 && (convert_optab_handler (vec_init_optab,
8926 TYPE_MODE (vectype), vmode)
8927 != CODE_FOR_nothing))
8929 nloads = const_nunits / group_size;
8930 lnel = group_size;
8931 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
8933 else
8935 /* Otherwise avoid emitting a constructor of vector elements
8936 by performing the loads using an integer type of the same
8937 size, constructing a vector of those and then
8938 re-interpreting it as the original vector type.
8939 This avoids a huge runtime penalty due to the general
8940 inability to perform store forwarding from smaller stores
8941 to a larger load. */
8942 unsigned lsize
8943 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
8944 unsigned int lnunits = const_nunits / group_size;
8945 /* If we can't construct such a vector fall back to
8946 element loads of the original vector type. */
8947 if (int_mode_for_size (lsize, 0).exists (&elmode)
8948 && VECTOR_MODE_P (TYPE_MODE (vectype))
8949 && related_vector_mode (TYPE_MODE (vectype), elmode,
8950 lnunits).exists (&vmode)
8951 && (convert_optab_handler (vec_init_optab, vmode, elmode)
8952 != CODE_FOR_nothing))
8954 nloads = lnunits;
8955 lnel = group_size;
8956 ltype = build_nonstandard_integer_type (lsize, 1);
8957 lvectype = build_vector_type (ltype, nloads);
8961 else
8963 nloads = 1;
8964 lnel = const_nunits;
8965 ltype = vectype;
8967 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
8969 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8970 else if (nloads == 1)
8971 ltype = vectype;
8973 if (slp)
8975 /* For SLP permutation support we need to load the whole group,
8976 not only the number of vector stmts the permutation result
8977 fits in. */
8978 if (slp_perm)
8980 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8981 variable VF. */
8982 unsigned int const_vf = vf.to_constant ();
8983 ncopies = CEIL (group_size * const_vf, const_nunits);
8984 dr_chain.create (ncopies);
8986 else
8987 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
8989 unsigned int group_el = 0;
8990 unsigned HOST_WIDE_INT
8991 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
8992 for (j = 0; j < ncopies; j++)
8994 if (nloads > 1)
8995 vec_alloc (v, nloads);
8996 stmt_vec_info new_stmt_info = NULL;
8997 for (i = 0; i < nloads; i++)
8999 tree this_off = build_int_cst (TREE_TYPE (alias_off),
9000 group_el * elsz + cst_offset);
9001 tree data_ref = build2 (MEM_REF, ltype, running_off, this_off);
9002 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9003 gassign *new_stmt
9004 = gimple_build_assign (make_ssa_name (ltype), data_ref);
9005 new_stmt_info
9006 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9007 if (nloads > 1)
9008 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9009 gimple_assign_lhs (new_stmt));
9011 group_el += lnel;
9012 if (! slp
9013 || group_el == group_size)
9015 tree newoff = copy_ssa_name (running_off);
9016 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
9017 running_off, stride_step);
9018 vect_finish_stmt_generation (stmt_info, incr, gsi);
9020 running_off = newoff;
9021 group_el = 0;
9024 if (nloads > 1)
9026 tree vec_inv = build_constructor (lvectype, v);
9027 new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
9028 new_stmt_info = vinfo->lookup_def (new_temp);
9029 if (lvectype != vectype)
9031 gassign *new_stmt
9032 = gimple_build_assign (make_ssa_name (vectype),
9033 VIEW_CONVERT_EXPR,
9034 build1 (VIEW_CONVERT_EXPR,
9035 vectype, new_temp));
9036 new_stmt_info
9037 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9041 if (slp)
9043 if (slp_perm)
9044 dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
9045 else
9046 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9048 else
9050 if (j == 0)
9051 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9052 else
9053 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9054 prev_stmt_info = new_stmt_info;
9057 if (slp_perm)
9059 unsigned n_perms;
9060 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
9061 slp_node_instance, false, &n_perms);
9063 return true;
9066 if (memory_access_type == VMAT_GATHER_SCATTER
9067 || (!slp && memory_access_type == VMAT_CONTIGUOUS))
9068 grouped_load = false;
9070 if (grouped_load)
9072 first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
9073 group_size = DR_GROUP_SIZE (first_stmt_info);
9074 /* For SLP vectorization we directly vectorize a subchain
9075 without permutation. */
9076 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
9077 first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9078 /* For BB vectorization always use the first stmt to base
9079 the data ref pointer on. */
9080 if (bb_vinfo)
9081 first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9083 /* Check if the chain of loads is already vectorized. */
9084 if (STMT_VINFO_VEC_STMT (first_stmt_info)
9085 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
9086 ??? But we can only do so if there is exactly one
9087 as we have no way to get at the rest. Leave the CSE
9088 opportunity alone.
9089 ??? With the group load eventually participating
9090 in multiple different permutations (having multiple
9091 slp nodes which refer to the same group) the CSE
9092 is even wrong code. See PR56270. */
9093 && !slp)
9095 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9096 return true;
9098 first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
9099 group_gap_adj = 0;
9101 /* VEC_NUM is the number of vect stmts to be created for this group. */
9102 if (slp)
9104 grouped_load = false;
9105 /* If an SLP permutation is from N elements to N elements,
9106 and if one vector holds a whole number of N, we can load
9107 the inputs to the permutation in the same way as an
9108 unpermuted sequence. In other cases we need to load the
9109 whole group, not only the number of vector stmts the
9110 permutation result fits in. */
9111 if (slp_perm
9112 && (group_size != SLP_INSTANCE_GROUP_SIZE (slp_node_instance)
9113 || !multiple_p (nunits, group_size)))
9115 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
9116 variable VF; see vect_transform_slp_perm_load. */
9117 unsigned int const_vf = vf.to_constant ();
9118 unsigned int const_nunits = nunits.to_constant ();
9119 vec_num = CEIL (group_size * const_vf, const_nunits);
9120 group_gap_adj = vf * group_size - nunits * vec_num;
9122 else
9124 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9125 group_gap_adj
9126 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
9129 else
9130 vec_num = group_size;
9132 ref_type = get_group_alias_ptr_type (first_stmt_info);
9134 else
9136 first_stmt_info = stmt_info;
9137 first_dr_info = dr_info;
9138 group_size = vec_num = 1;
9139 group_gap_adj = 0;
9140 ref_type = reference_alias_ptr_type (DR_REF (first_dr_info->dr));
9143 alignment_support_scheme
9144 = vect_supportable_dr_alignment (first_dr_info, false);
9145 gcc_assert (alignment_support_scheme);
9146 vec_loop_masks *loop_masks
9147 = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
9148 ? &LOOP_VINFO_MASKS (loop_vinfo)
9149 : NULL);
9150 /* Targets with store-lane instructions must not require explicit
9151 realignment. vect_supportable_dr_alignment always returns either
9152 dr_aligned or dr_unaligned_supported for masked operations. */
9153 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
9154 && !mask
9155 && !loop_masks)
9156 || alignment_support_scheme == dr_aligned
9157 || alignment_support_scheme == dr_unaligned_supported);
9159 /* In case the vectorization factor (VF) is bigger than the number
9160 of elements that we can fit in a vectype (nunits), we have to generate
9161 more than one vector stmt - i.e - we need to "unroll" the
9162 vector stmt by a factor VF/nunits. In doing so, we record a pointer
9163 from one copy of the vector stmt to the next, in the field
9164 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
9165 stages to find the correct vector defs to be used when vectorizing
9166 stmts that use the defs of the current stmt. The example below
9167 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
9168 need to create 4 vectorized stmts):
9170 before vectorization:
9171 RELATED_STMT VEC_STMT
9172 S1: x = memref - -
9173 S2: z = x + 1 - -
9175 step 1: vectorize stmt S1:
9176 We first create the vector stmt VS1_0, and, as usual, record a
9177 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
9178 Next, we create the vector stmt VS1_1, and record a pointer to
9179 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
9180 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
9181 stmts and pointers:
9182 RELATED_STMT VEC_STMT
9183 VS1_0: vx0 = memref0 VS1_1 -
9184 VS1_1: vx1 = memref1 VS1_2 -
9185 VS1_2: vx2 = memref2 VS1_3 -
9186 VS1_3: vx3 = memref3 - -
9187 S1: x = load - VS1_0
9188 S2: z = x + 1 - -
9190 See in documentation in vect_get_vec_def_for_stmt_copy for how the
9191 information we recorded in RELATED_STMT field is used to vectorize
9192 stmt S2. */
9194 /* In case of interleaving (non-unit grouped access):
9196 S1: x2 = &base + 2
9197 S2: x0 = &base
9198 S3: x1 = &base + 1
9199 S4: x3 = &base + 3
9201 Vectorized loads are created in the order of memory accesses
9202 starting from the access of the first stmt of the chain:
9204 VS1: vx0 = &base
9205 VS2: vx1 = &base + vec_size*1
9206 VS3: vx3 = &base + vec_size*2
9207 VS4: vx4 = &base + vec_size*3
9209 Then permutation statements are generated:
9211 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
9212 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
9215 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
9216 (the order of the data-refs in the output of vect_permute_load_chain
9217 corresponds to the order of scalar stmts in the interleaving chain - see
9218 the documentation of vect_permute_load_chain()).
9219 The generation of permutation stmts and recording them in
9220 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
9222 In case of both multiple types and interleaving, the vector loads and
9223 permutation stmts above are created for every copy. The result vector
9224 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
9225 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
9227 /* If the data reference is aligned (dr_aligned) or potentially unaligned
9228 on a target that supports unaligned accesses (dr_unaligned_supported)
9229 we generate the following code:
9230 p = initial_addr;
9231 indx = 0;
9232 loop {
9233 p = p + indx * vectype_size;
9234 vec_dest = *(p);
9235 indx = indx + 1;
9238 Otherwise, the data reference is potentially unaligned on a target that
9239 does not support unaligned accesses (dr_explicit_realign_optimized) -
9240 then generate the following code, in which the data in each iteration is
9241 obtained by two vector loads, one from the previous iteration, and one
9242 from the current iteration:
9243 p1 = initial_addr;
9244 msq_init = *(floor(p1))
9245 p2 = initial_addr + VS - 1;
9246 realignment_token = call target_builtin;
9247 indx = 0;
9248 loop {
9249 p2 = p2 + indx * vectype_size
9250 lsq = *(floor(p2))
9251 vec_dest = realign_load (msq, lsq, realignment_token)
9252 indx = indx + 1;
9253 msq = lsq;
9254 } */
9256 /* If the misalignment remains the same throughout the execution of the
9257 loop, we can create the init_addr and permutation mask at the loop
9258 preheader. Otherwise, it needs to be created inside the loop.
9259 This can only occur when vectorizing memory accesses in the inner-loop
9260 nested within an outer-loop that is being vectorized. */
9262 if (nested_in_vect_loop
9263 && !multiple_p (DR_STEP_ALIGNMENT (dr_info->dr),
9264 GET_MODE_SIZE (TYPE_MODE (vectype))))
9266 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
9267 compute_in_loop = true;
9270 bool diff_first_stmt_info
9271 = first_stmt_info_for_drptr && first_stmt_info != first_stmt_info_for_drptr;
9273 if ((alignment_support_scheme == dr_explicit_realign_optimized
9274 || alignment_support_scheme == dr_explicit_realign)
9275 && !compute_in_loop)
9277 /* If we have different first_stmt_info, we can't set up realignment
9278 here, since we can't guarantee first_stmt_info DR has been
9279 initialized yet, use first_stmt_info_for_drptr DR by bumping the
9280 distance from first_stmt_info DR instead as below. */
9281 if (!diff_first_stmt_info)
9282 msq = vect_setup_realignment (first_stmt_info, gsi, &realignment_token,
9283 alignment_support_scheme, NULL_TREE,
9284 &at_loop);
9285 if (alignment_support_scheme == dr_explicit_realign_optimized)
9287 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
9288 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
9289 size_one_node);
9290 gcc_assert (!first_stmt_info_for_drptr);
9293 else
9294 at_loop = loop;
9296 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9297 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
9299 tree bump;
9300 tree vec_offset = NULL_TREE;
9301 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9303 aggr_type = NULL_TREE;
9304 bump = NULL_TREE;
9306 else if (memory_access_type == VMAT_GATHER_SCATTER)
9308 aggr_type = elem_type;
9309 vect_get_strided_load_store_ops (stmt_info, loop_vinfo, &gs_info,
9310 &bump, &vec_offset);
9312 else
9314 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9315 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
9316 else
9317 aggr_type = vectype;
9318 bump = vect_get_data_ptr_increment (dr_info, aggr_type,
9319 memory_access_type);
9322 tree vec_mask = NULL_TREE;
9323 prev_stmt_info = NULL;
9324 poly_uint64 group_elt = 0;
9325 for (j = 0; j < ncopies; j++)
9327 stmt_vec_info new_stmt_info = NULL;
9328 /* 1. Create the vector or array pointer update chain. */
9329 if (j == 0)
9331 bool simd_lane_access_p
9332 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) != 0;
9333 if (simd_lane_access_p
9334 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info->dr)) == ADDR_EXPR
9335 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info->dr), 0))
9336 && integer_zerop (get_dr_vinfo_offset (first_dr_info))
9337 && integer_zerop (DR_INIT (first_dr_info->dr))
9338 && alias_sets_conflict_p (get_alias_set (aggr_type),
9339 get_alias_set (TREE_TYPE (ref_type)))
9340 && (alignment_support_scheme == dr_aligned
9341 || alignment_support_scheme == dr_unaligned_supported))
9343 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
9344 dataref_offset = build_int_cst (ref_type, 0);
9346 else if (diff_first_stmt_info)
9348 dataref_ptr
9349 = vect_create_data_ref_ptr (first_stmt_info_for_drptr,
9350 aggr_type, at_loop, offset, &dummy,
9351 gsi, &ptr_incr, simd_lane_access_p,
9352 byte_offset, bump);
9353 /* Adjust the pointer by the difference to first_stmt. */
9354 data_reference_p ptrdr
9355 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr);
9356 tree diff
9357 = fold_convert (sizetype,
9358 size_binop (MINUS_EXPR,
9359 DR_INIT (first_dr_info->dr),
9360 DR_INIT (ptrdr)));
9361 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9362 stmt_info, diff);
9363 if (alignment_support_scheme == dr_explicit_realign)
9365 msq = vect_setup_realignment (first_stmt_info_for_drptr, gsi,
9366 &realignment_token,
9367 alignment_support_scheme,
9368 dataref_ptr, &at_loop);
9369 gcc_assert (!compute_in_loop);
9372 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9373 vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
9374 &dataref_ptr, &vec_offset);
9375 else
9376 dataref_ptr
9377 = vect_create_data_ref_ptr (first_stmt_info, aggr_type, at_loop,
9378 offset, &dummy, gsi, &ptr_incr,
9379 simd_lane_access_p,
9380 byte_offset, bump);
9381 if (mask)
9383 if (slp_node)
9385 auto_vec<vec<tree> > vec_defs (1);
9386 vect_get_slp_defs (slp_node, &vec_defs);
9387 vec_mask = vec_defs[0][0];
9389 else
9390 vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
9391 mask_vectype);
9394 else
9396 if (dataref_offset)
9397 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
9398 bump);
9399 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
9400 vec_offset = vect_get_vec_def_for_stmt_copy (vinfo, vec_offset);
9401 else
9402 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9403 stmt_info, bump);
9404 if (mask)
9405 vec_mask = vect_get_vec_def_for_stmt_copy (vinfo, vec_mask);
9408 if (grouped_load || slp_perm)
9409 dr_chain.create (vec_num);
9411 if (memory_access_type == VMAT_LOAD_STORE_LANES)
9413 tree vec_array;
9415 vec_array = create_vector_array (vectype, vec_num);
9417 tree final_mask = NULL_TREE;
9418 if (loop_masks)
9419 final_mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
9420 vectype, j);
9421 if (vec_mask)
9422 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9423 vec_mask, gsi);
9425 gcall *call;
9426 if (final_mask)
9428 /* Emit:
9429 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9430 VEC_MASK). */
9431 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
9432 tree alias_ptr = build_int_cst (ref_type, align);
9433 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
9434 dataref_ptr, alias_ptr,
9435 final_mask);
9437 else
9439 /* Emit:
9440 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9441 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
9442 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
9444 gimple_call_set_lhs (call, vec_array);
9445 gimple_call_set_nothrow (call, true);
9446 new_stmt_info = vect_finish_stmt_generation (stmt_info, call, gsi);
9448 /* Extract each vector into an SSA_NAME. */
9449 for (i = 0; i < vec_num; i++)
9451 new_temp = read_vector_array (stmt_info, gsi, scalar_dest,
9452 vec_array, i);
9453 dr_chain.quick_push (new_temp);
9456 /* Record the mapping between SSA_NAMEs and statements. */
9457 vect_record_grouped_load_vectors (stmt_info, dr_chain);
9459 /* Record that VEC_ARRAY is now dead. */
9460 vect_clobber_variable (stmt_info, gsi, vec_array);
9462 else
9464 for (i = 0; i < vec_num; i++)
9466 tree final_mask = NULL_TREE;
9467 if (loop_masks
9468 && memory_access_type != VMAT_INVARIANT)
9469 final_mask = vect_get_loop_mask (gsi, loop_masks,
9470 vec_num * ncopies,
9471 vectype, vec_num * j + i);
9472 if (vec_mask)
9473 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
9474 vec_mask, gsi);
9476 if (i > 0)
9477 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9478 stmt_info, bump);
9480 /* 2. Create the vector-load in the loop. */
9481 gimple *new_stmt = NULL;
9482 switch (alignment_support_scheme)
9484 case dr_aligned:
9485 case dr_unaligned_supported:
9487 unsigned int misalign;
9488 unsigned HOST_WIDE_INT align;
9490 if (memory_access_type == VMAT_GATHER_SCATTER)
9492 tree zero = build_zero_cst (vectype);
9493 tree scale = size_int (gs_info.scale);
9494 gcall *call;
9495 if (loop_masks)
9496 call = gimple_build_call_internal
9497 (IFN_MASK_GATHER_LOAD, 5, dataref_ptr,
9498 vec_offset, scale, zero, final_mask);
9499 else
9500 call = gimple_build_call_internal
9501 (IFN_GATHER_LOAD, 4, dataref_ptr,
9502 vec_offset, scale, zero);
9503 gimple_call_set_nothrow (call, true);
9504 new_stmt = call;
9505 data_ref = NULL_TREE;
9506 break;
9509 align =
9510 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info));
9511 if (alignment_support_scheme == dr_aligned)
9513 gcc_assert (aligned_access_p (first_dr_info));
9514 misalign = 0;
9516 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9518 align = dr_alignment
9519 (vect_dr_behavior (first_dr_info));
9520 misalign = 0;
9522 else
9523 misalign = DR_MISALIGNMENT (first_dr_info);
9524 if (dataref_offset == NULL_TREE
9525 && TREE_CODE (dataref_ptr) == SSA_NAME)
9526 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
9527 align, misalign);
9529 if (final_mask)
9531 align = least_bit_hwi (misalign | align);
9532 tree ptr = build_int_cst (ref_type, align);
9533 gcall *call
9534 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
9535 dataref_ptr, ptr,
9536 final_mask);
9537 gimple_call_set_nothrow (call, true);
9538 new_stmt = call;
9539 data_ref = NULL_TREE;
9541 else
9543 tree ltype = vectype;
9544 /* If there's no peeling for gaps but we have a gap
9545 with slp loads then load the lower half of the
9546 vector only. See get_group_load_store_type for
9547 when we apply this optimization. */
9548 if (slp
9549 && loop_vinfo
9550 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
9551 && DR_GROUP_GAP (first_stmt_info) != 0
9552 && known_eq (nunits,
9553 (group_size
9554 - DR_GROUP_GAP (first_stmt_info)) * 2)
9555 && known_eq (nunits, group_size))
9556 ltype = build_vector_type (TREE_TYPE (vectype),
9557 (group_size
9558 - DR_GROUP_GAP
9559 (first_stmt_info)));
9560 data_ref
9561 = fold_build2 (MEM_REF, ltype, dataref_ptr,
9562 dataref_offset
9563 ? dataref_offset
9564 : build_int_cst (ref_type, 0));
9565 if (alignment_support_scheme == dr_aligned)
9567 else if (DR_MISALIGNMENT (first_dr_info) == -1)
9568 TREE_TYPE (data_ref)
9569 = build_aligned_type (TREE_TYPE (data_ref),
9570 align * BITS_PER_UNIT);
9571 else
9572 TREE_TYPE (data_ref)
9573 = build_aligned_type (TREE_TYPE (data_ref),
9574 TYPE_ALIGN (elem_type));
9575 if (ltype != vectype)
9577 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9578 tree tem = make_ssa_name (ltype);
9579 new_stmt = gimple_build_assign (tem, data_ref);
9580 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9581 data_ref = NULL;
9582 vec<constructor_elt, va_gc> *v;
9583 vec_alloc (v, 2);
9584 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
9585 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
9586 build_zero_cst (ltype));
9587 new_stmt
9588 = gimple_build_assign (vec_dest,
9589 build_constructor
9590 (vectype, v));
9593 break;
9595 case dr_explicit_realign:
9597 tree ptr, bump;
9599 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
9601 if (compute_in_loop)
9602 msq = vect_setup_realignment (first_stmt_info, gsi,
9603 &realignment_token,
9604 dr_explicit_realign,
9605 dataref_ptr, NULL);
9607 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9608 ptr = copy_ssa_name (dataref_ptr);
9609 else
9610 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
9611 // For explicit realign the target alignment should be
9612 // known at compile time.
9613 unsigned HOST_WIDE_INT align =
9614 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9615 new_stmt = gimple_build_assign
9616 (ptr, BIT_AND_EXPR, dataref_ptr,
9617 build_int_cst
9618 (TREE_TYPE (dataref_ptr),
9619 -(HOST_WIDE_INT) align));
9620 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9621 data_ref
9622 = build2 (MEM_REF, vectype, ptr,
9623 build_int_cst (ref_type, 0));
9624 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9625 vec_dest = vect_create_destination_var (scalar_dest,
9626 vectype);
9627 new_stmt = gimple_build_assign (vec_dest, data_ref);
9628 new_temp = make_ssa_name (vec_dest, new_stmt);
9629 gimple_assign_set_lhs (new_stmt, new_temp);
9630 gimple_move_vops (new_stmt, stmt_info->stmt);
9631 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9632 msq = new_temp;
9634 bump = size_binop (MULT_EXPR, vs,
9635 TYPE_SIZE_UNIT (elem_type));
9636 bump = size_binop (MINUS_EXPR, bump, size_one_node);
9637 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi,
9638 stmt_info, bump);
9639 new_stmt = gimple_build_assign
9640 (NULL_TREE, BIT_AND_EXPR, ptr,
9641 build_int_cst
9642 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
9643 ptr = copy_ssa_name (ptr, new_stmt);
9644 gimple_assign_set_lhs (new_stmt, ptr);
9645 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9646 data_ref
9647 = build2 (MEM_REF, vectype, ptr,
9648 build_int_cst (ref_type, 0));
9649 break;
9651 case dr_explicit_realign_optimized:
9653 if (TREE_CODE (dataref_ptr) == SSA_NAME)
9654 new_temp = copy_ssa_name (dataref_ptr);
9655 else
9656 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
9657 // We should only be doing this if we know the target
9658 // alignment at compile time.
9659 unsigned HOST_WIDE_INT align =
9660 DR_TARGET_ALIGNMENT (first_dr_info).to_constant ();
9661 new_stmt = gimple_build_assign
9662 (new_temp, BIT_AND_EXPR, dataref_ptr,
9663 build_int_cst (TREE_TYPE (dataref_ptr),
9664 -(HOST_WIDE_INT) align));
9665 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9666 data_ref
9667 = build2 (MEM_REF, vectype, new_temp,
9668 build_int_cst (ref_type, 0));
9669 break;
9671 default:
9672 gcc_unreachable ();
9674 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9675 /* DATA_REF is null if we've already built the statement. */
9676 if (data_ref)
9678 vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
9679 new_stmt = gimple_build_assign (vec_dest, data_ref);
9681 new_temp = make_ssa_name (vec_dest, new_stmt);
9682 gimple_set_lhs (new_stmt, new_temp);
9683 new_stmt_info
9684 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9686 /* 3. Handle explicit realignment if necessary/supported.
9687 Create in loop:
9688 vec_dest = realign_load (msq, lsq, realignment_token) */
9689 if (alignment_support_scheme == dr_explicit_realign_optimized
9690 || alignment_support_scheme == dr_explicit_realign)
9692 lsq = gimple_assign_lhs (new_stmt);
9693 if (!realignment_token)
9694 realignment_token = dataref_ptr;
9695 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9696 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
9697 msq, lsq, realignment_token);
9698 new_temp = make_ssa_name (vec_dest, new_stmt);
9699 gimple_assign_set_lhs (new_stmt, new_temp);
9700 new_stmt_info
9701 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
9703 if (alignment_support_scheme == dr_explicit_realign_optimized)
9705 gcc_assert (phi);
9706 if (i == vec_num - 1 && j == ncopies - 1)
9707 add_phi_arg (phi, lsq,
9708 loop_latch_edge (containing_loop),
9709 UNKNOWN_LOCATION);
9710 msq = lsq;
9714 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
9716 tree perm_mask = perm_mask_for_reverse (vectype);
9717 new_temp = permute_vec_elements (new_temp, new_temp,
9718 perm_mask, stmt_info, gsi);
9719 new_stmt_info = vinfo->lookup_def (new_temp);
9722 /* Collect vector loads and later create their permutation in
9723 vect_transform_grouped_load (). */
9724 if (grouped_load || slp_perm)
9725 dr_chain.quick_push (new_temp);
9727 /* Store vector loads in the corresponding SLP_NODE. */
9728 if (slp && !slp_perm)
9729 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
9731 /* With SLP permutation we load the gaps as well, without
9732 we need to skip the gaps after we manage to fully load
9733 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9734 group_elt += nunits;
9735 if (maybe_ne (group_gap_adj, 0U)
9736 && !slp_perm
9737 && known_eq (group_elt, group_size - group_gap_adj))
9739 poly_wide_int bump_val
9740 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9741 * group_gap_adj);
9742 tree bump = wide_int_to_tree (sizetype, bump_val);
9743 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9744 stmt_info, bump);
9745 group_elt = 0;
9748 /* Bump the vector pointer to account for a gap or for excess
9749 elements loaded for a permuted SLP load. */
9750 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
9752 poly_wide_int bump_val
9753 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
9754 * group_gap_adj);
9755 tree bump = wide_int_to_tree (sizetype, bump_val);
9756 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
9757 stmt_info, bump);
9761 if (slp && !slp_perm)
9762 continue;
9764 if (slp_perm)
9766 unsigned n_perms;
9767 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
9768 slp_node_instance, false,
9769 &n_perms))
9771 dr_chain.release ();
9772 return false;
9775 else
9777 if (grouped_load)
9779 if (memory_access_type != VMAT_LOAD_STORE_LANES)
9780 vect_transform_grouped_load (stmt_info, dr_chain,
9781 group_size, gsi);
9782 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9784 else
9786 if (j == 0)
9787 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
9788 else
9789 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
9790 prev_stmt_info = new_stmt_info;
9793 dr_chain.release ();
9796 return true;
9799 /* Function vect_is_simple_cond.
9801 Input:
9802 LOOP - the loop that is being vectorized.
9803 COND - Condition that is checked for simple use.
9805 Output:
9806 *COMP_VECTYPE - the vector type for the comparison.
9807 *DTS - The def types for the arguments of the comparison
9809 Returns whether a COND can be vectorized. Checks whether
9810 condition operands are supportable using vec_is_simple_use. */
9812 static bool
9813 vect_is_simple_cond (tree cond, vec_info *vinfo, slp_tree slp_node,
9814 tree *comp_vectype, enum vect_def_type *dts,
9815 tree vectype)
9817 tree lhs, rhs;
9818 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9820 /* Mask case. */
9821 if (TREE_CODE (cond) == SSA_NAME
9822 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
9824 if (!vect_is_simple_use (cond, vinfo, &dts[0], comp_vectype)
9825 || !*comp_vectype
9826 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
9827 return false;
9828 return true;
9831 if (!COMPARISON_CLASS_P (cond))
9832 return false;
9834 lhs = TREE_OPERAND (cond, 0);
9835 rhs = TREE_OPERAND (cond, 1);
9837 if (TREE_CODE (lhs) == SSA_NAME)
9839 if (!vect_is_simple_use (lhs, vinfo, &dts[0], &vectype1))
9840 return false;
9842 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
9843 || TREE_CODE (lhs) == FIXED_CST)
9844 dts[0] = vect_constant_def;
9845 else
9846 return false;
9848 if (TREE_CODE (rhs) == SSA_NAME)
9850 if (!vect_is_simple_use (rhs, vinfo, &dts[1], &vectype2))
9851 return false;
9853 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
9854 || TREE_CODE (rhs) == FIXED_CST)
9855 dts[1] = vect_constant_def;
9856 else
9857 return false;
9859 if (vectype1 && vectype2
9860 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
9861 TYPE_VECTOR_SUBPARTS (vectype2)))
9862 return false;
9864 *comp_vectype = vectype1 ? vectype1 : vectype2;
9865 /* Invariant comparison. */
9866 if (! *comp_vectype)
9868 tree scalar_type = TREE_TYPE (lhs);
9869 /* If we can widen the comparison to match vectype do so. */
9870 if (INTEGRAL_TYPE_P (scalar_type)
9871 && vectype
9872 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
9873 TYPE_SIZE (TREE_TYPE (vectype))))
9874 scalar_type = build_nonstandard_integer_type
9875 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
9876 TYPE_UNSIGNED (scalar_type));
9877 *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
9878 slp_node);
9881 return true;
9884 /* vectorizable_condition.
9886 Check if STMT_INFO is conditional modify expression that can be vectorized.
9887 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9888 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9889 at GSI.
9891 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9893 Return true if STMT_INFO is vectorizable in this way. */
9895 static bool
9896 vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
9897 stmt_vec_info *vec_stmt,
9898 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
9900 vec_info *vinfo = stmt_info->vinfo;
9901 tree scalar_dest = NULL_TREE;
9902 tree vec_dest = NULL_TREE;
9903 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
9904 tree then_clause, else_clause;
9905 tree comp_vectype = NULL_TREE;
9906 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
9907 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
9908 tree vec_compare;
9909 tree new_temp;
9910 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9911 enum vect_def_type dts[4]
9912 = {vect_unknown_def_type, vect_unknown_def_type,
9913 vect_unknown_def_type, vect_unknown_def_type};
9914 int ndts = 4;
9915 int ncopies;
9916 int vec_num;
9917 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
9918 stmt_vec_info prev_stmt_info = NULL;
9919 int i, j;
9920 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9921 vec<tree> vec_oprnds0 = vNULL;
9922 vec<tree> vec_oprnds1 = vNULL;
9923 vec<tree> vec_oprnds2 = vNULL;
9924 vec<tree> vec_oprnds3 = vNULL;
9925 tree vec_cmp_type;
9926 bool masked = false;
9928 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
9929 return false;
9931 /* Is vectorizable conditional operation? */
9932 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
9933 if (!stmt)
9934 return false;
9936 code = gimple_assign_rhs_code (stmt);
9937 if (code != COND_EXPR)
9938 return false;
9940 stmt_vec_info reduc_info = NULL;
9941 int reduc_index = -1;
9942 vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
9943 bool for_reduction
9944 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
9945 if (for_reduction)
9947 if (STMT_SLP_TYPE (stmt_info))
9948 return false;
9949 reduc_info = info_for_reduction (stmt_info);
9950 reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
9951 reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
9952 gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
9953 || reduc_index != -1);
9955 else
9957 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
9958 return false;
9960 /* FORNOW: only supported as part of a reduction. */
9961 if (STMT_VINFO_LIVE_P (stmt_info))
9963 if (dump_enabled_p ())
9964 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9965 "value used after loop.\n");
9966 return false;
9970 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
9971 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
9973 if (slp_node)
9975 ncopies = 1;
9976 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
9978 else
9980 ncopies = vect_get_num_copies (loop_vinfo, vectype);
9981 vec_num = 1;
9984 gcc_assert (ncopies >= 1);
9985 if (for_reduction && ncopies > 1)
9986 return false; /* FORNOW */
9988 cond_expr = gimple_assign_rhs1 (stmt);
9989 then_clause = gimple_assign_rhs2 (stmt);
9990 else_clause = gimple_assign_rhs3 (stmt);
9992 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, slp_node,
9993 &comp_vectype, &dts[0], slp_node ? NULL : vectype)
9994 || !comp_vectype)
9995 return false;
9997 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &dts[2], &vectype1))
9998 return false;
9999 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &dts[3], &vectype2))
10000 return false;
10002 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
10003 return false;
10005 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
10006 return false;
10008 masked = !COMPARISON_CLASS_P (cond_expr);
10009 vec_cmp_type = truth_type_for (comp_vectype);
10011 if (vec_cmp_type == NULL_TREE)
10012 return false;
10014 cond_code = TREE_CODE (cond_expr);
10015 if (!masked)
10017 cond_expr0 = TREE_OPERAND (cond_expr, 0);
10018 cond_expr1 = TREE_OPERAND (cond_expr, 1);
10021 /* For conditional reductions, the "then" value needs to be the candidate
10022 value calculated by this iteration while the "else" value needs to be
10023 the result carried over from previous iterations. If the COND_EXPR
10024 is the other way around, we need to swap it. */
10025 bool must_invert_cmp_result = false;
10026 if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
10028 if (masked)
10029 must_invert_cmp_result = true;
10030 else
10032 bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
10033 tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
10034 if (new_code == ERROR_MARK)
10035 must_invert_cmp_result = true;
10036 else
10038 cond_code = new_code;
10039 /* Make sure we don't accidentally use the old condition. */
10040 cond_expr = NULL_TREE;
10043 std::swap (then_clause, else_clause);
10046 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
10048 /* Boolean values may have another representation in vectors
10049 and therefore we prefer bit operations over comparison for
10050 them (which also works for scalar masks). We store opcodes
10051 to use in bitop1 and bitop2. Statement is vectorized as
10052 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
10053 depending on bitop1 and bitop2 arity. */
10054 switch (cond_code)
10056 case GT_EXPR:
10057 bitop1 = BIT_NOT_EXPR;
10058 bitop2 = BIT_AND_EXPR;
10059 break;
10060 case GE_EXPR:
10061 bitop1 = BIT_NOT_EXPR;
10062 bitop2 = BIT_IOR_EXPR;
10063 break;
10064 case LT_EXPR:
10065 bitop1 = BIT_NOT_EXPR;
10066 bitop2 = BIT_AND_EXPR;
10067 std::swap (cond_expr0, cond_expr1);
10068 break;
10069 case LE_EXPR:
10070 bitop1 = BIT_NOT_EXPR;
10071 bitop2 = BIT_IOR_EXPR;
10072 std::swap (cond_expr0, cond_expr1);
10073 break;
10074 case NE_EXPR:
10075 bitop1 = BIT_XOR_EXPR;
10076 break;
10077 case EQ_EXPR:
10078 bitop1 = BIT_XOR_EXPR;
10079 bitop2 = BIT_NOT_EXPR;
10080 break;
10081 default:
10082 return false;
10084 cond_code = SSA_NAME;
10087 if (TREE_CODE_CLASS (cond_code) == tcc_comparison
10088 && reduction_type == EXTRACT_LAST_REDUCTION
10089 && !expand_vec_cmp_expr_p (comp_vectype, vec_cmp_type, cond_code))
10091 if (dump_enabled_p ())
10092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10093 "reduction comparison operation not supported.\n");
10094 return false;
10097 if (!vec_stmt)
10099 if (bitop1 != NOP_EXPR)
10101 machine_mode mode = TYPE_MODE (comp_vectype);
10102 optab optab;
10104 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
10105 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10106 return false;
10108 if (bitop2 != NOP_EXPR)
10110 optab = optab_for_tree_code (bitop2, comp_vectype,
10111 optab_default);
10112 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10113 return false;
10117 if (loop_vinfo
10118 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
10119 && reduction_type == EXTRACT_LAST_REDUCTION)
10120 vect_record_loop_mask (loop_vinfo, &LOOP_VINFO_MASKS (loop_vinfo),
10121 ncopies * vec_num, vectype, NULL);
10123 vect_cost_for_stmt kind = vector_stmt;
10124 if (reduction_type == EXTRACT_LAST_REDUCTION)
10125 /* Count one reduction-like operation per vector. */
10126 kind = vec_to_scalar;
10127 else if (!expand_vec_cond_expr_p (vectype, comp_vectype, cond_code))
10128 return false;
10130 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
10131 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, slp_node,
10132 cost_vec, kind);
10133 return true;
10136 /* Transform. */
10138 if (!slp_node)
10140 vec_oprnds0.create (1);
10141 vec_oprnds1.create (1);
10142 vec_oprnds2.create (1);
10143 vec_oprnds3.create (1);
10146 /* Handle def. */
10147 scalar_dest = gimple_assign_lhs (stmt);
10148 if (reduction_type != EXTRACT_LAST_REDUCTION)
10149 vec_dest = vect_create_destination_var (scalar_dest, vectype);
10151 /* Handle cond expr. */
10152 for (j = 0; j < ncopies; j++)
10154 bool swap_cond_operands = false;
10156 /* See whether another part of the vectorized code applies a loop
10157 mask to the condition, or to its inverse. */
10159 vec_loop_masks *masks = NULL;
10160 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
10162 if (reduction_type == EXTRACT_LAST_REDUCTION)
10163 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10164 else
10166 scalar_cond_masked_key cond (cond_expr, ncopies);
10167 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10168 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10169 else
10171 bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
10172 cond.code = invert_tree_comparison (cond.code, honor_nans);
10173 if (loop_vinfo->scalar_cond_masked_set.contains (cond))
10175 masks = &LOOP_VINFO_MASKS (loop_vinfo);
10176 cond_code = cond.code;
10177 swap_cond_operands = true;
10183 stmt_vec_info new_stmt_info = NULL;
10184 if (j == 0)
10186 if (slp_node)
10188 auto_vec<vec<tree>, 4> vec_defs;
10189 vect_get_slp_defs (slp_node, &vec_defs);
10190 vec_oprnds3 = vec_defs.pop ();
10191 vec_oprnds2 = vec_defs.pop ();
10192 if (!masked)
10193 vec_oprnds1 = vec_defs.pop ();
10194 vec_oprnds0 = vec_defs.pop ();
10196 else
10198 if (masked)
10200 vec_cond_lhs
10201 = vect_get_vec_def_for_operand (cond_expr, stmt_info,
10202 comp_vectype);
10204 else
10206 vec_cond_lhs
10207 = vect_get_vec_def_for_operand (cond_expr0,
10208 stmt_info, comp_vectype);
10209 vec_cond_rhs
10210 = vect_get_vec_def_for_operand (cond_expr1,
10211 stmt_info, comp_vectype);
10213 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
10214 stmt_info);
10215 if (reduction_type != EXTRACT_LAST_REDUCTION)
10216 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
10217 stmt_info);
10220 else
10222 vec_cond_lhs
10223 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds0.pop ());
10224 if (!masked)
10225 vec_cond_rhs
10226 = vect_get_vec_def_for_stmt_copy (vinfo, vec_oprnds1.pop ());
10228 vec_then_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10229 vec_oprnds2.pop ());
10230 vec_else_clause = vect_get_vec_def_for_stmt_copy (vinfo,
10231 vec_oprnds3.pop ());
10234 if (!slp_node)
10236 vec_oprnds0.quick_push (vec_cond_lhs);
10237 if (!masked)
10238 vec_oprnds1.quick_push (vec_cond_rhs);
10239 vec_oprnds2.quick_push (vec_then_clause);
10240 vec_oprnds3.quick_push (vec_else_clause);
10243 /* Arguments are ready. Create the new vector stmt. */
10244 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
10246 vec_then_clause = vec_oprnds2[i];
10247 vec_else_clause = vec_oprnds3[i];
10249 if (swap_cond_operands)
10250 std::swap (vec_then_clause, vec_else_clause);
10252 if (masked)
10253 vec_compare = vec_cond_lhs;
10254 else
10256 vec_cond_rhs = vec_oprnds1[i];
10257 if (bitop1 == NOP_EXPR)
10258 vec_compare = build2 (cond_code, vec_cmp_type,
10259 vec_cond_lhs, vec_cond_rhs);
10260 else
10262 new_temp = make_ssa_name (vec_cmp_type);
10263 gassign *new_stmt;
10264 if (bitop1 == BIT_NOT_EXPR)
10265 new_stmt = gimple_build_assign (new_temp, bitop1,
10266 vec_cond_rhs);
10267 else
10268 new_stmt
10269 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
10270 vec_cond_rhs);
10271 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10272 if (bitop2 == NOP_EXPR)
10273 vec_compare = new_temp;
10274 else if (bitop2 == BIT_NOT_EXPR)
10276 /* Instead of doing ~x ? y : z do x ? z : y. */
10277 vec_compare = new_temp;
10278 std::swap (vec_then_clause, vec_else_clause);
10280 else
10282 vec_compare = make_ssa_name (vec_cmp_type);
10283 new_stmt
10284 = gimple_build_assign (vec_compare, bitop2,
10285 vec_cond_lhs, new_temp);
10286 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10291 /* If we decided to apply a loop mask to the result of the vector
10292 comparison, AND the comparison with the mask now. Later passes
10293 should then be able to reuse the AND results between mulitple
10294 vector statements.
10296 For example:
10297 for (int i = 0; i < 100; ++i)
10298 x[i] = y[i] ? z[i] : 10;
10300 results in following optimized GIMPLE:
10302 mask__35.8_43 = vect__4.7_41 != { 0, ... };
10303 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
10304 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
10305 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
10306 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
10307 vect_iftmp.11_47, { 10, ... }>;
10309 instead of using a masked and unmasked forms of
10310 vec != { 0, ... } (masked in the MASK_LOAD,
10311 unmasked in the VEC_COND_EXPR). */
10313 /* Force vec_compare to be an SSA_NAME rather than a comparison,
10314 in cases where that's necessary. */
10316 if (masks || reduction_type == EXTRACT_LAST_REDUCTION)
10318 if (!is_gimple_val (vec_compare))
10320 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10321 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10322 vec_compare);
10323 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10324 vec_compare = vec_compare_name;
10327 if (must_invert_cmp_result)
10329 tree vec_compare_name = make_ssa_name (vec_cmp_type);
10330 gassign *new_stmt = gimple_build_assign (vec_compare_name,
10331 BIT_NOT_EXPR,
10332 vec_compare);
10333 vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10334 vec_compare = vec_compare_name;
10337 if (masks)
10339 unsigned vec_num = vec_oprnds0.length ();
10340 tree loop_mask
10341 = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
10342 vectype, vec_num * j + i);
10343 tree tmp2 = make_ssa_name (vec_cmp_type);
10344 gassign *g
10345 = gimple_build_assign (tmp2, BIT_AND_EXPR, vec_compare,
10346 loop_mask);
10347 vect_finish_stmt_generation (stmt_info, g, gsi);
10348 vec_compare = tmp2;
10352 if (reduction_type == EXTRACT_LAST_REDUCTION)
10354 gimple *old_stmt = vect_orig_stmt (stmt_info)->stmt;
10355 tree lhs = gimple_get_lhs (old_stmt);
10356 gcall *new_stmt = gimple_build_call_internal
10357 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
10358 vec_then_clause);
10359 gimple_call_set_lhs (new_stmt, lhs);
10360 SSA_NAME_DEF_STMT (lhs) = new_stmt;
10361 if (old_stmt == gsi_stmt (*gsi))
10362 new_stmt_info = vect_finish_replace_stmt (stmt_info, new_stmt);
10363 else
10365 /* In this case we're moving the definition to later in the
10366 block. That doesn't matter because the only uses of the
10367 lhs are in phi statements. */
10368 gimple_stmt_iterator old_gsi = gsi_for_stmt (old_stmt);
10369 gsi_remove (&old_gsi, true);
10370 new_stmt_info
10371 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10374 else
10376 new_temp = make_ssa_name (vec_dest);
10377 gassign *new_stmt
10378 = gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
10379 vec_then_clause, vec_else_clause);
10380 new_stmt_info
10381 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10383 if (slp_node)
10384 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10387 if (slp_node)
10388 continue;
10390 if (j == 0)
10391 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10392 else
10393 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10395 prev_stmt_info = new_stmt_info;
10398 vec_oprnds0.release ();
10399 vec_oprnds1.release ();
10400 vec_oprnds2.release ();
10401 vec_oprnds3.release ();
10403 return true;
10406 /* vectorizable_comparison.
10408 Check if STMT_INFO is comparison expression that can be vectorized.
10409 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10410 comparison, put it in VEC_STMT, and insert it at GSI.
10412 Return true if STMT_INFO is vectorizable in this way. */
10414 static bool
10415 vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10416 stmt_vec_info *vec_stmt,
10417 slp_tree slp_node, stmt_vector_for_cost *cost_vec)
10419 vec_info *vinfo = stmt_info->vinfo;
10420 tree lhs, rhs1, rhs2;
10421 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
10422 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
10423 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
10424 tree new_temp;
10425 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
10426 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
10427 int ndts = 2;
10428 poly_uint64 nunits;
10429 int ncopies;
10430 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
10431 stmt_vec_info prev_stmt_info = NULL;
10432 int i, j;
10433 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10434 vec<tree> vec_oprnds0 = vNULL;
10435 vec<tree> vec_oprnds1 = vNULL;
10436 tree mask_type;
10437 tree mask;
10439 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
10440 return false;
10442 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
10443 return false;
10445 mask_type = vectype;
10446 nunits = TYPE_VECTOR_SUBPARTS (vectype);
10448 if (slp_node)
10449 ncopies = 1;
10450 else
10451 ncopies = vect_get_num_copies (loop_vinfo, vectype);
10453 gcc_assert (ncopies >= 1);
10454 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
10455 return false;
10457 if (STMT_VINFO_LIVE_P (stmt_info))
10459 if (dump_enabled_p ())
10460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
10461 "value used after loop.\n");
10462 return false;
10465 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
10466 if (!stmt)
10467 return false;
10469 code = gimple_assign_rhs_code (stmt);
10471 if (TREE_CODE_CLASS (code) != tcc_comparison)
10472 return false;
10474 rhs1 = gimple_assign_rhs1 (stmt);
10475 rhs2 = gimple_assign_rhs2 (stmt);
10477 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &dts[0], &vectype1))
10478 return false;
10480 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &dts[1], &vectype2))
10481 return false;
10483 if (vectype1 && vectype2
10484 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
10485 TYPE_VECTOR_SUBPARTS (vectype2)))
10486 return false;
10488 vectype = vectype1 ? vectype1 : vectype2;
10490 /* Invariant comparison. */
10491 if (!vectype)
10493 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1),
10494 slp_node);
10495 if (!vectype || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
10496 return false;
10498 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
10499 return false;
10501 /* Can't compare mask and non-mask types. */
10502 if (vectype1 && vectype2
10503 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
10504 return false;
10506 /* Boolean values may have another representation in vectors
10507 and therefore we prefer bit operations over comparison for
10508 them (which also works for scalar masks). We store opcodes
10509 to use in bitop1 and bitop2. Statement is vectorized as
10510 BITOP2 (rhs1 BITOP1 rhs2) or
10511 rhs1 BITOP2 (BITOP1 rhs2)
10512 depending on bitop1 and bitop2 arity. */
10513 bool swap_p = false;
10514 if (VECTOR_BOOLEAN_TYPE_P (vectype))
10516 if (code == GT_EXPR)
10518 bitop1 = BIT_NOT_EXPR;
10519 bitop2 = BIT_AND_EXPR;
10521 else if (code == GE_EXPR)
10523 bitop1 = BIT_NOT_EXPR;
10524 bitop2 = BIT_IOR_EXPR;
10526 else if (code == LT_EXPR)
10528 bitop1 = BIT_NOT_EXPR;
10529 bitop2 = BIT_AND_EXPR;
10530 swap_p = true;
10532 else if (code == LE_EXPR)
10534 bitop1 = BIT_NOT_EXPR;
10535 bitop2 = BIT_IOR_EXPR;
10536 swap_p = true;
10538 else
10540 bitop1 = BIT_XOR_EXPR;
10541 if (code == EQ_EXPR)
10542 bitop2 = BIT_NOT_EXPR;
10546 if (!vec_stmt)
10548 if (bitop1 == NOP_EXPR)
10550 if (!expand_vec_cmp_expr_p (vectype, mask_type, code))
10551 return false;
10553 else
10555 machine_mode mode = TYPE_MODE (vectype);
10556 optab optab;
10558 optab = optab_for_tree_code (bitop1, vectype, optab_default);
10559 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10560 return false;
10562 if (bitop2 != NOP_EXPR)
10564 optab = optab_for_tree_code (bitop2, vectype, optab_default);
10565 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
10566 return false;
10570 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
10571 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
10572 dts, ndts, slp_node, cost_vec);
10573 return true;
10576 /* Transform. */
10577 if (!slp_node)
10579 vec_oprnds0.create (1);
10580 vec_oprnds1.create (1);
10583 /* Handle def. */
10584 lhs = gimple_assign_lhs (stmt);
10585 mask = vect_create_destination_var (lhs, mask_type);
10587 /* Handle cmp expr. */
10588 for (j = 0; j < ncopies; j++)
10590 stmt_vec_info new_stmt_info = NULL;
10591 if (j == 0)
10593 if (slp_node)
10595 auto_vec<vec<tree>, 2> vec_defs;
10596 vect_get_slp_defs (slp_node, &vec_defs);
10597 vec_oprnds1 = vec_defs.pop ();
10598 vec_oprnds0 = vec_defs.pop ();
10599 if (swap_p)
10600 std::swap (vec_oprnds0, vec_oprnds1);
10602 else
10604 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt_info,
10605 vectype);
10606 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt_info,
10607 vectype);
10610 else
10612 vec_rhs1 = vect_get_vec_def_for_stmt_copy (vinfo,
10613 vec_oprnds0.pop ());
10614 vec_rhs2 = vect_get_vec_def_for_stmt_copy (vinfo,
10615 vec_oprnds1.pop ());
10618 if (!slp_node)
10620 if (swap_p && j == 0)
10621 std::swap (vec_rhs1, vec_rhs2);
10622 vec_oprnds0.quick_push (vec_rhs1);
10623 vec_oprnds1.quick_push (vec_rhs2);
10626 /* Arguments are ready. Create the new vector stmt. */
10627 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
10629 vec_rhs2 = vec_oprnds1[i];
10631 new_temp = make_ssa_name (mask);
10632 if (bitop1 == NOP_EXPR)
10634 gassign *new_stmt = gimple_build_assign (new_temp, code,
10635 vec_rhs1, vec_rhs2);
10636 new_stmt_info
10637 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10639 else
10641 gassign *new_stmt;
10642 if (bitop1 == BIT_NOT_EXPR)
10643 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
10644 else
10645 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
10646 vec_rhs2);
10647 new_stmt_info
10648 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10649 if (bitop2 != NOP_EXPR)
10651 tree res = make_ssa_name (mask);
10652 if (bitop2 == BIT_NOT_EXPR)
10653 new_stmt = gimple_build_assign (res, bitop2, new_temp);
10654 else
10655 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
10656 new_temp);
10657 new_stmt_info
10658 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
10661 if (slp_node)
10662 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
10665 if (slp_node)
10666 continue;
10668 if (j == 0)
10669 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
10670 else
10671 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
10673 prev_stmt_info = new_stmt_info;
10676 vec_oprnds0.release ();
10677 vec_oprnds1.release ();
10679 return true;
10682 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10683 can handle all live statements in the node. Otherwise return true
10684 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10685 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10687 static bool
10688 can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10689 slp_tree slp_node, slp_instance slp_node_instance,
10690 bool vec_stmt_p,
10691 stmt_vector_for_cost *cost_vec)
10693 if (slp_node)
10695 stmt_vec_info slp_stmt_info;
10696 unsigned int i;
10697 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
10699 if (STMT_VINFO_LIVE_P (slp_stmt_info)
10700 && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node,
10701 slp_node_instance, i,
10702 vec_stmt_p, cost_vec))
10703 return false;
10706 else if (STMT_VINFO_LIVE_P (stmt_info)
10707 && !vectorizable_live_operation (stmt_info, gsi, slp_node,
10708 slp_node_instance, -1,
10709 vec_stmt_p, cost_vec))
10710 return false;
10712 return true;
10715 /* Make sure the statement is vectorizable. */
10717 opt_result
10718 vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
10719 slp_tree node, slp_instance node_instance,
10720 stmt_vector_for_cost *cost_vec)
10722 vec_info *vinfo = stmt_info->vinfo;
10723 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
10724 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
10725 bool ok;
10726 gimple_seq pattern_def_seq;
10728 if (dump_enabled_p ())
10729 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
10730 stmt_info->stmt);
10732 if (gimple_has_volatile_ops (stmt_info->stmt))
10733 return opt_result::failure_at (stmt_info->stmt,
10734 "not vectorized:"
10735 " stmt has volatile operands: %G\n",
10736 stmt_info->stmt);
10738 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10739 && node == NULL
10740 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
10742 gimple_stmt_iterator si;
10744 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
10746 stmt_vec_info pattern_def_stmt_info
10747 = vinfo->lookup_stmt (gsi_stmt (si));
10748 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
10749 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
10751 /* Analyze def stmt of STMT if it's a pattern stmt. */
10752 if (dump_enabled_p ())
10753 dump_printf_loc (MSG_NOTE, vect_location,
10754 "==> examining pattern def statement: %G",
10755 pattern_def_stmt_info->stmt);
10757 opt_result res
10758 = vect_analyze_stmt (pattern_def_stmt_info,
10759 need_to_vectorize, node, node_instance,
10760 cost_vec);
10761 if (!res)
10762 return res;
10767 /* Skip stmts that do not need to be vectorized. In loops this is expected
10768 to include:
10769 - the COND_EXPR which is the loop exit condition
10770 - any LABEL_EXPRs in the loop
10771 - computations that are used only for array indexing or loop control.
10772 In basic blocks we only analyze statements that are a part of some SLP
10773 instance, therefore, all the statements are relevant.
10775 Pattern statement needs to be analyzed instead of the original statement
10776 if the original statement is not relevant. Otherwise, we analyze both
10777 statements. In basic blocks we are called from some SLP instance
10778 traversal, don't analyze pattern stmts instead, the pattern stmts
10779 already will be part of SLP instance. */
10781 stmt_vec_info pattern_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
10782 if (!STMT_VINFO_RELEVANT_P (stmt_info)
10783 && !STMT_VINFO_LIVE_P (stmt_info))
10785 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10786 && pattern_stmt_info
10787 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10788 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10790 /* Analyze PATTERN_STMT instead of the original stmt. */
10791 stmt_info = pattern_stmt_info;
10792 if (dump_enabled_p ())
10793 dump_printf_loc (MSG_NOTE, vect_location,
10794 "==> examining pattern statement: %G",
10795 stmt_info->stmt);
10797 else
10799 if (dump_enabled_p ())
10800 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
10802 return opt_result::success ();
10805 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
10806 && node == NULL
10807 && pattern_stmt_info
10808 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info)
10809 || STMT_VINFO_LIVE_P (pattern_stmt_info)))
10811 /* Analyze PATTERN_STMT too. */
10812 if (dump_enabled_p ())
10813 dump_printf_loc (MSG_NOTE, vect_location,
10814 "==> examining pattern statement: %G",
10815 pattern_stmt_info->stmt);
10817 opt_result res
10818 = vect_analyze_stmt (pattern_stmt_info, need_to_vectorize, node,
10819 node_instance, cost_vec);
10820 if (!res)
10821 return res;
10824 switch (STMT_VINFO_DEF_TYPE (stmt_info))
10826 case vect_internal_def:
10827 break;
10829 case vect_reduction_def:
10830 case vect_nested_cycle:
10831 gcc_assert (!bb_vinfo
10832 && (relevance == vect_used_in_outer
10833 || relevance == vect_used_in_outer_by_reduction
10834 || relevance == vect_used_by_reduction
10835 || relevance == vect_unused_in_scope
10836 || relevance == vect_used_only_live));
10837 break;
10839 case vect_induction_def:
10840 gcc_assert (!bb_vinfo);
10841 break;
10843 case vect_constant_def:
10844 case vect_external_def:
10845 case vect_unknown_def_type:
10846 default:
10847 gcc_unreachable ();
10850 if (STMT_VINFO_RELEVANT_P (stmt_info))
10852 tree type = gimple_expr_type (stmt_info->stmt);
10853 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type)));
10854 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
10855 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
10856 || (call && gimple_call_lhs (call) == NULL_TREE));
10857 *need_to_vectorize = true;
10860 if (PURE_SLP_STMT (stmt_info) && !node)
10862 if (dump_enabled_p ())
10863 dump_printf_loc (MSG_NOTE, vect_location,
10864 "handled only by SLP analysis\n");
10865 return opt_result::success ();
10868 ok = true;
10869 if (!bb_vinfo
10870 && (STMT_VINFO_RELEVANT_P (stmt_info)
10871 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
10872 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10873 -mveclibabi= takes preference over library functions with
10874 the simd attribute. */
10875 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10876 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10877 cost_vec)
10878 || vectorizable_conversion (stmt_info, NULL, NULL, node, cost_vec)
10879 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10880 || vectorizable_assignment (stmt_info, NULL, NULL, node, cost_vec)
10881 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10882 cost_vec)
10883 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10884 || vectorizable_reduction (stmt_info, node, node_instance, cost_vec)
10885 || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
10886 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10887 || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
10888 || vectorizable_comparison (stmt_info, NULL, NULL, node,
10889 cost_vec)
10890 || vectorizable_lc_phi (stmt_info, NULL, node));
10891 else
10893 if (bb_vinfo)
10894 ok = (vectorizable_call (stmt_info, NULL, NULL, node, cost_vec)
10895 || vectorizable_simd_clone_call (stmt_info, NULL, NULL, node,
10896 cost_vec)
10897 || vectorizable_conversion (stmt_info, NULL, NULL, node,
10898 cost_vec)
10899 || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
10900 || vectorizable_operation (stmt_info, NULL, NULL, node, cost_vec)
10901 || vectorizable_assignment (stmt_info, NULL, NULL, node,
10902 cost_vec)
10903 || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
10904 cost_vec)
10905 || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
10906 || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
10907 || vectorizable_comparison (stmt_info, NULL, NULL, node,
10908 cost_vec));
10911 if (!ok)
10912 return opt_result::failure_at (stmt_info->stmt,
10913 "not vectorized:"
10914 " relevant stmt not supported: %G",
10915 stmt_info->stmt);
10917 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10918 need extra handling, except for vectorizable reductions. */
10919 if (!bb_vinfo
10920 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
10921 && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
10922 && !can_vectorize_live_stmts (stmt_info, NULL, node, node_instance,
10923 false, cost_vec))
10924 return opt_result::failure_at (stmt_info->stmt,
10925 "not vectorized:"
10926 " live stmt not supported: %G",
10927 stmt_info->stmt);
10929 return opt_result::success ();
10933 /* Function vect_transform_stmt.
10935 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10937 bool
10938 vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
10939 slp_tree slp_node, slp_instance slp_node_instance)
10941 vec_info *vinfo = stmt_info->vinfo;
10942 bool is_store = false;
10943 stmt_vec_info vec_stmt = NULL;
10944 bool done;
10946 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
10947 stmt_vec_info old_vec_stmt_info = STMT_VINFO_VEC_STMT (stmt_info);
10949 bool nested_p = (STMT_VINFO_LOOP_VINFO (stmt_info)
10950 && nested_in_vect_loop_p
10951 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)),
10952 stmt_info));
10954 gimple *stmt = stmt_info->stmt;
10955 switch (STMT_VINFO_TYPE (stmt_info))
10957 case type_demotion_vec_info_type:
10958 case type_promotion_vec_info_type:
10959 case type_conversion_vec_info_type:
10960 done = vectorizable_conversion (stmt_info, gsi, &vec_stmt, slp_node,
10961 NULL);
10962 gcc_assert (done);
10963 break;
10965 case induc_vec_info_type:
10966 done = vectorizable_induction (stmt_info, gsi, &vec_stmt, slp_node,
10967 NULL);
10968 gcc_assert (done);
10969 break;
10971 case shift_vec_info_type:
10972 done = vectorizable_shift (stmt_info, gsi, &vec_stmt, slp_node, NULL);
10973 gcc_assert (done);
10974 break;
10976 case op_vec_info_type:
10977 done = vectorizable_operation (stmt_info, gsi, &vec_stmt, slp_node,
10978 NULL);
10979 gcc_assert (done);
10980 break;
10982 case assignment_vec_info_type:
10983 done = vectorizable_assignment (stmt_info, gsi, &vec_stmt, slp_node,
10984 NULL);
10985 gcc_assert (done);
10986 break;
10988 case load_vec_info_type:
10989 done = vectorizable_load (stmt_info, gsi, &vec_stmt, slp_node,
10990 slp_node_instance, NULL);
10991 gcc_assert (done);
10992 break;
10994 case store_vec_info_type:
10995 done = vectorizable_store (stmt_info, gsi, &vec_stmt, slp_node, NULL);
10996 gcc_assert (done);
10997 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
10999 /* In case of interleaving, the whole chain is vectorized when the
11000 last store in the chain is reached. Store stmts before the last
11001 one are skipped, and there vec_stmt_info shouldn't be freed
11002 meanwhile. */
11003 stmt_vec_info group_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
11004 if (DR_GROUP_STORE_COUNT (group_info) == DR_GROUP_SIZE (group_info))
11005 is_store = true;
11007 else
11008 is_store = true;
11009 break;
11011 case condition_vec_info_type:
11012 done = vectorizable_condition (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11013 gcc_assert (done);
11014 break;
11016 case comparison_vec_info_type:
11017 done = vectorizable_comparison (stmt_info, gsi, &vec_stmt,
11018 slp_node, NULL);
11019 gcc_assert (done);
11020 break;
11022 case call_vec_info_type:
11023 done = vectorizable_call (stmt_info, gsi, &vec_stmt, slp_node, NULL);
11024 stmt = gsi_stmt (*gsi);
11025 break;
11027 case call_simd_clone_vec_info_type:
11028 done = vectorizable_simd_clone_call (stmt_info, gsi, &vec_stmt,
11029 slp_node, NULL);
11030 stmt = gsi_stmt (*gsi);
11031 break;
11033 case reduc_vec_info_type:
11034 done = vect_transform_reduction (stmt_info, gsi, &vec_stmt, slp_node);
11035 gcc_assert (done);
11036 break;
11038 case cycle_phi_info_type:
11039 done = vect_transform_cycle_phi (stmt_info, &vec_stmt, slp_node,
11040 slp_node_instance);
11041 gcc_assert (done);
11042 break;
11044 case lc_phi_info_type:
11045 done = vectorizable_lc_phi (stmt_info, &vec_stmt, slp_node);
11046 gcc_assert (done);
11047 break;
11049 default:
11050 if (!STMT_VINFO_LIVE_P (stmt_info))
11052 if (dump_enabled_p ())
11053 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11054 "stmt not supported.\n");
11055 gcc_unreachable ();
11059 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
11060 This would break hybrid SLP vectorization. */
11061 if (slp_node)
11062 gcc_assert (!vec_stmt
11063 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt_info);
11065 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
11066 is being vectorized, but outside the immediately enclosing loop. */
11067 if (vec_stmt
11068 && nested_p
11069 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
11070 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
11071 || STMT_VINFO_RELEVANT (stmt_info) ==
11072 vect_used_in_outer_by_reduction))
11074 class loop *innerloop = LOOP_VINFO_LOOP (
11075 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
11076 imm_use_iterator imm_iter;
11077 use_operand_p use_p;
11078 tree scalar_dest;
11080 if (dump_enabled_p ())
11081 dump_printf_loc (MSG_NOTE, vect_location,
11082 "Record the vdef for outer-loop vectorization.\n");
11084 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
11085 (to be used when vectorizing outer-loop stmts that use the DEF of
11086 STMT). */
11087 if (gimple_code (stmt) == GIMPLE_PHI)
11088 scalar_dest = PHI_RESULT (stmt);
11089 else
11090 scalar_dest = gimple_get_lhs (stmt);
11092 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
11093 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
11095 stmt_vec_info exit_phi_info
11096 = vinfo->lookup_stmt (USE_STMT (use_p));
11097 STMT_VINFO_VEC_STMT (exit_phi_info) = vec_stmt;
11101 if (vec_stmt)
11102 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
11104 if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
11105 return is_store;
11107 /* If this stmt defines a value used on a backedge, update the
11108 vectorized PHIs. */
11109 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
11110 stmt_vec_info reduc_info;
11111 if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
11112 && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
11113 && (reduc_info = info_for_reduction (orig_stmt_info))
11114 && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
11115 && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
11117 gphi *phi;
11118 edge e;
11119 if (!slp_node
11120 && (phi = dyn_cast <gphi *>
11121 (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
11122 && dominated_by_p (CDI_DOMINATORS,
11123 gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
11124 && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
11125 && (PHI_ARG_DEF_FROM_EDGE (phi, e)
11126 == gimple_get_lhs (orig_stmt_info->stmt)))
11128 stmt_vec_info phi_info
11129 = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
11130 stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
11133 add_phi_arg (as_a <gphi *> (phi_info->stmt),
11134 gimple_get_lhs (vec_stmt->stmt), e,
11135 gimple_phi_arg_location (phi, e->dest_idx));
11136 phi_info = STMT_VINFO_RELATED_STMT (phi_info);
11137 vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
11139 while (phi_info);
11140 gcc_assert (!vec_stmt);
11142 else if (slp_node
11143 && slp_node != slp_node_instance->reduc_phis)
11145 slp_tree phi_node = slp_node_instance->reduc_phis;
11146 gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
11147 e = loop_latch_edge (gimple_bb (phi)->loop_father);
11148 gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
11149 == SLP_TREE_VEC_STMTS (slp_node).length ());
11150 for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
11151 add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
11152 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt),
11153 e, gimple_phi_arg_location (phi, e->dest_idx));
11157 /* Handle stmts whose DEF is used outside the loop-nest that is
11158 being vectorized. */
11159 done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,
11160 slp_node_instance, true, NULL);
11161 gcc_assert (done);
11163 return false;
11167 /* Remove a group of stores (for SLP or interleaving), free their
11168 stmt_vec_info. */
11170 void
11171 vect_remove_stores (stmt_vec_info first_stmt_info)
11173 vec_info *vinfo = first_stmt_info->vinfo;
11174 stmt_vec_info next_stmt_info = first_stmt_info;
11176 while (next_stmt_info)
11178 stmt_vec_info tmp = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
11179 next_stmt_info = vect_orig_stmt (next_stmt_info);
11180 /* Free the attached stmt_vec_info and remove the stmt. */
11181 vinfo->remove_stmt (next_stmt_info);
11182 next_stmt_info = tmp;
11186 /* If NUNITS is nonzero, return a vector type that contains NUNITS
11187 elements of type SCALAR_TYPE, or null if the target doesn't support
11188 such a type.
11190 If NUNITS is zero, return a vector type that contains elements of
11191 type SCALAR_TYPE, choosing whichever vector size the target prefers.
11193 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
11194 for this vectorization region and want to "autodetect" the best choice.
11195 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
11196 and we want the new type to be interoperable with it. PREVAILING_MODE
11197 in this case can be a scalar integer mode or a vector mode; when it
11198 is a vector mode, the function acts like a tree-level version of
11199 related_vector_mode. */
11201 tree
11202 get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
11203 tree scalar_type, poly_uint64 nunits)
11205 tree orig_scalar_type = scalar_type;
11206 scalar_mode inner_mode;
11207 machine_mode simd_mode;
11208 tree vectype;
11210 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
11211 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
11212 return NULL_TREE;
11214 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
11216 /* For vector types of elements whose mode precision doesn't
11217 match their types precision we use a element type of mode
11218 precision. The vectorization routines will have to make sure
11219 they support the proper result truncation/extension.
11220 We also make sure to build vector types with INTEGER_TYPE
11221 component type only. */
11222 if (INTEGRAL_TYPE_P (scalar_type)
11223 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
11224 || TREE_CODE (scalar_type) != INTEGER_TYPE))
11225 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
11226 TYPE_UNSIGNED (scalar_type));
11228 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
11229 When the component mode passes the above test simply use a type
11230 corresponding to that mode. The theory is that any use that
11231 would cause problems with this will disable vectorization anyway. */
11232 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
11233 && !INTEGRAL_TYPE_P (scalar_type))
11234 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
11236 /* We can't build a vector type of elements with alignment bigger than
11237 their size. */
11238 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
11239 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
11240 TYPE_UNSIGNED (scalar_type));
11242 /* If we felt back to using the mode fail if there was
11243 no scalar type for it. */
11244 if (scalar_type == NULL_TREE)
11245 return NULL_TREE;
11247 /* If no prevailing mode was supplied, use the mode the target prefers.
11248 Otherwise lookup a vector mode based on the prevailing mode. */
11249 if (prevailing_mode == VOIDmode)
11251 gcc_assert (known_eq (nunits, 0U));
11252 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
11253 if (SCALAR_INT_MODE_P (simd_mode))
11255 /* Traditional behavior is not to take the integer mode
11256 literally, but simply to use it as a way of determining
11257 the vector size. It is up to mode_for_vector to decide
11258 what the TYPE_MODE should be.
11260 Note that nunits == 1 is allowed in order to support single
11261 element vector types. */
11262 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
11263 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11264 return NULL_TREE;
11267 else if (SCALAR_INT_MODE_P (prevailing_mode)
11268 || !related_vector_mode (prevailing_mode,
11269 inner_mode, nunits).exists (&simd_mode))
11271 /* Fall back to using mode_for_vector, mostly in the hope of being
11272 able to use an integer mode. */
11273 if (known_eq (nunits, 0U)
11274 && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
11275 return NULL_TREE;
11277 if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
11278 return NULL_TREE;
11281 vectype = build_vector_type_for_mode (scalar_type, simd_mode);
11283 /* In cases where the mode was chosen by mode_for_vector, check that
11284 the target actually supports the chosen mode, or that it at least
11285 allows the vector mode to be replaced by a like-sized integer. */
11286 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
11287 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
11288 return NULL_TREE;
11290 /* Re-attach the address-space qualifier if we canonicalized the scalar
11291 type. */
11292 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
11293 return build_qualified_type
11294 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
11296 return vectype;
11299 /* Function get_vectype_for_scalar_type.
11301 Returns the vector type corresponding to SCALAR_TYPE as supported
11302 by the target. If GROUP_SIZE is nonzero and we're performing BB
11303 vectorization, make sure that the number of elements in the vector
11304 is no bigger than GROUP_SIZE. */
11306 tree
11307 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type,
11308 unsigned int group_size)
11310 /* For BB vectorization, we should always have a group size once we've
11311 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11312 are tentative requests during things like early data reference
11313 analysis and pattern recognition. */
11314 if (is_a <bb_vec_info> (vinfo))
11315 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
11316 else
11317 group_size = 0;
11319 tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11320 scalar_type);
11321 if (vectype && vinfo->vector_mode == VOIDmode)
11322 vinfo->vector_mode = TYPE_MODE (vectype);
11324 /* Register the natural choice of vector type, before the group size
11325 has been applied. */
11326 if (vectype)
11327 vinfo->used_vector_modes.add (TYPE_MODE (vectype));
11329 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
11330 try again with an explicit number of elements. */
11331 if (vectype
11332 && group_size
11333 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype), group_size))
11335 /* Start with the biggest number of units that fits within
11336 GROUP_SIZE and halve it until we find a valid vector type.
11337 Usually either the first attempt will succeed or all will
11338 fail (in the latter case because GROUP_SIZE is too small
11339 for the target), but it's possible that a target could have
11340 a hole between supported vector types.
11342 If GROUP_SIZE is not a power of 2, this has the effect of
11343 trying the largest power of 2 that fits within the group,
11344 even though the group is not a multiple of that vector size.
11345 The BB vectorizer will then try to carve up the group into
11346 smaller pieces. */
11347 unsigned int nunits = 1 << floor_log2 (group_size);
11350 vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
11351 scalar_type, nunits);
11352 nunits /= 2;
11354 while (nunits > 1 && !vectype);
11357 return vectype;
11360 /* Return the vector type corresponding to SCALAR_TYPE as supported
11361 by the target. NODE, if nonnull, is the SLP tree node that will
11362 use the returned vector type. */
11364 tree
11365 get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type, slp_tree node)
11367 unsigned int group_size = 0;
11368 if (node)
11370 group_size = SLP_TREE_SCALAR_OPS (node).length ();
11371 if (group_size == 0)
11372 group_size = SLP_TREE_SCALAR_STMTS (node).length ();
11374 return get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11377 /* Function get_mask_type_for_scalar_type.
11379 Returns the mask type corresponding to a result of comparison
11380 of vectors of specified SCALAR_TYPE as supported by target.
11381 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11382 make sure that the number of elements in the vector is no bigger
11383 than GROUP_SIZE. */
11385 tree
11386 get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
11387 unsigned int group_size)
11389 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
11391 if (!vectype)
11392 return NULL;
11394 return truth_type_for (vectype);
11397 /* Function get_same_sized_vectype
11399 Returns a vector type corresponding to SCALAR_TYPE of size
11400 VECTOR_TYPE if supported by the target. */
11402 tree
11403 get_same_sized_vectype (tree scalar_type, tree vector_type)
11405 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
11406 return truth_type_for (vector_type);
11408 poly_uint64 nunits;
11409 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
11410 GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
11411 return NULL_TREE;
11413 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
11414 scalar_type, nunits);
11417 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11418 would not change the chosen vector modes. */
11420 bool
11421 vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
11423 for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
11424 i != vinfo->used_vector_modes.end (); ++i)
11425 if (!VECTOR_MODE_P (*i)
11426 || related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
11427 return false;
11428 return true;
11431 /* Function vect_is_simple_use.
11433 Input:
11434 VINFO - the vect info of the loop or basic block that is being vectorized.
11435 OPERAND - operand in the loop or bb.
11436 Output:
11437 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11438 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11439 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11440 the definition could be anywhere in the function
11441 DT - the type of definition
11443 Returns whether a stmt with OPERAND can be vectorized.
11444 For loops, supportable operands are constants, loop invariants, and operands
11445 that are defined by the current iteration of the loop. Unsupportable
11446 operands are those that are defined by a previous iteration of the loop (as
11447 is the case in reduction/induction computations).
11448 For basic blocks, supportable operands are constants and bb invariants.
11449 For now, operands defined outside the basic block are not supported. */
11451 bool
11452 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11453 stmt_vec_info *def_stmt_info_out, gimple **def_stmt_out)
11455 if (def_stmt_info_out)
11456 *def_stmt_info_out = NULL;
11457 if (def_stmt_out)
11458 *def_stmt_out = NULL;
11459 *dt = vect_unknown_def_type;
11461 if (dump_enabled_p ())
11463 dump_printf_loc (MSG_NOTE, vect_location,
11464 "vect_is_simple_use: operand ");
11465 if (TREE_CODE (operand) == SSA_NAME
11466 && !SSA_NAME_IS_DEFAULT_DEF (operand))
11467 dump_gimple_expr (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (operand), 0);
11468 else
11469 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
11472 if (CONSTANT_CLASS_P (operand))
11473 *dt = vect_constant_def;
11474 else if (is_gimple_min_invariant (operand))
11475 *dt = vect_external_def;
11476 else if (TREE_CODE (operand) != SSA_NAME)
11477 *dt = vect_unknown_def_type;
11478 else if (SSA_NAME_IS_DEFAULT_DEF (operand))
11479 *dt = vect_external_def;
11480 else
11482 gimple *def_stmt = SSA_NAME_DEF_STMT (operand);
11483 stmt_vec_info stmt_vinfo = vinfo->lookup_def (operand);
11484 if (!stmt_vinfo)
11485 *dt = vect_external_def;
11486 else
11488 stmt_vinfo = vect_stmt_to_vectorize (stmt_vinfo);
11489 def_stmt = stmt_vinfo->stmt;
11490 switch (gimple_code (def_stmt))
11492 case GIMPLE_PHI:
11493 case GIMPLE_ASSIGN:
11494 case GIMPLE_CALL:
11495 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
11496 break;
11497 default:
11498 *dt = vect_unknown_def_type;
11499 break;
11501 if (def_stmt_info_out)
11502 *def_stmt_info_out = stmt_vinfo;
11504 if (def_stmt_out)
11505 *def_stmt_out = def_stmt;
11508 if (dump_enabled_p ())
11510 dump_printf (MSG_NOTE, ", type of def: ");
11511 switch (*dt)
11513 case vect_uninitialized_def:
11514 dump_printf (MSG_NOTE, "uninitialized\n");
11515 break;
11516 case vect_constant_def:
11517 dump_printf (MSG_NOTE, "constant\n");
11518 break;
11519 case vect_external_def:
11520 dump_printf (MSG_NOTE, "external\n");
11521 break;
11522 case vect_internal_def:
11523 dump_printf (MSG_NOTE, "internal\n");
11524 break;
11525 case vect_induction_def:
11526 dump_printf (MSG_NOTE, "induction\n");
11527 break;
11528 case vect_reduction_def:
11529 dump_printf (MSG_NOTE, "reduction\n");
11530 break;
11531 case vect_double_reduction_def:
11532 dump_printf (MSG_NOTE, "double reduction\n");
11533 break;
11534 case vect_nested_cycle:
11535 dump_printf (MSG_NOTE, "nested cycle\n");
11536 break;
11537 case vect_unknown_def_type:
11538 dump_printf (MSG_NOTE, "unknown\n");
11539 break;
11543 if (*dt == vect_unknown_def_type)
11545 if (dump_enabled_p ())
11546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
11547 "Unsupported pattern.\n");
11548 return false;
11551 return true;
11554 /* Function vect_is_simple_use.
11556 Same as vect_is_simple_use but also determines the vector operand
11557 type of OPERAND and stores it to *VECTYPE. If the definition of
11558 OPERAND is vect_uninitialized_def, vect_constant_def or
11559 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11560 is responsible to compute the best suited vector type for the
11561 scalar operand. */
11563 bool
11564 vect_is_simple_use (tree operand, vec_info *vinfo, enum vect_def_type *dt,
11565 tree *vectype, stmt_vec_info *def_stmt_info_out,
11566 gimple **def_stmt_out)
11568 stmt_vec_info def_stmt_info;
11569 gimple *def_stmt;
11570 if (!vect_is_simple_use (operand, vinfo, dt, &def_stmt_info, &def_stmt))
11571 return false;
11573 if (def_stmt_out)
11574 *def_stmt_out = def_stmt;
11575 if (def_stmt_info_out)
11576 *def_stmt_info_out = def_stmt_info;
11578 /* Now get a vector type if the def is internal, otherwise supply
11579 NULL_TREE and leave it up to the caller to figure out a proper
11580 type for the use stmt. */
11581 if (*dt == vect_internal_def
11582 || *dt == vect_induction_def
11583 || *dt == vect_reduction_def
11584 || *dt == vect_double_reduction_def
11585 || *dt == vect_nested_cycle)
11587 *vectype = STMT_VINFO_VECTYPE (def_stmt_info);
11588 gcc_assert (*vectype != NULL_TREE);
11589 if (dump_enabled_p ())
11590 dump_printf_loc (MSG_NOTE, vect_location,
11591 "vect_is_simple_use: vectype %T\n", *vectype);
11593 else if (*dt == vect_uninitialized_def
11594 || *dt == vect_constant_def
11595 || *dt == vect_external_def)
11596 *vectype = NULL_TREE;
11597 else
11598 gcc_unreachable ();
11600 return true;
11604 /* Function supportable_widening_operation
11606 Check whether an operation represented by the code CODE is a
11607 widening operation that is supported by the target platform in
11608 vector form (i.e., when operating on arguments of type VECTYPE_IN
11609 producing a result of type VECTYPE_OUT).
11611 Widening operations we currently support are NOP (CONVERT), FLOAT,
11612 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11613 are supported by the target platform either directly (via vector
11614 tree-codes), or via target builtins.
11616 Output:
11617 - CODE1 and CODE2 are codes of vector operations to be used when
11618 vectorizing the operation, if available.
11619 - MULTI_STEP_CVT determines the number of required intermediate steps in
11620 case of multi-step conversion (like char->short->int - in that case
11621 MULTI_STEP_CVT will be 1).
11622 - INTERM_TYPES contains the intermediate type required to perform the
11623 widening operation (short in the above example). */
11625 bool
11626 supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
11627 tree vectype_out, tree vectype_in,
11628 enum tree_code *code1, enum tree_code *code2,
11629 int *multi_step_cvt,
11630 vec<tree> *interm_types)
11632 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
11633 class loop *vect_loop = NULL;
11634 machine_mode vec_mode;
11635 enum insn_code icode1, icode2;
11636 optab optab1, optab2;
11637 tree vectype = vectype_in;
11638 tree wide_vectype = vectype_out;
11639 enum tree_code c1, c2;
11640 int i;
11641 tree prev_type, intermediate_type;
11642 machine_mode intermediate_mode, prev_mode;
11643 optab optab3, optab4;
11645 *multi_step_cvt = 0;
11646 if (loop_info)
11647 vect_loop = LOOP_VINFO_LOOP (loop_info);
11649 switch (code)
11651 case WIDEN_MULT_EXPR:
11652 /* The result of a vectorized widening operation usually requires
11653 two vectors (because the widened results do not fit into one vector).
11654 The generated vector results would normally be expected to be
11655 generated in the same order as in the original scalar computation,
11656 i.e. if 8 results are generated in each vector iteration, they are
11657 to be organized as follows:
11658 vect1: [res1,res2,res3,res4],
11659 vect2: [res5,res6,res7,res8].
11661 However, in the special case that the result of the widening
11662 operation is used in a reduction computation only, the order doesn't
11663 matter (because when vectorizing a reduction we change the order of
11664 the computation). Some targets can take advantage of this and
11665 generate more efficient code. For example, targets like Altivec,
11666 that support widen_mult using a sequence of {mult_even,mult_odd}
11667 generate the following vectors:
11668 vect1: [res1,res3,res5,res7],
11669 vect2: [res2,res4,res6,res8].
11671 When vectorizing outer-loops, we execute the inner-loop sequentially
11672 (each vectorized inner-loop iteration contributes to VF outer-loop
11673 iterations in parallel). We therefore don't allow to change the
11674 order of the computation in the inner-loop during outer-loop
11675 vectorization. */
11676 /* TODO: Another case in which order doesn't *really* matter is when we
11677 widen and then contract again, e.g. (short)((int)x * y >> 8).
11678 Normally, pack_trunc performs an even/odd permute, whereas the
11679 repack from an even/odd expansion would be an interleave, which
11680 would be significantly simpler for e.g. AVX2. */
11681 /* In any case, in order to avoid duplicating the code below, recurse
11682 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11683 are properly set up for the caller. If we fail, we'll continue with
11684 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11685 if (vect_loop
11686 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
11687 && !nested_in_vect_loop_p (vect_loop, stmt_info)
11688 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
11689 stmt_info, vectype_out,
11690 vectype_in, code1, code2,
11691 multi_step_cvt, interm_types))
11693 /* Elements in a vector with vect_used_by_reduction property cannot
11694 be reordered if the use chain with this property does not have the
11695 same operation. One such an example is s += a * b, where elements
11696 in a and b cannot be reordered. Here we check if the vector defined
11697 by STMT is only directly used in the reduction statement. */
11698 tree lhs = gimple_assign_lhs (stmt_info->stmt);
11699 stmt_vec_info use_stmt_info = loop_info->lookup_single_use (lhs);
11700 if (use_stmt_info
11701 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
11702 return true;
11704 c1 = VEC_WIDEN_MULT_LO_EXPR;
11705 c2 = VEC_WIDEN_MULT_HI_EXPR;
11706 break;
11708 case DOT_PROD_EXPR:
11709 c1 = DOT_PROD_EXPR;
11710 c2 = DOT_PROD_EXPR;
11711 break;
11713 case SAD_EXPR:
11714 c1 = SAD_EXPR;
11715 c2 = SAD_EXPR;
11716 break;
11718 case VEC_WIDEN_MULT_EVEN_EXPR:
11719 /* Support the recursion induced just above. */
11720 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
11721 c2 = VEC_WIDEN_MULT_ODD_EXPR;
11722 break;
11724 case WIDEN_LSHIFT_EXPR:
11725 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
11726 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
11727 break;
11729 CASE_CONVERT:
11730 c1 = VEC_UNPACK_LO_EXPR;
11731 c2 = VEC_UNPACK_HI_EXPR;
11732 break;
11734 case FLOAT_EXPR:
11735 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
11736 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
11737 break;
11739 case FIX_TRUNC_EXPR:
11740 c1 = VEC_UNPACK_FIX_TRUNC_LO_EXPR;
11741 c2 = VEC_UNPACK_FIX_TRUNC_HI_EXPR;
11742 break;
11744 default:
11745 gcc_unreachable ();
11748 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
11749 std::swap (c1, c2);
11751 if (code == FIX_TRUNC_EXPR)
11753 /* The signedness is determined from output operand. */
11754 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11755 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
11757 else if (CONVERT_EXPR_CODE_P (code)
11758 && VECTOR_BOOLEAN_TYPE_P (wide_vectype)
11759 && VECTOR_BOOLEAN_TYPE_P (vectype)
11760 && TYPE_MODE (wide_vectype) == TYPE_MODE (vectype)
11761 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11763 /* If the input and result modes are the same, a different optab
11764 is needed where we pass in the number of units in vectype. */
11765 optab1 = vec_unpacks_sbool_lo_optab;
11766 optab2 = vec_unpacks_sbool_hi_optab;
11768 else
11770 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11771 optab2 = optab_for_tree_code (c2, vectype, optab_default);
11774 if (!optab1 || !optab2)
11775 return false;
11777 vec_mode = TYPE_MODE (vectype);
11778 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
11779 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
11780 return false;
11782 *code1 = c1;
11783 *code2 = c2;
11785 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11786 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11788 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11789 return true;
11790 /* For scalar masks we may have different boolean
11791 vector types having the same QImode. Thus we
11792 add additional check for elements number. */
11793 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
11794 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11795 return true;
11798 /* Check if it's a multi-step conversion that can be done using intermediate
11799 types. */
11801 prev_type = vectype;
11802 prev_mode = vec_mode;
11804 if (!CONVERT_EXPR_CODE_P (code))
11805 return false;
11807 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11808 intermediate steps in promotion sequence. We try
11809 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11810 not. */
11811 interm_types->create (MAX_INTERM_CVT_STEPS);
11812 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11814 intermediate_mode = insn_data[icode1].operand[0].mode;
11815 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
11816 intermediate_type
11817 = vect_halve_mask_nunits (prev_type, intermediate_mode);
11818 else
11819 intermediate_type
11820 = lang_hooks.types.type_for_mode (intermediate_mode,
11821 TYPE_UNSIGNED (prev_type));
11823 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
11824 && VECTOR_BOOLEAN_TYPE_P (prev_type)
11825 && intermediate_mode == prev_mode
11826 && SCALAR_INT_MODE_P (prev_mode))
11828 /* If the input and result modes are the same, a different optab
11829 is needed where we pass in the number of units in vectype. */
11830 optab3 = vec_unpacks_sbool_lo_optab;
11831 optab4 = vec_unpacks_sbool_hi_optab;
11833 else
11835 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
11836 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
11839 if (!optab3 || !optab4
11840 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
11841 || insn_data[icode1].operand[0].mode != intermediate_mode
11842 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
11843 || insn_data[icode2].operand[0].mode != intermediate_mode
11844 || ((icode1 = optab_handler (optab3, intermediate_mode))
11845 == CODE_FOR_nothing)
11846 || ((icode2 = optab_handler (optab4, intermediate_mode))
11847 == CODE_FOR_nothing))
11848 break;
11850 interm_types->quick_push (intermediate_type);
11851 (*multi_step_cvt)++;
11853 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
11854 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
11856 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11857 return true;
11858 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
11859 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2))
11860 return true;
11863 prev_type = intermediate_type;
11864 prev_mode = intermediate_mode;
11867 interm_types->release ();
11868 return false;
11872 /* Function supportable_narrowing_operation
11874 Check whether an operation represented by the code CODE is a
11875 narrowing operation that is supported by the target platform in
11876 vector form (i.e., when operating on arguments of type VECTYPE_IN
11877 and producing a result of type VECTYPE_OUT).
11879 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11880 and FLOAT. This function checks if these operations are supported by
11881 the target platform directly via vector tree-codes.
11883 Output:
11884 - CODE1 is the code of a vector operation to be used when
11885 vectorizing the operation, if available.
11886 - MULTI_STEP_CVT determines the number of required intermediate steps in
11887 case of multi-step conversion (like int->short->char - in that case
11888 MULTI_STEP_CVT will be 1).
11889 - INTERM_TYPES contains the intermediate type required to perform the
11890 narrowing operation (short in the above example). */
11892 bool
11893 supportable_narrowing_operation (enum tree_code code,
11894 tree vectype_out, tree vectype_in,
11895 enum tree_code *code1, int *multi_step_cvt,
11896 vec<tree> *interm_types)
11898 machine_mode vec_mode;
11899 enum insn_code icode1;
11900 optab optab1, interm_optab;
11901 tree vectype = vectype_in;
11902 tree narrow_vectype = vectype_out;
11903 enum tree_code c1;
11904 tree intermediate_type, prev_type;
11905 machine_mode intermediate_mode, prev_mode;
11906 int i;
11907 bool uns;
11909 *multi_step_cvt = 0;
11910 switch (code)
11912 CASE_CONVERT:
11913 c1 = VEC_PACK_TRUNC_EXPR;
11914 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype)
11915 && VECTOR_BOOLEAN_TYPE_P (vectype)
11916 && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype)
11917 && SCALAR_INT_MODE_P (TYPE_MODE (vectype)))
11918 optab1 = vec_pack_sbool_trunc_optab;
11919 else
11920 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11921 break;
11923 case FIX_TRUNC_EXPR:
11924 c1 = VEC_PACK_FIX_TRUNC_EXPR;
11925 /* The signedness is determined from output operand. */
11926 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
11927 break;
11929 case FLOAT_EXPR:
11930 c1 = VEC_PACK_FLOAT_EXPR;
11931 optab1 = optab_for_tree_code (c1, vectype, optab_default);
11932 break;
11934 default:
11935 gcc_unreachable ();
11938 if (!optab1)
11939 return false;
11941 vec_mode = TYPE_MODE (vectype);
11942 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
11943 return false;
11945 *code1 = c1;
11947 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
11949 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
11950 return true;
11951 /* For scalar masks we may have different boolean
11952 vector types having the same QImode. Thus we
11953 add additional check for elements number. */
11954 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
11955 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
11956 return true;
11959 if (code == FLOAT_EXPR)
11960 return false;
11962 /* Check if it's a multi-step conversion that can be done using intermediate
11963 types. */
11964 prev_mode = vec_mode;
11965 prev_type = vectype;
11966 if (code == FIX_TRUNC_EXPR)
11967 uns = TYPE_UNSIGNED (vectype_out);
11968 else
11969 uns = TYPE_UNSIGNED (vectype);
11971 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11972 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11973 costly than signed. */
11974 if (code == FIX_TRUNC_EXPR && uns)
11976 enum insn_code icode2;
11978 intermediate_type
11979 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
11980 interm_optab
11981 = optab_for_tree_code (c1, intermediate_type, optab_default);
11982 if (interm_optab != unknown_optab
11983 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
11984 && insn_data[icode1].operand[0].mode
11985 == insn_data[icode2].operand[0].mode)
11987 uns = false;
11988 optab1 = interm_optab;
11989 icode1 = icode2;
11993 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11994 intermediate steps in promotion sequence. We try
11995 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11996 interm_types->create (MAX_INTERM_CVT_STEPS);
11997 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
11999 intermediate_mode = insn_data[icode1].operand[0].mode;
12000 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
12001 intermediate_type
12002 = vect_double_mask_nunits (prev_type, intermediate_mode);
12003 else
12004 intermediate_type
12005 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
12006 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type)
12007 && VECTOR_BOOLEAN_TYPE_P (prev_type)
12008 && intermediate_mode == prev_mode
12009 && SCALAR_INT_MODE_P (prev_mode))
12010 interm_optab = vec_pack_sbool_trunc_optab;
12011 else
12012 interm_optab
12013 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
12014 optab_default);
12015 if (!interm_optab
12016 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
12017 || insn_data[icode1].operand[0].mode != intermediate_mode
12018 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
12019 == CODE_FOR_nothing))
12020 break;
12022 interm_types->quick_push (intermediate_type);
12023 (*multi_step_cvt)++;
12025 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
12027 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12028 return true;
12029 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
12030 TYPE_VECTOR_SUBPARTS (narrow_vectype)))
12031 return true;
12034 prev_mode = intermediate_mode;
12035 prev_type = intermediate_type;
12036 optab1 = interm_optab;
12039 interm_types->release ();
12040 return false;
12043 /* Generate and return a statement that sets vector mask MASK such that
12044 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
12046 gcall *
12047 vect_gen_while (tree mask, tree start_index, tree end_index)
12049 tree cmp_type = TREE_TYPE (start_index);
12050 tree mask_type = TREE_TYPE (mask);
12051 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
12052 cmp_type, mask_type,
12053 OPTIMIZE_FOR_SPEED));
12054 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
12055 start_index, end_index,
12056 build_zero_cst (mask_type));
12057 gimple_call_set_lhs (call, mask);
12058 return call;
12061 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
12062 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
12064 tree
12065 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
12066 tree end_index)
12068 tree tmp = make_ssa_name (mask_type);
12069 gcall *call = vect_gen_while (tmp, start_index, end_index);
12070 gimple_seq_add_stmt (seq, call);
12071 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);
12074 /* Try to compute the vector types required to vectorize STMT_INFO,
12075 returning true on success and false if vectorization isn't possible.
12076 If GROUP_SIZE is nonzero and we're performing BB vectorization,
12077 take sure that the number of elements in the vectors is no bigger
12078 than GROUP_SIZE.
12080 On success:
12082 - Set *STMT_VECTYPE_OUT to:
12083 - NULL_TREE if the statement doesn't need to be vectorized;
12084 - the equivalent of STMT_VINFO_VECTYPE otherwise.
12086 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
12087 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
12088 statement does not help to determine the overall number of units. */
12090 opt_result
12091 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
12092 tree *stmt_vectype_out,
12093 tree *nunits_vectype_out,
12094 unsigned int group_size)
12096 vec_info *vinfo = stmt_info->vinfo;
12097 gimple *stmt = stmt_info->stmt;
12099 /* For BB vectorization, we should always have a group size once we've
12100 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
12101 are tentative requests during things like early data reference
12102 analysis and pattern recognition. */
12103 if (is_a <bb_vec_info> (vinfo))
12104 gcc_assert (vinfo->slp_instances.is_empty () || group_size != 0);
12105 else
12106 group_size = 0;
12108 *stmt_vectype_out = NULL_TREE;
12109 *nunits_vectype_out = NULL_TREE;
12111 if (gimple_get_lhs (stmt) == NULL_TREE
12112 /* MASK_STORE has no lhs, but is ok. */
12113 && !gimple_call_internal_p (stmt, IFN_MASK_STORE))
12115 if (is_a <gcall *> (stmt))
12117 /* Ignore calls with no lhs. These must be calls to
12118 #pragma omp simd functions, and what vectorization factor
12119 it really needs can't be determined until
12120 vectorizable_simd_clone_call. */
12121 if (dump_enabled_p ())
12122 dump_printf_loc (MSG_NOTE, vect_location,
12123 "defer to SIMD clone analysis.\n");
12124 return opt_result::success ();
12127 return opt_result::failure_at (stmt,
12128 "not vectorized: irregular stmt.%G", stmt);
12131 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
12132 return opt_result::failure_at (stmt,
12133 "not vectorized: vector stmt in loop:%G",
12134 stmt);
12136 tree vectype;
12137 tree scalar_type = NULL_TREE;
12138 if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
12140 vectype = STMT_VINFO_VECTYPE (stmt_info);
12141 if (dump_enabled_p ())
12142 dump_printf_loc (MSG_NOTE, vect_location,
12143 "precomputed vectype: %T\n", vectype);
12145 else if (vect_use_mask_type_p (stmt_info))
12147 unsigned int precision = stmt_info->mask_precision;
12148 scalar_type = build_nonstandard_integer_type (precision, 1);
12149 vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
12150 if (!vectype)
12151 return opt_result::failure_at (stmt, "not vectorized: unsupported"
12152 " data-type %T\n", scalar_type);
12153 if (dump_enabled_p ())
12154 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12156 else
12158 if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
12159 scalar_type = TREE_TYPE (DR_REF (dr));
12160 else if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
12161 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
12162 else
12163 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
12165 if (dump_enabled_p ())
12167 if (group_size)
12168 dump_printf_loc (MSG_NOTE, vect_location,
12169 "get vectype for scalar type (group size %d):"
12170 " %T\n", group_size, scalar_type);
12171 else
12172 dump_printf_loc (MSG_NOTE, vect_location,
12173 "get vectype for scalar type: %T\n", scalar_type);
12175 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
12176 if (!vectype)
12177 return opt_result::failure_at (stmt,
12178 "not vectorized:"
12179 " unsupported data-type %T\n",
12180 scalar_type);
12182 if (dump_enabled_p ())
12183 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
12185 *stmt_vectype_out = vectype;
12187 /* Don't try to compute scalar types if the stmt produces a boolean
12188 vector; use the existing vector type instead. */
12189 tree nunits_vectype = vectype;
12190 if (!VECTOR_BOOLEAN_TYPE_P (vectype))
12192 /* The number of units is set according to the smallest scalar
12193 type (or the largest vector size, but we only support one
12194 vector size per vectorization). */
12195 HOST_WIDE_INT dummy;
12196 scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
12197 if (scalar_type != TREE_TYPE (vectype))
12199 if (dump_enabled_p ())
12200 dump_printf_loc (MSG_NOTE, vect_location,
12201 "get vectype for smallest scalar type: %T\n",
12202 scalar_type);
12203 nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type,
12204 group_size);
12205 if (!nunits_vectype)
12206 return opt_result::failure_at
12207 (stmt, "not vectorized: unsupported data-type %T\n",
12208 scalar_type);
12209 if (dump_enabled_p ())
12210 dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
12211 nunits_vectype);
12215 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
12216 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
12218 if (dump_enabled_p ())
12220 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
12221 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
12222 dump_printf (MSG_NOTE, "\n");
12225 *nunits_vectype_out = nunits_vectype;
12226 return opt_result::success ();