Add support for conditional reductions using SVE CLASTB
[official-gcc.git] / gcc / tree-vect-stmts.c
blob5bb61319b669ee27133a6acdeeba04869da2491c
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
64 return STMT_VINFO_VECTYPE (stmt_info);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
77 if (!loop_vinfo)
78 return false;
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
82 return (bb->loop_father == loop->inner);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
94 if ((kind == vector_load || kind == unaligned_load)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
96 kind = vector_gather_load;
97 if ((kind == vector_store || kind == unaligned_store)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
99 kind = vector_scatter_store;
100 if (body_cost_vec)
102 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
103 stmt_info_for_cost si = { count, kind,
104 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
105 misalign };
106 body_cost_vec->safe_push (si);
107 return (unsigned)
108 (builtin_vectorization_cost (kind, vectype, misalign) * count);
110 else
111 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
112 count, kind, stmt_info, misalign, where);
115 /* Return a variable of type ELEM_TYPE[NELEMS]. */
117 static tree
118 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
120 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
121 "vect_array");
124 /* ARRAY is an array of vectors created by create_vector_array.
125 Return an SSA_NAME for the vector in index N. The reference
126 is part of the vectorization of STMT and the vector is associated
127 with scalar destination SCALAR_DEST. */
129 static tree
130 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
131 tree array, unsigned HOST_WIDE_INT n)
133 tree vect_type, vect, vect_name, array_ref;
134 gimple *new_stmt;
136 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
137 vect_type = TREE_TYPE (TREE_TYPE (array));
138 vect = vect_create_destination_var (scalar_dest, vect_type);
139 array_ref = build4 (ARRAY_REF, vect_type, array,
140 build_int_cst (size_type_node, n),
141 NULL_TREE, NULL_TREE);
143 new_stmt = gimple_build_assign (vect, array_ref);
144 vect_name = make_ssa_name (vect, new_stmt);
145 gimple_assign_set_lhs (new_stmt, vect_name);
146 vect_finish_stmt_generation (stmt, new_stmt, gsi);
148 return vect_name;
151 /* ARRAY is an array of vectors created by create_vector_array.
152 Emit code to store SSA_NAME VECT in index N of the array.
153 The store is part of the vectorization of STMT. */
155 static void
156 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
157 tree array, unsigned HOST_WIDE_INT n)
159 tree array_ref;
160 gimple *new_stmt;
162 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
163 build_int_cst (size_type_node, n),
164 NULL_TREE, NULL_TREE);
166 new_stmt = gimple_build_assign (array_ref, vect);
167 vect_finish_stmt_generation (stmt, new_stmt, gsi);
170 /* PTR is a pointer to an array of type TYPE. Return a representation
171 of *PTR. The memory reference replaces those in FIRST_DR
172 (and its group). */
174 static tree
175 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
177 tree mem_ref;
179 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
180 /* Arrays have the same alignment as their type. */
181 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
182 return mem_ref;
185 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
187 /* Function vect_mark_relevant.
189 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
191 static void
192 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
193 enum vect_relevant relevant, bool live_p)
195 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
196 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
197 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
198 gimple *pattern_stmt;
200 if (dump_enabled_p ())
202 dump_printf_loc (MSG_NOTE, vect_location,
203 "mark relevant %d, live %d: ", relevant, live_p);
204 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
207 /* If this stmt is an original stmt in a pattern, we might need to mark its
208 related pattern stmt instead of the original stmt. However, such stmts
209 may have their own uses that are not in any pattern, in such cases the
210 stmt itself should be marked. */
211 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
213 /* This is the last stmt in a sequence that was detected as a
214 pattern that can potentially be vectorized. Don't mark the stmt
215 as relevant/live because it's not going to be vectorized.
216 Instead mark the pattern-stmt that replaces it. */
218 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
220 if (dump_enabled_p ())
221 dump_printf_loc (MSG_NOTE, vect_location,
222 "last stmt in pattern. don't mark"
223 " relevant/live.\n");
224 stmt_info = vinfo_for_stmt (pattern_stmt);
225 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
226 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
227 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
228 stmt = pattern_stmt;
231 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
232 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
233 STMT_VINFO_RELEVANT (stmt_info) = relevant;
235 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
236 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
238 if (dump_enabled_p ())
239 dump_printf_loc (MSG_NOTE, vect_location,
240 "already marked relevant/live.\n");
241 return;
244 worklist->safe_push (stmt);
248 /* Function is_simple_and_all_uses_invariant
250 Return true if STMT is simple and all uses of it are invariant. */
252 bool
253 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
255 tree op;
256 gimple *def_stmt;
257 ssa_op_iter iter;
259 if (!is_gimple_assign (stmt))
260 return false;
262 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
264 enum vect_def_type dt = vect_uninitialized_def;
266 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
268 if (dump_enabled_p ())
269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
270 "use not simple.\n");
271 return false;
274 if (dt != vect_external_def && dt != vect_constant_def)
275 return false;
277 return true;
280 /* Function vect_stmt_relevant_p.
282 Return true if STMT in loop that is represented by LOOP_VINFO is
283 "relevant for vectorization".
285 A stmt is considered "relevant for vectorization" if:
286 - it has uses outside the loop.
287 - it has vdefs (it alters memory).
288 - control stmts in the loop (except for the exit condition).
290 CHECKME: what other side effects would the vectorizer allow? */
292 static bool
293 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
294 enum vect_relevant *relevant, bool *live_p)
296 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
297 ssa_op_iter op_iter;
298 imm_use_iterator imm_iter;
299 use_operand_p use_p;
300 def_operand_p def_p;
302 *relevant = vect_unused_in_scope;
303 *live_p = false;
305 /* cond stmt other than loop exit cond. */
306 if (is_ctrl_stmt (stmt)
307 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
308 != loop_exit_ctrl_vec_info_type)
309 *relevant = vect_used_in_scope;
311 /* changing memory. */
312 if (gimple_code (stmt) != GIMPLE_PHI)
313 if (gimple_vdef (stmt)
314 && !gimple_clobber_p (stmt))
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE, vect_location,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant = vect_used_in_scope;
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
325 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
327 basic_block bb = gimple_bb (USE_STMT (use_p));
328 if (!flow_bb_inside_loop_p (loop, bb))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE, vect_location,
332 "vec_stmt_relevant_p: used out of loop.\n");
334 if (is_gimple_debug (USE_STMT (use_p)))
335 continue;
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
340 gcc_assert (bb == single_exit (loop)->dest);
342 *live_p = true;
347 if (*live_p && *relevant == vect_unused_in_scope
348 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE, vect_location,
352 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353 *relevant = vect_used_only_live;
356 return (*live_p || *relevant);
360 /* Function exist_non_indexing_operands_for_use_p
362 USE is one of the uses attached to STMT. Check if USE is
363 used in STMT for anything other than indexing an array. */
365 static bool
366 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
368 tree operand;
369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
371 /* USE corresponds to some operand in STMT. If there is no data
372 reference in STMT, then any operand that corresponds to USE
373 is not indexing an array. */
374 if (!STMT_VINFO_DATA_REF (stmt_info))
375 return true;
377 /* STMT has a data_ref. FORNOW this means that its of one of
378 the following forms:
379 -1- ARRAY_REF = var
380 -2- var = ARRAY_REF
381 (This should have been verified in analyze_data_refs).
383 'var' in the second case corresponds to a def, not a use,
384 so USE cannot correspond to any operands that are not used
385 for array indexing.
387 Therefore, all we need to check is if STMT falls into the
388 first case, and whether var corresponds to USE. */
390 if (!gimple_assign_copy_p (stmt))
392 if (is_gimple_call (stmt)
393 && gimple_call_internal_p (stmt))
394 switch (gimple_call_internal_fn (stmt))
396 case IFN_MASK_STORE:
397 operand = gimple_call_arg (stmt, 3);
398 if (operand == use)
399 return true;
400 /* FALLTHRU */
401 case IFN_MASK_LOAD:
402 operand = gimple_call_arg (stmt, 2);
403 if (operand == use)
404 return true;
405 break;
406 default:
407 break;
409 return false;
412 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
413 return false;
414 operand = gimple_assign_rhs1 (stmt);
415 if (TREE_CODE (operand) != SSA_NAME)
416 return false;
418 if (operand == use)
419 return true;
421 return false;
426 Function process_use.
428 Inputs:
429 - a USE in STMT in a loop represented by LOOP_VINFO
430 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
431 that defined USE. This is done by calling mark_relevant and passing it
432 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
433 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
434 be performed.
436 Outputs:
437 Generally, LIVE_P and RELEVANT are used to define the liveness and
438 relevance info of the DEF_STMT of this USE:
439 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
440 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
441 Exceptions:
442 - case 1: If USE is used only for address computations (e.g. array indexing),
443 which does not need to be directly vectorized, then the liveness/relevance
444 of the respective DEF_STMT is left unchanged.
445 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
446 skip DEF_STMT cause it had already been processed.
447 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
448 be modified accordingly.
450 Return true if everything is as expected. Return false otherwise. */
452 static bool
453 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
454 enum vect_relevant relevant, vec<gimple *> *worklist,
455 bool force)
457 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
458 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
459 stmt_vec_info dstmt_vinfo;
460 basic_block bb, def_bb;
461 gimple *def_stmt;
462 enum vect_def_type dt;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
467 return true;
469 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
471 if (dump_enabled_p ())
472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
473 "not vectorized: unsupported use in stmt.\n");
474 return false;
477 if (!def_stmt || gimple_nop_p (def_stmt))
478 return true;
480 def_bb = gimple_bb (def_stmt);
481 if (!flow_bb_inside_loop_p (loop, def_bb))
483 if (dump_enabled_p ())
484 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
485 return true;
488 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
489 DEF_STMT must have already been processed, because this should be the
490 only way that STMT, which is a reduction-phi, was put in the worklist,
491 as there should be no other uses for DEF_STMT in the loop. So we just
492 check that everything is as expected, and we are done. */
493 dstmt_vinfo = vinfo_for_stmt (def_stmt);
494 bb = gimple_bb (stmt);
495 if (gimple_code (stmt) == GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
497 && gimple_code (def_stmt) != GIMPLE_PHI
498 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
499 && bb->loop_father == def_bb->loop_father)
501 if (dump_enabled_p ())
502 dump_printf_loc (MSG_NOTE, vect_location,
503 "reduc-stmt defining reduc-phi in the same nest.\n");
504 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
505 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
506 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
507 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
508 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
509 return true;
512 /* case 3a: outer-loop stmt defining an inner-loop stmt:
513 outer-loop-header-bb:
514 d = def_stmt
515 inner-loop:
516 stmt # use (d)
517 outer-loop-tail-bb:
518 ... */
519 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
521 if (dump_enabled_p ())
522 dump_printf_loc (MSG_NOTE, vect_location,
523 "outer-loop def-stmt defining inner-loop stmt.\n");
525 switch (relevant)
527 case vect_unused_in_scope:
528 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
529 vect_used_in_scope : vect_unused_in_scope;
530 break;
532 case vect_used_in_outer_by_reduction:
533 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
534 relevant = vect_used_by_reduction;
535 break;
537 case vect_used_in_outer:
538 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
539 relevant = vect_used_in_scope;
540 break;
542 case vect_used_in_scope:
543 break;
545 default:
546 gcc_unreachable ();
550 /* case 3b: inner-loop stmt defining an outer-loop stmt:
551 outer-loop-header-bb:
553 inner-loop:
554 d = def_stmt
555 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
556 stmt # use (d) */
557 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
559 if (dump_enabled_p ())
560 dump_printf_loc (MSG_NOTE, vect_location,
561 "inner-loop def-stmt defining outer-loop stmt.\n");
563 switch (relevant)
565 case vect_unused_in_scope:
566 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
567 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
568 vect_used_in_outer_by_reduction : vect_unused_in_scope;
569 break;
571 case vect_used_by_reduction:
572 case vect_used_only_live:
573 relevant = vect_used_in_outer_by_reduction;
574 break;
576 case vect_used_in_scope:
577 relevant = vect_used_in_outer;
578 break;
580 default:
581 gcc_unreachable ();
584 /* We are also not interested in uses on loop PHI backedges that are
585 inductions. Otherwise we'll needlessly vectorize the IV increment
586 and cause hybrid SLP for SLP inductions. Unless the PHI is live
587 of course. */
588 else if (gimple_code (stmt) == GIMPLE_PHI
589 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
590 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
591 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
592 == use))
594 if (dump_enabled_p ())
595 dump_printf_loc (MSG_NOTE, vect_location,
596 "induction value on backedge.\n");
597 return true;
601 vect_mark_relevant (worklist, def_stmt, relevant, false);
602 return true;
606 /* Function vect_mark_stmts_to_be_vectorized.
608 Not all stmts in the loop need to be vectorized. For example:
610 for i...
611 for j...
612 1. T0 = i + j
613 2. T1 = a[T0]
615 3. j = j + 1
617 Stmt 1 and 3 do not need to be vectorized, because loop control and
618 addressing of vectorized data-refs are handled differently.
620 This pass detects such stmts. */
622 bool
623 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
625 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
626 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
627 unsigned int nbbs = loop->num_nodes;
628 gimple_stmt_iterator si;
629 gimple *stmt;
630 unsigned int i;
631 stmt_vec_info stmt_vinfo;
632 basic_block bb;
633 gimple *phi;
634 bool live_p;
635 enum vect_relevant relevant;
637 if (dump_enabled_p ())
638 dump_printf_loc (MSG_NOTE, vect_location,
639 "=== vect_mark_stmts_to_be_vectorized ===\n");
641 auto_vec<gimple *, 64> worklist;
643 /* 1. Init worklist. */
644 for (i = 0; i < nbbs; i++)
646 bb = bbs[i];
647 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
649 phi = gsi_stmt (si);
650 if (dump_enabled_p ())
652 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
653 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
656 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
657 vect_mark_relevant (&worklist, phi, relevant, live_p);
659 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
661 stmt = gsi_stmt (si);
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
665 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
668 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
669 vect_mark_relevant (&worklist, stmt, relevant, live_p);
673 /* 2. Process_worklist */
674 while (worklist.length () > 0)
676 use_operand_p use_p;
677 ssa_op_iter iter;
679 stmt = worklist.pop ();
680 if (dump_enabled_p ())
682 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
683 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
686 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
687 (DEF_STMT) as relevant/irrelevant according to the relevance property
688 of STMT. */
689 stmt_vinfo = vinfo_for_stmt (stmt);
690 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
692 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
693 propagated as is to the DEF_STMTs of its USEs.
695 One exception is when STMT has been identified as defining a reduction
696 variable; in this case we set the relevance to vect_used_by_reduction.
697 This is because we distinguish between two kinds of relevant stmts -
698 those that are used by a reduction computation, and those that are
699 (also) used by a regular computation. This allows us later on to
700 identify stmts that are used solely by a reduction, and therefore the
701 order of the results that they produce does not have to be kept. */
703 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
705 case vect_reduction_def:
706 gcc_assert (relevant != vect_unused_in_scope);
707 if (relevant != vect_unused_in_scope
708 && relevant != vect_used_in_scope
709 && relevant != vect_used_by_reduction
710 && relevant != vect_used_only_live)
712 if (dump_enabled_p ())
713 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
714 "unsupported use of reduction.\n");
715 return false;
717 break;
719 case vect_nested_cycle:
720 if (relevant != vect_unused_in_scope
721 && relevant != vect_used_in_outer_by_reduction
722 && relevant != vect_used_in_outer)
724 if (dump_enabled_p ())
725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
726 "unsupported use of nested cycle.\n");
728 return false;
730 break;
732 case vect_double_reduction_def:
733 if (relevant != vect_unused_in_scope
734 && relevant != vect_used_by_reduction
735 && relevant != vect_used_only_live)
737 if (dump_enabled_p ())
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
739 "unsupported use of double reduction.\n");
741 return false;
743 break;
745 default:
746 break;
749 if (is_pattern_stmt_p (stmt_vinfo))
751 /* Pattern statements are not inserted into the code, so
752 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
753 have to scan the RHS or function arguments instead. */
754 if (is_gimple_assign (stmt))
756 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
757 tree op = gimple_assign_rhs1 (stmt);
759 i = 1;
760 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
762 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
763 relevant, &worklist, false)
764 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
765 relevant, &worklist, false))
766 return false;
767 i = 2;
769 for (; i < gimple_num_ops (stmt); i++)
771 op = gimple_op (stmt, i);
772 if (TREE_CODE (op) == SSA_NAME
773 && !process_use (stmt, op, loop_vinfo, relevant,
774 &worklist, false))
775 return false;
778 else if (is_gimple_call (stmt))
780 for (i = 0; i < gimple_call_num_args (stmt); i++)
782 tree arg = gimple_call_arg (stmt, i);
783 if (!process_use (stmt, arg, loop_vinfo, relevant,
784 &worklist, false))
785 return false;
789 else
790 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
792 tree op = USE_FROM_PTR (use_p);
793 if (!process_use (stmt, op, loop_vinfo, relevant,
794 &worklist, false))
795 return false;
798 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
800 gather_scatter_info gs_info;
801 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
802 gcc_unreachable ();
803 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
804 &worklist, true))
805 return false;
807 } /* while worklist */
809 return true;
813 /* Function vect_model_simple_cost.
815 Models cost for simple operations, i.e. those that only emit ncopies of a
816 single op. Right now, this does not account for multiple insns that could
817 be generated for the single vector op. We will handle that shortly. */
819 void
820 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
821 enum vect_def_type *dt,
822 int ndts,
823 stmt_vector_for_cost *prologue_cost_vec,
824 stmt_vector_for_cost *body_cost_vec)
826 int i;
827 int inside_cost = 0, prologue_cost = 0;
829 /* The SLP costs were already calculated during SLP tree build. */
830 if (PURE_SLP_STMT (stmt_info))
831 return;
833 /* Cost the "broadcast" of a scalar operand in to a vector operand.
834 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
835 cost model. */
836 for (i = 0; i < ndts; i++)
837 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
838 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
839 stmt_info, 0, vect_prologue);
841 /* Pass the inside-of-loop statements to the target-specific cost model. */
842 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
843 stmt_info, 0, vect_body);
845 if (dump_enabled_p ())
846 dump_printf_loc (MSG_NOTE, vect_location,
847 "vect_model_simple_cost: inside_cost = %d, "
848 "prologue_cost = %d .\n", inside_cost, prologue_cost);
852 /* Model cost for type demotion and promotion operations. PWR is normally
853 zero for single-step promotions and demotions. It will be one if
854 two-step promotion/demotion is required, and so on. Each additional
855 step doubles the number of instructions required. */
857 static void
858 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
859 enum vect_def_type *dt, int pwr)
861 int i, tmp;
862 int inside_cost = 0, prologue_cost = 0;
863 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
864 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
865 void *target_cost_data;
867 /* The SLP costs were already calculated during SLP tree build. */
868 if (PURE_SLP_STMT (stmt_info))
869 return;
871 if (loop_vinfo)
872 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
873 else
874 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
876 for (i = 0; i < pwr + 1; i++)
878 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
879 (i + 1) : i;
880 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
881 vec_promote_demote, stmt_info, 0,
882 vect_body);
885 /* FORNOW: Assuming maximum 2 args per stmts. */
886 for (i = 0; i < 2; i++)
887 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
888 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
889 stmt_info, 0, vect_prologue);
891 if (dump_enabled_p ())
892 dump_printf_loc (MSG_NOTE, vect_location,
893 "vect_model_promotion_demotion_cost: inside_cost = %d, "
894 "prologue_cost = %d .\n", inside_cost, prologue_cost);
897 /* Function vect_model_store_cost
899 Models cost for stores. In the case of grouped accesses, one access
900 has the overhead of the grouped access attributed to it. */
902 void
903 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
904 vect_memory_access_type memory_access_type,
905 vec_load_store_type vls_type, slp_tree slp_node,
906 stmt_vector_for_cost *prologue_cost_vec,
907 stmt_vector_for_cost *body_cost_vec)
909 unsigned int inside_cost = 0, prologue_cost = 0;
910 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
911 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
912 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
914 if (vls_type == VLS_STORE_INVARIANT)
915 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
916 stmt_info, 0, vect_prologue);
918 /* Grouped stores update all elements in the group at once,
919 so we want the DR for the first statement. */
920 if (!slp_node && grouped_access_p)
922 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
923 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
926 /* True if we should include any once-per-group costs as well as
927 the cost of the statement itself. For SLP we only get called
928 once per group anyhow. */
929 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
931 /* We assume that the cost of a single store-lanes instruction is
932 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
933 access is instead being provided by a permute-and-store operation,
934 include the cost of the permutes. */
935 if (first_stmt_p
936 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
938 /* Uses a high and low interleave or shuffle operations for each
939 needed permute. */
940 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
941 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
942 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
943 stmt_info, 0, vect_body);
945 if (dump_enabled_p ())
946 dump_printf_loc (MSG_NOTE, vect_location,
947 "vect_model_store_cost: strided group_size = %d .\n",
948 group_size);
951 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
952 /* Costs of the stores. */
953 if (memory_access_type == VMAT_ELEMENTWISE
954 || memory_access_type == VMAT_GATHER_SCATTER)
956 /* N scalar stores plus extracting the elements. */
957 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
958 inside_cost += record_stmt_cost (body_cost_vec,
959 ncopies * assumed_nunits,
960 scalar_store, stmt_info, 0, vect_body);
962 else
963 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
965 if (memory_access_type == VMAT_ELEMENTWISE
966 || memory_access_type == VMAT_STRIDED_SLP)
968 /* N scalar stores plus extracting the elements. */
969 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
970 inside_cost += record_stmt_cost (body_cost_vec,
971 ncopies * assumed_nunits,
972 vec_to_scalar, stmt_info, 0, vect_body);
975 if (dump_enabled_p ())
976 dump_printf_loc (MSG_NOTE, vect_location,
977 "vect_model_store_cost: inside_cost = %d, "
978 "prologue_cost = %d .\n", inside_cost, prologue_cost);
982 /* Calculate cost of DR's memory access. */
983 void
984 vect_get_store_cost (struct data_reference *dr, int ncopies,
985 unsigned int *inside_cost,
986 stmt_vector_for_cost *body_cost_vec)
988 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
989 gimple *stmt = DR_STMT (dr);
990 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
992 switch (alignment_support_scheme)
994 case dr_aligned:
996 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
997 vector_store, stmt_info, 0,
998 vect_body);
1000 if (dump_enabled_p ())
1001 dump_printf_loc (MSG_NOTE, vect_location,
1002 "vect_model_store_cost: aligned.\n");
1003 break;
1006 case dr_unaligned_supported:
1008 /* Here, we assign an additional cost for the unaligned store. */
1009 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1010 unaligned_store, stmt_info,
1011 DR_MISALIGNMENT (dr), vect_body);
1012 if (dump_enabled_p ())
1013 dump_printf_loc (MSG_NOTE, vect_location,
1014 "vect_model_store_cost: unaligned supported by "
1015 "hardware.\n");
1016 break;
1019 case dr_unaligned_unsupported:
1021 *inside_cost = VECT_MAX_COST;
1023 if (dump_enabled_p ())
1024 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1025 "vect_model_store_cost: unsupported access.\n");
1026 break;
1029 default:
1030 gcc_unreachable ();
1035 /* Function vect_model_load_cost
1037 Models cost for loads. In the case of grouped accesses, one access has
1038 the overhead of the grouped access attributed to it. Since unaligned
1039 accesses are supported for loads, we also account for the costs of the
1040 access scheme chosen. */
1042 void
1043 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1044 vect_memory_access_type memory_access_type,
1045 slp_tree slp_node,
1046 stmt_vector_for_cost *prologue_cost_vec,
1047 stmt_vector_for_cost *body_cost_vec)
1049 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1050 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1051 unsigned int inside_cost = 0, prologue_cost = 0;
1052 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1054 /* Grouped loads read all elements in the group at once,
1055 so we want the DR for the first statement. */
1056 if (!slp_node && grouped_access_p)
1058 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1059 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1062 /* True if we should include any once-per-group costs as well as
1063 the cost of the statement itself. For SLP we only get called
1064 once per group anyhow. */
1065 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1067 /* We assume that the cost of a single load-lanes instruction is
1068 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1069 access is instead being provided by a load-and-permute operation,
1070 include the cost of the permutes. */
1071 if (first_stmt_p
1072 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1074 /* Uses an even and odd extract operations or shuffle operations
1075 for each needed permute. */
1076 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1077 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1078 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1079 stmt_info, 0, vect_body);
1081 if (dump_enabled_p ())
1082 dump_printf_loc (MSG_NOTE, vect_location,
1083 "vect_model_load_cost: strided group_size = %d .\n",
1084 group_size);
1087 /* The loads themselves. */
1088 if (memory_access_type == VMAT_ELEMENTWISE
1089 || memory_access_type == VMAT_GATHER_SCATTER)
1091 /* N scalar loads plus gathering them into a vector. */
1092 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1093 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1094 inside_cost += record_stmt_cost (body_cost_vec,
1095 ncopies * assumed_nunits,
1096 scalar_load, stmt_info, 0, vect_body);
1098 else
1099 vect_get_load_cost (dr, ncopies, first_stmt_p,
1100 &inside_cost, &prologue_cost,
1101 prologue_cost_vec, body_cost_vec, true);
1102 if (memory_access_type == VMAT_ELEMENTWISE
1103 || memory_access_type == VMAT_STRIDED_SLP)
1104 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1105 stmt_info, 0, vect_body);
1107 if (dump_enabled_p ())
1108 dump_printf_loc (MSG_NOTE, vect_location,
1109 "vect_model_load_cost: inside_cost = %d, "
1110 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1114 /* Calculate cost of DR's memory access. */
1115 void
1116 vect_get_load_cost (struct data_reference *dr, int ncopies,
1117 bool add_realign_cost, unsigned int *inside_cost,
1118 unsigned int *prologue_cost,
1119 stmt_vector_for_cost *prologue_cost_vec,
1120 stmt_vector_for_cost *body_cost_vec,
1121 bool record_prologue_costs)
1123 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1124 gimple *stmt = DR_STMT (dr);
1125 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1127 switch (alignment_support_scheme)
1129 case dr_aligned:
1131 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1132 stmt_info, 0, vect_body);
1134 if (dump_enabled_p ())
1135 dump_printf_loc (MSG_NOTE, vect_location,
1136 "vect_model_load_cost: aligned.\n");
1138 break;
1140 case dr_unaligned_supported:
1142 /* Here, we assign an additional cost for the unaligned load. */
1143 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1144 unaligned_load, stmt_info,
1145 DR_MISALIGNMENT (dr), vect_body);
1147 if (dump_enabled_p ())
1148 dump_printf_loc (MSG_NOTE, vect_location,
1149 "vect_model_load_cost: unaligned supported by "
1150 "hardware.\n");
1152 break;
1154 case dr_explicit_realign:
1156 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1157 vector_load, stmt_info, 0, vect_body);
1158 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1159 vec_perm, stmt_info, 0, vect_body);
1161 /* FIXME: If the misalignment remains fixed across the iterations of
1162 the containing loop, the following cost should be added to the
1163 prologue costs. */
1164 if (targetm.vectorize.builtin_mask_for_load)
1165 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1166 stmt_info, 0, vect_body);
1168 if (dump_enabled_p ())
1169 dump_printf_loc (MSG_NOTE, vect_location,
1170 "vect_model_load_cost: explicit realign\n");
1172 break;
1174 case dr_explicit_realign_optimized:
1176 if (dump_enabled_p ())
1177 dump_printf_loc (MSG_NOTE, vect_location,
1178 "vect_model_load_cost: unaligned software "
1179 "pipelined.\n");
1181 /* Unaligned software pipeline has a load of an address, an initial
1182 load, and possibly a mask operation to "prime" the loop. However,
1183 if this is an access in a group of loads, which provide grouped
1184 access, then the above cost should only be considered for one
1185 access in the group. Inside the loop, there is a load op
1186 and a realignment op. */
1188 if (add_realign_cost && record_prologue_costs)
1190 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1191 vector_stmt, stmt_info,
1192 0, vect_prologue);
1193 if (targetm.vectorize.builtin_mask_for_load)
1194 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1195 vector_stmt, stmt_info,
1196 0, vect_prologue);
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1200 stmt_info, 0, vect_body);
1201 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1202 stmt_info, 0, vect_body);
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_NOTE, vect_location,
1206 "vect_model_load_cost: explicit realign optimized"
1207 "\n");
1209 break;
1212 case dr_unaligned_unsupported:
1214 *inside_cost = VECT_MAX_COST;
1216 if (dump_enabled_p ())
1217 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1218 "vect_model_load_cost: unsupported access.\n");
1219 break;
1222 default:
1223 gcc_unreachable ();
1227 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1228 the loop preheader for the vectorized stmt STMT. */
1230 static void
1231 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1233 if (gsi)
1234 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1235 else
1237 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1238 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1240 if (loop_vinfo)
1242 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1243 basic_block new_bb;
1244 edge pe;
1246 if (nested_in_vect_loop_p (loop, stmt))
1247 loop = loop->inner;
1249 pe = loop_preheader_edge (loop);
1250 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1251 gcc_assert (!new_bb);
1253 else
1255 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1256 basic_block bb;
1257 gimple_stmt_iterator gsi_bb_start;
1259 gcc_assert (bb_vinfo);
1260 bb = BB_VINFO_BB (bb_vinfo);
1261 gsi_bb_start = gsi_after_labels (bb);
1262 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1266 if (dump_enabled_p ())
1268 dump_printf_loc (MSG_NOTE, vect_location,
1269 "created new init_stmt: ");
1270 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1274 /* Function vect_init_vector.
1276 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1277 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1278 vector type a vector with all elements equal to VAL is created first.
1279 Place the initialization at BSI if it is not NULL. Otherwise, place the
1280 initialization at the loop preheader.
1281 Return the DEF of INIT_STMT.
1282 It will be used in the vectorization of STMT. */
1284 tree
1285 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1287 gimple *init_stmt;
1288 tree new_temp;
1290 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1291 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1293 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1294 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1296 /* Scalar boolean value should be transformed into
1297 all zeros or all ones value before building a vector. */
1298 if (VECTOR_BOOLEAN_TYPE_P (type))
1300 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1301 tree false_val = build_zero_cst (TREE_TYPE (type));
1303 if (CONSTANT_CLASS_P (val))
1304 val = integer_zerop (val) ? false_val : true_val;
1305 else
1307 new_temp = make_ssa_name (TREE_TYPE (type));
1308 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1309 val, true_val, false_val);
1310 vect_init_vector_1 (stmt, init_stmt, gsi);
1311 val = new_temp;
1314 else if (CONSTANT_CLASS_P (val))
1315 val = fold_convert (TREE_TYPE (type), val);
1316 else
1318 new_temp = make_ssa_name (TREE_TYPE (type));
1319 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1320 init_stmt = gimple_build_assign (new_temp,
1321 fold_build1 (VIEW_CONVERT_EXPR,
1322 TREE_TYPE (type),
1323 val));
1324 else
1325 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1326 vect_init_vector_1 (stmt, init_stmt, gsi);
1327 val = new_temp;
1330 val = build_vector_from_val (type, val);
1333 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1334 init_stmt = gimple_build_assign (new_temp, val);
1335 vect_init_vector_1 (stmt, init_stmt, gsi);
1336 return new_temp;
1339 /* Function vect_get_vec_def_for_operand_1.
1341 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1342 DT that will be used in the vectorized stmt. */
1344 tree
1345 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1347 tree vec_oprnd;
1348 gimple *vec_stmt;
1349 stmt_vec_info def_stmt_info = NULL;
1351 switch (dt)
1353 /* operand is a constant or a loop invariant. */
1354 case vect_constant_def:
1355 case vect_external_def:
1356 /* Code should use vect_get_vec_def_for_operand. */
1357 gcc_unreachable ();
1359 /* operand is defined inside the loop. */
1360 case vect_internal_def:
1362 /* Get the def from the vectorized stmt. */
1363 def_stmt_info = vinfo_for_stmt (def_stmt);
1365 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1366 /* Get vectorized pattern statement. */
1367 if (!vec_stmt
1368 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1369 && !STMT_VINFO_RELEVANT (def_stmt_info))
1370 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1371 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1372 gcc_assert (vec_stmt);
1373 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1374 vec_oprnd = PHI_RESULT (vec_stmt);
1375 else if (is_gimple_call (vec_stmt))
1376 vec_oprnd = gimple_call_lhs (vec_stmt);
1377 else
1378 vec_oprnd = gimple_assign_lhs (vec_stmt);
1379 return vec_oprnd;
1382 /* operand is defined by a loop header phi. */
1383 case vect_reduction_def:
1384 case vect_double_reduction_def:
1385 case vect_nested_cycle:
1386 case vect_induction_def:
1388 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1390 /* Get the def from the vectorized stmt. */
1391 def_stmt_info = vinfo_for_stmt (def_stmt);
1392 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1393 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1394 vec_oprnd = PHI_RESULT (vec_stmt);
1395 else
1396 vec_oprnd = gimple_get_lhs (vec_stmt);
1397 return vec_oprnd;
1400 default:
1401 gcc_unreachable ();
1406 /* Function vect_get_vec_def_for_operand.
1408 OP is an operand in STMT. This function returns a (vector) def that will be
1409 used in the vectorized stmt for STMT.
1411 In the case that OP is an SSA_NAME which is defined in the loop, then
1412 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1414 In case OP is an invariant or constant, a new stmt that creates a vector def
1415 needs to be introduced. VECTYPE may be used to specify a required type for
1416 vector invariant. */
1418 tree
1419 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1421 gimple *def_stmt;
1422 enum vect_def_type dt;
1423 bool is_simple_use;
1424 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1425 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1427 if (dump_enabled_p ())
1429 dump_printf_loc (MSG_NOTE, vect_location,
1430 "vect_get_vec_def_for_operand: ");
1431 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1432 dump_printf (MSG_NOTE, "\n");
1435 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1436 gcc_assert (is_simple_use);
1437 if (def_stmt && dump_enabled_p ())
1439 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1440 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1443 if (dt == vect_constant_def || dt == vect_external_def)
1445 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1446 tree vector_type;
1448 if (vectype)
1449 vector_type = vectype;
1450 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1451 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1452 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1453 else
1454 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1456 gcc_assert (vector_type);
1457 return vect_init_vector (stmt, op, vector_type, NULL);
1459 else
1460 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1464 /* Function vect_get_vec_def_for_stmt_copy
1466 Return a vector-def for an operand. This function is used when the
1467 vectorized stmt to be created (by the caller to this function) is a "copy"
1468 created in case the vectorized result cannot fit in one vector, and several
1469 copies of the vector-stmt are required. In this case the vector-def is
1470 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1471 of the stmt that defines VEC_OPRND.
1472 DT is the type of the vector def VEC_OPRND.
1474 Context:
1475 In case the vectorization factor (VF) is bigger than the number
1476 of elements that can fit in a vectype (nunits), we have to generate
1477 more than one vector stmt to vectorize the scalar stmt. This situation
1478 arises when there are multiple data-types operated upon in the loop; the
1479 smallest data-type determines the VF, and as a result, when vectorizing
1480 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1481 vector stmt (each computing a vector of 'nunits' results, and together
1482 computing 'VF' results in each iteration). This function is called when
1483 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1484 which VF=16 and nunits=4, so the number of copies required is 4):
1486 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1488 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1489 VS1.1: vx.1 = memref1 VS1.2
1490 VS1.2: vx.2 = memref2 VS1.3
1491 VS1.3: vx.3 = memref3
1493 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1494 VSnew.1: vz1 = vx.1 + ... VSnew.2
1495 VSnew.2: vz2 = vx.2 + ... VSnew.3
1496 VSnew.3: vz3 = vx.3 + ...
1498 The vectorization of S1 is explained in vectorizable_load.
1499 The vectorization of S2:
1500 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1501 the function 'vect_get_vec_def_for_operand' is called to
1502 get the relevant vector-def for each operand of S2. For operand x it
1503 returns the vector-def 'vx.0'.
1505 To create the remaining copies of the vector-stmt (VSnew.j), this
1506 function is called to get the relevant vector-def for each operand. It is
1507 obtained from the respective VS1.j stmt, which is recorded in the
1508 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1510 For example, to obtain the vector-def 'vx.1' in order to create the
1511 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1512 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1513 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1514 and return its def ('vx.1').
1515 Overall, to create the above sequence this function will be called 3 times:
1516 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1517 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1518 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1520 tree
1521 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1523 gimple *vec_stmt_for_operand;
1524 stmt_vec_info def_stmt_info;
1526 /* Do nothing; can reuse same def. */
1527 if (dt == vect_external_def || dt == vect_constant_def )
1528 return vec_oprnd;
1530 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1531 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1532 gcc_assert (def_stmt_info);
1533 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1534 gcc_assert (vec_stmt_for_operand);
1535 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1536 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1537 else
1538 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1539 return vec_oprnd;
1543 /* Get vectorized definitions for the operands to create a copy of an original
1544 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1546 void
1547 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1548 vec<tree> *vec_oprnds0,
1549 vec<tree> *vec_oprnds1)
1551 tree vec_oprnd = vec_oprnds0->pop ();
1553 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1554 vec_oprnds0->quick_push (vec_oprnd);
1556 if (vec_oprnds1 && vec_oprnds1->length ())
1558 vec_oprnd = vec_oprnds1->pop ();
1559 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1560 vec_oprnds1->quick_push (vec_oprnd);
1565 /* Get vectorized definitions for OP0 and OP1. */
1567 void
1568 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1569 vec<tree> *vec_oprnds0,
1570 vec<tree> *vec_oprnds1,
1571 slp_tree slp_node)
1573 if (slp_node)
1575 int nops = (op1 == NULL_TREE) ? 1 : 2;
1576 auto_vec<tree> ops (nops);
1577 auto_vec<vec<tree> > vec_defs (nops);
1579 ops.quick_push (op0);
1580 if (op1)
1581 ops.quick_push (op1);
1583 vect_get_slp_defs (ops, slp_node, &vec_defs);
1585 *vec_oprnds0 = vec_defs[0];
1586 if (op1)
1587 *vec_oprnds1 = vec_defs[1];
1589 else
1591 tree vec_oprnd;
1593 vec_oprnds0->create (1);
1594 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1595 vec_oprnds0->quick_push (vec_oprnd);
1597 if (op1)
1599 vec_oprnds1->create (1);
1600 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1601 vec_oprnds1->quick_push (vec_oprnd);
1606 /* Helper function called by vect_finish_replace_stmt and
1607 vect_finish_stmt_generation. Set the location of the new
1608 statement and create a stmt_vec_info for it. */
1610 static void
1611 vect_finish_stmt_generation_1 (gimple *stmt, gimple *vec_stmt)
1613 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1614 vec_info *vinfo = stmt_info->vinfo;
1616 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1618 if (dump_enabled_p ())
1620 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1621 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1624 gimple_set_location (vec_stmt, gimple_location (stmt));
1626 /* While EH edges will generally prevent vectorization, stmt might
1627 e.g. be in a must-not-throw region. Ensure newly created stmts
1628 that could throw are part of the same region. */
1629 int lp_nr = lookup_stmt_eh_lp (stmt);
1630 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1631 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1634 /* Replace the scalar statement STMT with a new vector statement VEC_STMT,
1635 which sets the same scalar result as STMT did. */
1637 void
1638 vect_finish_replace_stmt (gimple *stmt, gimple *vec_stmt)
1640 gcc_assert (gimple_get_lhs (stmt) == gimple_get_lhs (vec_stmt));
1642 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1643 gsi_replace (&gsi, vec_stmt, false);
1645 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1648 /* Function vect_finish_stmt_generation.
1650 Insert a new stmt. */
1652 void
1653 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1654 gimple_stmt_iterator *gsi)
1656 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1658 if (!gsi_end_p (*gsi)
1659 && gimple_has_mem_ops (vec_stmt))
1661 gimple *at_stmt = gsi_stmt (*gsi);
1662 tree vuse = gimple_vuse (at_stmt);
1663 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1665 tree vdef = gimple_vdef (at_stmt);
1666 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1667 /* If we have an SSA vuse and insert a store, update virtual
1668 SSA form to avoid triggering the renamer. Do so only
1669 if we can easily see all uses - which is what almost always
1670 happens with the way vectorized stmts are inserted. */
1671 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1672 && ((is_gimple_assign (vec_stmt)
1673 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1674 || (is_gimple_call (vec_stmt)
1675 && !(gimple_call_flags (vec_stmt)
1676 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1678 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1679 gimple_set_vdef (vec_stmt, new_vdef);
1680 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1684 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1685 vect_finish_stmt_generation_1 (stmt, vec_stmt);
1688 /* We want to vectorize a call to combined function CFN with function
1689 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1690 as the types of all inputs. Check whether this is possible using
1691 an internal function, returning its code if so or IFN_LAST if not. */
1693 static internal_fn
1694 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1695 tree vectype_out, tree vectype_in)
1697 internal_fn ifn;
1698 if (internal_fn_p (cfn))
1699 ifn = as_internal_fn (cfn);
1700 else
1701 ifn = associated_internal_fn (fndecl);
1702 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1704 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1705 if (info.vectorizable)
1707 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1708 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1709 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1710 OPTIMIZE_FOR_SPEED))
1711 return ifn;
1714 return IFN_LAST;
1718 static tree permute_vec_elements (tree, tree, tree, gimple *,
1719 gimple_stmt_iterator *);
1721 /* Check whether a load or store statement in the loop described by
1722 LOOP_VINFO is possible in a fully-masked loop. This is testing
1723 whether the vectorizer pass has the appropriate support, as well as
1724 whether the target does.
1726 VLS_TYPE says whether the statement is a load or store and VECTYPE
1727 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1728 says how the load or store is going to be implemented and GROUP_SIZE
1729 is the number of load or store statements in the containing group.
1731 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1732 supported, otherwise record the required mask types. */
1734 static void
1735 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
1736 vec_load_store_type vls_type, int group_size,
1737 vect_memory_access_type memory_access_type)
1739 /* Invariant loads need no special support. */
1740 if (memory_access_type == VMAT_INVARIANT)
1741 return;
1743 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
1744 machine_mode vecmode = TYPE_MODE (vectype);
1745 bool is_load = (vls_type == VLS_LOAD);
1746 if (memory_access_type == VMAT_LOAD_STORE_LANES)
1748 if (is_load
1749 ? !vect_load_lanes_supported (vectype, group_size, true)
1750 : !vect_store_lanes_supported (vectype, group_size, true))
1752 if (dump_enabled_p ())
1753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1754 "can't use a fully-masked loop because the"
1755 " target doesn't have an appropriate masked"
1756 " load/store-lanes instruction.\n");
1757 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1758 return;
1760 unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
1761 vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
1762 return;
1765 if (memory_access_type != VMAT_CONTIGUOUS
1766 && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
1768 /* Element X of the data must come from iteration i * VF + X of the
1769 scalar loop. We need more work to support other mappings. */
1770 if (dump_enabled_p ())
1771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1772 "can't use a fully-masked loop because an access"
1773 " isn't contiguous.\n");
1774 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1775 return;
1778 machine_mode mask_mode;
1779 if (!(targetm.vectorize.get_mask_mode
1780 (GET_MODE_NUNITS (vecmode),
1781 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
1782 || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
1784 if (dump_enabled_p ())
1785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1786 "can't use a fully-masked loop because the target"
1787 " doesn't have the appropriate masked load or"
1788 " store.\n");
1789 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
1790 return;
1792 /* We might load more scalars than we need for permuting SLP loads.
1793 We checked in get_group_load_store_type that the extra elements
1794 don't leak into a new vector. */
1795 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1796 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1797 unsigned int nvectors;
1798 if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
1799 vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
1800 else
1801 gcc_unreachable ();
1804 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1805 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1806 that needs to be applied to all loads and stores in a vectorized loop.
1807 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1809 MASK_TYPE is the type of both masks. If new statements are needed,
1810 insert them before GSI. */
1812 static tree
1813 prepare_load_store_mask (tree mask_type, tree loop_mask, tree vec_mask,
1814 gimple_stmt_iterator *gsi)
1816 gcc_assert (useless_type_conversion_p (mask_type, TREE_TYPE (vec_mask)));
1817 if (!loop_mask)
1818 return vec_mask;
1820 gcc_assert (TREE_TYPE (loop_mask) == mask_type);
1821 tree and_res = make_temp_ssa_name (mask_type, NULL, "vec_mask_and");
1822 gimple *and_stmt = gimple_build_assign (and_res, BIT_AND_EXPR,
1823 vec_mask, loop_mask);
1824 gsi_insert_before (gsi, and_stmt, GSI_SAME_STMT);
1825 return and_res;
1828 /* STMT is a non-strided load or store, meaning that it accesses
1829 elements with a known constant step. Return -1 if that step
1830 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1832 static int
1833 compare_step_with_zero (gimple *stmt)
1835 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1836 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1837 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1838 size_zero_node);
1841 /* If the target supports a permute mask that reverses the elements in
1842 a vector of type VECTYPE, return that mask, otherwise return null. */
1844 static tree
1845 perm_mask_for_reverse (tree vectype)
1847 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1849 /* The encoding has a single stepped pattern. */
1850 vec_perm_builder sel (nunits, 1, 3);
1851 for (int i = 0; i < 3; ++i)
1852 sel.quick_push (nunits - 1 - i);
1854 vec_perm_indices indices (sel, 1, nunits);
1855 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1856 return NULL_TREE;
1857 return vect_gen_perm_mask_checked (vectype, indices);
1860 /* STMT is either a masked or unconditional store. Return the value
1861 being stored. */
1863 static tree
1864 vect_get_store_rhs (gimple *stmt)
1866 if (gassign *assign = dyn_cast <gassign *> (stmt))
1868 gcc_assert (gimple_assign_single_p (assign));
1869 return gimple_assign_rhs1 (assign);
1871 if (gcall *call = dyn_cast <gcall *> (stmt))
1873 internal_fn ifn = gimple_call_internal_fn (call);
1874 gcc_assert (ifn == IFN_MASK_STORE);
1875 return gimple_call_arg (stmt, 3);
1877 gcc_unreachable ();
1880 /* A subroutine of get_load_store_type, with a subset of the same
1881 arguments. Handle the case where STMT is part of a grouped load
1882 or store.
1884 For stores, the statements in the group are all consecutive
1885 and there is no gap at the end. For loads, the statements in the
1886 group might not be consecutive; there can be gaps between statements
1887 as well as at the end. */
1889 static bool
1890 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1891 bool masked_p, vec_load_store_type vls_type,
1892 vect_memory_access_type *memory_access_type)
1894 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1895 vec_info *vinfo = stmt_info->vinfo;
1896 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1897 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1898 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1899 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1900 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1901 bool single_element_p = (stmt == first_stmt
1902 && !GROUP_NEXT_ELEMENT (stmt_info));
1903 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1904 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1906 /* True if the vectorized statements would access beyond the last
1907 statement in the group. */
1908 bool overrun_p = false;
1910 /* True if we can cope with such overrun by peeling for gaps, so that
1911 there is at least one final scalar iteration after the vector loop. */
1912 bool can_overrun_p = (!masked_p
1913 && vls_type == VLS_LOAD
1914 && loop_vinfo
1915 && !loop->inner);
1917 /* There can only be a gap at the end of the group if the stride is
1918 known at compile time. */
1919 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1921 /* Stores can't yet have gaps. */
1922 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1924 if (slp)
1926 if (STMT_VINFO_STRIDED_P (stmt_info))
1928 /* Try to use consecutive accesses of GROUP_SIZE elements,
1929 separated by the stride, until we have a complete vector.
1930 Fall back to scalar accesses if that isn't possible. */
1931 if (multiple_p (nunits, group_size))
1932 *memory_access_type = VMAT_STRIDED_SLP;
1933 else
1934 *memory_access_type = VMAT_ELEMENTWISE;
1936 else
1938 overrun_p = loop_vinfo && gap != 0;
1939 if (overrun_p && vls_type != VLS_LOAD)
1941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1942 "Grouped store with gaps requires"
1943 " non-consecutive accesses\n");
1944 return false;
1946 /* An overrun is fine if the trailing elements are smaller
1947 than the alignment boundary B. Every vector access will
1948 be a multiple of B and so we are guaranteed to access a
1949 non-gap element in the same B-sized block. */
1950 if (overrun_p
1951 && gap < (vect_known_alignment_in_bytes (first_dr)
1952 / vect_get_scalar_dr_size (first_dr)))
1953 overrun_p = false;
1954 if (overrun_p && !can_overrun_p)
1956 if (dump_enabled_p ())
1957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1958 "Peeling for outer loop is not supported\n");
1959 return false;
1961 *memory_access_type = VMAT_CONTIGUOUS;
1964 else
1966 /* We can always handle this case using elementwise accesses,
1967 but see if something more efficient is available. */
1968 *memory_access_type = VMAT_ELEMENTWISE;
1970 /* If there is a gap at the end of the group then these optimizations
1971 would access excess elements in the last iteration. */
1972 bool would_overrun_p = (gap != 0);
1973 /* An overrun is fine if the trailing elements are smaller than the
1974 alignment boundary B. Every vector access will be a multiple of B
1975 and so we are guaranteed to access a non-gap element in the
1976 same B-sized block. */
1977 if (would_overrun_p
1978 && !masked_p
1979 && gap < (vect_known_alignment_in_bytes (first_dr)
1980 / vect_get_scalar_dr_size (first_dr)))
1981 would_overrun_p = false;
1983 if (!STMT_VINFO_STRIDED_P (stmt_info)
1984 && (can_overrun_p || !would_overrun_p)
1985 && compare_step_with_zero (stmt) > 0)
1987 /* First cope with the degenerate case of a single-element
1988 vector. */
1989 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
1990 *memory_access_type = VMAT_CONTIGUOUS;
1992 /* Otherwise try using LOAD/STORE_LANES. */
1993 if (*memory_access_type == VMAT_ELEMENTWISE
1994 && (vls_type == VLS_LOAD
1995 ? vect_load_lanes_supported (vectype, group_size, masked_p)
1996 : vect_store_lanes_supported (vectype, group_size,
1997 masked_p)))
1999 *memory_access_type = VMAT_LOAD_STORE_LANES;
2000 overrun_p = would_overrun_p;
2003 /* If that fails, try using permuting loads. */
2004 if (*memory_access_type == VMAT_ELEMENTWISE
2005 && (vls_type == VLS_LOAD
2006 ? vect_grouped_load_supported (vectype, single_element_p,
2007 group_size)
2008 : vect_grouped_store_supported (vectype, group_size)))
2010 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
2011 overrun_p = would_overrun_p;
2016 if (vls_type != VLS_LOAD && first_stmt == stmt)
2018 /* STMT is the leader of the group. Check the operands of all the
2019 stmts of the group. */
2020 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
2021 while (next_stmt)
2023 tree op = vect_get_store_rhs (next_stmt);
2024 gimple *def_stmt;
2025 enum vect_def_type dt;
2026 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
2028 if (dump_enabled_p ())
2029 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2030 "use not simple.\n");
2031 return false;
2033 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2037 if (overrun_p)
2039 gcc_assert (can_overrun_p);
2040 if (dump_enabled_p ())
2041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2042 "Data access with gaps requires scalar "
2043 "epilogue loop\n");
2044 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2047 return true;
2050 /* A subroutine of get_load_store_type, with a subset of the same
2051 arguments. Handle the case where STMT is a load or store that
2052 accesses consecutive elements with a negative step. */
2054 static vect_memory_access_type
2055 get_negative_load_store_type (gimple *stmt, tree vectype,
2056 vec_load_store_type vls_type,
2057 unsigned int ncopies)
2059 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2060 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2061 dr_alignment_support alignment_support_scheme;
2063 if (ncopies > 1)
2065 if (dump_enabled_p ())
2066 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2067 "multiple types with negative step.\n");
2068 return VMAT_ELEMENTWISE;
2071 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
2072 if (alignment_support_scheme != dr_aligned
2073 && alignment_support_scheme != dr_unaligned_supported)
2075 if (dump_enabled_p ())
2076 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2077 "negative step but alignment required.\n");
2078 return VMAT_ELEMENTWISE;
2081 if (vls_type == VLS_STORE_INVARIANT)
2083 if (dump_enabled_p ())
2084 dump_printf_loc (MSG_NOTE, vect_location,
2085 "negative step with invariant source;"
2086 " no permute needed.\n");
2087 return VMAT_CONTIGUOUS_DOWN;
2090 if (!perm_mask_for_reverse (vectype))
2092 if (dump_enabled_p ())
2093 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2094 "negative step and reversing not supported.\n");
2095 return VMAT_ELEMENTWISE;
2098 return VMAT_CONTIGUOUS_REVERSE;
2101 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
2102 if there is a memory access type that the vectorized form can use,
2103 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2104 or scatters, fill in GS_INFO accordingly.
2106 SLP says whether we're performing SLP rather than loop vectorization.
2107 MASKED_P is true if the statement is conditional on a vectorized mask.
2108 VECTYPE is the vector type that the vectorized statements will use.
2109 NCOPIES is the number of vector statements that will be needed. */
2111 static bool
2112 get_load_store_type (gimple *stmt, tree vectype, bool slp, bool masked_p,
2113 vec_load_store_type vls_type, unsigned int ncopies,
2114 vect_memory_access_type *memory_access_type,
2115 gather_scatter_info *gs_info)
2117 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2118 vec_info *vinfo = stmt_info->vinfo;
2119 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2120 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2121 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2123 *memory_access_type = VMAT_GATHER_SCATTER;
2124 gimple *def_stmt;
2125 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
2126 gcc_unreachable ();
2127 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
2128 &gs_info->offset_dt,
2129 &gs_info->offset_vectype))
2131 if (dump_enabled_p ())
2132 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2133 "%s index use not simple.\n",
2134 vls_type == VLS_LOAD ? "gather" : "scatter");
2135 return false;
2138 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2140 if (!get_group_load_store_type (stmt, vectype, slp, masked_p, vls_type,
2141 memory_access_type))
2142 return false;
2144 else if (STMT_VINFO_STRIDED_P (stmt_info))
2146 gcc_assert (!slp);
2147 *memory_access_type = VMAT_ELEMENTWISE;
2149 else
2151 int cmp = compare_step_with_zero (stmt);
2152 if (cmp < 0)
2153 *memory_access_type = get_negative_load_store_type
2154 (stmt, vectype, vls_type, ncopies);
2155 else if (cmp == 0)
2157 gcc_assert (vls_type == VLS_LOAD);
2158 *memory_access_type = VMAT_INVARIANT;
2160 else
2161 *memory_access_type = VMAT_CONTIGUOUS;
2164 if ((*memory_access_type == VMAT_ELEMENTWISE
2165 || *memory_access_type == VMAT_STRIDED_SLP)
2166 && !nunits.is_constant ())
2168 if (dump_enabled_p ())
2169 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2170 "Not using elementwise accesses due to variable "
2171 "vectorization factor.\n");
2172 return false;
2175 /* FIXME: At the moment the cost model seems to underestimate the
2176 cost of using elementwise accesses. This check preserves the
2177 traditional behavior until that can be fixed. */
2178 if (*memory_access_type == VMAT_ELEMENTWISE
2179 && !STMT_VINFO_STRIDED_P (stmt_info))
2181 if (dump_enabled_p ())
2182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2183 "not falling back to elementwise accesses\n");
2184 return false;
2186 return true;
2189 /* Return true if boolean argument MASK is suitable for vectorizing
2190 conditional load or store STMT. When returning true, store the
2191 type of the vectorized mask in *MASK_VECTYPE_OUT. */
2193 static bool
2194 vect_check_load_store_mask (gimple *stmt, tree mask, tree *mask_vectype_out)
2196 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2198 if (dump_enabled_p ())
2199 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2200 "mask argument is not a boolean.\n");
2201 return false;
2204 if (TREE_CODE (mask) != SSA_NAME)
2206 if (dump_enabled_p ())
2207 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2208 "mask argument is not an SSA name.\n");
2209 return false;
2212 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2213 gimple *def_stmt;
2214 enum vect_def_type dt;
2215 tree mask_vectype;
2216 if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &dt,
2217 &mask_vectype))
2219 if (dump_enabled_p ())
2220 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2221 "mask use not simple.\n");
2222 return false;
2225 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2226 if (!mask_vectype)
2227 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2229 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2231 if (dump_enabled_p ())
2232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2233 "could not find an appropriate vector mask type.\n");
2234 return false;
2237 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2238 TYPE_VECTOR_SUBPARTS (vectype)))
2240 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2243 "vector mask type ");
2244 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2245 dump_printf (MSG_MISSED_OPTIMIZATION,
2246 " does not match vector data type ");
2247 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2248 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2250 return false;
2253 *mask_vectype_out = mask_vectype;
2254 return true;
2257 /* Return true if stored value RHS is suitable for vectorizing store
2258 statement STMT. When returning true, store the type of the
2259 vectorized store value in *RHS_VECTYPE_OUT and the type of the
2260 store in *VLS_TYPE_OUT. */
2262 static bool
2263 vect_check_store_rhs (gimple *stmt, tree rhs, tree *rhs_vectype_out,
2264 vec_load_store_type *vls_type_out)
2266 /* In the case this is a store from a constant make sure
2267 native_encode_expr can handle it. */
2268 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2270 if (dump_enabled_p ())
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2272 "cannot encode constant as a byte sequence.\n");
2273 return false;
2276 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2277 gimple *def_stmt;
2278 enum vect_def_type dt;
2279 tree rhs_vectype;
2280 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &dt,
2281 &rhs_vectype))
2283 if (dump_enabled_p ())
2284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2285 "use not simple.\n");
2286 return false;
2289 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2290 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2294 "incompatible vector types.\n");
2295 return false;
2298 *rhs_vectype_out = rhs_vectype;
2299 if (dt == vect_constant_def || dt == vect_external_def)
2300 *vls_type_out = VLS_STORE_INVARIANT;
2301 else
2302 *vls_type_out = VLS_STORE;
2303 return true;
2306 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2307 Note that we support masks with floating-point type, in which case the
2308 floats are interpreted as a bitmask. */
2310 static tree
2311 vect_build_all_ones_mask (gimple *stmt, tree masktype)
2313 if (TREE_CODE (masktype) == INTEGER_TYPE)
2314 return build_int_cst (masktype, -1);
2315 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2317 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2318 mask = build_vector_from_val (masktype, mask);
2319 return vect_init_vector (stmt, mask, masktype, NULL);
2321 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2323 REAL_VALUE_TYPE r;
2324 long tmp[6];
2325 for (int j = 0; j < 6; ++j)
2326 tmp[j] = -1;
2327 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2328 tree mask = build_real (TREE_TYPE (masktype), r);
2329 mask = build_vector_from_val (masktype, mask);
2330 return vect_init_vector (stmt, mask, masktype, NULL);
2332 gcc_unreachable ();
2335 /* Build an all-zero merge value of type VECTYPE while vectorizing
2336 STMT as a gather load. */
2338 static tree
2339 vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2341 tree merge;
2342 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2343 merge = build_int_cst (TREE_TYPE (vectype), 0);
2344 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2346 REAL_VALUE_TYPE r;
2347 long tmp[6];
2348 for (int j = 0; j < 6; ++j)
2349 tmp[j] = 0;
2350 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2351 merge = build_real (TREE_TYPE (vectype), r);
2353 else
2354 gcc_unreachable ();
2355 merge = build_vector_from_val (vectype, merge);
2356 return vect_init_vector (stmt, merge, vectype, NULL);
2359 /* Build a gather load call while vectorizing STMT. Insert new instructions
2360 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2361 operation. If the load is conditional, MASK is the unvectorized
2362 condition, otherwise MASK is null. */
2364 static void
2365 vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2366 gimple **vec_stmt, gather_scatter_info *gs_info,
2367 tree mask)
2369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2370 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2371 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2372 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2373 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2374 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2375 edge pe = loop_preheader_edge (loop);
2376 enum { NARROW, NONE, WIDEN } modifier;
2377 poly_uint64 gather_off_nunits
2378 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2380 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2381 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2382 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2383 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2384 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2385 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2386 tree scaletype = TREE_VALUE (arglist);
2387 gcc_checking_assert (types_compatible_p (srctype, rettype)
2388 && (!mask || types_compatible_p (srctype, masktype)));
2390 tree perm_mask = NULL_TREE;
2391 tree mask_perm_mask = NULL_TREE;
2392 if (known_eq (nunits, gather_off_nunits))
2393 modifier = NONE;
2394 else if (known_eq (nunits * 2, gather_off_nunits))
2396 modifier = WIDEN;
2398 /* Currently widening gathers and scatters are only supported for
2399 fixed-length vectors. */
2400 int count = gather_off_nunits.to_constant ();
2401 vec_perm_builder sel (count, count, 1);
2402 for (int i = 0; i < count; ++i)
2403 sel.quick_push (i | (count / 2));
2405 vec_perm_indices indices (sel, 1, count);
2406 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2407 indices);
2409 else if (known_eq (nunits, gather_off_nunits * 2))
2411 modifier = NARROW;
2413 /* Currently narrowing gathers and scatters are only supported for
2414 fixed-length vectors. */
2415 int count = nunits.to_constant ();
2416 vec_perm_builder sel (count, count, 1);
2417 sel.quick_grow (count);
2418 for (int i = 0; i < count; ++i)
2419 sel[i] = i < count / 2 ? i : i + count / 2;
2420 vec_perm_indices indices (sel, 2, count);
2421 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2423 ncopies *= 2;
2425 if (mask)
2427 for (int i = 0; i < count; ++i)
2428 sel[i] = i | (count / 2);
2429 indices.new_vector (sel, 2, count);
2430 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2433 else
2434 gcc_unreachable ();
2436 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2437 vectype);
2439 tree ptr = fold_convert (ptrtype, gs_info->base);
2440 if (!is_gimple_min_invariant (ptr))
2442 gimple_seq seq;
2443 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2444 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2445 gcc_assert (!new_bb);
2448 tree scale = build_int_cst (scaletype, gs_info->scale);
2450 tree vec_oprnd0 = NULL_TREE;
2451 tree vec_mask = NULL_TREE;
2452 tree src_op = NULL_TREE;
2453 tree mask_op = NULL_TREE;
2454 tree prev_res = NULL_TREE;
2455 stmt_vec_info prev_stmt_info = NULL;
2457 if (!mask)
2459 src_op = vect_build_zero_merge_argument (stmt, rettype);
2460 mask_op = vect_build_all_ones_mask (stmt, masktype);
2463 for (int j = 0; j < ncopies; ++j)
2465 tree op, var;
2466 gimple *new_stmt;
2467 if (modifier == WIDEN && (j & 1))
2468 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2469 perm_mask, stmt, gsi);
2470 else if (j == 0)
2471 op = vec_oprnd0
2472 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2473 else
2474 op = vec_oprnd0
2475 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2477 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2479 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2480 TYPE_VECTOR_SUBPARTS (idxtype)));
2481 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2482 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2483 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2484 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2485 op = var;
2488 if (mask)
2490 if (mask_perm_mask && (j & 1))
2491 mask_op = permute_vec_elements (mask_op, mask_op,
2492 mask_perm_mask, stmt, gsi);
2493 else
2495 if (j == 0)
2496 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2497 else
2499 gimple *def_stmt;
2500 enum vect_def_type dt;
2501 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2502 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2505 mask_op = vec_mask;
2506 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2508 gcc_assert
2509 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2510 TYPE_VECTOR_SUBPARTS (masktype)));
2511 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2512 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2513 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2514 mask_op);
2515 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2516 mask_op = var;
2519 src_op = mask_op;
2522 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2523 mask_op, scale);
2525 if (!useless_type_conversion_p (vectype, rettype))
2527 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2528 TYPE_VECTOR_SUBPARTS (rettype)));
2529 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2530 gimple_call_set_lhs (new_stmt, op);
2531 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2532 var = make_ssa_name (vec_dest);
2533 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2534 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2536 else
2538 var = make_ssa_name (vec_dest, new_stmt);
2539 gimple_call_set_lhs (new_stmt, var);
2542 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2544 if (modifier == NARROW)
2546 if ((j & 1) == 0)
2548 prev_res = var;
2549 continue;
2551 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2552 new_stmt = SSA_NAME_DEF_STMT (var);
2555 if (prev_stmt_info == NULL)
2556 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2557 else
2558 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2559 prev_stmt_info = vinfo_for_stmt (new_stmt);
2563 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2565 static bool
2566 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2567 gimple **vec_stmt, slp_tree slp_node,
2568 tree vectype_in, enum vect_def_type *dt)
2570 tree op, vectype;
2571 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2572 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2573 unsigned ncopies;
2574 unsigned HOST_WIDE_INT nunits, num_bytes;
2576 op = gimple_call_arg (stmt, 0);
2577 vectype = STMT_VINFO_VECTYPE (stmt_info);
2579 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2580 return false;
2582 /* Multiple types in SLP are handled by creating the appropriate number of
2583 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2584 case of SLP. */
2585 if (slp_node)
2586 ncopies = 1;
2587 else
2588 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2590 gcc_assert (ncopies >= 1);
2592 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2593 if (! char_vectype)
2594 return false;
2596 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2597 return false;
2599 unsigned word_bytes = num_bytes / nunits;
2601 /* The encoding uses one stepped pattern for each byte in the word. */
2602 vec_perm_builder elts (num_bytes, word_bytes, 3);
2603 for (unsigned i = 0; i < 3; ++i)
2604 for (unsigned j = 0; j < word_bytes; ++j)
2605 elts.quick_push ((i + 1) * word_bytes - j - 1);
2607 vec_perm_indices indices (elts, 1, num_bytes);
2608 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2609 return false;
2611 if (! vec_stmt)
2613 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2614 if (dump_enabled_p ())
2615 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2616 "\n");
2617 if (! PURE_SLP_STMT (stmt_info))
2619 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2620 1, vector_stmt, stmt_info, 0, vect_prologue);
2621 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2622 ncopies, vec_perm, stmt_info, 0, vect_body);
2624 return true;
2627 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2629 /* Transform. */
2630 vec<tree> vec_oprnds = vNULL;
2631 gimple *new_stmt = NULL;
2632 stmt_vec_info prev_stmt_info = NULL;
2633 for (unsigned j = 0; j < ncopies; j++)
2635 /* Handle uses. */
2636 if (j == 0)
2637 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2638 else
2639 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2641 /* Arguments are ready. create the new vector stmt. */
2642 unsigned i;
2643 tree vop;
2644 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2646 tree tem = make_ssa_name (char_vectype);
2647 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2648 char_vectype, vop));
2649 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2650 tree tem2 = make_ssa_name (char_vectype);
2651 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2652 tem, tem, bswap_vconst);
2653 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2654 tem = make_ssa_name (vectype);
2655 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2656 vectype, tem2));
2657 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2658 if (slp_node)
2659 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2662 if (slp_node)
2663 continue;
2665 if (j == 0)
2666 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2667 else
2668 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2670 prev_stmt_info = vinfo_for_stmt (new_stmt);
2673 vec_oprnds.release ();
2674 return true;
2677 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2678 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2679 in a single step. On success, store the binary pack code in
2680 *CONVERT_CODE. */
2682 static bool
2683 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2684 tree_code *convert_code)
2686 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2687 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2688 return false;
2690 tree_code code;
2691 int multi_step_cvt = 0;
2692 auto_vec <tree, 8> interm_types;
2693 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2694 &code, &multi_step_cvt,
2695 &interm_types)
2696 || multi_step_cvt)
2697 return false;
2699 *convert_code = code;
2700 return true;
2703 /* Function vectorizable_call.
2705 Check if GS performs a function call that can be vectorized.
2706 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2707 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2708 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2710 static bool
2711 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2712 slp_tree slp_node)
2714 gcall *stmt;
2715 tree vec_dest;
2716 tree scalar_dest;
2717 tree op, type;
2718 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2719 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2720 tree vectype_out, vectype_in;
2721 poly_uint64 nunits_in;
2722 poly_uint64 nunits_out;
2723 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2724 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2725 vec_info *vinfo = stmt_info->vinfo;
2726 tree fndecl, new_temp, rhs_type;
2727 gimple *def_stmt;
2728 enum vect_def_type dt[3]
2729 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2730 int ndts = 3;
2731 gimple *new_stmt = NULL;
2732 int ncopies, j;
2733 vec<tree> vargs = vNULL;
2734 enum { NARROW, NONE, WIDEN } modifier;
2735 size_t i, nargs;
2736 tree lhs;
2738 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2739 return false;
2741 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2742 && ! vec_stmt)
2743 return false;
2745 /* Is GS a vectorizable call? */
2746 stmt = dyn_cast <gcall *> (gs);
2747 if (!stmt)
2748 return false;
2750 if (gimple_call_internal_p (stmt)
2751 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2752 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2753 /* Handled by vectorizable_load and vectorizable_store. */
2754 return false;
2756 if (gimple_call_lhs (stmt) == NULL_TREE
2757 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2758 return false;
2760 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2762 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2764 /* Process function arguments. */
2765 rhs_type = NULL_TREE;
2766 vectype_in = NULL_TREE;
2767 nargs = gimple_call_num_args (stmt);
2769 /* Bail out if the function has more than three arguments, we do not have
2770 interesting builtin functions to vectorize with more than two arguments
2771 except for fma. No arguments is also not good. */
2772 if (nargs == 0 || nargs > 3)
2773 return false;
2775 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2776 if (gimple_call_internal_p (stmt)
2777 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2779 nargs = 0;
2780 rhs_type = unsigned_type_node;
2783 for (i = 0; i < nargs; i++)
2785 tree opvectype;
2787 op = gimple_call_arg (stmt, i);
2789 /* We can only handle calls with arguments of the same type. */
2790 if (rhs_type
2791 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2793 if (dump_enabled_p ())
2794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2795 "argument types differ.\n");
2796 return false;
2798 if (!rhs_type)
2799 rhs_type = TREE_TYPE (op);
2801 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2803 if (dump_enabled_p ())
2804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2805 "use not simple.\n");
2806 return false;
2809 if (!vectype_in)
2810 vectype_in = opvectype;
2811 else if (opvectype
2812 && opvectype != vectype_in)
2814 if (dump_enabled_p ())
2815 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2816 "argument vector types differ.\n");
2817 return false;
2820 /* If all arguments are external or constant defs use a vector type with
2821 the same size as the output vector type. */
2822 if (!vectype_in)
2823 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2824 if (vec_stmt)
2825 gcc_assert (vectype_in);
2826 if (!vectype_in)
2828 if (dump_enabled_p ())
2830 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2831 "no vectype for scalar type ");
2832 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2833 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2836 return false;
2839 /* FORNOW */
2840 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2841 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2842 if (known_eq (nunits_in * 2, nunits_out))
2843 modifier = NARROW;
2844 else if (known_eq (nunits_out, nunits_in))
2845 modifier = NONE;
2846 else if (known_eq (nunits_out * 2, nunits_in))
2847 modifier = WIDEN;
2848 else
2849 return false;
2851 /* We only handle functions that do not read or clobber memory. */
2852 if (gimple_vuse (stmt))
2854 if (dump_enabled_p ())
2855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2856 "function reads from or writes to memory.\n");
2857 return false;
2860 /* For now, we only vectorize functions if a target specific builtin
2861 is available. TODO -- in some cases, it might be profitable to
2862 insert the calls for pieces of the vector, in order to be able
2863 to vectorize other operations in the loop. */
2864 fndecl = NULL_TREE;
2865 internal_fn ifn = IFN_LAST;
2866 combined_fn cfn = gimple_call_combined_fn (stmt);
2867 tree callee = gimple_call_fndecl (stmt);
2869 /* First try using an internal function. */
2870 tree_code convert_code = ERROR_MARK;
2871 if (cfn != CFN_LAST
2872 && (modifier == NONE
2873 || (modifier == NARROW
2874 && simple_integer_narrowing (vectype_out, vectype_in,
2875 &convert_code))))
2876 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2877 vectype_in);
2879 /* If that fails, try asking for a target-specific built-in function. */
2880 if (ifn == IFN_LAST)
2882 if (cfn != CFN_LAST)
2883 fndecl = targetm.vectorize.builtin_vectorized_function
2884 (cfn, vectype_out, vectype_in);
2885 else
2886 fndecl = targetm.vectorize.builtin_md_vectorized_function
2887 (callee, vectype_out, vectype_in);
2890 if (ifn == IFN_LAST && !fndecl)
2892 if (cfn == CFN_GOMP_SIMD_LANE
2893 && !slp_node
2894 && loop_vinfo
2895 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2896 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2897 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2898 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2900 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2901 { 0, 1, 2, ... vf - 1 } vector. */
2902 gcc_assert (nargs == 0);
2904 else if (modifier == NONE
2905 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2906 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2907 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2908 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2909 vectype_in, dt);
2910 else
2912 if (dump_enabled_p ())
2913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2914 "function is not vectorizable.\n");
2915 return false;
2919 if (slp_node)
2920 ncopies = 1;
2921 else if (modifier == NARROW && ifn == IFN_LAST)
2922 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
2923 else
2924 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
2926 /* Sanity check: make sure that at least one copy of the vectorized stmt
2927 needs to be generated. */
2928 gcc_assert (ncopies >= 1);
2930 if (!vec_stmt) /* transformation not required. */
2932 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2933 if (dump_enabled_p ())
2934 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2935 "\n");
2936 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2937 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2938 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2939 vec_promote_demote, stmt_info, 0, vect_body);
2941 return true;
2944 /* Transform. */
2946 if (dump_enabled_p ())
2947 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2949 /* Handle def. */
2950 scalar_dest = gimple_call_lhs (stmt);
2951 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2953 prev_stmt_info = NULL;
2954 if (modifier == NONE || ifn != IFN_LAST)
2956 tree prev_res = NULL_TREE;
2957 for (j = 0; j < ncopies; ++j)
2959 /* Build argument list for the vectorized call. */
2960 if (j == 0)
2961 vargs.create (nargs);
2962 else
2963 vargs.truncate (0);
2965 if (slp_node)
2967 auto_vec<vec<tree> > vec_defs (nargs);
2968 vec<tree> vec_oprnds0;
2970 for (i = 0; i < nargs; i++)
2971 vargs.quick_push (gimple_call_arg (stmt, i));
2972 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2973 vec_oprnds0 = vec_defs[0];
2975 /* Arguments are ready. Create the new vector stmt. */
2976 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2978 size_t k;
2979 for (k = 0; k < nargs; k++)
2981 vec<tree> vec_oprndsk = vec_defs[k];
2982 vargs[k] = vec_oprndsk[i];
2984 if (modifier == NARROW)
2986 tree half_res = make_ssa_name (vectype_in);
2987 gcall *call
2988 = gimple_build_call_internal_vec (ifn, vargs);
2989 gimple_call_set_lhs (call, half_res);
2990 gimple_call_set_nothrow (call, true);
2991 new_stmt = call;
2992 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2993 if ((i & 1) == 0)
2995 prev_res = half_res;
2996 continue;
2998 new_temp = make_ssa_name (vec_dest);
2999 new_stmt = gimple_build_assign (new_temp, convert_code,
3000 prev_res, half_res);
3002 else
3004 gcall *call;
3005 if (ifn != IFN_LAST)
3006 call = gimple_build_call_internal_vec (ifn, vargs);
3007 else
3008 call = gimple_build_call_vec (fndecl, vargs);
3009 new_temp = make_ssa_name (vec_dest, call);
3010 gimple_call_set_lhs (call, new_temp);
3011 gimple_call_set_nothrow (call, true);
3012 new_stmt = call;
3014 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3015 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3018 for (i = 0; i < nargs; i++)
3020 vec<tree> vec_oprndsi = vec_defs[i];
3021 vec_oprndsi.release ();
3023 continue;
3026 for (i = 0; i < nargs; i++)
3028 op = gimple_call_arg (stmt, i);
3029 if (j == 0)
3030 vec_oprnd0
3031 = vect_get_vec_def_for_operand (op, stmt);
3032 else
3034 vec_oprnd0 = gimple_call_arg (new_stmt, i);
3035 vec_oprnd0
3036 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3039 vargs.quick_push (vec_oprnd0);
3042 if (gimple_call_internal_p (stmt)
3043 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
3045 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
3046 tree new_var
3047 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
3048 gimple *init_stmt = gimple_build_assign (new_var, cst);
3049 vect_init_vector_1 (stmt, init_stmt, NULL);
3050 new_temp = make_ssa_name (vec_dest);
3051 new_stmt = gimple_build_assign (new_temp, new_var);
3053 else if (modifier == NARROW)
3055 tree half_res = make_ssa_name (vectype_in);
3056 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
3057 gimple_call_set_lhs (call, half_res);
3058 gimple_call_set_nothrow (call, true);
3059 new_stmt = call;
3060 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3061 if ((j & 1) == 0)
3063 prev_res = half_res;
3064 continue;
3066 new_temp = make_ssa_name (vec_dest);
3067 new_stmt = gimple_build_assign (new_temp, convert_code,
3068 prev_res, half_res);
3070 else
3072 gcall *call;
3073 if (ifn != IFN_LAST)
3074 call = gimple_build_call_internal_vec (ifn, vargs);
3075 else
3076 call = gimple_build_call_vec (fndecl, vargs);
3077 new_temp = make_ssa_name (vec_dest, new_stmt);
3078 gimple_call_set_lhs (call, new_temp);
3079 gimple_call_set_nothrow (call, true);
3080 new_stmt = call;
3082 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3084 if (j == (modifier == NARROW ? 1 : 0))
3085 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3086 else
3087 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3089 prev_stmt_info = vinfo_for_stmt (new_stmt);
3092 else if (modifier == NARROW)
3094 for (j = 0; j < ncopies; ++j)
3096 /* Build argument list for the vectorized call. */
3097 if (j == 0)
3098 vargs.create (nargs * 2);
3099 else
3100 vargs.truncate (0);
3102 if (slp_node)
3104 auto_vec<vec<tree> > vec_defs (nargs);
3105 vec<tree> vec_oprnds0;
3107 for (i = 0; i < nargs; i++)
3108 vargs.quick_push (gimple_call_arg (stmt, i));
3109 vect_get_slp_defs (vargs, slp_node, &vec_defs);
3110 vec_oprnds0 = vec_defs[0];
3112 /* Arguments are ready. Create the new vector stmt. */
3113 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
3115 size_t k;
3116 vargs.truncate (0);
3117 for (k = 0; k < nargs; k++)
3119 vec<tree> vec_oprndsk = vec_defs[k];
3120 vargs.quick_push (vec_oprndsk[i]);
3121 vargs.quick_push (vec_oprndsk[i + 1]);
3123 gcall *call;
3124 if (ifn != IFN_LAST)
3125 call = gimple_build_call_internal_vec (ifn, vargs);
3126 else
3127 call = gimple_build_call_vec (fndecl, vargs);
3128 new_temp = make_ssa_name (vec_dest, call);
3129 gimple_call_set_lhs (call, new_temp);
3130 gimple_call_set_nothrow (call, true);
3131 new_stmt = call;
3132 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3133 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3136 for (i = 0; i < nargs; i++)
3138 vec<tree> vec_oprndsi = vec_defs[i];
3139 vec_oprndsi.release ();
3141 continue;
3144 for (i = 0; i < nargs; i++)
3146 op = gimple_call_arg (stmt, i);
3147 if (j == 0)
3149 vec_oprnd0
3150 = vect_get_vec_def_for_operand (op, stmt);
3151 vec_oprnd1
3152 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3154 else
3156 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3157 vec_oprnd0
3158 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3159 vec_oprnd1
3160 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3163 vargs.quick_push (vec_oprnd0);
3164 vargs.quick_push (vec_oprnd1);
3167 new_stmt = gimple_build_call_vec (fndecl, vargs);
3168 new_temp = make_ssa_name (vec_dest, new_stmt);
3169 gimple_call_set_lhs (new_stmt, new_temp);
3170 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3172 if (j == 0)
3173 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3174 else
3175 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3177 prev_stmt_info = vinfo_for_stmt (new_stmt);
3180 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3182 else
3183 /* No current target implements this case. */
3184 return false;
3186 vargs.release ();
3188 /* The call in STMT might prevent it from being removed in dce.
3189 We however cannot remove it here, due to the way the ssa name
3190 it defines is mapped to the new definition. So just replace
3191 rhs of the statement with something harmless. */
3193 if (slp_node)
3194 return true;
3196 type = TREE_TYPE (scalar_dest);
3197 if (is_pattern_stmt_p (stmt_info))
3198 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3199 else
3200 lhs = gimple_call_lhs (stmt);
3202 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3203 set_vinfo_for_stmt (new_stmt, stmt_info);
3204 set_vinfo_for_stmt (stmt, NULL);
3205 STMT_VINFO_STMT (stmt_info) = new_stmt;
3206 gsi_replace (gsi, new_stmt, false);
3208 return true;
3212 struct simd_call_arg_info
3214 tree vectype;
3215 tree op;
3216 HOST_WIDE_INT linear_step;
3217 enum vect_def_type dt;
3218 unsigned int align;
3219 bool simd_lane_linear;
3222 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3223 is linear within simd lane (but not within whole loop), note it in
3224 *ARGINFO. */
3226 static void
3227 vect_simd_lane_linear (tree op, struct loop *loop,
3228 struct simd_call_arg_info *arginfo)
3230 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3232 if (!is_gimple_assign (def_stmt)
3233 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3234 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3235 return;
3237 tree base = gimple_assign_rhs1 (def_stmt);
3238 HOST_WIDE_INT linear_step = 0;
3239 tree v = gimple_assign_rhs2 (def_stmt);
3240 while (TREE_CODE (v) == SSA_NAME)
3242 tree t;
3243 def_stmt = SSA_NAME_DEF_STMT (v);
3244 if (is_gimple_assign (def_stmt))
3245 switch (gimple_assign_rhs_code (def_stmt))
3247 case PLUS_EXPR:
3248 t = gimple_assign_rhs2 (def_stmt);
3249 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3250 return;
3251 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3252 v = gimple_assign_rhs1 (def_stmt);
3253 continue;
3254 case MULT_EXPR:
3255 t = gimple_assign_rhs2 (def_stmt);
3256 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3257 return;
3258 linear_step = tree_to_shwi (t);
3259 v = gimple_assign_rhs1 (def_stmt);
3260 continue;
3261 CASE_CONVERT:
3262 t = gimple_assign_rhs1 (def_stmt);
3263 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3264 || (TYPE_PRECISION (TREE_TYPE (v))
3265 < TYPE_PRECISION (TREE_TYPE (t))))
3266 return;
3267 if (!linear_step)
3268 linear_step = 1;
3269 v = t;
3270 continue;
3271 default:
3272 return;
3274 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3275 && loop->simduid
3276 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3277 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3278 == loop->simduid))
3280 if (!linear_step)
3281 linear_step = 1;
3282 arginfo->linear_step = linear_step;
3283 arginfo->op = base;
3284 arginfo->simd_lane_linear = true;
3285 return;
3290 /* Return the number of elements in vector type VECTYPE, which is associated
3291 with a SIMD clone. At present these vectors always have a constant
3292 length. */
3294 static unsigned HOST_WIDE_INT
3295 simd_clone_subparts (tree vectype)
3297 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3300 /* Function vectorizable_simd_clone_call.
3302 Check if STMT performs a function call that can be vectorized
3303 by calling a simd clone of the function.
3304 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3305 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3306 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3308 static bool
3309 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3310 gimple **vec_stmt, slp_tree slp_node)
3312 tree vec_dest;
3313 tree scalar_dest;
3314 tree op, type;
3315 tree vec_oprnd0 = NULL_TREE;
3316 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3317 tree vectype;
3318 unsigned int nunits;
3319 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3320 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3321 vec_info *vinfo = stmt_info->vinfo;
3322 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3323 tree fndecl, new_temp;
3324 gimple *def_stmt;
3325 gimple *new_stmt = NULL;
3326 int ncopies, j;
3327 auto_vec<simd_call_arg_info> arginfo;
3328 vec<tree> vargs = vNULL;
3329 size_t i, nargs;
3330 tree lhs, rtype, ratype;
3331 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3333 /* Is STMT a vectorizable call? */
3334 if (!is_gimple_call (stmt))
3335 return false;
3337 fndecl = gimple_call_fndecl (stmt);
3338 if (fndecl == NULL_TREE)
3339 return false;
3341 struct cgraph_node *node = cgraph_node::get (fndecl);
3342 if (node == NULL || node->simd_clones == NULL)
3343 return false;
3345 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3346 return false;
3348 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3349 && ! vec_stmt)
3350 return false;
3352 if (gimple_call_lhs (stmt)
3353 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3354 return false;
3356 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3358 vectype = STMT_VINFO_VECTYPE (stmt_info);
3360 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3361 return false;
3363 /* FORNOW */
3364 if (slp_node)
3365 return false;
3367 /* Process function arguments. */
3368 nargs = gimple_call_num_args (stmt);
3370 /* Bail out if the function has zero arguments. */
3371 if (nargs == 0)
3372 return false;
3374 arginfo.reserve (nargs, true);
3376 for (i = 0; i < nargs; i++)
3378 simd_call_arg_info thisarginfo;
3379 affine_iv iv;
3381 thisarginfo.linear_step = 0;
3382 thisarginfo.align = 0;
3383 thisarginfo.op = NULL_TREE;
3384 thisarginfo.simd_lane_linear = false;
3386 op = gimple_call_arg (stmt, i);
3387 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3388 &thisarginfo.vectype)
3389 || thisarginfo.dt == vect_uninitialized_def)
3391 if (dump_enabled_p ())
3392 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3393 "use not simple.\n");
3394 return false;
3397 if (thisarginfo.dt == vect_constant_def
3398 || thisarginfo.dt == vect_external_def)
3399 gcc_assert (thisarginfo.vectype == NULL_TREE);
3400 else
3401 gcc_assert (thisarginfo.vectype != NULL_TREE);
3403 /* For linear arguments, the analyze phase should have saved
3404 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3405 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3406 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3408 gcc_assert (vec_stmt);
3409 thisarginfo.linear_step
3410 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3411 thisarginfo.op
3412 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3413 thisarginfo.simd_lane_linear
3414 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3415 == boolean_true_node);
3416 /* If loop has been peeled for alignment, we need to adjust it. */
3417 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3418 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3419 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3421 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3422 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3423 tree opt = TREE_TYPE (thisarginfo.op);
3424 bias = fold_convert (TREE_TYPE (step), bias);
3425 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3426 thisarginfo.op
3427 = fold_build2 (POINTER_TYPE_P (opt)
3428 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3429 thisarginfo.op, bias);
3432 else if (!vec_stmt
3433 && thisarginfo.dt != vect_constant_def
3434 && thisarginfo.dt != vect_external_def
3435 && loop_vinfo
3436 && TREE_CODE (op) == SSA_NAME
3437 && simple_iv (loop, loop_containing_stmt (stmt), op,
3438 &iv, false)
3439 && tree_fits_shwi_p (iv.step))
3441 thisarginfo.linear_step = tree_to_shwi (iv.step);
3442 thisarginfo.op = iv.base;
3444 else if ((thisarginfo.dt == vect_constant_def
3445 || thisarginfo.dt == vect_external_def)
3446 && POINTER_TYPE_P (TREE_TYPE (op)))
3447 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3448 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3449 linear too. */
3450 if (POINTER_TYPE_P (TREE_TYPE (op))
3451 && !thisarginfo.linear_step
3452 && !vec_stmt
3453 && thisarginfo.dt != vect_constant_def
3454 && thisarginfo.dt != vect_external_def
3455 && loop_vinfo
3456 && !slp_node
3457 && TREE_CODE (op) == SSA_NAME)
3458 vect_simd_lane_linear (op, loop, &thisarginfo);
3460 arginfo.quick_push (thisarginfo);
3463 unsigned HOST_WIDE_INT vf;
3464 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3466 if (dump_enabled_p ())
3467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3468 "not considering SIMD clones; not yet supported"
3469 " for variable-width vectors.\n");
3470 return NULL;
3473 unsigned int badness = 0;
3474 struct cgraph_node *bestn = NULL;
3475 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3476 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3477 else
3478 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3479 n = n->simdclone->next_clone)
3481 unsigned int this_badness = 0;
3482 if (n->simdclone->simdlen > vf
3483 || n->simdclone->nargs != nargs)
3484 continue;
3485 if (n->simdclone->simdlen < vf)
3486 this_badness += (exact_log2 (vf)
3487 - exact_log2 (n->simdclone->simdlen)) * 1024;
3488 if (n->simdclone->inbranch)
3489 this_badness += 2048;
3490 int target_badness = targetm.simd_clone.usable (n);
3491 if (target_badness < 0)
3492 continue;
3493 this_badness += target_badness * 512;
3494 /* FORNOW: Have to add code to add the mask argument. */
3495 if (n->simdclone->inbranch)
3496 continue;
3497 for (i = 0; i < nargs; i++)
3499 switch (n->simdclone->args[i].arg_type)
3501 case SIMD_CLONE_ARG_TYPE_VECTOR:
3502 if (!useless_type_conversion_p
3503 (n->simdclone->args[i].orig_type,
3504 TREE_TYPE (gimple_call_arg (stmt, i))))
3505 i = -1;
3506 else if (arginfo[i].dt == vect_constant_def
3507 || arginfo[i].dt == vect_external_def
3508 || arginfo[i].linear_step)
3509 this_badness += 64;
3510 break;
3511 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3512 if (arginfo[i].dt != vect_constant_def
3513 && arginfo[i].dt != vect_external_def)
3514 i = -1;
3515 break;
3516 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3517 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3518 if (arginfo[i].dt == vect_constant_def
3519 || arginfo[i].dt == vect_external_def
3520 || (arginfo[i].linear_step
3521 != n->simdclone->args[i].linear_step))
3522 i = -1;
3523 break;
3524 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3525 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3526 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3527 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3528 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3529 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3530 /* FORNOW */
3531 i = -1;
3532 break;
3533 case SIMD_CLONE_ARG_TYPE_MASK:
3534 gcc_unreachable ();
3536 if (i == (size_t) -1)
3537 break;
3538 if (n->simdclone->args[i].alignment > arginfo[i].align)
3540 i = -1;
3541 break;
3543 if (arginfo[i].align)
3544 this_badness += (exact_log2 (arginfo[i].align)
3545 - exact_log2 (n->simdclone->args[i].alignment));
3547 if (i == (size_t) -1)
3548 continue;
3549 if (bestn == NULL || this_badness < badness)
3551 bestn = n;
3552 badness = this_badness;
3556 if (bestn == NULL)
3557 return false;
3559 for (i = 0; i < nargs; i++)
3560 if ((arginfo[i].dt == vect_constant_def
3561 || arginfo[i].dt == vect_external_def)
3562 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3564 arginfo[i].vectype
3565 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3566 i)));
3567 if (arginfo[i].vectype == NULL
3568 || (simd_clone_subparts (arginfo[i].vectype)
3569 > bestn->simdclone->simdlen))
3570 return false;
3573 fndecl = bestn->decl;
3574 nunits = bestn->simdclone->simdlen;
3575 ncopies = vf / nunits;
3577 /* If the function isn't const, only allow it in simd loops where user
3578 has asserted that at least nunits consecutive iterations can be
3579 performed using SIMD instructions. */
3580 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3581 && gimple_vuse (stmt))
3582 return false;
3584 /* Sanity check: make sure that at least one copy of the vectorized stmt
3585 needs to be generated. */
3586 gcc_assert (ncopies >= 1);
3588 if (!vec_stmt) /* transformation not required. */
3590 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3591 for (i = 0; i < nargs; i++)
3592 if ((bestn->simdclone->args[i].arg_type
3593 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3594 || (bestn->simdclone->args[i].arg_type
3595 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3597 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3598 + 1);
3599 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3600 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3601 ? size_type_node : TREE_TYPE (arginfo[i].op);
3602 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3603 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3604 tree sll = arginfo[i].simd_lane_linear
3605 ? boolean_true_node : boolean_false_node;
3606 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3608 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3609 if (dump_enabled_p ())
3610 dump_printf_loc (MSG_NOTE, vect_location,
3611 "=== vectorizable_simd_clone_call ===\n");
3612 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3613 return true;
3616 /* Transform. */
3618 if (dump_enabled_p ())
3619 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3621 /* Handle def. */
3622 scalar_dest = gimple_call_lhs (stmt);
3623 vec_dest = NULL_TREE;
3624 rtype = NULL_TREE;
3625 ratype = NULL_TREE;
3626 if (scalar_dest)
3628 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3629 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3630 if (TREE_CODE (rtype) == ARRAY_TYPE)
3632 ratype = rtype;
3633 rtype = TREE_TYPE (ratype);
3637 prev_stmt_info = NULL;
3638 for (j = 0; j < ncopies; ++j)
3640 /* Build argument list for the vectorized call. */
3641 if (j == 0)
3642 vargs.create (nargs);
3643 else
3644 vargs.truncate (0);
3646 for (i = 0; i < nargs; i++)
3648 unsigned int k, l, m, o;
3649 tree atype;
3650 op = gimple_call_arg (stmt, i);
3651 switch (bestn->simdclone->args[i].arg_type)
3653 case SIMD_CLONE_ARG_TYPE_VECTOR:
3654 atype = bestn->simdclone->args[i].vector_type;
3655 o = nunits / simd_clone_subparts (atype);
3656 for (m = j * o; m < (j + 1) * o; m++)
3658 if (simd_clone_subparts (atype)
3659 < simd_clone_subparts (arginfo[i].vectype))
3661 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3662 k = (simd_clone_subparts (arginfo[i].vectype)
3663 / simd_clone_subparts (atype));
3664 gcc_assert ((k & (k - 1)) == 0);
3665 if (m == 0)
3666 vec_oprnd0
3667 = vect_get_vec_def_for_operand (op, stmt);
3668 else
3670 vec_oprnd0 = arginfo[i].op;
3671 if ((m & (k - 1)) == 0)
3672 vec_oprnd0
3673 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3674 vec_oprnd0);
3676 arginfo[i].op = vec_oprnd0;
3677 vec_oprnd0
3678 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3679 bitsize_int (prec),
3680 bitsize_int ((m & (k - 1)) * prec));
3681 new_stmt
3682 = gimple_build_assign (make_ssa_name (atype),
3683 vec_oprnd0);
3684 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3685 vargs.safe_push (gimple_assign_lhs (new_stmt));
3687 else
3689 k = (simd_clone_subparts (atype)
3690 / simd_clone_subparts (arginfo[i].vectype));
3691 gcc_assert ((k & (k - 1)) == 0);
3692 vec<constructor_elt, va_gc> *ctor_elts;
3693 if (k != 1)
3694 vec_alloc (ctor_elts, k);
3695 else
3696 ctor_elts = NULL;
3697 for (l = 0; l < k; l++)
3699 if (m == 0 && l == 0)
3700 vec_oprnd0
3701 = vect_get_vec_def_for_operand (op, stmt);
3702 else
3703 vec_oprnd0
3704 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3705 arginfo[i].op);
3706 arginfo[i].op = vec_oprnd0;
3707 if (k == 1)
3708 break;
3709 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3710 vec_oprnd0);
3712 if (k == 1)
3713 vargs.safe_push (vec_oprnd0);
3714 else
3716 vec_oprnd0 = build_constructor (atype, ctor_elts);
3717 new_stmt
3718 = gimple_build_assign (make_ssa_name (atype),
3719 vec_oprnd0);
3720 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3721 vargs.safe_push (gimple_assign_lhs (new_stmt));
3725 break;
3726 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3727 vargs.safe_push (op);
3728 break;
3729 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3730 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3731 if (j == 0)
3733 gimple_seq stmts;
3734 arginfo[i].op
3735 = force_gimple_operand (arginfo[i].op, &stmts, true,
3736 NULL_TREE);
3737 if (stmts != NULL)
3739 basic_block new_bb;
3740 edge pe = loop_preheader_edge (loop);
3741 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3742 gcc_assert (!new_bb);
3744 if (arginfo[i].simd_lane_linear)
3746 vargs.safe_push (arginfo[i].op);
3747 break;
3749 tree phi_res = copy_ssa_name (op);
3750 gphi *new_phi = create_phi_node (phi_res, loop->header);
3751 set_vinfo_for_stmt (new_phi,
3752 new_stmt_vec_info (new_phi, loop_vinfo));
3753 add_phi_arg (new_phi, arginfo[i].op,
3754 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3755 enum tree_code code
3756 = POINTER_TYPE_P (TREE_TYPE (op))
3757 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3758 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3759 ? sizetype : TREE_TYPE (op);
3760 widest_int cst
3761 = wi::mul (bestn->simdclone->args[i].linear_step,
3762 ncopies * nunits);
3763 tree tcst = wide_int_to_tree (type, cst);
3764 tree phi_arg = copy_ssa_name (op);
3765 new_stmt
3766 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3767 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3768 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3769 set_vinfo_for_stmt (new_stmt,
3770 new_stmt_vec_info (new_stmt, loop_vinfo));
3771 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3772 UNKNOWN_LOCATION);
3773 arginfo[i].op = phi_res;
3774 vargs.safe_push (phi_res);
3776 else
3778 enum tree_code code
3779 = POINTER_TYPE_P (TREE_TYPE (op))
3780 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3781 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3782 ? sizetype : TREE_TYPE (op);
3783 widest_int cst
3784 = wi::mul (bestn->simdclone->args[i].linear_step,
3785 j * nunits);
3786 tree tcst = wide_int_to_tree (type, cst);
3787 new_temp = make_ssa_name (TREE_TYPE (op));
3788 new_stmt = gimple_build_assign (new_temp, code,
3789 arginfo[i].op, tcst);
3790 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3791 vargs.safe_push (new_temp);
3793 break;
3794 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3795 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3796 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3797 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3798 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3799 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3800 default:
3801 gcc_unreachable ();
3805 new_stmt = gimple_build_call_vec (fndecl, vargs);
3806 if (vec_dest)
3808 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
3809 if (ratype)
3810 new_temp = create_tmp_var (ratype);
3811 else if (simd_clone_subparts (vectype)
3812 == simd_clone_subparts (rtype))
3813 new_temp = make_ssa_name (vec_dest, new_stmt);
3814 else
3815 new_temp = make_ssa_name (rtype, new_stmt);
3816 gimple_call_set_lhs (new_stmt, new_temp);
3818 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3820 if (vec_dest)
3822 if (simd_clone_subparts (vectype) < nunits)
3824 unsigned int k, l;
3825 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3826 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
3827 k = nunits / simd_clone_subparts (vectype);
3828 gcc_assert ((k & (k - 1)) == 0);
3829 for (l = 0; l < k; l++)
3831 tree t;
3832 if (ratype)
3834 t = build_fold_addr_expr (new_temp);
3835 t = build2 (MEM_REF, vectype, t,
3836 build_int_cst (TREE_TYPE (t), l * bytes));
3838 else
3839 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3840 bitsize_int (prec), bitsize_int (l * prec));
3841 new_stmt
3842 = gimple_build_assign (make_ssa_name (vectype), t);
3843 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3844 if (j == 0 && l == 0)
3845 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3846 else
3847 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3849 prev_stmt_info = vinfo_for_stmt (new_stmt);
3852 if (ratype)
3854 tree clobber = build_constructor (ratype, NULL);
3855 TREE_THIS_VOLATILE (clobber) = 1;
3856 new_stmt = gimple_build_assign (new_temp, clobber);
3857 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3859 continue;
3861 else if (simd_clone_subparts (vectype) > nunits)
3863 unsigned int k = (simd_clone_subparts (vectype)
3864 / simd_clone_subparts (rtype));
3865 gcc_assert ((k & (k - 1)) == 0);
3866 if ((j & (k - 1)) == 0)
3867 vec_alloc (ret_ctor_elts, k);
3868 if (ratype)
3870 unsigned int m, o = nunits / simd_clone_subparts (rtype);
3871 for (m = 0; m < o; m++)
3873 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3874 size_int (m), NULL_TREE, NULL_TREE);
3875 new_stmt
3876 = gimple_build_assign (make_ssa_name (rtype), tem);
3877 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3878 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3879 gimple_assign_lhs (new_stmt));
3881 tree clobber = build_constructor (ratype, NULL);
3882 TREE_THIS_VOLATILE (clobber) = 1;
3883 new_stmt = gimple_build_assign (new_temp, clobber);
3884 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3886 else
3887 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3888 if ((j & (k - 1)) != k - 1)
3889 continue;
3890 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3891 new_stmt
3892 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3893 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3895 if ((unsigned) j == k - 1)
3896 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3897 else
3898 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3900 prev_stmt_info = vinfo_for_stmt (new_stmt);
3901 continue;
3903 else if (ratype)
3905 tree t = build_fold_addr_expr (new_temp);
3906 t = build2 (MEM_REF, vectype, t,
3907 build_int_cst (TREE_TYPE (t), 0));
3908 new_stmt
3909 = gimple_build_assign (make_ssa_name (vec_dest), t);
3910 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3911 tree clobber = build_constructor (ratype, NULL);
3912 TREE_THIS_VOLATILE (clobber) = 1;
3913 vect_finish_stmt_generation (stmt,
3914 gimple_build_assign (new_temp,
3915 clobber), gsi);
3919 if (j == 0)
3920 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3921 else
3922 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3924 prev_stmt_info = vinfo_for_stmt (new_stmt);
3927 vargs.release ();
3929 /* The call in STMT might prevent it from being removed in dce.
3930 We however cannot remove it here, due to the way the ssa name
3931 it defines is mapped to the new definition. So just replace
3932 rhs of the statement with something harmless. */
3934 if (slp_node)
3935 return true;
3937 if (scalar_dest)
3939 type = TREE_TYPE (scalar_dest);
3940 if (is_pattern_stmt_p (stmt_info))
3941 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3942 else
3943 lhs = gimple_call_lhs (stmt);
3944 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3946 else
3947 new_stmt = gimple_build_nop ();
3948 set_vinfo_for_stmt (new_stmt, stmt_info);
3949 set_vinfo_for_stmt (stmt, NULL);
3950 STMT_VINFO_STMT (stmt_info) = new_stmt;
3951 gsi_replace (gsi, new_stmt, true);
3952 unlink_stmt_vdef (stmt);
3954 return true;
3958 /* Function vect_gen_widened_results_half
3960 Create a vector stmt whose code, type, number of arguments, and result
3961 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3962 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3963 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3964 needs to be created (DECL is a function-decl of a target-builtin).
3965 STMT is the original scalar stmt that we are vectorizing. */
3967 static gimple *
3968 vect_gen_widened_results_half (enum tree_code code,
3969 tree decl,
3970 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3971 tree vec_dest, gimple_stmt_iterator *gsi,
3972 gimple *stmt)
3974 gimple *new_stmt;
3975 tree new_temp;
3977 /* Generate half of the widened result: */
3978 if (code == CALL_EXPR)
3980 /* Target specific support */
3981 if (op_type == binary_op)
3982 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3983 else
3984 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3985 new_temp = make_ssa_name (vec_dest, new_stmt);
3986 gimple_call_set_lhs (new_stmt, new_temp);
3988 else
3990 /* Generic support */
3991 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3992 if (op_type != binary_op)
3993 vec_oprnd1 = NULL;
3994 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3995 new_temp = make_ssa_name (vec_dest, new_stmt);
3996 gimple_assign_set_lhs (new_stmt, new_temp);
3998 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4000 return new_stmt;
4004 /* Get vectorized definitions for loop-based vectorization. For the first
4005 operand we call vect_get_vec_def_for_operand() (with OPRND containing
4006 scalar operand), and for the rest we get a copy with
4007 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4008 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4009 The vectors are collected into VEC_OPRNDS. */
4011 static void
4012 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
4013 vec<tree> *vec_oprnds, int multi_step_cvt)
4015 tree vec_oprnd;
4017 /* Get first vector operand. */
4018 /* All the vector operands except the very first one (that is scalar oprnd)
4019 are stmt copies. */
4020 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
4021 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
4022 else
4023 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
4025 vec_oprnds->quick_push (vec_oprnd);
4027 /* Get second vector operand. */
4028 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
4029 vec_oprnds->quick_push (vec_oprnd);
4031 *oprnd = vec_oprnd;
4033 /* For conversion in multiple steps, continue to get operands
4034 recursively. */
4035 if (multi_step_cvt)
4036 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
4040 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4041 For multi-step conversions store the resulting vectors and call the function
4042 recursively. */
4044 static void
4045 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
4046 int multi_step_cvt, gimple *stmt,
4047 vec<tree> vec_dsts,
4048 gimple_stmt_iterator *gsi,
4049 slp_tree slp_node, enum tree_code code,
4050 stmt_vec_info *prev_stmt_info)
4052 unsigned int i;
4053 tree vop0, vop1, new_tmp, vec_dest;
4054 gimple *new_stmt;
4055 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4057 vec_dest = vec_dsts.pop ();
4059 for (i = 0; i < vec_oprnds->length (); i += 2)
4061 /* Create demotion operation. */
4062 vop0 = (*vec_oprnds)[i];
4063 vop1 = (*vec_oprnds)[i + 1];
4064 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4065 new_tmp = make_ssa_name (vec_dest, new_stmt);
4066 gimple_assign_set_lhs (new_stmt, new_tmp);
4067 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4069 if (multi_step_cvt)
4070 /* Store the resulting vector for next recursive call. */
4071 (*vec_oprnds)[i/2] = new_tmp;
4072 else
4074 /* This is the last step of the conversion sequence. Store the
4075 vectors in SLP_NODE or in vector info of the scalar statement
4076 (or in STMT_VINFO_RELATED_STMT chain). */
4077 if (slp_node)
4078 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4079 else
4081 if (!*prev_stmt_info)
4082 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4083 else
4084 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
4086 *prev_stmt_info = vinfo_for_stmt (new_stmt);
4091 /* For multi-step demotion operations we first generate demotion operations
4092 from the source type to the intermediate types, and then combine the
4093 results (stored in VEC_OPRNDS) in demotion operation to the destination
4094 type. */
4095 if (multi_step_cvt)
4097 /* At each level of recursion we have half of the operands we had at the
4098 previous level. */
4099 vec_oprnds->truncate ((i+1)/2);
4100 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
4101 stmt, vec_dsts, gsi, slp_node,
4102 VEC_PACK_TRUNC_EXPR,
4103 prev_stmt_info);
4106 vec_dsts.quick_push (vec_dest);
4110 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4111 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
4112 the resulting vectors and call the function recursively. */
4114 static void
4115 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
4116 vec<tree> *vec_oprnds1,
4117 gimple *stmt, tree vec_dest,
4118 gimple_stmt_iterator *gsi,
4119 enum tree_code code1,
4120 enum tree_code code2, tree decl1,
4121 tree decl2, int op_type)
4123 int i;
4124 tree vop0, vop1, new_tmp1, new_tmp2;
4125 gimple *new_stmt1, *new_stmt2;
4126 vec<tree> vec_tmp = vNULL;
4128 vec_tmp.create (vec_oprnds0->length () * 2);
4129 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
4131 if (op_type == binary_op)
4132 vop1 = (*vec_oprnds1)[i];
4133 else
4134 vop1 = NULL_TREE;
4136 /* Generate the two halves of promotion operation. */
4137 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4138 op_type, vec_dest, gsi, stmt);
4139 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4140 op_type, vec_dest, gsi, stmt);
4141 if (is_gimple_call (new_stmt1))
4143 new_tmp1 = gimple_call_lhs (new_stmt1);
4144 new_tmp2 = gimple_call_lhs (new_stmt2);
4146 else
4148 new_tmp1 = gimple_assign_lhs (new_stmt1);
4149 new_tmp2 = gimple_assign_lhs (new_stmt2);
4152 /* Store the results for the next step. */
4153 vec_tmp.quick_push (new_tmp1);
4154 vec_tmp.quick_push (new_tmp2);
4157 vec_oprnds0->release ();
4158 *vec_oprnds0 = vec_tmp;
4162 /* Check if STMT performs a conversion operation, that can be vectorized.
4163 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4164 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4165 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4167 static bool
4168 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4169 gimple **vec_stmt, slp_tree slp_node)
4171 tree vec_dest;
4172 tree scalar_dest;
4173 tree op0, op1 = NULL_TREE;
4174 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4175 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4176 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4177 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4178 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4179 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4180 tree new_temp;
4181 gimple *def_stmt;
4182 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4183 int ndts = 2;
4184 gimple *new_stmt = NULL;
4185 stmt_vec_info prev_stmt_info;
4186 poly_uint64 nunits_in;
4187 poly_uint64 nunits_out;
4188 tree vectype_out, vectype_in;
4189 int ncopies, i, j;
4190 tree lhs_type, rhs_type;
4191 enum { NARROW, NONE, WIDEN } modifier;
4192 vec<tree> vec_oprnds0 = vNULL;
4193 vec<tree> vec_oprnds1 = vNULL;
4194 tree vop0;
4195 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4196 vec_info *vinfo = stmt_info->vinfo;
4197 int multi_step_cvt = 0;
4198 vec<tree> interm_types = vNULL;
4199 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4200 int op_type;
4201 unsigned short fltsz;
4203 /* Is STMT a vectorizable conversion? */
4205 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4206 return false;
4208 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4209 && ! vec_stmt)
4210 return false;
4212 if (!is_gimple_assign (stmt))
4213 return false;
4215 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4216 return false;
4218 code = gimple_assign_rhs_code (stmt);
4219 if (!CONVERT_EXPR_CODE_P (code)
4220 && code != FIX_TRUNC_EXPR
4221 && code != FLOAT_EXPR
4222 && code != WIDEN_MULT_EXPR
4223 && code != WIDEN_LSHIFT_EXPR)
4224 return false;
4226 op_type = TREE_CODE_LENGTH (code);
4228 /* Check types of lhs and rhs. */
4229 scalar_dest = gimple_assign_lhs (stmt);
4230 lhs_type = TREE_TYPE (scalar_dest);
4231 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4233 op0 = gimple_assign_rhs1 (stmt);
4234 rhs_type = TREE_TYPE (op0);
4236 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4237 && !((INTEGRAL_TYPE_P (lhs_type)
4238 && INTEGRAL_TYPE_P (rhs_type))
4239 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4240 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4241 return false;
4243 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4244 && ((INTEGRAL_TYPE_P (lhs_type)
4245 && !type_has_mode_precision_p (lhs_type))
4246 || (INTEGRAL_TYPE_P (rhs_type)
4247 && !type_has_mode_precision_p (rhs_type))))
4249 if (dump_enabled_p ())
4250 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4251 "type conversion to/from bit-precision unsupported."
4252 "\n");
4253 return false;
4256 /* Check the operands of the operation. */
4257 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4259 if (dump_enabled_p ())
4260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4261 "use not simple.\n");
4262 return false;
4264 if (op_type == binary_op)
4266 bool ok;
4268 op1 = gimple_assign_rhs2 (stmt);
4269 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4270 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4271 OP1. */
4272 if (CONSTANT_CLASS_P (op0))
4273 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4274 else
4275 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4277 if (!ok)
4279 if (dump_enabled_p ())
4280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4281 "use not simple.\n");
4282 return false;
4286 /* If op0 is an external or constant defs use a vector type of
4287 the same size as the output vector type. */
4288 if (!vectype_in)
4289 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4290 if (vec_stmt)
4291 gcc_assert (vectype_in);
4292 if (!vectype_in)
4294 if (dump_enabled_p ())
4296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4297 "no vectype for scalar type ");
4298 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4299 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4302 return false;
4305 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4306 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4308 if (dump_enabled_p ())
4310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4311 "can't convert between boolean and non "
4312 "boolean vectors");
4313 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4314 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4317 return false;
4320 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4321 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4322 if (known_eq (nunits_out, nunits_in))
4323 modifier = NONE;
4324 else if (multiple_p (nunits_out, nunits_in))
4325 modifier = NARROW;
4326 else
4328 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4329 modifier = WIDEN;
4332 /* Multiple types in SLP are handled by creating the appropriate number of
4333 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4334 case of SLP. */
4335 if (slp_node)
4336 ncopies = 1;
4337 else if (modifier == NARROW)
4338 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4339 else
4340 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4342 /* Sanity check: make sure that at least one copy of the vectorized stmt
4343 needs to be generated. */
4344 gcc_assert (ncopies >= 1);
4346 bool found_mode = false;
4347 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4348 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4349 opt_scalar_mode rhs_mode_iter;
4351 /* Supportable by target? */
4352 switch (modifier)
4354 case NONE:
4355 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4356 return false;
4357 if (supportable_convert_operation (code, vectype_out, vectype_in,
4358 &decl1, &code1))
4359 break;
4360 /* FALLTHRU */
4361 unsupported:
4362 if (dump_enabled_p ())
4363 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4364 "conversion not supported by target.\n");
4365 return false;
4367 case WIDEN:
4368 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4369 &code1, &code2, &multi_step_cvt,
4370 &interm_types))
4372 /* Binary widening operation can only be supported directly by the
4373 architecture. */
4374 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4375 break;
4378 if (code != FLOAT_EXPR
4379 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4380 goto unsupported;
4382 fltsz = GET_MODE_SIZE (lhs_mode);
4383 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4385 rhs_mode = rhs_mode_iter.require ();
4386 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4387 break;
4389 cvt_type
4390 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4391 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4392 if (cvt_type == NULL_TREE)
4393 goto unsupported;
4395 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4397 if (!supportable_convert_operation (code, vectype_out,
4398 cvt_type, &decl1, &codecvt1))
4399 goto unsupported;
4401 else if (!supportable_widening_operation (code, stmt, vectype_out,
4402 cvt_type, &codecvt1,
4403 &codecvt2, &multi_step_cvt,
4404 &interm_types))
4405 continue;
4406 else
4407 gcc_assert (multi_step_cvt == 0);
4409 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4410 vectype_in, &code1, &code2,
4411 &multi_step_cvt, &interm_types))
4413 found_mode = true;
4414 break;
4418 if (!found_mode)
4419 goto unsupported;
4421 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4422 codecvt2 = ERROR_MARK;
4423 else
4425 multi_step_cvt++;
4426 interm_types.safe_push (cvt_type);
4427 cvt_type = NULL_TREE;
4429 break;
4431 case NARROW:
4432 gcc_assert (op_type == unary_op);
4433 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4434 &code1, &multi_step_cvt,
4435 &interm_types))
4436 break;
4438 if (code != FIX_TRUNC_EXPR
4439 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4440 goto unsupported;
4442 cvt_type
4443 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4444 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4445 if (cvt_type == NULL_TREE)
4446 goto unsupported;
4447 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4448 &decl1, &codecvt1))
4449 goto unsupported;
4450 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4451 &code1, &multi_step_cvt,
4452 &interm_types))
4453 break;
4454 goto unsupported;
4456 default:
4457 gcc_unreachable ();
4460 if (!vec_stmt) /* transformation not required. */
4462 if (dump_enabled_p ())
4463 dump_printf_loc (MSG_NOTE, vect_location,
4464 "=== vectorizable_conversion ===\n");
4465 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4467 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4468 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4470 else if (modifier == NARROW)
4472 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4473 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4475 else
4477 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4478 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4480 interm_types.release ();
4481 return true;
4484 /* Transform. */
4485 if (dump_enabled_p ())
4486 dump_printf_loc (MSG_NOTE, vect_location,
4487 "transform conversion. ncopies = %d.\n", ncopies);
4489 if (op_type == binary_op)
4491 if (CONSTANT_CLASS_P (op0))
4492 op0 = fold_convert (TREE_TYPE (op1), op0);
4493 else if (CONSTANT_CLASS_P (op1))
4494 op1 = fold_convert (TREE_TYPE (op0), op1);
4497 /* In case of multi-step conversion, we first generate conversion operations
4498 to the intermediate types, and then from that types to the final one.
4499 We create vector destinations for the intermediate type (TYPES) received
4500 from supportable_*_operation, and store them in the correct order
4501 for future use in vect_create_vectorized_*_stmts (). */
4502 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4503 vec_dest = vect_create_destination_var (scalar_dest,
4504 (cvt_type && modifier == WIDEN)
4505 ? cvt_type : vectype_out);
4506 vec_dsts.quick_push (vec_dest);
4508 if (multi_step_cvt)
4510 for (i = interm_types.length () - 1;
4511 interm_types.iterate (i, &intermediate_type); i--)
4513 vec_dest = vect_create_destination_var (scalar_dest,
4514 intermediate_type);
4515 vec_dsts.quick_push (vec_dest);
4519 if (cvt_type)
4520 vec_dest = vect_create_destination_var (scalar_dest,
4521 modifier == WIDEN
4522 ? vectype_out : cvt_type);
4524 if (!slp_node)
4526 if (modifier == WIDEN)
4528 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4529 if (op_type == binary_op)
4530 vec_oprnds1.create (1);
4532 else if (modifier == NARROW)
4533 vec_oprnds0.create (
4534 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4536 else if (code == WIDEN_LSHIFT_EXPR)
4537 vec_oprnds1.create (slp_node->vec_stmts_size);
4539 last_oprnd = op0;
4540 prev_stmt_info = NULL;
4541 switch (modifier)
4543 case NONE:
4544 for (j = 0; j < ncopies; j++)
4546 if (j == 0)
4547 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4548 else
4549 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4551 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4553 /* Arguments are ready, create the new vector stmt. */
4554 if (code1 == CALL_EXPR)
4556 new_stmt = gimple_build_call (decl1, 1, vop0);
4557 new_temp = make_ssa_name (vec_dest, new_stmt);
4558 gimple_call_set_lhs (new_stmt, new_temp);
4560 else
4562 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4563 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4564 new_temp = make_ssa_name (vec_dest, new_stmt);
4565 gimple_assign_set_lhs (new_stmt, new_temp);
4568 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4569 if (slp_node)
4570 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4571 else
4573 if (!prev_stmt_info)
4574 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4575 else
4576 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4577 prev_stmt_info = vinfo_for_stmt (new_stmt);
4581 break;
4583 case WIDEN:
4584 /* In case the vectorization factor (VF) is bigger than the number
4585 of elements that we can fit in a vectype (nunits), we have to
4586 generate more than one vector stmt - i.e - we need to "unroll"
4587 the vector stmt by a factor VF/nunits. */
4588 for (j = 0; j < ncopies; j++)
4590 /* Handle uses. */
4591 if (j == 0)
4593 if (slp_node)
4595 if (code == WIDEN_LSHIFT_EXPR)
4597 unsigned int k;
4599 vec_oprnd1 = op1;
4600 /* Store vec_oprnd1 for every vector stmt to be created
4601 for SLP_NODE. We check during the analysis that all
4602 the shift arguments are the same. */
4603 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4604 vec_oprnds1.quick_push (vec_oprnd1);
4606 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4607 slp_node);
4609 else
4610 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4611 &vec_oprnds1, slp_node);
4613 else
4615 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4616 vec_oprnds0.quick_push (vec_oprnd0);
4617 if (op_type == binary_op)
4619 if (code == WIDEN_LSHIFT_EXPR)
4620 vec_oprnd1 = op1;
4621 else
4622 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4623 vec_oprnds1.quick_push (vec_oprnd1);
4627 else
4629 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4630 vec_oprnds0.truncate (0);
4631 vec_oprnds0.quick_push (vec_oprnd0);
4632 if (op_type == binary_op)
4634 if (code == WIDEN_LSHIFT_EXPR)
4635 vec_oprnd1 = op1;
4636 else
4637 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4638 vec_oprnd1);
4639 vec_oprnds1.truncate (0);
4640 vec_oprnds1.quick_push (vec_oprnd1);
4644 /* Arguments are ready. Create the new vector stmts. */
4645 for (i = multi_step_cvt; i >= 0; i--)
4647 tree this_dest = vec_dsts[i];
4648 enum tree_code c1 = code1, c2 = code2;
4649 if (i == 0 && codecvt2 != ERROR_MARK)
4651 c1 = codecvt1;
4652 c2 = codecvt2;
4654 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4655 &vec_oprnds1,
4656 stmt, this_dest, gsi,
4657 c1, c2, decl1, decl2,
4658 op_type);
4661 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4663 if (cvt_type)
4665 if (codecvt1 == CALL_EXPR)
4667 new_stmt = gimple_build_call (decl1, 1, vop0);
4668 new_temp = make_ssa_name (vec_dest, new_stmt);
4669 gimple_call_set_lhs (new_stmt, new_temp);
4671 else
4673 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4674 new_temp = make_ssa_name (vec_dest);
4675 new_stmt = gimple_build_assign (new_temp, codecvt1,
4676 vop0);
4679 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4681 else
4682 new_stmt = SSA_NAME_DEF_STMT (vop0);
4684 if (slp_node)
4685 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4686 else
4688 if (!prev_stmt_info)
4689 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4690 else
4691 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4692 prev_stmt_info = vinfo_for_stmt (new_stmt);
4697 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4698 break;
4700 case NARROW:
4701 /* In case the vectorization factor (VF) is bigger than the number
4702 of elements that we can fit in a vectype (nunits), we have to
4703 generate more than one vector stmt - i.e - we need to "unroll"
4704 the vector stmt by a factor VF/nunits. */
4705 for (j = 0; j < ncopies; j++)
4707 /* Handle uses. */
4708 if (slp_node)
4709 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4710 slp_node);
4711 else
4713 vec_oprnds0.truncate (0);
4714 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4715 vect_pow2 (multi_step_cvt) - 1);
4718 /* Arguments are ready. Create the new vector stmts. */
4719 if (cvt_type)
4720 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4722 if (codecvt1 == CALL_EXPR)
4724 new_stmt = gimple_build_call (decl1, 1, vop0);
4725 new_temp = make_ssa_name (vec_dest, new_stmt);
4726 gimple_call_set_lhs (new_stmt, new_temp);
4728 else
4730 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4731 new_temp = make_ssa_name (vec_dest);
4732 new_stmt = gimple_build_assign (new_temp, codecvt1,
4733 vop0);
4736 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4737 vec_oprnds0[i] = new_temp;
4740 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4741 stmt, vec_dsts, gsi,
4742 slp_node, code1,
4743 &prev_stmt_info);
4746 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4747 break;
4750 vec_oprnds0.release ();
4751 vec_oprnds1.release ();
4752 interm_types.release ();
4754 return true;
4758 /* Function vectorizable_assignment.
4760 Check if STMT performs an assignment (copy) that can be vectorized.
4761 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4762 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4763 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4765 static bool
4766 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4767 gimple **vec_stmt, slp_tree slp_node)
4769 tree vec_dest;
4770 tree scalar_dest;
4771 tree op;
4772 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4773 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4774 tree new_temp;
4775 gimple *def_stmt;
4776 enum vect_def_type dt[1] = {vect_unknown_def_type};
4777 int ndts = 1;
4778 int ncopies;
4779 int i, j;
4780 vec<tree> vec_oprnds = vNULL;
4781 tree vop;
4782 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4783 vec_info *vinfo = stmt_info->vinfo;
4784 gimple *new_stmt = NULL;
4785 stmt_vec_info prev_stmt_info = NULL;
4786 enum tree_code code;
4787 tree vectype_in;
4789 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4790 return false;
4792 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4793 && ! vec_stmt)
4794 return false;
4796 /* Is vectorizable assignment? */
4797 if (!is_gimple_assign (stmt))
4798 return false;
4800 scalar_dest = gimple_assign_lhs (stmt);
4801 if (TREE_CODE (scalar_dest) != SSA_NAME)
4802 return false;
4804 code = gimple_assign_rhs_code (stmt);
4805 if (gimple_assign_single_p (stmt)
4806 || code == PAREN_EXPR
4807 || CONVERT_EXPR_CODE_P (code))
4808 op = gimple_assign_rhs1 (stmt);
4809 else
4810 return false;
4812 if (code == VIEW_CONVERT_EXPR)
4813 op = TREE_OPERAND (op, 0);
4815 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4816 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4818 /* Multiple types in SLP are handled by creating the appropriate number of
4819 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4820 case of SLP. */
4821 if (slp_node)
4822 ncopies = 1;
4823 else
4824 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4826 gcc_assert (ncopies >= 1);
4828 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4830 if (dump_enabled_p ())
4831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4832 "use not simple.\n");
4833 return false;
4836 /* We can handle NOP_EXPR conversions that do not change the number
4837 of elements or the vector size. */
4838 if ((CONVERT_EXPR_CODE_P (code)
4839 || code == VIEW_CONVERT_EXPR)
4840 && (!vectype_in
4841 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
4842 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
4843 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4844 return false;
4846 /* We do not handle bit-precision changes. */
4847 if ((CONVERT_EXPR_CODE_P (code)
4848 || code == VIEW_CONVERT_EXPR)
4849 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4850 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4851 || !type_has_mode_precision_p (TREE_TYPE (op)))
4852 /* But a conversion that does not change the bit-pattern is ok. */
4853 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4854 > TYPE_PRECISION (TREE_TYPE (op)))
4855 && TYPE_UNSIGNED (TREE_TYPE (op)))
4856 /* Conversion between boolean types of different sizes is
4857 a simple assignment in case their vectypes are same
4858 boolean vectors. */
4859 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4860 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4862 if (dump_enabled_p ())
4863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4864 "type conversion to/from bit-precision "
4865 "unsupported.\n");
4866 return false;
4869 if (!vec_stmt) /* transformation not required. */
4871 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4872 if (dump_enabled_p ())
4873 dump_printf_loc (MSG_NOTE, vect_location,
4874 "=== vectorizable_assignment ===\n");
4875 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4876 return true;
4879 /* Transform. */
4880 if (dump_enabled_p ())
4881 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4883 /* Handle def. */
4884 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4886 /* Handle use. */
4887 for (j = 0; j < ncopies; j++)
4889 /* Handle uses. */
4890 if (j == 0)
4891 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
4892 else
4893 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4895 /* Arguments are ready. create the new vector stmt. */
4896 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4898 if (CONVERT_EXPR_CODE_P (code)
4899 || code == VIEW_CONVERT_EXPR)
4900 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4901 new_stmt = gimple_build_assign (vec_dest, vop);
4902 new_temp = make_ssa_name (vec_dest, new_stmt);
4903 gimple_assign_set_lhs (new_stmt, new_temp);
4904 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4905 if (slp_node)
4906 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4909 if (slp_node)
4910 continue;
4912 if (j == 0)
4913 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4914 else
4915 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4917 prev_stmt_info = vinfo_for_stmt (new_stmt);
4920 vec_oprnds.release ();
4921 return true;
4925 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4926 either as shift by a scalar or by a vector. */
4928 bool
4929 vect_supportable_shift (enum tree_code code, tree scalar_type)
4932 machine_mode vec_mode;
4933 optab optab;
4934 int icode;
4935 tree vectype;
4937 vectype = get_vectype_for_scalar_type (scalar_type);
4938 if (!vectype)
4939 return false;
4941 optab = optab_for_tree_code (code, vectype, optab_scalar);
4942 if (!optab
4943 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4945 optab = optab_for_tree_code (code, vectype, optab_vector);
4946 if (!optab
4947 || (optab_handler (optab, TYPE_MODE (vectype))
4948 == CODE_FOR_nothing))
4949 return false;
4952 vec_mode = TYPE_MODE (vectype);
4953 icode = (int) optab_handler (optab, vec_mode);
4954 if (icode == CODE_FOR_nothing)
4955 return false;
4957 return true;
4961 /* Function vectorizable_shift.
4963 Check if STMT performs a shift operation that can be vectorized.
4964 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4965 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4966 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4968 static bool
4969 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4970 gimple **vec_stmt, slp_tree slp_node)
4972 tree vec_dest;
4973 tree scalar_dest;
4974 tree op0, op1 = NULL;
4975 tree vec_oprnd1 = NULL_TREE;
4976 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4977 tree vectype;
4978 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4979 enum tree_code code;
4980 machine_mode vec_mode;
4981 tree new_temp;
4982 optab optab;
4983 int icode;
4984 machine_mode optab_op2_mode;
4985 gimple *def_stmt;
4986 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4987 int ndts = 2;
4988 gimple *new_stmt = NULL;
4989 stmt_vec_info prev_stmt_info;
4990 poly_uint64 nunits_in;
4991 poly_uint64 nunits_out;
4992 tree vectype_out;
4993 tree op1_vectype;
4994 int ncopies;
4995 int j, i;
4996 vec<tree> vec_oprnds0 = vNULL;
4997 vec<tree> vec_oprnds1 = vNULL;
4998 tree vop0, vop1;
4999 unsigned int k;
5000 bool scalar_shift_arg = true;
5001 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5002 vec_info *vinfo = stmt_info->vinfo;
5004 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5005 return false;
5007 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5008 && ! vec_stmt)
5009 return false;
5011 /* Is STMT a vectorizable binary/unary operation? */
5012 if (!is_gimple_assign (stmt))
5013 return false;
5015 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5016 return false;
5018 code = gimple_assign_rhs_code (stmt);
5020 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5021 || code == RROTATE_EXPR))
5022 return false;
5024 scalar_dest = gimple_assign_lhs (stmt);
5025 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5026 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
5028 if (dump_enabled_p ())
5029 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5030 "bit-precision shifts not supported.\n");
5031 return false;
5034 op0 = gimple_assign_rhs1 (stmt);
5035 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5037 if (dump_enabled_p ())
5038 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5039 "use not simple.\n");
5040 return false;
5042 /* If op0 is an external or constant def use a vector type with
5043 the same size as the output vector type. */
5044 if (!vectype)
5045 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5046 if (vec_stmt)
5047 gcc_assert (vectype);
5048 if (!vectype)
5050 if (dump_enabled_p ())
5051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5052 "no vectype for scalar type\n");
5053 return false;
5056 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5057 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5058 if (maybe_ne (nunits_out, nunits_in))
5059 return false;
5061 op1 = gimple_assign_rhs2 (stmt);
5062 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
5064 if (dump_enabled_p ())
5065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5066 "use not simple.\n");
5067 return false;
5070 /* Multiple types in SLP are handled by creating the appropriate number of
5071 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5072 case of SLP. */
5073 if (slp_node)
5074 ncopies = 1;
5075 else
5076 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5078 gcc_assert (ncopies >= 1);
5080 /* Determine whether the shift amount is a vector, or scalar. If the
5081 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5083 if ((dt[1] == vect_internal_def
5084 || dt[1] == vect_induction_def)
5085 && !slp_node)
5086 scalar_shift_arg = false;
5087 else if (dt[1] == vect_constant_def
5088 || dt[1] == vect_external_def
5089 || dt[1] == vect_internal_def)
5091 /* In SLP, need to check whether the shift count is the same,
5092 in loops if it is a constant or invariant, it is always
5093 a scalar shift. */
5094 if (slp_node)
5096 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
5097 gimple *slpstmt;
5099 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
5100 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
5101 scalar_shift_arg = false;
5104 /* If the shift amount is computed by a pattern stmt we cannot
5105 use the scalar amount directly thus give up and use a vector
5106 shift. */
5107 if (dt[1] == vect_internal_def)
5109 gimple *def = SSA_NAME_DEF_STMT (op1);
5110 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
5111 scalar_shift_arg = false;
5114 else
5116 if (dump_enabled_p ())
5117 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5118 "operand mode requires invariant argument.\n");
5119 return false;
5122 /* Vector shifted by vector. */
5123 if (!scalar_shift_arg)
5125 optab = optab_for_tree_code (code, vectype, optab_vector);
5126 if (dump_enabled_p ())
5127 dump_printf_loc (MSG_NOTE, vect_location,
5128 "vector/vector shift/rotate found.\n");
5130 if (!op1_vectype)
5131 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
5132 if (op1_vectype == NULL_TREE
5133 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
5135 if (dump_enabled_p ())
5136 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5137 "unusable type for last operand in"
5138 " vector/vector shift/rotate.\n");
5139 return false;
5142 /* See if the machine has a vector shifted by scalar insn and if not
5143 then see if it has a vector shifted by vector insn. */
5144 else
5146 optab = optab_for_tree_code (code, vectype, optab_scalar);
5147 if (optab
5148 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5150 if (dump_enabled_p ())
5151 dump_printf_loc (MSG_NOTE, vect_location,
5152 "vector/scalar shift/rotate found.\n");
5154 else
5156 optab = optab_for_tree_code (code, vectype, optab_vector);
5157 if (optab
5158 && (optab_handler (optab, TYPE_MODE (vectype))
5159 != CODE_FOR_nothing))
5161 scalar_shift_arg = false;
5163 if (dump_enabled_p ())
5164 dump_printf_loc (MSG_NOTE, vect_location,
5165 "vector/vector shift/rotate found.\n");
5167 /* Unlike the other binary operators, shifts/rotates have
5168 the rhs being int, instead of the same type as the lhs,
5169 so make sure the scalar is the right type if we are
5170 dealing with vectors of long long/long/short/char. */
5171 if (dt[1] == vect_constant_def)
5172 op1 = fold_convert (TREE_TYPE (vectype), op1);
5173 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5174 TREE_TYPE (op1)))
5176 if (slp_node
5177 && TYPE_MODE (TREE_TYPE (vectype))
5178 != TYPE_MODE (TREE_TYPE (op1)))
5180 if (dump_enabled_p ())
5181 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5182 "unusable type for last operand in"
5183 " vector/vector shift/rotate.\n");
5184 return false;
5186 if (vec_stmt && !slp_node)
5188 op1 = fold_convert (TREE_TYPE (vectype), op1);
5189 op1 = vect_init_vector (stmt, op1,
5190 TREE_TYPE (vectype), NULL);
5197 /* Supportable by target? */
5198 if (!optab)
5200 if (dump_enabled_p ())
5201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5202 "no optab.\n");
5203 return false;
5205 vec_mode = TYPE_MODE (vectype);
5206 icode = (int) optab_handler (optab, vec_mode);
5207 if (icode == CODE_FOR_nothing)
5209 if (dump_enabled_p ())
5210 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5211 "op not supported by target.\n");
5212 /* Check only during analysis. */
5213 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5214 || (!vec_stmt
5215 && !vect_worthwhile_without_simd_p (vinfo, code)))
5216 return false;
5217 if (dump_enabled_p ())
5218 dump_printf_loc (MSG_NOTE, vect_location,
5219 "proceeding using word mode.\n");
5222 /* Worthwhile without SIMD support? Check only during analysis. */
5223 if (!vec_stmt
5224 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5225 && !vect_worthwhile_without_simd_p (vinfo, code))
5227 if (dump_enabled_p ())
5228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5229 "not worthwhile without SIMD support.\n");
5230 return false;
5233 if (!vec_stmt) /* transformation not required. */
5235 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5236 if (dump_enabled_p ())
5237 dump_printf_loc (MSG_NOTE, vect_location,
5238 "=== vectorizable_shift ===\n");
5239 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5240 return true;
5243 /* Transform. */
5245 if (dump_enabled_p ())
5246 dump_printf_loc (MSG_NOTE, vect_location,
5247 "transform binary/unary operation.\n");
5249 /* Handle def. */
5250 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5252 prev_stmt_info = NULL;
5253 for (j = 0; j < ncopies; j++)
5255 /* Handle uses. */
5256 if (j == 0)
5258 if (scalar_shift_arg)
5260 /* Vector shl and shr insn patterns can be defined with scalar
5261 operand 2 (shift operand). In this case, use constant or loop
5262 invariant op1 directly, without extending it to vector mode
5263 first. */
5264 optab_op2_mode = insn_data[icode].operand[2].mode;
5265 if (!VECTOR_MODE_P (optab_op2_mode))
5267 if (dump_enabled_p ())
5268 dump_printf_loc (MSG_NOTE, vect_location,
5269 "operand 1 using scalar mode.\n");
5270 vec_oprnd1 = op1;
5271 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5272 vec_oprnds1.quick_push (vec_oprnd1);
5273 if (slp_node)
5275 /* Store vec_oprnd1 for every vector stmt to be created
5276 for SLP_NODE. We check during the analysis that all
5277 the shift arguments are the same.
5278 TODO: Allow different constants for different vector
5279 stmts generated for an SLP instance. */
5280 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5281 vec_oprnds1.quick_push (vec_oprnd1);
5286 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5287 (a special case for certain kind of vector shifts); otherwise,
5288 operand 1 should be of a vector type (the usual case). */
5289 if (vec_oprnd1)
5290 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5291 slp_node);
5292 else
5293 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5294 slp_node);
5296 else
5297 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5299 /* Arguments are ready. Create the new vector stmt. */
5300 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5302 vop1 = vec_oprnds1[i];
5303 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5304 new_temp = make_ssa_name (vec_dest, new_stmt);
5305 gimple_assign_set_lhs (new_stmt, new_temp);
5306 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5307 if (slp_node)
5308 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5311 if (slp_node)
5312 continue;
5314 if (j == 0)
5315 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5316 else
5317 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5318 prev_stmt_info = vinfo_for_stmt (new_stmt);
5321 vec_oprnds0.release ();
5322 vec_oprnds1.release ();
5324 return true;
5328 /* Function vectorizable_operation.
5330 Check if STMT performs a binary, unary or ternary operation that can
5331 be vectorized.
5332 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5333 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5334 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5336 static bool
5337 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5338 gimple **vec_stmt, slp_tree slp_node)
5340 tree vec_dest;
5341 tree scalar_dest;
5342 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5343 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5344 tree vectype;
5345 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5346 enum tree_code code, orig_code;
5347 machine_mode vec_mode;
5348 tree new_temp;
5349 int op_type;
5350 optab optab;
5351 bool target_support_p;
5352 gimple *def_stmt;
5353 enum vect_def_type dt[3]
5354 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5355 int ndts = 3;
5356 gimple *new_stmt = NULL;
5357 stmt_vec_info prev_stmt_info;
5358 poly_uint64 nunits_in;
5359 poly_uint64 nunits_out;
5360 tree vectype_out;
5361 int ncopies;
5362 int j, i;
5363 vec<tree> vec_oprnds0 = vNULL;
5364 vec<tree> vec_oprnds1 = vNULL;
5365 vec<tree> vec_oprnds2 = vNULL;
5366 tree vop0, vop1, vop2;
5367 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5368 vec_info *vinfo = stmt_info->vinfo;
5370 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5371 return false;
5373 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5374 && ! vec_stmt)
5375 return false;
5377 /* Is STMT a vectorizable binary/unary operation? */
5378 if (!is_gimple_assign (stmt))
5379 return false;
5381 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5382 return false;
5384 orig_code = code = gimple_assign_rhs_code (stmt);
5386 /* For pointer addition and subtraction, we should use the normal
5387 plus and minus for the vector operation. */
5388 if (code == POINTER_PLUS_EXPR)
5389 code = PLUS_EXPR;
5390 if (code == POINTER_DIFF_EXPR)
5391 code = MINUS_EXPR;
5393 /* Support only unary or binary operations. */
5394 op_type = TREE_CODE_LENGTH (code);
5395 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5397 if (dump_enabled_p ())
5398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5399 "num. args = %d (not unary/binary/ternary op).\n",
5400 op_type);
5401 return false;
5404 scalar_dest = gimple_assign_lhs (stmt);
5405 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5407 /* Most operations cannot handle bit-precision types without extra
5408 truncations. */
5409 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5410 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5411 /* Exception are bitwise binary operations. */
5412 && code != BIT_IOR_EXPR
5413 && code != BIT_XOR_EXPR
5414 && code != BIT_AND_EXPR)
5416 if (dump_enabled_p ())
5417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5418 "bit-precision arithmetic not supported.\n");
5419 return false;
5422 op0 = gimple_assign_rhs1 (stmt);
5423 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5425 if (dump_enabled_p ())
5426 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5427 "use not simple.\n");
5428 return false;
5430 /* If op0 is an external or constant def use a vector type with
5431 the same size as the output vector type. */
5432 if (!vectype)
5434 /* For boolean type we cannot determine vectype by
5435 invariant value (don't know whether it is a vector
5436 of booleans or vector of integers). We use output
5437 vectype because operations on boolean don't change
5438 type. */
5439 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5441 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5443 if (dump_enabled_p ())
5444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5445 "not supported operation on bool value.\n");
5446 return false;
5448 vectype = vectype_out;
5450 else
5451 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5453 if (vec_stmt)
5454 gcc_assert (vectype);
5455 if (!vectype)
5457 if (dump_enabled_p ())
5459 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5460 "no vectype for scalar type ");
5461 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5462 TREE_TYPE (op0));
5463 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5466 return false;
5469 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5470 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5471 if (maybe_ne (nunits_out, nunits_in))
5472 return false;
5474 if (op_type == binary_op || op_type == ternary_op)
5476 op1 = gimple_assign_rhs2 (stmt);
5477 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5479 if (dump_enabled_p ())
5480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5481 "use not simple.\n");
5482 return false;
5485 if (op_type == ternary_op)
5487 op2 = gimple_assign_rhs3 (stmt);
5488 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5490 if (dump_enabled_p ())
5491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5492 "use not simple.\n");
5493 return false;
5497 /* Multiple types in SLP are handled by creating the appropriate number of
5498 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5499 case of SLP. */
5500 if (slp_node)
5501 ncopies = 1;
5502 else
5503 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5505 gcc_assert (ncopies >= 1);
5507 /* Shifts are handled in vectorizable_shift (). */
5508 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5509 || code == RROTATE_EXPR)
5510 return false;
5512 /* Supportable by target? */
5514 vec_mode = TYPE_MODE (vectype);
5515 if (code == MULT_HIGHPART_EXPR)
5516 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5517 else
5519 optab = optab_for_tree_code (code, vectype, optab_default);
5520 if (!optab)
5522 if (dump_enabled_p ())
5523 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5524 "no optab.\n");
5525 return false;
5527 target_support_p = (optab_handler (optab, vec_mode)
5528 != CODE_FOR_nothing);
5531 if (!target_support_p)
5533 if (dump_enabled_p ())
5534 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5535 "op not supported by target.\n");
5536 /* Check only during analysis. */
5537 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5538 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5539 return false;
5540 if (dump_enabled_p ())
5541 dump_printf_loc (MSG_NOTE, vect_location,
5542 "proceeding using word mode.\n");
5545 /* Worthwhile without SIMD support? Check only during analysis. */
5546 if (!VECTOR_MODE_P (vec_mode)
5547 && !vec_stmt
5548 && !vect_worthwhile_without_simd_p (vinfo, code))
5550 if (dump_enabled_p ())
5551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5552 "not worthwhile without SIMD support.\n");
5553 return false;
5556 if (!vec_stmt) /* transformation not required. */
5558 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5559 if (dump_enabled_p ())
5560 dump_printf_loc (MSG_NOTE, vect_location,
5561 "=== vectorizable_operation ===\n");
5562 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5563 return true;
5566 /* Transform. */
5568 if (dump_enabled_p ())
5569 dump_printf_loc (MSG_NOTE, vect_location,
5570 "transform binary/unary operation.\n");
5572 /* Handle def. */
5573 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5575 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5576 vectors with unsigned elements, but the result is signed. So, we
5577 need to compute the MINUS_EXPR into vectype temporary and
5578 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5579 tree vec_cvt_dest = NULL_TREE;
5580 if (orig_code == POINTER_DIFF_EXPR)
5581 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5583 /* In case the vectorization factor (VF) is bigger than the number
5584 of elements that we can fit in a vectype (nunits), we have to generate
5585 more than one vector stmt - i.e - we need to "unroll" the
5586 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5587 from one copy of the vector stmt to the next, in the field
5588 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5589 stages to find the correct vector defs to be used when vectorizing
5590 stmts that use the defs of the current stmt. The example below
5591 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5592 we need to create 4 vectorized stmts):
5594 before vectorization:
5595 RELATED_STMT VEC_STMT
5596 S1: x = memref - -
5597 S2: z = x + 1 - -
5599 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5600 there):
5601 RELATED_STMT VEC_STMT
5602 VS1_0: vx0 = memref0 VS1_1 -
5603 VS1_1: vx1 = memref1 VS1_2 -
5604 VS1_2: vx2 = memref2 VS1_3 -
5605 VS1_3: vx3 = memref3 - -
5606 S1: x = load - VS1_0
5607 S2: z = x + 1 - -
5609 step2: vectorize stmt S2 (done here):
5610 To vectorize stmt S2 we first need to find the relevant vector
5611 def for the first operand 'x'. This is, as usual, obtained from
5612 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5613 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5614 relevant vector def 'vx0'. Having found 'vx0' we can generate
5615 the vector stmt VS2_0, and as usual, record it in the
5616 STMT_VINFO_VEC_STMT of stmt S2.
5617 When creating the second copy (VS2_1), we obtain the relevant vector
5618 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5619 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5620 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5621 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5622 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5623 chain of stmts and pointers:
5624 RELATED_STMT VEC_STMT
5625 VS1_0: vx0 = memref0 VS1_1 -
5626 VS1_1: vx1 = memref1 VS1_2 -
5627 VS1_2: vx2 = memref2 VS1_3 -
5628 VS1_3: vx3 = memref3 - -
5629 S1: x = load - VS1_0
5630 VS2_0: vz0 = vx0 + v1 VS2_1 -
5631 VS2_1: vz1 = vx1 + v1 VS2_2 -
5632 VS2_2: vz2 = vx2 + v1 VS2_3 -
5633 VS2_3: vz3 = vx3 + v1 - -
5634 S2: z = x + 1 - VS2_0 */
5636 prev_stmt_info = NULL;
5637 for (j = 0; j < ncopies; j++)
5639 /* Handle uses. */
5640 if (j == 0)
5642 if (op_type == binary_op || op_type == ternary_op)
5643 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5644 slp_node);
5645 else
5646 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5647 slp_node);
5648 if (op_type == ternary_op)
5649 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5650 slp_node);
5652 else
5654 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5655 if (op_type == ternary_op)
5657 tree vec_oprnd = vec_oprnds2.pop ();
5658 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5659 vec_oprnd));
5663 /* Arguments are ready. Create the new vector stmt. */
5664 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5666 vop1 = ((op_type == binary_op || op_type == ternary_op)
5667 ? vec_oprnds1[i] : NULL_TREE);
5668 vop2 = ((op_type == ternary_op)
5669 ? vec_oprnds2[i] : NULL_TREE);
5670 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5671 new_temp = make_ssa_name (vec_dest, new_stmt);
5672 gimple_assign_set_lhs (new_stmt, new_temp);
5673 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5674 if (vec_cvt_dest)
5676 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5677 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5678 new_temp);
5679 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5680 gimple_assign_set_lhs (new_stmt, new_temp);
5681 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5683 if (slp_node)
5684 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5687 if (slp_node)
5688 continue;
5690 if (j == 0)
5691 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5692 else
5693 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5694 prev_stmt_info = vinfo_for_stmt (new_stmt);
5697 vec_oprnds0.release ();
5698 vec_oprnds1.release ();
5699 vec_oprnds2.release ();
5701 return true;
5704 /* A helper function to ensure data reference DR's base alignment. */
5706 static void
5707 ensure_base_align (struct data_reference *dr)
5709 if (!dr->aux)
5710 return;
5712 if (DR_VECT_AUX (dr)->base_misaligned)
5714 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5716 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5718 if (decl_in_symtab_p (base_decl))
5719 symtab_node::get (base_decl)->increase_alignment (align_base_to);
5720 else
5722 SET_DECL_ALIGN (base_decl, align_base_to);
5723 DECL_USER_ALIGN (base_decl) = 1;
5725 DR_VECT_AUX (dr)->base_misaligned = false;
5730 /* Function get_group_alias_ptr_type.
5732 Return the alias type for the group starting at FIRST_STMT. */
5734 static tree
5735 get_group_alias_ptr_type (gimple *first_stmt)
5737 struct data_reference *first_dr, *next_dr;
5738 gimple *next_stmt;
5740 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5741 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5742 while (next_stmt)
5744 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5745 if (get_alias_set (DR_REF (first_dr))
5746 != get_alias_set (DR_REF (next_dr)))
5748 if (dump_enabled_p ())
5749 dump_printf_loc (MSG_NOTE, vect_location,
5750 "conflicting alias set types.\n");
5751 return ptr_type_node;
5753 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5755 return reference_alias_ptr_type (DR_REF (first_dr));
5759 /* Function vectorizable_store.
5761 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5762 can be vectorized.
5763 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5764 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5765 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5767 static bool
5768 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5769 slp_tree slp_node)
5771 tree data_ref;
5772 tree op;
5773 tree vec_oprnd = NULL_TREE;
5774 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5775 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5776 tree elem_type;
5777 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5778 struct loop *loop = NULL;
5779 machine_mode vec_mode;
5780 tree dummy;
5781 enum dr_alignment_support alignment_support_scheme;
5782 gimple *def_stmt;
5783 enum vect_def_type dt;
5784 stmt_vec_info prev_stmt_info = NULL;
5785 tree dataref_ptr = NULL_TREE;
5786 tree dataref_offset = NULL_TREE;
5787 gimple *ptr_incr = NULL;
5788 int ncopies;
5789 int j;
5790 gimple *next_stmt, *first_stmt;
5791 bool grouped_store;
5792 unsigned int group_size, i;
5793 vec<tree> oprnds = vNULL;
5794 vec<tree> result_chain = vNULL;
5795 bool inv_p;
5796 tree offset = NULL_TREE;
5797 vec<tree> vec_oprnds = vNULL;
5798 bool slp = (slp_node != NULL);
5799 unsigned int vec_num;
5800 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5801 vec_info *vinfo = stmt_info->vinfo;
5802 tree aggr_type;
5803 gather_scatter_info gs_info;
5804 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5805 gimple *new_stmt;
5806 poly_uint64 vf;
5807 vec_load_store_type vls_type;
5808 tree ref_type;
5810 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5811 return false;
5813 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5814 && ! vec_stmt)
5815 return false;
5817 /* Is vectorizable store? */
5819 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
5820 if (is_gimple_assign (stmt))
5822 tree scalar_dest = gimple_assign_lhs (stmt);
5823 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5824 && is_pattern_stmt_p (stmt_info))
5825 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5826 if (TREE_CODE (scalar_dest) != ARRAY_REF
5827 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5828 && TREE_CODE (scalar_dest) != INDIRECT_REF
5829 && TREE_CODE (scalar_dest) != COMPONENT_REF
5830 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5831 && TREE_CODE (scalar_dest) != REALPART_EXPR
5832 && TREE_CODE (scalar_dest) != MEM_REF)
5833 return false;
5835 else
5837 gcall *call = dyn_cast <gcall *> (stmt);
5838 if (!call || !gimple_call_internal_p (call, IFN_MASK_STORE))
5839 return false;
5841 if (slp_node != NULL)
5843 if (dump_enabled_p ())
5844 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5845 "SLP of masked stores not supported.\n");
5846 return false;
5849 ref_type = TREE_TYPE (gimple_call_arg (call, 1));
5850 mask = gimple_call_arg (call, 2);
5851 if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
5852 return false;
5855 op = vect_get_store_rhs (stmt);
5857 /* Cannot have hybrid store SLP -- that would mean storing to the
5858 same location twice. */
5859 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5861 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5862 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5864 if (loop_vinfo)
5866 loop = LOOP_VINFO_LOOP (loop_vinfo);
5867 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5869 else
5870 vf = 1;
5872 /* Multiple types in SLP are handled by creating the appropriate number of
5873 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5874 case of SLP. */
5875 if (slp)
5876 ncopies = 1;
5877 else
5878 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5880 gcc_assert (ncopies >= 1);
5882 /* FORNOW. This restriction should be relaxed. */
5883 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5885 if (dump_enabled_p ())
5886 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5887 "multiple types in nested loop.\n");
5888 return false;
5891 if (!vect_check_store_rhs (stmt, op, &rhs_vectype, &vls_type))
5892 return false;
5894 elem_type = TREE_TYPE (vectype);
5895 vec_mode = TYPE_MODE (vectype);
5897 if (!STMT_VINFO_DATA_REF (stmt_info))
5898 return false;
5900 vect_memory_access_type memory_access_type;
5901 if (!get_load_store_type (stmt, vectype, slp, mask, vls_type, ncopies,
5902 &memory_access_type, &gs_info))
5903 return false;
5905 if (mask)
5907 if (memory_access_type == VMAT_CONTIGUOUS)
5909 if (!VECTOR_MODE_P (vec_mode)
5910 || !can_vec_mask_load_store_p (vec_mode,
5911 TYPE_MODE (mask_vectype), false))
5912 return false;
5914 else if (memory_access_type != VMAT_LOAD_STORE_LANES)
5916 if (dump_enabled_p ())
5917 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5918 "unsupported access type for masked store.\n");
5919 return false;
5922 else
5924 /* FORNOW. In some cases can vectorize even if data-type not supported
5925 (e.g. - array initialization with 0). */
5926 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5927 return false;
5930 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5931 if (grouped_store)
5933 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5934 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5935 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5937 else
5939 first_stmt = stmt;
5940 first_dr = dr;
5941 group_size = vec_num = 1;
5944 if (!vec_stmt) /* transformation not required. */
5946 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5948 if (loop_vinfo
5949 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
5950 check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
5951 memory_access_type);
5953 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5954 /* The SLP costs are calculated during SLP analysis. */
5955 if (!PURE_SLP_STMT (stmt_info))
5956 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
5957 vls_type, NULL, NULL, NULL);
5958 return true;
5960 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5962 /* Transform. */
5964 ensure_base_align (dr);
5966 if (memory_access_type == VMAT_GATHER_SCATTER)
5968 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
5969 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5970 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5971 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5972 edge pe = loop_preheader_edge (loop);
5973 gimple_seq seq;
5974 basic_block new_bb;
5975 enum { NARROW, NONE, WIDEN } modifier;
5976 poly_uint64 scatter_off_nunits
5977 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5979 if (known_eq (nunits, scatter_off_nunits))
5980 modifier = NONE;
5981 else if (known_eq (nunits * 2, scatter_off_nunits))
5983 modifier = WIDEN;
5985 /* Currently gathers and scatters are only supported for
5986 fixed-length vectors. */
5987 unsigned int count = scatter_off_nunits.to_constant ();
5988 vec_perm_builder sel (count, count, 1);
5989 for (i = 0; i < (unsigned int) count; ++i)
5990 sel.quick_push (i | (count / 2));
5992 vec_perm_indices indices (sel, 1, count);
5993 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
5994 indices);
5995 gcc_assert (perm_mask != NULL_TREE);
5997 else if (known_eq (nunits, scatter_off_nunits * 2))
5999 modifier = NARROW;
6001 /* Currently gathers and scatters are only supported for
6002 fixed-length vectors. */
6003 unsigned int count = nunits.to_constant ();
6004 vec_perm_builder sel (count, count, 1);
6005 for (i = 0; i < (unsigned int) count; ++i)
6006 sel.quick_push (i | (count / 2));
6008 vec_perm_indices indices (sel, 2, count);
6009 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
6010 gcc_assert (perm_mask != NULL_TREE);
6011 ncopies *= 2;
6013 else
6014 gcc_unreachable ();
6016 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6017 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6018 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6019 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6020 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6021 scaletype = TREE_VALUE (arglist);
6023 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
6024 && TREE_CODE (rettype) == VOID_TYPE);
6026 ptr = fold_convert (ptrtype, gs_info.base);
6027 if (!is_gimple_min_invariant (ptr))
6029 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6030 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6031 gcc_assert (!new_bb);
6034 /* Currently we support only unconditional scatter stores,
6035 so mask should be all ones. */
6036 mask = build_int_cst (masktype, -1);
6037 mask = vect_init_vector (stmt, mask, masktype, NULL);
6039 scale = build_int_cst (scaletype, gs_info.scale);
6041 prev_stmt_info = NULL;
6042 for (j = 0; j < ncopies; ++j)
6044 if (j == 0)
6046 src = vec_oprnd1
6047 = vect_get_vec_def_for_operand (op, stmt);
6048 op = vec_oprnd0
6049 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6051 else if (modifier != NONE && (j & 1))
6053 if (modifier == WIDEN)
6055 src = vec_oprnd1
6056 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
6057 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
6058 stmt, gsi);
6060 else if (modifier == NARROW)
6062 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
6063 stmt, gsi);
6064 op = vec_oprnd0
6065 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6066 vec_oprnd0);
6068 else
6069 gcc_unreachable ();
6071 else
6073 src = vec_oprnd1
6074 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
6075 op = vec_oprnd0
6076 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
6077 vec_oprnd0);
6080 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
6082 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
6083 TYPE_VECTOR_SUBPARTS (srctype)));
6084 var = vect_get_new_ssa_name (srctype, vect_simple_var);
6085 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
6086 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
6087 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6088 src = var;
6091 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6093 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
6094 TYPE_VECTOR_SUBPARTS (idxtype)));
6095 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6096 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6097 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6098 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6099 op = var;
6102 new_stmt
6103 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
6105 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6107 if (prev_stmt_info == NULL)
6108 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6109 else
6110 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6111 prev_stmt_info = vinfo_for_stmt (new_stmt);
6113 return true;
6116 if (grouped_store)
6118 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
6120 /* FORNOW */
6121 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
6123 /* We vectorize all the stmts of the interleaving group when we
6124 reach the last stmt in the group. */
6125 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
6126 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
6127 && !slp)
6129 *vec_stmt = NULL;
6130 return true;
6133 if (slp)
6135 grouped_store = false;
6136 /* VEC_NUM is the number of vect stmts to be created for this
6137 group. */
6138 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6139 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6140 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
6141 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6142 op = vect_get_store_rhs (first_stmt);
6144 else
6145 /* VEC_NUM is the number of vect stmts to be created for this
6146 group. */
6147 vec_num = group_size;
6149 ref_type = get_group_alias_ptr_type (first_stmt);
6151 else
6152 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6154 if (dump_enabled_p ())
6155 dump_printf_loc (MSG_NOTE, vect_location,
6156 "transform store. ncopies = %d\n", ncopies);
6158 if (memory_access_type == VMAT_ELEMENTWISE
6159 || memory_access_type == VMAT_STRIDED_SLP)
6161 gimple_stmt_iterator incr_gsi;
6162 bool insert_after;
6163 gimple *incr;
6164 tree offvar;
6165 tree ivstep;
6166 tree running_off;
6167 gimple_seq stmts = NULL;
6168 tree stride_base, stride_step, alias_off;
6169 tree vec_oprnd;
6170 unsigned int g;
6171 /* Checked by get_load_store_type. */
6172 unsigned int const_nunits = nunits.to_constant ();
6174 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6175 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6177 stride_base
6178 = fold_build_pointer_plus
6179 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
6180 size_binop (PLUS_EXPR,
6181 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
6182 convert_to_ptrofftype (DR_INIT (first_dr))));
6183 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
6185 /* For a store with loop-invariant (but other than power-of-2)
6186 stride (i.e. not a grouped access) like so:
6188 for (i = 0; i < n; i += stride)
6189 array[i] = ...;
6191 we generate a new induction variable and new stores from
6192 the components of the (vectorized) rhs:
6194 for (j = 0; ; j += VF*stride)
6195 vectemp = ...;
6196 tmp1 = vectemp[0];
6197 array[j] = tmp1;
6198 tmp2 = vectemp[1];
6199 array[j + stride] = tmp2;
6203 unsigned nstores = const_nunits;
6204 unsigned lnel = 1;
6205 tree ltype = elem_type;
6206 tree lvectype = vectype;
6207 if (slp)
6209 if (group_size < const_nunits
6210 && const_nunits % group_size == 0)
6212 nstores = const_nunits / group_size;
6213 lnel = group_size;
6214 ltype = build_vector_type (elem_type, group_size);
6215 lvectype = vectype;
6217 /* First check if vec_extract optab doesn't support extraction
6218 of vector elts directly. */
6219 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6220 machine_mode vmode;
6221 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6222 || !VECTOR_MODE_P (vmode)
6223 || (convert_optab_handler (vec_extract_optab,
6224 TYPE_MODE (vectype), vmode)
6225 == CODE_FOR_nothing))
6227 /* Try to avoid emitting an extract of vector elements
6228 by performing the extracts using an integer type of the
6229 same size, extracting from a vector of those and then
6230 re-interpreting it as the original vector type if
6231 supported. */
6232 unsigned lsize
6233 = group_size * GET_MODE_BITSIZE (elmode);
6234 elmode = int_mode_for_size (lsize, 0).require ();
6235 unsigned int lnunits = const_nunits / group_size;
6236 /* If we can't construct such a vector fall back to
6237 element extracts from the original vector type and
6238 element size stores. */
6239 if (mode_for_vector (elmode, lnunits).exists (&vmode)
6240 && VECTOR_MODE_P (vmode)
6241 && (convert_optab_handler (vec_extract_optab,
6242 vmode, elmode)
6243 != CODE_FOR_nothing))
6245 nstores = lnunits;
6246 lnel = group_size;
6247 ltype = build_nonstandard_integer_type (lsize, 1);
6248 lvectype = build_vector_type (ltype, nstores);
6250 /* Else fall back to vector extraction anyway.
6251 Fewer stores are more important than avoiding spilling
6252 of the vector we extract from. Compared to the
6253 construction case in vectorizable_load no store-forwarding
6254 issue exists here for reasonable archs. */
6257 else if (group_size >= const_nunits
6258 && group_size % const_nunits == 0)
6260 nstores = 1;
6261 lnel = const_nunits;
6262 ltype = vectype;
6263 lvectype = vectype;
6265 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6266 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6269 ivstep = stride_step;
6270 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6271 build_int_cst (TREE_TYPE (ivstep), vf));
6273 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6275 create_iv (stride_base, ivstep, NULL,
6276 loop, &incr_gsi, insert_after,
6277 &offvar, NULL);
6278 incr = gsi_stmt (incr_gsi);
6279 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6281 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6282 if (stmts)
6283 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6285 prev_stmt_info = NULL;
6286 alias_off = build_int_cst (ref_type, 0);
6287 next_stmt = first_stmt;
6288 for (g = 0; g < group_size; g++)
6290 running_off = offvar;
6291 if (g)
6293 tree size = TYPE_SIZE_UNIT (ltype);
6294 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6295 size);
6296 tree newoff = copy_ssa_name (running_off, NULL);
6297 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6298 running_off, pos);
6299 vect_finish_stmt_generation (stmt, incr, gsi);
6300 running_off = newoff;
6302 unsigned int group_el = 0;
6303 unsigned HOST_WIDE_INT
6304 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6305 for (j = 0; j < ncopies; j++)
6307 /* We've set op and dt above, from vect_get_store_rhs,
6308 and first_stmt == stmt. */
6309 if (j == 0)
6311 if (slp)
6313 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6314 slp_node);
6315 vec_oprnd = vec_oprnds[0];
6317 else
6319 op = vect_get_store_rhs (next_stmt);
6320 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6323 else
6325 if (slp)
6326 vec_oprnd = vec_oprnds[j];
6327 else
6329 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6330 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6333 /* Pun the vector to extract from if necessary. */
6334 if (lvectype != vectype)
6336 tree tem = make_ssa_name (lvectype);
6337 gimple *pun
6338 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6339 lvectype, vec_oprnd));
6340 vect_finish_stmt_generation (stmt, pun, gsi);
6341 vec_oprnd = tem;
6343 for (i = 0; i < nstores; i++)
6345 tree newref, newoff;
6346 gimple *incr, *assign;
6347 tree size = TYPE_SIZE (ltype);
6348 /* Extract the i'th component. */
6349 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6350 bitsize_int (i), size);
6351 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6352 size, pos);
6354 elem = force_gimple_operand_gsi (gsi, elem, true,
6355 NULL_TREE, true,
6356 GSI_SAME_STMT);
6358 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6359 group_el * elsz);
6360 newref = build2 (MEM_REF, ltype,
6361 running_off, this_off);
6363 /* And store it to *running_off. */
6364 assign = gimple_build_assign (newref, elem);
6365 vect_finish_stmt_generation (stmt, assign, gsi);
6367 group_el += lnel;
6368 if (! slp
6369 || group_el == group_size)
6371 newoff = copy_ssa_name (running_off, NULL);
6372 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6373 running_off, stride_step);
6374 vect_finish_stmt_generation (stmt, incr, gsi);
6376 running_off = newoff;
6377 group_el = 0;
6379 if (g == group_size - 1
6380 && !slp)
6382 if (j == 0 && i == 0)
6383 STMT_VINFO_VEC_STMT (stmt_info)
6384 = *vec_stmt = assign;
6385 else
6386 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6387 prev_stmt_info = vinfo_for_stmt (assign);
6391 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6392 if (slp)
6393 break;
6396 vec_oprnds.release ();
6397 return true;
6400 auto_vec<tree> dr_chain (group_size);
6401 oprnds.create (group_size);
6403 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6404 gcc_assert (alignment_support_scheme);
6405 bool masked_loop_p = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
6406 /* Targets with store-lane instructions must not require explicit
6407 realignment. vect_supportable_dr_alignment always returns either
6408 dr_aligned or dr_unaligned_supported for masked operations. */
6409 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
6410 && !mask
6411 && !masked_loop_p)
6412 || alignment_support_scheme == dr_aligned
6413 || alignment_support_scheme == dr_unaligned_supported);
6415 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6416 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6417 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6419 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6420 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6421 else
6422 aggr_type = vectype;
6424 if (mask)
6425 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6427 /* In case the vectorization factor (VF) is bigger than the number
6428 of elements that we can fit in a vectype (nunits), we have to generate
6429 more than one vector stmt - i.e - we need to "unroll" the
6430 vector stmt by a factor VF/nunits. For more details see documentation in
6431 vect_get_vec_def_for_copy_stmt. */
6433 /* In case of interleaving (non-unit grouped access):
6435 S1: &base + 2 = x2
6436 S2: &base = x0
6437 S3: &base + 1 = x1
6438 S4: &base + 3 = x3
6440 We create vectorized stores starting from base address (the access of the
6441 first stmt in the chain (S2 in the above example), when the last store stmt
6442 of the chain (S4) is reached:
6444 VS1: &base = vx2
6445 VS2: &base + vec_size*1 = vx0
6446 VS3: &base + vec_size*2 = vx1
6447 VS4: &base + vec_size*3 = vx3
6449 Then permutation statements are generated:
6451 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6452 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6455 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6456 (the order of the data-refs in the output of vect_permute_store_chain
6457 corresponds to the order of scalar stmts in the interleaving chain - see
6458 the documentation of vect_permute_store_chain()).
6460 In case of both multiple types and interleaving, above vector stores and
6461 permutation stmts are created for every copy. The result vector stmts are
6462 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6463 STMT_VINFO_RELATED_STMT for the next copies.
6466 prev_stmt_info = NULL;
6467 tree vec_mask = NULL_TREE;
6468 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
6469 for (j = 0; j < ncopies; j++)
6472 if (j == 0)
6474 if (slp)
6476 /* Get vectorized arguments for SLP_NODE. */
6477 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6478 NULL, slp_node);
6480 vec_oprnd = vec_oprnds[0];
6482 else
6484 /* For interleaved stores we collect vectorized defs for all the
6485 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6486 used as an input to vect_permute_store_chain(), and OPRNDS as
6487 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6489 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6490 OPRNDS are of size 1. */
6491 next_stmt = first_stmt;
6492 for (i = 0; i < group_size; i++)
6494 /* Since gaps are not supported for interleaved stores,
6495 GROUP_SIZE is the exact number of stmts in the chain.
6496 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6497 there is no interleaving, GROUP_SIZE is 1, and only one
6498 iteration of the loop will be executed. */
6499 op = vect_get_store_rhs (next_stmt);
6500 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6501 dr_chain.quick_push (vec_oprnd);
6502 oprnds.quick_push (vec_oprnd);
6503 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6505 if (mask)
6506 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6507 mask_vectype);
6510 /* We should have catched mismatched types earlier. */
6511 gcc_assert (useless_type_conversion_p (vectype,
6512 TREE_TYPE (vec_oprnd)));
6513 bool simd_lane_access_p
6514 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6515 if (simd_lane_access_p
6516 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6517 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6518 && integer_zerop (DR_OFFSET (first_dr))
6519 && integer_zerop (DR_INIT (first_dr))
6520 && alias_sets_conflict_p (get_alias_set (aggr_type),
6521 get_alias_set (TREE_TYPE (ref_type))))
6523 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6524 dataref_offset = build_int_cst (ref_type, 0);
6525 inv_p = false;
6527 else
6528 dataref_ptr
6529 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6530 simd_lane_access_p ? loop : NULL,
6531 offset, &dummy, gsi, &ptr_incr,
6532 simd_lane_access_p, &inv_p);
6533 gcc_assert (bb_vinfo || !inv_p);
6535 else
6537 /* For interleaved stores we created vectorized defs for all the
6538 defs stored in OPRNDS in the previous iteration (previous copy).
6539 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6540 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6541 next copy.
6542 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6543 OPRNDS are of size 1. */
6544 for (i = 0; i < group_size; i++)
6546 op = oprnds[i];
6547 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6548 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6549 dr_chain[i] = vec_oprnd;
6550 oprnds[i] = vec_oprnd;
6552 if (mask)
6554 vect_is_simple_use (vec_mask, vinfo, &def_stmt, &dt);
6555 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
6557 if (dataref_offset)
6558 dataref_offset
6559 = int_const_binop (PLUS_EXPR, dataref_offset,
6560 TYPE_SIZE_UNIT (aggr_type));
6561 else
6562 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6563 TYPE_SIZE_UNIT (aggr_type));
6566 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6568 tree vec_array;
6570 /* Combine all the vectors into an array. */
6571 vec_array = create_vector_array (vectype, vec_num);
6572 for (i = 0; i < vec_num; i++)
6574 vec_oprnd = dr_chain[i];
6575 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6578 tree final_mask = NULL;
6579 if (masked_loop_p)
6580 final_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
6581 if (vec_mask)
6582 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6583 vec_mask, gsi);
6585 gcall *call;
6586 if (final_mask)
6588 /* Emit:
6589 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
6590 VEC_ARRAY). */
6591 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
6592 tree alias_ptr = build_int_cst (ref_type, align);
6593 call = gimple_build_call_internal (IFN_MASK_STORE_LANES, 4,
6594 dataref_ptr, alias_ptr,
6595 final_mask, vec_array);
6597 else
6599 /* Emit:
6600 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6601 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6602 call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6603 vec_array);
6604 gimple_call_set_lhs (call, data_ref);
6606 gimple_call_set_nothrow (call, true);
6607 new_stmt = call;
6608 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6610 else
6612 new_stmt = NULL;
6613 if (grouped_store)
6615 if (j == 0)
6616 result_chain.create (group_size);
6617 /* Permute. */
6618 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6619 &result_chain);
6622 next_stmt = first_stmt;
6623 for (i = 0; i < vec_num; i++)
6625 unsigned align, misalign;
6627 tree final_mask = NULL_TREE;
6628 if (masked_loop_p)
6629 final_mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
6630 vectype, vec_num * j + i);
6631 if (vec_mask)
6632 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
6633 vec_mask, gsi);
6635 if (i > 0)
6636 /* Bump the vector pointer. */
6637 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6638 stmt, NULL_TREE);
6640 if (slp)
6641 vec_oprnd = vec_oprnds[i];
6642 else if (grouped_store)
6643 /* For grouped stores vectorized defs are interleaved in
6644 vect_permute_store_chain(). */
6645 vec_oprnd = result_chain[i];
6647 align = DR_TARGET_ALIGNMENT (first_dr);
6648 if (aligned_access_p (first_dr))
6649 misalign = 0;
6650 else if (DR_MISALIGNMENT (first_dr) == -1)
6652 align = dr_alignment (vect_dr_behavior (first_dr));
6653 misalign = 0;
6655 else
6656 misalign = DR_MISALIGNMENT (first_dr);
6657 if (dataref_offset == NULL_TREE
6658 && TREE_CODE (dataref_ptr) == SSA_NAME)
6659 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6660 misalign);
6662 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6664 tree perm_mask = perm_mask_for_reverse (vectype);
6665 tree perm_dest
6666 = vect_create_destination_var (vect_get_store_rhs (stmt),
6667 vectype);
6668 tree new_temp = make_ssa_name (perm_dest);
6670 /* Generate the permute statement. */
6671 gimple *perm_stmt
6672 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6673 vec_oprnd, perm_mask);
6674 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6676 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6677 vec_oprnd = new_temp;
6680 /* Arguments are ready. Create the new vector stmt. */
6681 if (final_mask)
6683 align = least_bit_hwi (misalign | align);
6684 tree ptr = build_int_cst (ref_type, align);
6685 gcall *call
6686 = gimple_build_call_internal (IFN_MASK_STORE, 4,
6687 dataref_ptr, ptr,
6688 final_mask, vec_oprnd);
6689 gimple_call_set_nothrow (call, true);
6690 new_stmt = call;
6692 else
6694 data_ref = fold_build2 (MEM_REF, vectype,
6695 dataref_ptr,
6696 dataref_offset
6697 ? dataref_offset
6698 : build_int_cst (ref_type, 0));
6699 if (aligned_access_p (first_dr))
6701 else if (DR_MISALIGNMENT (first_dr) == -1)
6702 TREE_TYPE (data_ref)
6703 = build_aligned_type (TREE_TYPE (data_ref),
6704 align * BITS_PER_UNIT);
6705 else
6706 TREE_TYPE (data_ref)
6707 = build_aligned_type (TREE_TYPE (data_ref),
6708 TYPE_ALIGN (elem_type));
6709 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6711 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6713 if (slp)
6714 continue;
6716 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6717 if (!next_stmt)
6718 break;
6721 if (!slp)
6723 if (j == 0)
6724 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6725 else
6726 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6727 prev_stmt_info = vinfo_for_stmt (new_stmt);
6731 oprnds.release ();
6732 result_chain.release ();
6733 vec_oprnds.release ();
6735 return true;
6738 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6739 VECTOR_CST mask. No checks are made that the target platform supports the
6740 mask, so callers may wish to test can_vec_perm_const_p separately, or use
6741 vect_gen_perm_mask_checked. */
6743 tree
6744 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
6746 tree mask_type;
6748 poly_uint64 nunits = sel.length ();
6749 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
6751 mask_type = build_vector_type (ssizetype, nunits);
6752 return vec_perm_indices_to_tree (mask_type, sel);
6755 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
6756 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6758 tree
6759 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
6761 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
6762 return vect_gen_perm_mask_any (vectype, sel);
6765 /* Given a vector variable X and Y, that was generated for the scalar
6766 STMT, generate instructions to permute the vector elements of X and Y
6767 using permutation mask MASK_VEC, insert them at *GSI and return the
6768 permuted vector variable. */
6770 static tree
6771 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6772 gimple_stmt_iterator *gsi)
6774 tree vectype = TREE_TYPE (x);
6775 tree perm_dest, data_ref;
6776 gimple *perm_stmt;
6778 tree scalar_dest = gimple_get_lhs (stmt);
6779 if (TREE_CODE (scalar_dest) == SSA_NAME)
6780 perm_dest = vect_create_destination_var (scalar_dest, vectype);
6781 else
6782 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
6783 data_ref = make_ssa_name (perm_dest);
6785 /* Generate the permute statement. */
6786 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6787 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6789 return data_ref;
6792 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6793 inserting them on the loops preheader edge. Returns true if we
6794 were successful in doing so (and thus STMT can be moved then),
6795 otherwise returns false. */
6797 static bool
6798 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6800 ssa_op_iter i;
6801 tree op;
6802 bool any = false;
6804 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6806 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6807 if (!gimple_nop_p (def_stmt)
6808 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6810 /* Make sure we don't need to recurse. While we could do
6811 so in simple cases when there are more complex use webs
6812 we don't have an easy way to preserve stmt order to fulfil
6813 dependencies within them. */
6814 tree op2;
6815 ssa_op_iter i2;
6816 if (gimple_code (def_stmt) == GIMPLE_PHI)
6817 return false;
6818 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6820 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6821 if (!gimple_nop_p (def_stmt2)
6822 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6823 return false;
6825 any = true;
6829 if (!any)
6830 return true;
6832 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6834 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6835 if (!gimple_nop_p (def_stmt)
6836 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6838 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6839 gsi_remove (&gsi, false);
6840 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6844 return true;
6847 /* vectorizable_load.
6849 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6850 can be vectorized.
6851 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6852 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6853 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6855 static bool
6856 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6857 slp_tree slp_node, slp_instance slp_node_instance)
6859 tree scalar_dest;
6860 tree vec_dest = NULL;
6861 tree data_ref = NULL;
6862 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6863 stmt_vec_info prev_stmt_info;
6864 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6865 struct loop *loop = NULL;
6866 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6867 bool nested_in_vect_loop = false;
6868 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6869 tree elem_type;
6870 tree new_temp;
6871 machine_mode mode;
6872 gimple *new_stmt = NULL;
6873 tree dummy;
6874 enum dr_alignment_support alignment_support_scheme;
6875 tree dataref_ptr = NULL_TREE;
6876 tree dataref_offset = NULL_TREE;
6877 gimple *ptr_incr = NULL;
6878 int ncopies;
6879 int i, j;
6880 unsigned int group_size;
6881 poly_uint64 group_gap_adj;
6882 tree msq = NULL_TREE, lsq;
6883 tree offset = NULL_TREE;
6884 tree byte_offset = NULL_TREE;
6885 tree realignment_token = NULL_TREE;
6886 gphi *phi = NULL;
6887 vec<tree> dr_chain = vNULL;
6888 bool grouped_load = false;
6889 gimple *first_stmt;
6890 gimple *first_stmt_for_drptr = NULL;
6891 bool inv_p;
6892 bool compute_in_loop = false;
6893 struct loop *at_loop;
6894 int vec_num;
6895 bool slp = (slp_node != NULL);
6896 bool slp_perm = false;
6897 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6898 poly_uint64 vf;
6899 tree aggr_type;
6900 gather_scatter_info gs_info;
6901 vec_info *vinfo = stmt_info->vinfo;
6902 tree ref_type;
6904 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6905 return false;
6907 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6908 && ! vec_stmt)
6909 return false;
6911 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6912 if (is_gimple_assign (stmt))
6914 scalar_dest = gimple_assign_lhs (stmt);
6915 if (TREE_CODE (scalar_dest) != SSA_NAME)
6916 return false;
6918 tree_code code = gimple_assign_rhs_code (stmt);
6919 if (code != ARRAY_REF
6920 && code != BIT_FIELD_REF
6921 && code != INDIRECT_REF
6922 && code != COMPONENT_REF
6923 && code != IMAGPART_EXPR
6924 && code != REALPART_EXPR
6925 && code != MEM_REF
6926 && TREE_CODE_CLASS (code) != tcc_declaration)
6927 return false;
6929 else
6931 gcall *call = dyn_cast <gcall *> (stmt);
6932 if (!call || !gimple_call_internal_p (call, IFN_MASK_LOAD))
6933 return false;
6935 scalar_dest = gimple_call_lhs (call);
6936 if (!scalar_dest)
6937 return false;
6939 if (slp_node != NULL)
6941 if (dump_enabled_p ())
6942 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6943 "SLP of masked loads not supported.\n");
6944 return false;
6947 mask = gimple_call_arg (call, 2);
6948 if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
6949 return false;
6952 if (!STMT_VINFO_DATA_REF (stmt_info))
6953 return false;
6955 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6956 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6958 if (loop_vinfo)
6960 loop = LOOP_VINFO_LOOP (loop_vinfo);
6961 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6962 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6964 else
6965 vf = 1;
6967 /* Multiple types in SLP are handled by creating the appropriate number of
6968 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6969 case of SLP. */
6970 if (slp)
6971 ncopies = 1;
6972 else
6973 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6975 gcc_assert (ncopies >= 1);
6977 /* FORNOW. This restriction should be relaxed. */
6978 if (nested_in_vect_loop && ncopies > 1)
6980 if (dump_enabled_p ())
6981 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6982 "multiple types in nested loop.\n");
6983 return false;
6986 /* Invalidate assumptions made by dependence analysis when vectorization
6987 on the unrolled body effectively re-orders stmts. */
6988 if (ncopies > 1
6989 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6990 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
6991 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6993 if (dump_enabled_p ())
6994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6995 "cannot perform implicit CSE when unrolling "
6996 "with negative dependence distance\n");
6997 return false;
7000 elem_type = TREE_TYPE (vectype);
7001 mode = TYPE_MODE (vectype);
7003 /* FORNOW. In some cases can vectorize even if data-type not supported
7004 (e.g. - data copies). */
7005 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
7007 if (dump_enabled_p ())
7008 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7009 "Aligned load, but unsupported type.\n");
7010 return false;
7013 /* Check if the load is a part of an interleaving chain. */
7014 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7016 grouped_load = true;
7017 /* FORNOW */
7018 gcc_assert (!nested_in_vect_loop);
7019 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
7021 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7022 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7024 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7025 slp_perm = true;
7027 /* Invalidate assumptions made by dependence analysis when vectorization
7028 on the unrolled body effectively re-orders stmts. */
7029 if (!PURE_SLP_STMT (stmt_info)
7030 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
7031 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
7032 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
7034 if (dump_enabled_p ())
7035 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7036 "cannot perform implicit CSE when performing "
7037 "group loads with negative dependence distance\n");
7038 return false;
7041 /* Similarly when the stmt is a load that is both part of a SLP
7042 instance and a loop vectorized stmt via the same-dr mechanism
7043 we have to give up. */
7044 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
7045 && (STMT_SLP_TYPE (stmt_info)
7046 != STMT_SLP_TYPE (vinfo_for_stmt
7047 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
7049 if (dump_enabled_p ())
7050 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7051 "conflicting SLP types for CSEd load\n");
7052 return false;
7055 else
7056 group_size = 1;
7058 vect_memory_access_type memory_access_type;
7059 if (!get_load_store_type (stmt, vectype, slp, mask, VLS_LOAD, ncopies,
7060 &memory_access_type, &gs_info))
7061 return false;
7063 if (mask)
7065 if (memory_access_type == VMAT_CONTIGUOUS)
7067 machine_mode vec_mode = TYPE_MODE (vectype);
7068 if (!VECTOR_MODE_P (vec_mode)
7069 || !can_vec_mask_load_store_p (vec_mode,
7070 TYPE_MODE (mask_vectype), true))
7071 return false;
7073 else if (memory_access_type == VMAT_GATHER_SCATTER)
7075 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
7076 tree masktype
7077 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
7078 if (TREE_CODE (masktype) == INTEGER_TYPE)
7080 if (dump_enabled_p ())
7081 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7082 "masked gather with integer mask not"
7083 " supported.");
7084 return false;
7087 else if (memory_access_type != VMAT_LOAD_STORE_LANES)
7089 if (dump_enabled_p ())
7090 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7091 "unsupported access type for masked load.\n");
7092 return false;
7096 if (!vec_stmt) /* transformation not required. */
7098 if (!slp)
7099 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
7101 if (loop_vinfo
7102 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7103 check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
7104 memory_access_type);
7106 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
7107 /* The SLP costs are calculated during SLP analysis. */
7108 if (!PURE_SLP_STMT (stmt_info))
7109 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
7110 NULL, NULL, NULL);
7111 return true;
7114 if (!slp)
7115 gcc_assert (memory_access_type
7116 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
7118 if (dump_enabled_p ())
7119 dump_printf_loc (MSG_NOTE, vect_location,
7120 "transform load. ncopies = %d\n", ncopies);
7122 /* Transform. */
7124 ensure_base_align (dr);
7126 if (memory_access_type == VMAT_GATHER_SCATTER)
7128 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask);
7129 return true;
7132 if (memory_access_type == VMAT_ELEMENTWISE
7133 || memory_access_type == VMAT_STRIDED_SLP)
7135 gimple_stmt_iterator incr_gsi;
7136 bool insert_after;
7137 gimple *incr;
7138 tree offvar;
7139 tree ivstep;
7140 tree running_off;
7141 vec<constructor_elt, va_gc> *v = NULL;
7142 gimple_seq stmts = NULL;
7143 tree stride_base, stride_step, alias_off;
7144 /* Checked by get_load_store_type. */
7145 unsigned int const_nunits = nunits.to_constant ();
7147 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7148 gcc_assert (!nested_in_vect_loop);
7150 if (slp && grouped_load)
7152 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7153 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7154 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7155 ref_type = get_group_alias_ptr_type (first_stmt);
7157 else
7159 first_stmt = stmt;
7160 first_dr = dr;
7161 group_size = 1;
7162 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7165 stride_base
7166 = fold_build_pointer_plus
7167 (DR_BASE_ADDRESS (first_dr),
7168 size_binop (PLUS_EXPR,
7169 convert_to_ptrofftype (DR_OFFSET (first_dr)),
7170 convert_to_ptrofftype (DR_INIT (first_dr))));
7171 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
7173 /* For a load with loop-invariant (but other than power-of-2)
7174 stride (i.e. not a grouped access) like so:
7176 for (i = 0; i < n; i += stride)
7177 ... = array[i];
7179 we generate a new induction variable and new accesses to
7180 form a new vector (or vectors, depending on ncopies):
7182 for (j = 0; ; j += VF*stride)
7183 tmp1 = array[j];
7184 tmp2 = array[j + stride];
7186 vectemp = {tmp1, tmp2, ...}
7189 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
7190 build_int_cst (TREE_TYPE (stride_step), vf));
7192 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7194 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7195 loop, &incr_gsi, insert_after,
7196 &offvar, NULL);
7197 incr = gsi_stmt (incr_gsi);
7198 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7200 stride_step = force_gimple_operand (unshare_expr (stride_step),
7201 &stmts, true, NULL_TREE);
7202 if (stmts)
7203 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7205 prev_stmt_info = NULL;
7206 running_off = offvar;
7207 alias_off = build_int_cst (ref_type, 0);
7208 int nloads = const_nunits;
7209 int lnel = 1;
7210 tree ltype = TREE_TYPE (vectype);
7211 tree lvectype = vectype;
7212 auto_vec<tree> dr_chain;
7213 if (memory_access_type == VMAT_STRIDED_SLP)
7215 if (group_size < const_nunits)
7217 /* First check if vec_init optab supports construction from
7218 vector elts directly. */
7219 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7220 machine_mode vmode;
7221 if (mode_for_vector (elmode, group_size).exists (&vmode)
7222 && VECTOR_MODE_P (vmode)
7223 && (convert_optab_handler (vec_init_optab,
7224 TYPE_MODE (vectype), vmode)
7225 != CODE_FOR_nothing))
7227 nloads = const_nunits / group_size;
7228 lnel = group_size;
7229 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7231 else
7233 /* Otherwise avoid emitting a constructor of vector elements
7234 by performing the loads using an integer type of the same
7235 size, constructing a vector of those and then
7236 re-interpreting it as the original vector type.
7237 This avoids a huge runtime penalty due to the general
7238 inability to perform store forwarding from smaller stores
7239 to a larger load. */
7240 unsigned lsize
7241 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7242 elmode = int_mode_for_size (lsize, 0).require ();
7243 unsigned int lnunits = const_nunits / group_size;
7244 /* If we can't construct such a vector fall back to
7245 element loads of the original vector type. */
7246 if (mode_for_vector (elmode, lnunits).exists (&vmode)
7247 && VECTOR_MODE_P (vmode)
7248 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7249 != CODE_FOR_nothing))
7251 nloads = lnunits;
7252 lnel = group_size;
7253 ltype = build_nonstandard_integer_type (lsize, 1);
7254 lvectype = build_vector_type (ltype, nloads);
7258 else
7260 nloads = 1;
7261 lnel = const_nunits;
7262 ltype = vectype;
7264 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7266 if (slp)
7268 /* For SLP permutation support we need to load the whole group,
7269 not only the number of vector stmts the permutation result
7270 fits in. */
7271 if (slp_perm)
7273 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7274 variable VF. */
7275 unsigned int const_vf = vf.to_constant ();
7276 ncopies = CEIL (group_size * const_vf, const_nunits);
7277 dr_chain.create (ncopies);
7279 else
7280 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7282 unsigned int group_el = 0;
7283 unsigned HOST_WIDE_INT
7284 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7285 for (j = 0; j < ncopies; j++)
7287 if (nloads > 1)
7288 vec_alloc (v, nloads);
7289 for (i = 0; i < nloads; i++)
7291 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7292 group_el * elsz);
7293 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7294 build2 (MEM_REF, ltype,
7295 running_off, this_off));
7296 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7297 if (nloads > 1)
7298 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7299 gimple_assign_lhs (new_stmt));
7301 group_el += lnel;
7302 if (! slp
7303 || group_el == group_size)
7305 tree newoff = copy_ssa_name (running_off);
7306 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7307 running_off, stride_step);
7308 vect_finish_stmt_generation (stmt, incr, gsi);
7310 running_off = newoff;
7311 group_el = 0;
7314 if (nloads > 1)
7316 tree vec_inv = build_constructor (lvectype, v);
7317 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7318 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7319 if (lvectype != vectype)
7321 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7322 VIEW_CONVERT_EXPR,
7323 build1 (VIEW_CONVERT_EXPR,
7324 vectype, new_temp));
7325 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7329 if (slp)
7331 if (slp_perm)
7332 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7333 else
7334 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7336 else
7338 if (j == 0)
7339 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7340 else
7341 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7342 prev_stmt_info = vinfo_for_stmt (new_stmt);
7345 if (slp_perm)
7347 unsigned n_perms;
7348 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7349 slp_node_instance, false, &n_perms);
7351 return true;
7354 if (grouped_load)
7356 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7357 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7358 /* For SLP vectorization we directly vectorize a subchain
7359 without permutation. */
7360 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7361 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7362 /* For BB vectorization always use the first stmt to base
7363 the data ref pointer on. */
7364 if (bb_vinfo)
7365 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7367 /* Check if the chain of loads is already vectorized. */
7368 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7369 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7370 ??? But we can only do so if there is exactly one
7371 as we have no way to get at the rest. Leave the CSE
7372 opportunity alone.
7373 ??? With the group load eventually participating
7374 in multiple different permutations (having multiple
7375 slp nodes which refer to the same group) the CSE
7376 is even wrong code. See PR56270. */
7377 && !slp)
7379 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7380 return true;
7382 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7383 group_gap_adj = 0;
7385 /* VEC_NUM is the number of vect stmts to be created for this group. */
7386 if (slp)
7388 grouped_load = false;
7389 /* For SLP permutation support we need to load the whole group,
7390 not only the number of vector stmts the permutation result
7391 fits in. */
7392 if (slp_perm)
7394 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7395 variable VF. */
7396 unsigned int const_vf = vf.to_constant ();
7397 unsigned int const_nunits = nunits.to_constant ();
7398 vec_num = CEIL (group_size * const_vf, const_nunits);
7399 group_gap_adj = vf * group_size - nunits * vec_num;
7401 else
7403 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7404 group_gap_adj
7405 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7408 else
7409 vec_num = group_size;
7411 ref_type = get_group_alias_ptr_type (first_stmt);
7413 else
7415 first_stmt = stmt;
7416 first_dr = dr;
7417 group_size = vec_num = 1;
7418 group_gap_adj = 0;
7419 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7422 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7423 gcc_assert (alignment_support_scheme);
7424 bool masked_loop_p = (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7425 /* Targets with store-lane instructions must not require explicit
7426 realignment. vect_supportable_dr_alignment always returns either
7427 dr_aligned or dr_unaligned_supported for masked operations. */
7428 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES
7429 && !mask
7430 && !masked_loop_p)
7431 || alignment_support_scheme == dr_aligned
7432 || alignment_support_scheme == dr_unaligned_supported);
7434 /* In case the vectorization factor (VF) is bigger than the number
7435 of elements that we can fit in a vectype (nunits), we have to generate
7436 more than one vector stmt - i.e - we need to "unroll" the
7437 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7438 from one copy of the vector stmt to the next, in the field
7439 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7440 stages to find the correct vector defs to be used when vectorizing
7441 stmts that use the defs of the current stmt. The example below
7442 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7443 need to create 4 vectorized stmts):
7445 before vectorization:
7446 RELATED_STMT VEC_STMT
7447 S1: x = memref - -
7448 S2: z = x + 1 - -
7450 step 1: vectorize stmt S1:
7451 We first create the vector stmt VS1_0, and, as usual, record a
7452 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7453 Next, we create the vector stmt VS1_1, and record a pointer to
7454 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7455 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7456 stmts and pointers:
7457 RELATED_STMT VEC_STMT
7458 VS1_0: vx0 = memref0 VS1_1 -
7459 VS1_1: vx1 = memref1 VS1_2 -
7460 VS1_2: vx2 = memref2 VS1_3 -
7461 VS1_3: vx3 = memref3 - -
7462 S1: x = load - VS1_0
7463 S2: z = x + 1 - -
7465 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7466 information we recorded in RELATED_STMT field is used to vectorize
7467 stmt S2. */
7469 /* In case of interleaving (non-unit grouped access):
7471 S1: x2 = &base + 2
7472 S2: x0 = &base
7473 S3: x1 = &base + 1
7474 S4: x3 = &base + 3
7476 Vectorized loads are created in the order of memory accesses
7477 starting from the access of the first stmt of the chain:
7479 VS1: vx0 = &base
7480 VS2: vx1 = &base + vec_size*1
7481 VS3: vx3 = &base + vec_size*2
7482 VS4: vx4 = &base + vec_size*3
7484 Then permutation statements are generated:
7486 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7487 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7490 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7491 (the order of the data-refs in the output of vect_permute_load_chain
7492 corresponds to the order of scalar stmts in the interleaving chain - see
7493 the documentation of vect_permute_load_chain()).
7494 The generation of permutation stmts and recording them in
7495 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7497 In case of both multiple types and interleaving, the vector loads and
7498 permutation stmts above are created for every copy. The result vector
7499 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7500 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7502 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7503 on a target that supports unaligned accesses (dr_unaligned_supported)
7504 we generate the following code:
7505 p = initial_addr;
7506 indx = 0;
7507 loop {
7508 p = p + indx * vectype_size;
7509 vec_dest = *(p);
7510 indx = indx + 1;
7513 Otherwise, the data reference is potentially unaligned on a target that
7514 does not support unaligned accesses (dr_explicit_realign_optimized) -
7515 then generate the following code, in which the data in each iteration is
7516 obtained by two vector loads, one from the previous iteration, and one
7517 from the current iteration:
7518 p1 = initial_addr;
7519 msq_init = *(floor(p1))
7520 p2 = initial_addr + VS - 1;
7521 realignment_token = call target_builtin;
7522 indx = 0;
7523 loop {
7524 p2 = p2 + indx * vectype_size
7525 lsq = *(floor(p2))
7526 vec_dest = realign_load (msq, lsq, realignment_token)
7527 indx = indx + 1;
7528 msq = lsq;
7529 } */
7531 /* If the misalignment remains the same throughout the execution of the
7532 loop, we can create the init_addr and permutation mask at the loop
7533 preheader. Otherwise, it needs to be created inside the loop.
7534 This can only occur when vectorizing memory accesses in the inner-loop
7535 nested within an outer-loop that is being vectorized. */
7537 if (nested_in_vect_loop
7538 && !multiple_p (DR_STEP_ALIGNMENT (dr),
7539 GET_MODE_SIZE (TYPE_MODE (vectype))))
7541 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7542 compute_in_loop = true;
7545 if ((alignment_support_scheme == dr_explicit_realign_optimized
7546 || alignment_support_scheme == dr_explicit_realign)
7547 && !compute_in_loop)
7549 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7550 alignment_support_scheme, NULL_TREE,
7551 &at_loop);
7552 if (alignment_support_scheme == dr_explicit_realign_optimized)
7554 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7555 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7556 size_one_node);
7559 else
7560 at_loop = loop;
7562 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7563 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7565 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7566 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7567 else
7568 aggr_type = vectype;
7570 tree vec_mask = NULL_TREE;
7571 prev_stmt_info = NULL;
7572 poly_uint64 group_elt = 0;
7573 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
7574 for (j = 0; j < ncopies; j++)
7576 /* 1. Create the vector or array pointer update chain. */
7577 if (j == 0)
7579 bool simd_lane_access_p
7580 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7581 if (simd_lane_access_p
7582 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7583 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7584 && integer_zerop (DR_OFFSET (first_dr))
7585 && integer_zerop (DR_INIT (first_dr))
7586 && alias_sets_conflict_p (get_alias_set (aggr_type),
7587 get_alias_set (TREE_TYPE (ref_type)))
7588 && (alignment_support_scheme == dr_aligned
7589 || alignment_support_scheme == dr_unaligned_supported))
7591 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7592 dataref_offset = build_int_cst (ref_type, 0);
7593 inv_p = false;
7595 else if (first_stmt_for_drptr
7596 && first_stmt != first_stmt_for_drptr)
7598 dataref_ptr
7599 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7600 at_loop, offset, &dummy, gsi,
7601 &ptr_incr, simd_lane_access_p,
7602 &inv_p, byte_offset);
7603 /* Adjust the pointer by the difference to first_stmt. */
7604 data_reference_p ptrdr
7605 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7606 tree diff = fold_convert (sizetype,
7607 size_binop (MINUS_EXPR,
7608 DR_INIT (first_dr),
7609 DR_INIT (ptrdr)));
7610 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7611 stmt, diff);
7613 else
7614 dataref_ptr
7615 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7616 offset, &dummy, gsi, &ptr_incr,
7617 simd_lane_access_p, &inv_p,
7618 byte_offset);
7619 if (mask)
7620 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
7621 mask_vectype);
7623 else
7625 if (dataref_offset)
7626 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7627 TYPE_SIZE_UNIT (aggr_type));
7628 else
7629 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7630 TYPE_SIZE_UNIT (aggr_type));
7631 if (mask)
7633 gimple *def_stmt;
7634 vect_def_type dt;
7635 vect_is_simple_use (vec_mask, vinfo, &def_stmt, &dt);
7636 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
7640 if (grouped_load || slp_perm)
7641 dr_chain.create (vec_num);
7643 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7645 tree vec_array;
7647 vec_array = create_vector_array (vectype, vec_num);
7649 tree final_mask = NULL_TREE;
7650 if (masked_loop_p)
7651 final_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
7652 if (vec_mask)
7653 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7654 vec_mask, gsi);
7656 gcall *call;
7657 if (final_mask)
7659 /* Emit:
7660 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
7661 VEC_MASK). */
7662 unsigned int align = TYPE_ALIGN_UNIT (TREE_TYPE (vectype));
7663 tree alias_ptr = build_int_cst (ref_type, align);
7664 call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
7665 dataref_ptr, alias_ptr,
7666 final_mask);
7668 else
7670 /* Emit:
7671 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7672 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7673 call = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7675 gimple_call_set_lhs (call, vec_array);
7676 gimple_call_set_nothrow (call, true);
7677 new_stmt = call;
7678 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7680 /* Extract each vector into an SSA_NAME. */
7681 for (i = 0; i < vec_num; i++)
7683 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7684 vec_array, i);
7685 dr_chain.quick_push (new_temp);
7688 /* Record the mapping between SSA_NAMEs and statements. */
7689 vect_record_grouped_load_vectors (stmt, dr_chain);
7691 else
7693 for (i = 0; i < vec_num; i++)
7695 tree final_mask = NULL_TREE;
7696 if (masked_loop_p
7697 && memory_access_type != VMAT_INVARIANT)
7698 final_mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
7699 vectype, vec_num * j + i);
7700 if (vec_mask)
7701 final_mask = prepare_load_store_mask (mask_vectype, final_mask,
7702 vec_mask, gsi);
7704 if (i > 0)
7705 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7706 stmt, NULL_TREE);
7708 /* 2. Create the vector-load in the loop. */
7709 switch (alignment_support_scheme)
7711 case dr_aligned:
7712 case dr_unaligned_supported:
7714 unsigned int align, misalign;
7716 align = DR_TARGET_ALIGNMENT (dr);
7717 if (alignment_support_scheme == dr_aligned)
7719 gcc_assert (aligned_access_p (first_dr));
7720 misalign = 0;
7722 else if (DR_MISALIGNMENT (first_dr) == -1)
7724 align = dr_alignment (vect_dr_behavior (first_dr));
7725 misalign = 0;
7727 else
7728 misalign = DR_MISALIGNMENT (first_dr);
7729 if (dataref_offset == NULL_TREE
7730 && TREE_CODE (dataref_ptr) == SSA_NAME)
7731 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7732 align, misalign);
7734 if (final_mask)
7736 align = least_bit_hwi (misalign | align);
7737 tree ptr = build_int_cst (ref_type, align);
7738 gcall *call
7739 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
7740 dataref_ptr, ptr,
7741 final_mask);
7742 gimple_call_set_nothrow (call, true);
7743 new_stmt = call;
7744 data_ref = NULL_TREE;
7746 else
7748 data_ref
7749 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7750 dataref_offset
7751 ? dataref_offset
7752 : build_int_cst (ref_type, 0));
7753 if (alignment_support_scheme == dr_aligned)
7755 else if (DR_MISALIGNMENT (first_dr) == -1)
7756 TREE_TYPE (data_ref)
7757 = build_aligned_type (TREE_TYPE (data_ref),
7758 align * BITS_PER_UNIT);
7759 else
7760 TREE_TYPE (data_ref)
7761 = build_aligned_type (TREE_TYPE (data_ref),
7762 TYPE_ALIGN (elem_type));
7764 break;
7766 case dr_explicit_realign:
7768 tree ptr, bump;
7770 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7772 if (compute_in_loop)
7773 msq = vect_setup_realignment (first_stmt, gsi,
7774 &realignment_token,
7775 dr_explicit_realign,
7776 dataref_ptr, NULL);
7778 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7779 ptr = copy_ssa_name (dataref_ptr);
7780 else
7781 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7782 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7783 new_stmt = gimple_build_assign
7784 (ptr, BIT_AND_EXPR, dataref_ptr,
7785 build_int_cst
7786 (TREE_TYPE (dataref_ptr),
7787 -(HOST_WIDE_INT) align));
7788 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7789 data_ref
7790 = build2 (MEM_REF, vectype, ptr,
7791 build_int_cst (ref_type, 0));
7792 vec_dest = vect_create_destination_var (scalar_dest,
7793 vectype);
7794 new_stmt = gimple_build_assign (vec_dest, data_ref);
7795 new_temp = make_ssa_name (vec_dest, new_stmt);
7796 gimple_assign_set_lhs (new_stmt, new_temp);
7797 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7798 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7799 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7800 msq = new_temp;
7802 bump = size_binop (MULT_EXPR, vs,
7803 TYPE_SIZE_UNIT (elem_type));
7804 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7805 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7806 new_stmt = gimple_build_assign
7807 (NULL_TREE, BIT_AND_EXPR, ptr,
7808 build_int_cst
7809 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
7810 ptr = copy_ssa_name (ptr, new_stmt);
7811 gimple_assign_set_lhs (new_stmt, ptr);
7812 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7813 data_ref
7814 = build2 (MEM_REF, vectype, ptr,
7815 build_int_cst (ref_type, 0));
7816 break;
7818 case dr_explicit_realign_optimized:
7820 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7821 new_temp = copy_ssa_name (dataref_ptr);
7822 else
7823 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7824 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7825 new_stmt = gimple_build_assign
7826 (new_temp, BIT_AND_EXPR, dataref_ptr,
7827 build_int_cst (TREE_TYPE (dataref_ptr),
7828 -(HOST_WIDE_INT) align));
7829 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7830 data_ref
7831 = build2 (MEM_REF, vectype, new_temp,
7832 build_int_cst (ref_type, 0));
7833 break;
7835 default:
7836 gcc_unreachable ();
7838 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7839 /* DATA_REF is null if we've already built the statement. */
7840 if (data_ref)
7841 new_stmt = gimple_build_assign (vec_dest, data_ref);
7842 new_temp = make_ssa_name (vec_dest, new_stmt);
7843 gimple_set_lhs (new_stmt, new_temp);
7844 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7846 /* 3. Handle explicit realignment if necessary/supported.
7847 Create in loop:
7848 vec_dest = realign_load (msq, lsq, realignment_token) */
7849 if (alignment_support_scheme == dr_explicit_realign_optimized
7850 || alignment_support_scheme == dr_explicit_realign)
7852 lsq = gimple_assign_lhs (new_stmt);
7853 if (!realignment_token)
7854 realignment_token = dataref_ptr;
7855 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7856 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7857 msq, lsq, realignment_token);
7858 new_temp = make_ssa_name (vec_dest, new_stmt);
7859 gimple_assign_set_lhs (new_stmt, new_temp);
7860 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7862 if (alignment_support_scheme == dr_explicit_realign_optimized)
7864 gcc_assert (phi);
7865 if (i == vec_num - 1 && j == ncopies - 1)
7866 add_phi_arg (phi, lsq,
7867 loop_latch_edge (containing_loop),
7868 UNKNOWN_LOCATION);
7869 msq = lsq;
7873 /* 4. Handle invariant-load. */
7874 if (inv_p && !bb_vinfo)
7876 gcc_assert (!grouped_load);
7877 /* If we have versioned for aliasing or the loop doesn't
7878 have any data dependencies that would preclude this,
7879 then we are sure this is a loop invariant load and
7880 thus we can insert it on the preheader edge. */
7881 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7882 && !nested_in_vect_loop
7883 && hoist_defs_of_uses (stmt, loop))
7885 if (dump_enabled_p ())
7887 dump_printf_loc (MSG_NOTE, vect_location,
7888 "hoisting out of the vectorized "
7889 "loop: ");
7890 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7892 tree tem = copy_ssa_name (scalar_dest);
7893 gsi_insert_on_edge_immediate
7894 (loop_preheader_edge (loop),
7895 gimple_build_assign (tem,
7896 unshare_expr
7897 (gimple_assign_rhs1 (stmt))));
7898 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7899 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7900 set_vinfo_for_stmt (new_stmt,
7901 new_stmt_vec_info (new_stmt, vinfo));
7903 else
7905 gimple_stmt_iterator gsi2 = *gsi;
7906 gsi_next (&gsi2);
7907 new_temp = vect_init_vector (stmt, scalar_dest,
7908 vectype, &gsi2);
7909 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7913 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7915 tree perm_mask = perm_mask_for_reverse (vectype);
7916 new_temp = permute_vec_elements (new_temp, new_temp,
7917 perm_mask, stmt, gsi);
7918 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7921 /* Collect vector loads and later create their permutation in
7922 vect_transform_grouped_load (). */
7923 if (grouped_load || slp_perm)
7924 dr_chain.quick_push (new_temp);
7926 /* Store vector loads in the corresponding SLP_NODE. */
7927 if (slp && !slp_perm)
7928 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7930 /* With SLP permutation we load the gaps as well, without
7931 we need to skip the gaps after we manage to fully load
7932 all elements. group_gap_adj is GROUP_SIZE here. */
7933 group_elt += nunits;
7934 if (maybe_ne (group_gap_adj, 0U)
7935 && !slp_perm
7936 && known_eq (group_elt, group_size - group_gap_adj))
7938 poly_wide_int bump_val
7939 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7940 * group_gap_adj);
7941 tree bump = wide_int_to_tree (sizetype, bump_val);
7942 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7943 stmt, bump);
7944 group_elt = 0;
7947 /* Bump the vector pointer to account for a gap or for excess
7948 elements loaded for a permuted SLP load. */
7949 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
7951 poly_wide_int bump_val
7952 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7953 * group_gap_adj);
7954 tree bump = wide_int_to_tree (sizetype, bump_val);
7955 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7956 stmt, bump);
7960 if (slp && !slp_perm)
7961 continue;
7963 if (slp_perm)
7965 unsigned n_perms;
7966 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7967 slp_node_instance, false,
7968 &n_perms))
7970 dr_chain.release ();
7971 return false;
7974 else
7976 if (grouped_load)
7978 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7979 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7980 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7982 else
7984 if (j == 0)
7985 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7986 else
7987 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7988 prev_stmt_info = vinfo_for_stmt (new_stmt);
7991 dr_chain.release ();
7994 return true;
7997 /* Function vect_is_simple_cond.
7999 Input:
8000 LOOP - the loop that is being vectorized.
8001 COND - Condition that is checked for simple use.
8003 Output:
8004 *COMP_VECTYPE - the vector type for the comparison.
8005 *DTS - The def types for the arguments of the comparison
8007 Returns whether a COND can be vectorized. Checks whether
8008 condition operands are supportable using vec_is_simple_use. */
8010 static bool
8011 vect_is_simple_cond (tree cond, vec_info *vinfo,
8012 tree *comp_vectype, enum vect_def_type *dts,
8013 tree vectype)
8015 tree lhs, rhs;
8016 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8018 /* Mask case. */
8019 if (TREE_CODE (cond) == SSA_NAME
8020 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
8022 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
8023 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
8024 &dts[0], comp_vectype)
8025 || !*comp_vectype
8026 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
8027 return false;
8028 return true;
8031 if (!COMPARISON_CLASS_P (cond))
8032 return false;
8034 lhs = TREE_OPERAND (cond, 0);
8035 rhs = TREE_OPERAND (cond, 1);
8037 if (TREE_CODE (lhs) == SSA_NAME)
8039 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
8040 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
8041 return false;
8043 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
8044 || TREE_CODE (lhs) == FIXED_CST)
8045 dts[0] = vect_constant_def;
8046 else
8047 return false;
8049 if (TREE_CODE (rhs) == SSA_NAME)
8051 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
8052 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
8053 return false;
8055 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
8056 || TREE_CODE (rhs) == FIXED_CST)
8057 dts[1] = vect_constant_def;
8058 else
8059 return false;
8061 if (vectype1 && vectype2
8062 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8063 TYPE_VECTOR_SUBPARTS (vectype2)))
8064 return false;
8066 *comp_vectype = vectype1 ? vectype1 : vectype2;
8067 /* Invariant comparison. */
8068 if (! *comp_vectype)
8070 tree scalar_type = TREE_TYPE (lhs);
8071 /* If we can widen the comparison to match vectype do so. */
8072 if (INTEGRAL_TYPE_P (scalar_type)
8073 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
8074 TYPE_SIZE (TREE_TYPE (vectype))))
8075 scalar_type = build_nonstandard_integer_type
8076 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
8077 TYPE_UNSIGNED (scalar_type));
8078 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
8081 return true;
8084 /* vectorizable_condition.
8086 Check if STMT is conditional modify expression that can be vectorized.
8087 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8088 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8089 at GSI.
8091 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
8092 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
8093 else clause if it is 2).
8095 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8097 bool
8098 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
8099 gimple **vec_stmt, tree reduc_def, int reduc_index,
8100 slp_tree slp_node)
8102 tree scalar_dest = NULL_TREE;
8103 tree vec_dest = NULL_TREE;
8104 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
8105 tree then_clause, else_clause;
8106 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8107 tree comp_vectype = NULL_TREE;
8108 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
8109 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
8110 tree vec_compare;
8111 tree new_temp;
8112 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8113 enum vect_def_type dts[4]
8114 = {vect_unknown_def_type, vect_unknown_def_type,
8115 vect_unknown_def_type, vect_unknown_def_type};
8116 int ndts = 4;
8117 int ncopies;
8118 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8119 stmt_vec_info prev_stmt_info = NULL;
8120 int i, j;
8121 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8122 vec<tree> vec_oprnds0 = vNULL;
8123 vec<tree> vec_oprnds1 = vNULL;
8124 vec<tree> vec_oprnds2 = vNULL;
8125 vec<tree> vec_oprnds3 = vNULL;
8126 tree vec_cmp_type;
8127 bool masked = false;
8129 if (reduc_index && STMT_SLP_TYPE (stmt_info))
8130 return false;
8132 vect_reduction_type reduction_type
8133 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
8134 if (reduction_type == TREE_CODE_REDUCTION)
8136 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8137 return false;
8139 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8140 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8141 && reduc_def))
8142 return false;
8144 /* FORNOW: not yet supported. */
8145 if (STMT_VINFO_LIVE_P (stmt_info))
8147 if (dump_enabled_p ())
8148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8149 "value used after loop.\n");
8150 return false;
8154 /* Is vectorizable conditional operation? */
8155 if (!is_gimple_assign (stmt))
8156 return false;
8158 code = gimple_assign_rhs_code (stmt);
8160 if (code != COND_EXPR)
8161 return false;
8163 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8164 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8166 if (slp_node)
8167 ncopies = 1;
8168 else
8169 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8171 gcc_assert (ncopies >= 1);
8172 if (reduc_index && ncopies > 1)
8173 return false; /* FORNOW */
8175 cond_expr = gimple_assign_rhs1 (stmt);
8176 then_clause = gimple_assign_rhs2 (stmt);
8177 else_clause = gimple_assign_rhs3 (stmt);
8179 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
8180 &comp_vectype, &dts[0], vectype)
8181 || !comp_vectype)
8182 return false;
8184 gimple *def_stmt;
8185 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
8186 &vectype1))
8187 return false;
8188 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
8189 &vectype2))
8190 return false;
8192 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
8193 return false;
8195 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
8196 return false;
8198 masked = !COMPARISON_CLASS_P (cond_expr);
8199 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
8201 if (vec_cmp_type == NULL_TREE)
8202 return false;
8204 cond_code = TREE_CODE (cond_expr);
8205 if (!masked)
8207 cond_expr0 = TREE_OPERAND (cond_expr, 0);
8208 cond_expr1 = TREE_OPERAND (cond_expr, 1);
8211 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
8213 /* Boolean values may have another representation in vectors
8214 and therefore we prefer bit operations over comparison for
8215 them (which also works for scalar masks). We store opcodes
8216 to use in bitop1 and bitop2. Statement is vectorized as
8217 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8218 depending on bitop1 and bitop2 arity. */
8219 switch (cond_code)
8221 case GT_EXPR:
8222 bitop1 = BIT_NOT_EXPR;
8223 bitop2 = BIT_AND_EXPR;
8224 break;
8225 case GE_EXPR:
8226 bitop1 = BIT_NOT_EXPR;
8227 bitop2 = BIT_IOR_EXPR;
8228 break;
8229 case LT_EXPR:
8230 bitop1 = BIT_NOT_EXPR;
8231 bitop2 = BIT_AND_EXPR;
8232 std::swap (cond_expr0, cond_expr1);
8233 break;
8234 case LE_EXPR:
8235 bitop1 = BIT_NOT_EXPR;
8236 bitop2 = BIT_IOR_EXPR;
8237 std::swap (cond_expr0, cond_expr1);
8238 break;
8239 case NE_EXPR:
8240 bitop1 = BIT_XOR_EXPR;
8241 break;
8242 case EQ_EXPR:
8243 bitop1 = BIT_XOR_EXPR;
8244 bitop2 = BIT_NOT_EXPR;
8245 break;
8246 default:
8247 return false;
8249 cond_code = SSA_NAME;
8252 if (!vec_stmt)
8254 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8255 if (bitop1 != NOP_EXPR)
8257 machine_mode mode = TYPE_MODE (comp_vectype);
8258 optab optab;
8260 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8261 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8262 return false;
8264 if (bitop2 != NOP_EXPR)
8266 optab = optab_for_tree_code (bitop2, comp_vectype,
8267 optab_default);
8268 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8269 return false;
8272 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8273 cond_code))
8275 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8276 return true;
8278 return false;
8281 /* Transform. */
8283 if (!slp_node)
8285 vec_oprnds0.create (1);
8286 vec_oprnds1.create (1);
8287 vec_oprnds2.create (1);
8288 vec_oprnds3.create (1);
8291 /* Handle def. */
8292 scalar_dest = gimple_assign_lhs (stmt);
8293 if (reduction_type != EXTRACT_LAST_REDUCTION)
8294 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8296 /* Handle cond expr. */
8297 for (j = 0; j < ncopies; j++)
8299 gimple *new_stmt = NULL;
8300 if (j == 0)
8302 if (slp_node)
8304 auto_vec<tree, 4> ops;
8305 auto_vec<vec<tree>, 4> vec_defs;
8307 if (masked)
8308 ops.safe_push (cond_expr);
8309 else
8311 ops.safe_push (cond_expr0);
8312 ops.safe_push (cond_expr1);
8314 ops.safe_push (then_clause);
8315 ops.safe_push (else_clause);
8316 vect_get_slp_defs (ops, slp_node, &vec_defs);
8317 vec_oprnds3 = vec_defs.pop ();
8318 vec_oprnds2 = vec_defs.pop ();
8319 if (!masked)
8320 vec_oprnds1 = vec_defs.pop ();
8321 vec_oprnds0 = vec_defs.pop ();
8323 else
8325 gimple *gtemp;
8326 if (masked)
8328 vec_cond_lhs
8329 = vect_get_vec_def_for_operand (cond_expr, stmt,
8330 comp_vectype);
8331 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8332 &gtemp, &dts[0]);
8334 else
8336 vec_cond_lhs
8337 = vect_get_vec_def_for_operand (cond_expr0,
8338 stmt, comp_vectype);
8339 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8341 vec_cond_rhs
8342 = vect_get_vec_def_for_operand (cond_expr1,
8343 stmt, comp_vectype);
8344 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8346 if (reduc_index == 1)
8347 vec_then_clause = reduc_def;
8348 else
8350 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8351 stmt);
8352 vect_is_simple_use (then_clause, loop_vinfo,
8353 &gtemp, &dts[2]);
8355 if (reduc_index == 2)
8356 vec_else_clause = reduc_def;
8357 else
8359 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8360 stmt);
8361 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8365 else
8367 vec_cond_lhs
8368 = vect_get_vec_def_for_stmt_copy (dts[0],
8369 vec_oprnds0.pop ());
8370 if (!masked)
8371 vec_cond_rhs
8372 = vect_get_vec_def_for_stmt_copy (dts[1],
8373 vec_oprnds1.pop ());
8375 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8376 vec_oprnds2.pop ());
8377 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8378 vec_oprnds3.pop ());
8381 if (!slp_node)
8383 vec_oprnds0.quick_push (vec_cond_lhs);
8384 if (!masked)
8385 vec_oprnds1.quick_push (vec_cond_rhs);
8386 vec_oprnds2.quick_push (vec_then_clause);
8387 vec_oprnds3.quick_push (vec_else_clause);
8390 /* Arguments are ready. Create the new vector stmt. */
8391 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8393 vec_then_clause = vec_oprnds2[i];
8394 vec_else_clause = vec_oprnds3[i];
8396 if (masked)
8397 vec_compare = vec_cond_lhs;
8398 else
8400 vec_cond_rhs = vec_oprnds1[i];
8401 if (bitop1 == NOP_EXPR)
8402 vec_compare = build2 (cond_code, vec_cmp_type,
8403 vec_cond_lhs, vec_cond_rhs);
8404 else
8406 new_temp = make_ssa_name (vec_cmp_type);
8407 if (bitop1 == BIT_NOT_EXPR)
8408 new_stmt = gimple_build_assign (new_temp, bitop1,
8409 vec_cond_rhs);
8410 else
8411 new_stmt
8412 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8413 vec_cond_rhs);
8414 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8415 if (bitop2 == NOP_EXPR)
8416 vec_compare = new_temp;
8417 else if (bitop2 == BIT_NOT_EXPR)
8419 /* Instead of doing ~x ? y : z do x ? z : y. */
8420 vec_compare = new_temp;
8421 std::swap (vec_then_clause, vec_else_clause);
8423 else
8425 vec_compare = make_ssa_name (vec_cmp_type);
8426 new_stmt
8427 = gimple_build_assign (vec_compare, bitop2,
8428 vec_cond_lhs, new_temp);
8429 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8433 if (reduction_type == EXTRACT_LAST_REDUCTION)
8435 if (!is_gimple_val (vec_compare))
8437 tree vec_compare_name = make_ssa_name (vec_cmp_type);
8438 new_stmt = gimple_build_assign (vec_compare_name,
8439 vec_compare);
8440 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8441 vec_compare = vec_compare_name;
8443 gcc_assert (reduc_index == 2);
8444 new_stmt = gimple_build_call_internal
8445 (IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
8446 vec_then_clause);
8447 gimple_call_set_lhs (new_stmt, scalar_dest);
8448 SSA_NAME_DEF_STMT (scalar_dest) = new_stmt;
8449 if (stmt == gsi_stmt (*gsi))
8450 vect_finish_replace_stmt (stmt, new_stmt);
8451 else
8453 /* In this case we're moving the definition to later in the
8454 block. That doesn't matter because the only uses of the
8455 lhs are in phi statements. */
8456 gimple_stmt_iterator old_gsi = gsi_for_stmt (stmt);
8457 gsi_remove (&old_gsi, true);
8458 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8461 else
8463 new_temp = make_ssa_name (vec_dest);
8464 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8465 vec_compare, vec_then_clause,
8466 vec_else_clause);
8467 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8469 if (slp_node)
8470 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8473 if (slp_node)
8474 continue;
8476 if (j == 0)
8477 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8478 else
8479 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8481 prev_stmt_info = vinfo_for_stmt (new_stmt);
8484 vec_oprnds0.release ();
8485 vec_oprnds1.release ();
8486 vec_oprnds2.release ();
8487 vec_oprnds3.release ();
8489 return true;
8492 /* vectorizable_comparison.
8494 Check if STMT is comparison expression that can be vectorized.
8495 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8496 comparison, put it in VEC_STMT, and insert it at GSI.
8498 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8500 static bool
8501 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8502 gimple **vec_stmt, tree reduc_def,
8503 slp_tree slp_node)
8505 tree lhs, rhs1, rhs2;
8506 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8507 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8508 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8509 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8510 tree new_temp;
8511 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8512 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8513 int ndts = 2;
8514 poly_uint64 nunits;
8515 int ncopies;
8516 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8517 stmt_vec_info prev_stmt_info = NULL;
8518 int i, j;
8519 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8520 vec<tree> vec_oprnds0 = vNULL;
8521 vec<tree> vec_oprnds1 = vNULL;
8522 gimple *def_stmt;
8523 tree mask_type;
8524 tree mask;
8526 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8527 return false;
8529 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8530 return false;
8532 mask_type = vectype;
8533 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8535 if (slp_node)
8536 ncopies = 1;
8537 else
8538 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8540 gcc_assert (ncopies >= 1);
8541 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8542 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8543 && reduc_def))
8544 return false;
8546 if (STMT_VINFO_LIVE_P (stmt_info))
8548 if (dump_enabled_p ())
8549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8550 "value used after loop.\n");
8551 return false;
8554 if (!is_gimple_assign (stmt))
8555 return false;
8557 code = gimple_assign_rhs_code (stmt);
8559 if (TREE_CODE_CLASS (code) != tcc_comparison)
8560 return false;
8562 rhs1 = gimple_assign_rhs1 (stmt);
8563 rhs2 = gimple_assign_rhs2 (stmt);
8565 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8566 &dts[0], &vectype1))
8567 return false;
8569 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8570 &dts[1], &vectype2))
8571 return false;
8573 if (vectype1 && vectype2
8574 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8575 TYPE_VECTOR_SUBPARTS (vectype2)))
8576 return false;
8578 vectype = vectype1 ? vectype1 : vectype2;
8580 /* Invariant comparison. */
8581 if (!vectype)
8583 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8584 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
8585 return false;
8587 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
8588 return false;
8590 /* Can't compare mask and non-mask types. */
8591 if (vectype1 && vectype2
8592 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8593 return false;
8595 /* Boolean values may have another representation in vectors
8596 and therefore we prefer bit operations over comparison for
8597 them (which also works for scalar masks). We store opcodes
8598 to use in bitop1 and bitop2. Statement is vectorized as
8599 BITOP2 (rhs1 BITOP1 rhs2) or
8600 rhs1 BITOP2 (BITOP1 rhs2)
8601 depending on bitop1 and bitop2 arity. */
8602 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8604 if (code == GT_EXPR)
8606 bitop1 = BIT_NOT_EXPR;
8607 bitop2 = BIT_AND_EXPR;
8609 else if (code == GE_EXPR)
8611 bitop1 = BIT_NOT_EXPR;
8612 bitop2 = BIT_IOR_EXPR;
8614 else if (code == LT_EXPR)
8616 bitop1 = BIT_NOT_EXPR;
8617 bitop2 = BIT_AND_EXPR;
8618 std::swap (rhs1, rhs2);
8619 std::swap (dts[0], dts[1]);
8621 else if (code == LE_EXPR)
8623 bitop1 = BIT_NOT_EXPR;
8624 bitop2 = BIT_IOR_EXPR;
8625 std::swap (rhs1, rhs2);
8626 std::swap (dts[0], dts[1]);
8628 else
8630 bitop1 = BIT_XOR_EXPR;
8631 if (code == EQ_EXPR)
8632 bitop2 = BIT_NOT_EXPR;
8636 if (!vec_stmt)
8638 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8639 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8640 dts, ndts, NULL, NULL);
8641 if (bitop1 == NOP_EXPR)
8642 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8643 else
8645 machine_mode mode = TYPE_MODE (vectype);
8646 optab optab;
8648 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8649 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8650 return false;
8652 if (bitop2 != NOP_EXPR)
8654 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8655 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8656 return false;
8658 return true;
8662 /* Transform. */
8663 if (!slp_node)
8665 vec_oprnds0.create (1);
8666 vec_oprnds1.create (1);
8669 /* Handle def. */
8670 lhs = gimple_assign_lhs (stmt);
8671 mask = vect_create_destination_var (lhs, mask_type);
8673 /* Handle cmp expr. */
8674 for (j = 0; j < ncopies; j++)
8676 gassign *new_stmt = NULL;
8677 if (j == 0)
8679 if (slp_node)
8681 auto_vec<tree, 2> ops;
8682 auto_vec<vec<tree>, 2> vec_defs;
8684 ops.safe_push (rhs1);
8685 ops.safe_push (rhs2);
8686 vect_get_slp_defs (ops, slp_node, &vec_defs);
8687 vec_oprnds1 = vec_defs.pop ();
8688 vec_oprnds0 = vec_defs.pop ();
8690 else
8692 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8693 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8696 else
8698 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8699 vec_oprnds0.pop ());
8700 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8701 vec_oprnds1.pop ());
8704 if (!slp_node)
8706 vec_oprnds0.quick_push (vec_rhs1);
8707 vec_oprnds1.quick_push (vec_rhs2);
8710 /* Arguments are ready. Create the new vector stmt. */
8711 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8713 vec_rhs2 = vec_oprnds1[i];
8715 new_temp = make_ssa_name (mask);
8716 if (bitop1 == NOP_EXPR)
8718 new_stmt = gimple_build_assign (new_temp, code,
8719 vec_rhs1, vec_rhs2);
8720 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8722 else
8724 if (bitop1 == BIT_NOT_EXPR)
8725 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8726 else
8727 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8728 vec_rhs2);
8729 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8730 if (bitop2 != NOP_EXPR)
8732 tree res = make_ssa_name (mask);
8733 if (bitop2 == BIT_NOT_EXPR)
8734 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8735 else
8736 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8737 new_temp);
8738 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8741 if (slp_node)
8742 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8745 if (slp_node)
8746 continue;
8748 if (j == 0)
8749 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8750 else
8751 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8753 prev_stmt_info = vinfo_for_stmt (new_stmt);
8756 vec_oprnds0.release ();
8757 vec_oprnds1.release ();
8759 return true;
8762 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8763 can handle all live statements in the node. Otherwise return true
8764 if STMT is not live or if vectorizable_live_operation can handle it.
8765 GSI and VEC_STMT are as for vectorizable_live_operation. */
8767 static bool
8768 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8769 slp_tree slp_node, gimple **vec_stmt)
8771 if (slp_node)
8773 gimple *slp_stmt;
8774 unsigned int i;
8775 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8777 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8778 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8779 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8780 vec_stmt))
8781 return false;
8784 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8785 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8786 return false;
8788 return true;
8791 /* Make sure the statement is vectorizable. */
8793 bool
8794 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8795 slp_instance node_instance)
8797 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8798 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8799 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8800 bool ok;
8801 gimple *pattern_stmt;
8802 gimple_seq pattern_def_seq;
8804 if (dump_enabled_p ())
8806 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8807 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8810 if (gimple_has_volatile_ops (stmt))
8812 if (dump_enabled_p ())
8813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8814 "not vectorized: stmt has volatile operands\n");
8816 return false;
8819 /* Skip stmts that do not need to be vectorized. In loops this is expected
8820 to include:
8821 - the COND_EXPR which is the loop exit condition
8822 - any LABEL_EXPRs in the loop
8823 - computations that are used only for array indexing or loop control.
8824 In basic blocks we only analyze statements that are a part of some SLP
8825 instance, therefore, all the statements are relevant.
8827 Pattern statement needs to be analyzed instead of the original statement
8828 if the original statement is not relevant. Otherwise, we analyze both
8829 statements. In basic blocks we are called from some SLP instance
8830 traversal, don't analyze pattern stmts instead, the pattern stmts
8831 already will be part of SLP instance. */
8833 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8834 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8835 && !STMT_VINFO_LIVE_P (stmt_info))
8837 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8838 && pattern_stmt
8839 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8840 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8842 /* Analyze PATTERN_STMT instead of the original stmt. */
8843 stmt = pattern_stmt;
8844 stmt_info = vinfo_for_stmt (pattern_stmt);
8845 if (dump_enabled_p ())
8847 dump_printf_loc (MSG_NOTE, vect_location,
8848 "==> examining pattern statement: ");
8849 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8852 else
8854 if (dump_enabled_p ())
8855 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8857 return true;
8860 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8861 && node == NULL
8862 && pattern_stmt
8863 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8864 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8866 /* Analyze PATTERN_STMT too. */
8867 if (dump_enabled_p ())
8869 dump_printf_loc (MSG_NOTE, vect_location,
8870 "==> examining pattern statement: ");
8871 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8874 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8875 node_instance))
8876 return false;
8879 if (is_pattern_stmt_p (stmt_info)
8880 && node == NULL
8881 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8883 gimple_stmt_iterator si;
8885 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8887 gimple *pattern_def_stmt = gsi_stmt (si);
8888 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8889 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8891 /* Analyze def stmt of STMT if it's a pattern stmt. */
8892 if (dump_enabled_p ())
8894 dump_printf_loc (MSG_NOTE, vect_location,
8895 "==> examining pattern def statement: ");
8896 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8899 if (!vect_analyze_stmt (pattern_def_stmt,
8900 need_to_vectorize, node, node_instance))
8901 return false;
8906 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8908 case vect_internal_def:
8909 break;
8911 case vect_reduction_def:
8912 case vect_nested_cycle:
8913 gcc_assert (!bb_vinfo
8914 && (relevance == vect_used_in_outer
8915 || relevance == vect_used_in_outer_by_reduction
8916 || relevance == vect_used_by_reduction
8917 || relevance == vect_unused_in_scope
8918 || relevance == vect_used_only_live));
8919 break;
8921 case vect_induction_def:
8922 gcc_assert (!bb_vinfo);
8923 break;
8925 case vect_constant_def:
8926 case vect_external_def:
8927 case vect_unknown_def_type:
8928 default:
8929 gcc_unreachable ();
8932 if (STMT_VINFO_RELEVANT_P (stmt_info))
8934 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8935 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8936 || (is_gimple_call (stmt)
8937 && gimple_call_lhs (stmt) == NULL_TREE));
8938 *need_to_vectorize = true;
8941 if (PURE_SLP_STMT (stmt_info) && !node)
8943 dump_printf_loc (MSG_NOTE, vect_location,
8944 "handled only by SLP analysis\n");
8945 return true;
8948 ok = true;
8949 if (!bb_vinfo
8950 && (STMT_VINFO_RELEVANT_P (stmt_info)
8951 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8952 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8953 || vectorizable_conversion (stmt, NULL, NULL, node)
8954 || vectorizable_shift (stmt, NULL, NULL, node)
8955 || vectorizable_operation (stmt, NULL, NULL, node)
8956 || vectorizable_assignment (stmt, NULL, NULL, node)
8957 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8958 || vectorizable_call (stmt, NULL, NULL, node)
8959 || vectorizable_store (stmt, NULL, NULL, node)
8960 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
8961 || vectorizable_induction (stmt, NULL, NULL, node)
8962 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8963 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8964 else
8966 if (bb_vinfo)
8967 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8968 || vectorizable_conversion (stmt, NULL, NULL, node)
8969 || vectorizable_shift (stmt, NULL, NULL, node)
8970 || vectorizable_operation (stmt, NULL, NULL, node)
8971 || vectorizable_assignment (stmt, NULL, NULL, node)
8972 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8973 || vectorizable_call (stmt, NULL, NULL, node)
8974 || vectorizable_store (stmt, NULL, NULL, node)
8975 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8976 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8979 if (!ok)
8981 if (dump_enabled_p ())
8983 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8984 "not vectorized: relevant stmt not ");
8985 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8986 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8989 return false;
8992 if (bb_vinfo)
8993 return true;
8995 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8996 need extra handling, except for vectorizable reductions. */
8997 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8998 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
9000 if (dump_enabled_p ())
9002 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9003 "not vectorized: live stmt not supported: ");
9004 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
9007 return false;
9010 return true;
9014 /* Function vect_transform_stmt.
9016 Create a vectorized stmt to replace STMT, and insert it at BSI. */
9018 bool
9019 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
9020 bool *grouped_store, slp_tree slp_node,
9021 slp_instance slp_node_instance)
9023 bool is_store = false;
9024 gimple *vec_stmt = NULL;
9025 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9026 bool done;
9028 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
9029 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
9031 switch (STMT_VINFO_TYPE (stmt_info))
9033 case type_demotion_vec_info_type:
9034 case type_promotion_vec_info_type:
9035 case type_conversion_vec_info_type:
9036 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
9037 gcc_assert (done);
9038 break;
9040 case induc_vec_info_type:
9041 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
9042 gcc_assert (done);
9043 break;
9045 case shift_vec_info_type:
9046 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
9047 gcc_assert (done);
9048 break;
9050 case op_vec_info_type:
9051 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
9052 gcc_assert (done);
9053 break;
9055 case assignment_vec_info_type:
9056 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
9057 gcc_assert (done);
9058 break;
9060 case load_vec_info_type:
9061 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
9062 slp_node_instance);
9063 gcc_assert (done);
9064 break;
9066 case store_vec_info_type:
9067 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
9068 gcc_assert (done);
9069 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
9071 /* In case of interleaving, the whole chain is vectorized when the
9072 last store in the chain is reached. Store stmts before the last
9073 one are skipped, and there vec_stmt_info shouldn't be freed
9074 meanwhile. */
9075 *grouped_store = true;
9076 if (STMT_VINFO_VEC_STMT (stmt_info))
9077 is_store = true;
9079 else
9080 is_store = true;
9081 break;
9083 case condition_vec_info_type:
9084 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
9085 gcc_assert (done);
9086 break;
9088 case comparison_vec_info_type:
9089 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
9090 gcc_assert (done);
9091 break;
9093 case call_vec_info_type:
9094 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
9095 stmt = gsi_stmt (*gsi);
9096 break;
9098 case call_simd_clone_vec_info_type:
9099 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
9100 stmt = gsi_stmt (*gsi);
9101 break;
9103 case reduc_vec_info_type:
9104 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
9105 slp_node_instance);
9106 gcc_assert (done);
9107 break;
9109 default:
9110 if (!STMT_VINFO_LIVE_P (stmt_info))
9112 if (dump_enabled_p ())
9113 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9114 "stmt not supported.\n");
9115 gcc_unreachable ();
9119 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9120 This would break hybrid SLP vectorization. */
9121 if (slp_node)
9122 gcc_assert (!vec_stmt
9123 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
9125 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9126 is being vectorized, but outside the immediately enclosing loop. */
9127 if (vec_stmt
9128 && STMT_VINFO_LOOP_VINFO (stmt_info)
9129 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
9130 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
9131 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
9132 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
9133 || STMT_VINFO_RELEVANT (stmt_info) ==
9134 vect_used_in_outer_by_reduction))
9136 struct loop *innerloop = LOOP_VINFO_LOOP (
9137 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
9138 imm_use_iterator imm_iter;
9139 use_operand_p use_p;
9140 tree scalar_dest;
9141 gimple *exit_phi;
9143 if (dump_enabled_p ())
9144 dump_printf_loc (MSG_NOTE, vect_location,
9145 "Record the vdef for outer-loop vectorization.\n");
9147 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9148 (to be used when vectorizing outer-loop stmts that use the DEF of
9149 STMT). */
9150 if (gimple_code (stmt) == GIMPLE_PHI)
9151 scalar_dest = PHI_RESULT (stmt);
9152 else
9153 scalar_dest = gimple_assign_lhs (stmt);
9155 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
9157 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
9159 exit_phi = USE_STMT (use_p);
9160 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
9165 /* Handle stmts whose DEF is used outside the loop-nest that is
9166 being vectorized. */
9167 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
9169 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
9170 gcc_assert (done);
9173 if (vec_stmt)
9174 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
9176 return is_store;
9180 /* Remove a group of stores (for SLP or interleaving), free their
9181 stmt_vec_info. */
9183 void
9184 vect_remove_stores (gimple *first_stmt)
9186 gimple *next = first_stmt;
9187 gimple *tmp;
9188 gimple_stmt_iterator next_si;
9190 while (next)
9192 stmt_vec_info stmt_info = vinfo_for_stmt (next);
9194 tmp = GROUP_NEXT_ELEMENT (stmt_info);
9195 if (is_pattern_stmt_p (stmt_info))
9196 next = STMT_VINFO_RELATED_STMT (stmt_info);
9197 /* Free the attached stmt_vec_info and remove the stmt. */
9198 next_si = gsi_for_stmt (next);
9199 unlink_stmt_vdef (next);
9200 gsi_remove (&next_si, true);
9201 release_defs (next);
9202 free_stmt_vec_info (next);
9203 next = tmp;
9208 /* Function new_stmt_vec_info.
9210 Create and initialize a new stmt_vec_info struct for STMT. */
9212 stmt_vec_info
9213 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
9215 stmt_vec_info res;
9216 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
9218 STMT_VINFO_TYPE (res) = undef_vec_info_type;
9219 STMT_VINFO_STMT (res) = stmt;
9220 res->vinfo = vinfo;
9221 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
9222 STMT_VINFO_LIVE_P (res) = false;
9223 STMT_VINFO_VECTYPE (res) = NULL;
9224 STMT_VINFO_VEC_STMT (res) = NULL;
9225 STMT_VINFO_VECTORIZABLE (res) = true;
9226 STMT_VINFO_IN_PATTERN_P (res) = false;
9227 STMT_VINFO_RELATED_STMT (res) = NULL;
9228 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
9229 STMT_VINFO_DATA_REF (res) = NULL;
9230 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
9231 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
9233 if (gimple_code (stmt) == GIMPLE_PHI
9234 && is_loop_header_bb_p (gimple_bb (stmt)))
9235 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
9236 else
9237 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
9239 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
9240 STMT_SLP_TYPE (res) = loop_vect;
9241 STMT_VINFO_NUM_SLP_USES (res) = 0;
9243 GROUP_FIRST_ELEMENT (res) = NULL;
9244 GROUP_NEXT_ELEMENT (res) = NULL;
9245 GROUP_SIZE (res) = 0;
9246 GROUP_STORE_COUNT (res) = 0;
9247 GROUP_GAP (res) = 0;
9248 GROUP_SAME_DR_STMT (res) = NULL;
9250 return res;
9254 /* Create a hash table for stmt_vec_info. */
9256 void
9257 init_stmt_vec_info_vec (void)
9259 gcc_assert (!stmt_vec_info_vec.exists ());
9260 stmt_vec_info_vec.create (50);
9264 /* Free hash table for stmt_vec_info. */
9266 void
9267 free_stmt_vec_info_vec (void)
9269 unsigned int i;
9270 stmt_vec_info info;
9271 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9272 if (info != NULL)
9273 free_stmt_vec_info (STMT_VINFO_STMT (info));
9274 gcc_assert (stmt_vec_info_vec.exists ());
9275 stmt_vec_info_vec.release ();
9279 /* Free stmt vectorization related info. */
9281 void
9282 free_stmt_vec_info (gimple *stmt)
9284 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9286 if (!stmt_info)
9287 return;
9289 /* Check if this statement has a related "pattern stmt"
9290 (introduced by the vectorizer during the pattern recognition
9291 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9292 too. */
9293 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9295 stmt_vec_info patt_info
9296 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9297 if (patt_info)
9299 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9300 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9301 gimple_set_bb (patt_stmt, NULL);
9302 tree lhs = gimple_get_lhs (patt_stmt);
9303 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9304 release_ssa_name (lhs);
9305 if (seq)
9307 gimple_stmt_iterator si;
9308 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9310 gimple *seq_stmt = gsi_stmt (si);
9311 gimple_set_bb (seq_stmt, NULL);
9312 lhs = gimple_get_lhs (seq_stmt);
9313 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9314 release_ssa_name (lhs);
9315 free_stmt_vec_info (seq_stmt);
9318 free_stmt_vec_info (patt_stmt);
9322 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9323 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9324 set_vinfo_for_stmt (stmt, NULL);
9325 free (stmt_info);
9329 /* Function get_vectype_for_scalar_type_and_size.
9331 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9332 by the target. */
9334 tree
9335 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9337 tree orig_scalar_type = scalar_type;
9338 scalar_mode inner_mode;
9339 machine_mode simd_mode;
9340 poly_uint64 nunits;
9341 tree vectype;
9343 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9344 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9345 return NULL_TREE;
9347 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9349 /* For vector types of elements whose mode precision doesn't
9350 match their types precision we use a element type of mode
9351 precision. The vectorization routines will have to make sure
9352 they support the proper result truncation/extension.
9353 We also make sure to build vector types with INTEGER_TYPE
9354 component type only. */
9355 if (INTEGRAL_TYPE_P (scalar_type)
9356 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9357 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9358 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9359 TYPE_UNSIGNED (scalar_type));
9361 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9362 When the component mode passes the above test simply use a type
9363 corresponding to that mode. The theory is that any use that
9364 would cause problems with this will disable vectorization anyway. */
9365 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9366 && !INTEGRAL_TYPE_P (scalar_type))
9367 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9369 /* We can't build a vector type of elements with alignment bigger than
9370 their size. */
9371 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9372 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9373 TYPE_UNSIGNED (scalar_type));
9375 /* If we felt back to using the mode fail if there was
9376 no scalar type for it. */
9377 if (scalar_type == NULL_TREE)
9378 return NULL_TREE;
9380 /* If no size was supplied use the mode the target prefers. Otherwise
9381 lookup a vector mode of the specified size. */
9382 if (known_eq (size, 0U))
9383 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9384 else if (!multiple_p (size, nbytes, &nunits)
9385 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9386 return NULL_TREE;
9387 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9388 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9389 return NULL_TREE;
9391 vectype = build_vector_type (scalar_type, nunits);
9393 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9394 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9395 return NULL_TREE;
9397 /* Re-attach the address-space qualifier if we canonicalized the scalar
9398 type. */
9399 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9400 return build_qualified_type
9401 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9403 return vectype;
9406 poly_uint64 current_vector_size;
9408 /* Function get_vectype_for_scalar_type.
9410 Returns the vector type corresponding to SCALAR_TYPE as supported
9411 by the target. */
9413 tree
9414 get_vectype_for_scalar_type (tree scalar_type)
9416 tree vectype;
9417 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9418 current_vector_size);
9419 if (vectype
9420 && known_eq (current_vector_size, 0U))
9421 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9422 return vectype;
9425 /* Function get_mask_type_for_scalar_type.
9427 Returns the mask type corresponding to a result of comparison
9428 of vectors of specified SCALAR_TYPE as supported by target. */
9430 tree
9431 get_mask_type_for_scalar_type (tree scalar_type)
9433 tree vectype = get_vectype_for_scalar_type (scalar_type);
9435 if (!vectype)
9436 return NULL;
9438 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9439 current_vector_size);
9442 /* Function get_same_sized_vectype
9444 Returns a vector type corresponding to SCALAR_TYPE of size
9445 VECTOR_TYPE if supported by the target. */
9447 tree
9448 get_same_sized_vectype (tree scalar_type, tree vector_type)
9450 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9451 return build_same_sized_truth_vector_type (vector_type);
9453 return get_vectype_for_scalar_type_and_size
9454 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9457 /* Function vect_is_simple_use.
9459 Input:
9460 VINFO - the vect info of the loop or basic block that is being vectorized.
9461 OPERAND - operand in the loop or bb.
9462 Output:
9463 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9464 DT - the type of definition
9466 Returns whether a stmt with OPERAND can be vectorized.
9467 For loops, supportable operands are constants, loop invariants, and operands
9468 that are defined by the current iteration of the loop. Unsupportable
9469 operands are those that are defined by a previous iteration of the loop (as
9470 is the case in reduction/induction computations).
9471 For basic blocks, supportable operands are constants and bb invariants.
9472 For now, operands defined outside the basic block are not supported. */
9474 bool
9475 vect_is_simple_use (tree operand, vec_info *vinfo,
9476 gimple **def_stmt, enum vect_def_type *dt)
9478 *def_stmt = NULL;
9479 *dt = vect_unknown_def_type;
9481 if (dump_enabled_p ())
9483 dump_printf_loc (MSG_NOTE, vect_location,
9484 "vect_is_simple_use: operand ");
9485 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9486 dump_printf (MSG_NOTE, "\n");
9489 if (CONSTANT_CLASS_P (operand))
9491 *dt = vect_constant_def;
9492 return true;
9495 if (is_gimple_min_invariant (operand))
9497 *dt = vect_external_def;
9498 return true;
9501 if (TREE_CODE (operand) != SSA_NAME)
9503 if (dump_enabled_p ())
9504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9505 "not ssa-name.\n");
9506 return false;
9509 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9511 *dt = vect_external_def;
9512 return true;
9515 *def_stmt = SSA_NAME_DEF_STMT (operand);
9516 if (dump_enabled_p ())
9518 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9519 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9522 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9523 *dt = vect_external_def;
9524 else
9526 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9527 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9530 if (dump_enabled_p ())
9532 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9533 switch (*dt)
9535 case vect_uninitialized_def:
9536 dump_printf (MSG_NOTE, "uninitialized\n");
9537 break;
9538 case vect_constant_def:
9539 dump_printf (MSG_NOTE, "constant\n");
9540 break;
9541 case vect_external_def:
9542 dump_printf (MSG_NOTE, "external\n");
9543 break;
9544 case vect_internal_def:
9545 dump_printf (MSG_NOTE, "internal\n");
9546 break;
9547 case vect_induction_def:
9548 dump_printf (MSG_NOTE, "induction\n");
9549 break;
9550 case vect_reduction_def:
9551 dump_printf (MSG_NOTE, "reduction\n");
9552 break;
9553 case vect_double_reduction_def:
9554 dump_printf (MSG_NOTE, "double reduction\n");
9555 break;
9556 case vect_nested_cycle:
9557 dump_printf (MSG_NOTE, "nested cycle\n");
9558 break;
9559 case vect_unknown_def_type:
9560 dump_printf (MSG_NOTE, "unknown\n");
9561 break;
9565 if (*dt == vect_unknown_def_type)
9567 if (dump_enabled_p ())
9568 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9569 "Unsupported pattern.\n");
9570 return false;
9573 switch (gimple_code (*def_stmt))
9575 case GIMPLE_PHI:
9576 case GIMPLE_ASSIGN:
9577 case GIMPLE_CALL:
9578 break;
9579 default:
9580 if (dump_enabled_p ())
9581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9582 "unsupported defining stmt:\n");
9583 return false;
9586 return true;
9589 /* Function vect_is_simple_use.
9591 Same as vect_is_simple_use but also determines the vector operand
9592 type of OPERAND and stores it to *VECTYPE. If the definition of
9593 OPERAND is vect_uninitialized_def, vect_constant_def or
9594 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9595 is responsible to compute the best suited vector type for the
9596 scalar operand. */
9598 bool
9599 vect_is_simple_use (tree operand, vec_info *vinfo,
9600 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9602 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9603 return false;
9605 /* Now get a vector type if the def is internal, otherwise supply
9606 NULL_TREE and leave it up to the caller to figure out a proper
9607 type for the use stmt. */
9608 if (*dt == vect_internal_def
9609 || *dt == vect_induction_def
9610 || *dt == vect_reduction_def
9611 || *dt == vect_double_reduction_def
9612 || *dt == vect_nested_cycle)
9614 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9616 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9617 && !STMT_VINFO_RELEVANT (stmt_info)
9618 && !STMT_VINFO_LIVE_P (stmt_info))
9619 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9621 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9622 gcc_assert (*vectype != NULL_TREE);
9624 else if (*dt == vect_uninitialized_def
9625 || *dt == vect_constant_def
9626 || *dt == vect_external_def)
9627 *vectype = NULL_TREE;
9628 else
9629 gcc_unreachable ();
9631 return true;
9635 /* Function supportable_widening_operation
9637 Check whether an operation represented by the code CODE is a
9638 widening operation that is supported by the target platform in
9639 vector form (i.e., when operating on arguments of type VECTYPE_IN
9640 producing a result of type VECTYPE_OUT).
9642 Widening operations we currently support are NOP (CONVERT), FLOAT
9643 and WIDEN_MULT. This function checks if these operations are supported
9644 by the target platform either directly (via vector tree-codes), or via
9645 target builtins.
9647 Output:
9648 - CODE1 and CODE2 are codes of vector operations to be used when
9649 vectorizing the operation, if available.
9650 - MULTI_STEP_CVT determines the number of required intermediate steps in
9651 case of multi-step conversion (like char->short->int - in that case
9652 MULTI_STEP_CVT will be 1).
9653 - INTERM_TYPES contains the intermediate type required to perform the
9654 widening operation (short in the above example). */
9656 bool
9657 supportable_widening_operation (enum tree_code code, gimple *stmt,
9658 tree vectype_out, tree vectype_in,
9659 enum tree_code *code1, enum tree_code *code2,
9660 int *multi_step_cvt,
9661 vec<tree> *interm_types)
9663 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9664 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9665 struct loop *vect_loop = NULL;
9666 machine_mode vec_mode;
9667 enum insn_code icode1, icode2;
9668 optab optab1, optab2;
9669 tree vectype = vectype_in;
9670 tree wide_vectype = vectype_out;
9671 enum tree_code c1, c2;
9672 int i;
9673 tree prev_type, intermediate_type;
9674 machine_mode intermediate_mode, prev_mode;
9675 optab optab3, optab4;
9677 *multi_step_cvt = 0;
9678 if (loop_info)
9679 vect_loop = LOOP_VINFO_LOOP (loop_info);
9681 switch (code)
9683 case WIDEN_MULT_EXPR:
9684 /* The result of a vectorized widening operation usually requires
9685 two vectors (because the widened results do not fit into one vector).
9686 The generated vector results would normally be expected to be
9687 generated in the same order as in the original scalar computation,
9688 i.e. if 8 results are generated in each vector iteration, they are
9689 to be organized as follows:
9690 vect1: [res1,res2,res3,res4],
9691 vect2: [res5,res6,res7,res8].
9693 However, in the special case that the result of the widening
9694 operation is used in a reduction computation only, the order doesn't
9695 matter (because when vectorizing a reduction we change the order of
9696 the computation). Some targets can take advantage of this and
9697 generate more efficient code. For example, targets like Altivec,
9698 that support widen_mult using a sequence of {mult_even,mult_odd}
9699 generate the following vectors:
9700 vect1: [res1,res3,res5,res7],
9701 vect2: [res2,res4,res6,res8].
9703 When vectorizing outer-loops, we execute the inner-loop sequentially
9704 (each vectorized inner-loop iteration contributes to VF outer-loop
9705 iterations in parallel). We therefore don't allow to change the
9706 order of the computation in the inner-loop during outer-loop
9707 vectorization. */
9708 /* TODO: Another case in which order doesn't *really* matter is when we
9709 widen and then contract again, e.g. (short)((int)x * y >> 8).
9710 Normally, pack_trunc performs an even/odd permute, whereas the
9711 repack from an even/odd expansion would be an interleave, which
9712 would be significantly simpler for e.g. AVX2. */
9713 /* In any case, in order to avoid duplicating the code below, recurse
9714 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9715 are properly set up for the caller. If we fail, we'll continue with
9716 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9717 if (vect_loop
9718 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9719 && !nested_in_vect_loop_p (vect_loop, stmt)
9720 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9721 stmt, vectype_out, vectype_in,
9722 code1, code2, multi_step_cvt,
9723 interm_types))
9725 /* Elements in a vector with vect_used_by_reduction property cannot
9726 be reordered if the use chain with this property does not have the
9727 same operation. One such an example is s += a * b, where elements
9728 in a and b cannot be reordered. Here we check if the vector defined
9729 by STMT is only directly used in the reduction statement. */
9730 tree lhs = gimple_assign_lhs (stmt);
9731 use_operand_p dummy;
9732 gimple *use_stmt;
9733 stmt_vec_info use_stmt_info = NULL;
9734 if (single_imm_use (lhs, &dummy, &use_stmt)
9735 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9736 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9737 return true;
9739 c1 = VEC_WIDEN_MULT_LO_EXPR;
9740 c2 = VEC_WIDEN_MULT_HI_EXPR;
9741 break;
9743 case DOT_PROD_EXPR:
9744 c1 = DOT_PROD_EXPR;
9745 c2 = DOT_PROD_EXPR;
9746 break;
9748 case SAD_EXPR:
9749 c1 = SAD_EXPR;
9750 c2 = SAD_EXPR;
9751 break;
9753 case VEC_WIDEN_MULT_EVEN_EXPR:
9754 /* Support the recursion induced just above. */
9755 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9756 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9757 break;
9759 case WIDEN_LSHIFT_EXPR:
9760 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9761 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9762 break;
9764 CASE_CONVERT:
9765 c1 = VEC_UNPACK_LO_EXPR;
9766 c2 = VEC_UNPACK_HI_EXPR;
9767 break;
9769 case FLOAT_EXPR:
9770 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9771 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9772 break;
9774 case FIX_TRUNC_EXPR:
9775 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9776 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9777 computing the operation. */
9778 return false;
9780 default:
9781 gcc_unreachable ();
9784 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9785 std::swap (c1, c2);
9787 if (code == FIX_TRUNC_EXPR)
9789 /* The signedness is determined from output operand. */
9790 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9791 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9793 else
9795 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9796 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9799 if (!optab1 || !optab2)
9800 return false;
9802 vec_mode = TYPE_MODE (vectype);
9803 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9804 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9805 return false;
9807 *code1 = c1;
9808 *code2 = c2;
9810 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9811 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9812 /* For scalar masks we may have different boolean
9813 vector types having the same QImode. Thus we
9814 add additional check for elements number. */
9815 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9816 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
9817 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
9819 /* Check if it's a multi-step conversion that can be done using intermediate
9820 types. */
9822 prev_type = vectype;
9823 prev_mode = vec_mode;
9825 if (!CONVERT_EXPR_CODE_P (code))
9826 return false;
9828 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9829 intermediate steps in promotion sequence. We try
9830 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9831 not. */
9832 interm_types->create (MAX_INTERM_CVT_STEPS);
9833 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9835 intermediate_mode = insn_data[icode1].operand[0].mode;
9836 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9838 intermediate_type = vect_halve_mask_nunits (prev_type);
9839 if (intermediate_mode != TYPE_MODE (intermediate_type))
9840 return false;
9842 else
9843 intermediate_type
9844 = lang_hooks.types.type_for_mode (intermediate_mode,
9845 TYPE_UNSIGNED (prev_type));
9847 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9848 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9850 if (!optab3 || !optab4
9851 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9852 || insn_data[icode1].operand[0].mode != intermediate_mode
9853 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9854 || insn_data[icode2].operand[0].mode != intermediate_mode
9855 || ((icode1 = optab_handler (optab3, intermediate_mode))
9856 == CODE_FOR_nothing)
9857 || ((icode2 = optab_handler (optab4, intermediate_mode))
9858 == CODE_FOR_nothing))
9859 break;
9861 interm_types->quick_push (intermediate_type);
9862 (*multi_step_cvt)++;
9864 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9865 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9866 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9867 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
9868 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
9870 prev_type = intermediate_type;
9871 prev_mode = intermediate_mode;
9874 interm_types->release ();
9875 return false;
9879 /* Function supportable_narrowing_operation
9881 Check whether an operation represented by the code CODE is a
9882 narrowing operation that is supported by the target platform in
9883 vector form (i.e., when operating on arguments of type VECTYPE_IN
9884 and producing a result of type VECTYPE_OUT).
9886 Narrowing operations we currently support are NOP (CONVERT) and
9887 FIX_TRUNC. This function checks if these operations are supported by
9888 the target platform directly via vector tree-codes.
9890 Output:
9891 - CODE1 is the code of a vector operation to be used when
9892 vectorizing the operation, if available.
9893 - MULTI_STEP_CVT determines the number of required intermediate steps in
9894 case of multi-step conversion (like int->short->char - in that case
9895 MULTI_STEP_CVT will be 1).
9896 - INTERM_TYPES contains the intermediate type required to perform the
9897 narrowing operation (short in the above example). */
9899 bool
9900 supportable_narrowing_operation (enum tree_code code,
9901 tree vectype_out, tree vectype_in,
9902 enum tree_code *code1, int *multi_step_cvt,
9903 vec<tree> *interm_types)
9905 machine_mode vec_mode;
9906 enum insn_code icode1;
9907 optab optab1, interm_optab;
9908 tree vectype = vectype_in;
9909 tree narrow_vectype = vectype_out;
9910 enum tree_code c1;
9911 tree intermediate_type, prev_type;
9912 machine_mode intermediate_mode, prev_mode;
9913 int i;
9914 bool uns;
9916 *multi_step_cvt = 0;
9917 switch (code)
9919 CASE_CONVERT:
9920 c1 = VEC_PACK_TRUNC_EXPR;
9921 break;
9923 case FIX_TRUNC_EXPR:
9924 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9925 break;
9927 case FLOAT_EXPR:
9928 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9929 tree code and optabs used for computing the operation. */
9930 return false;
9932 default:
9933 gcc_unreachable ();
9936 if (code == FIX_TRUNC_EXPR)
9937 /* The signedness is determined from output operand. */
9938 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9939 else
9940 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9942 if (!optab1)
9943 return false;
9945 vec_mode = TYPE_MODE (vectype);
9946 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9947 return false;
9949 *code1 = c1;
9951 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9952 /* For scalar masks we may have different boolean
9953 vector types having the same QImode. Thus we
9954 add additional check for elements number. */
9955 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9956 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
9957 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9959 /* Check if it's a multi-step conversion that can be done using intermediate
9960 types. */
9961 prev_mode = vec_mode;
9962 prev_type = vectype;
9963 if (code == FIX_TRUNC_EXPR)
9964 uns = TYPE_UNSIGNED (vectype_out);
9965 else
9966 uns = TYPE_UNSIGNED (vectype);
9968 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9969 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9970 costly than signed. */
9971 if (code == FIX_TRUNC_EXPR && uns)
9973 enum insn_code icode2;
9975 intermediate_type
9976 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9977 interm_optab
9978 = optab_for_tree_code (c1, intermediate_type, optab_default);
9979 if (interm_optab != unknown_optab
9980 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9981 && insn_data[icode1].operand[0].mode
9982 == insn_data[icode2].operand[0].mode)
9984 uns = false;
9985 optab1 = interm_optab;
9986 icode1 = icode2;
9990 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9991 intermediate steps in promotion sequence. We try
9992 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9993 interm_types->create (MAX_INTERM_CVT_STEPS);
9994 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9996 intermediate_mode = insn_data[icode1].operand[0].mode;
9997 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9999 intermediate_type = vect_double_mask_nunits (prev_type);
10000 if (intermediate_mode != TYPE_MODE (intermediate_type))
10001 return false;
10003 else
10004 intermediate_type
10005 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
10006 interm_optab
10007 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
10008 optab_default);
10009 if (!interm_optab
10010 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
10011 || insn_data[icode1].operand[0].mode != intermediate_mode
10012 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
10013 == CODE_FOR_nothing))
10014 break;
10016 interm_types->quick_push (intermediate_type);
10017 (*multi_step_cvt)++;
10019 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
10020 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
10021 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
10022 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
10024 prev_mode = intermediate_mode;
10025 prev_type = intermediate_type;
10026 optab1 = interm_optab;
10029 interm_types->release ();
10030 return false;
10033 /* Generate and return a statement that sets vector mask MASK such that
10034 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10036 gcall *
10037 vect_gen_while (tree mask, tree start_index, tree end_index)
10039 tree cmp_type = TREE_TYPE (start_index);
10040 tree mask_type = TREE_TYPE (mask);
10041 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT,
10042 cmp_type, mask_type,
10043 OPTIMIZE_FOR_SPEED));
10044 gcall *call = gimple_build_call_internal (IFN_WHILE_ULT, 3,
10045 start_index, end_index,
10046 build_zero_cst (mask_type));
10047 gimple_call_set_lhs (call, mask);
10048 return call;
10051 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10052 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10054 tree
10055 vect_gen_while_not (gimple_seq *seq, tree mask_type, tree start_index,
10056 tree end_index)
10058 tree tmp = make_ssa_name (mask_type);
10059 gcall *call = vect_gen_while (tmp, start_index, end_index);
10060 gimple_seq_add_stmt (seq, call);
10061 return gimple_build (seq, BIT_NOT_EXPR, mask_type, tmp);