[AArch64] SVE tests
[official-gcc.git] / gcc / tree-vect-stmts.c
blob50b35fc60af92a36cbba8544f5355f2ce194f9bc
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
54 /* For lang_hooks.types.type_for_mode. */
55 #include "langhooks.h"
57 /* Return the vectorized type for the given statement. */
59 tree
60 stmt_vectype (struct _stmt_vec_info *stmt_info)
62 return STMT_VINFO_VECTYPE (stmt_info);
65 /* Return TRUE iff the given statement is in an inner loop relative to
66 the loop being vectorized. */
67 bool
68 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
70 gimple *stmt = STMT_VINFO_STMT (stmt_info);
71 basic_block bb = gimple_bb (stmt);
72 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
73 struct loop* loop;
75 if (!loop_vinfo)
76 return false;
78 loop = LOOP_VINFO_LOOP (loop_vinfo);
80 return (bb->loop_father == loop->inner);
83 /* Record the cost of a statement, either by directly informing the
84 target model or by saving it in a vector for later processing.
85 Return a preliminary estimate of the statement's cost. */
87 unsigned
88 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
89 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
90 int misalign, enum vect_cost_model_location where)
92 if ((kind == vector_load || kind == unaligned_load)
93 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
94 kind = vector_gather_load;
95 if ((kind == vector_store || kind == unaligned_store)
96 && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
97 kind = vector_scatter_store;
98 if (body_cost_vec)
100 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
101 stmt_info_for_cost si = { count, kind,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
108 else
109 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
110 count, kind, stmt_info, misalign, where);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 static tree
116 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
118 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
119 "vect_array");
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
127 static tree
128 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
129 tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
153 static void
154 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
155 tree array, unsigned HOST_WIDE_INT n)
157 tree array_ref;
158 gimple *new_stmt;
160 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
161 build_int_cst (size_type_node, n),
162 NULL_TREE, NULL_TREE);
164 new_stmt = gimple_build_assign (array_ref, vect);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
170 (and its group). */
172 static tree
173 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 tree mem_ref;
177 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
180 return mem_ref;
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
189 static void
190 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
191 enum vect_relevant relevant, bool live_p)
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
196 gimple *pattern_stmt;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE, vect_location,
201 "mark relevant %d, live %d: ", relevant, live_p);
202 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE, vect_location,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info = vinfo_for_stmt (pattern_stmt);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
224 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
225 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
226 stmt = pattern_stmt;
229 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
230 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
231 STMT_VINFO_RELEVANT (stmt_info) = relevant;
233 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE, vect_location,
238 "already marked relevant/live.\n");
239 return;
242 worklist->safe_push (stmt);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
250 bool
251 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
253 tree op;
254 gimple *def_stmt;
255 ssa_op_iter iter;
257 if (!is_gimple_assign (stmt))
258 return false;
260 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
262 enum vect_def_type dt = vect_uninitialized_def;
264 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268 "use not simple.\n");
269 return false;
272 if (dt != vect_external_def && dt != vect_constant_def)
273 return false;
275 return true;
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
290 static bool
291 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
295 ssa_op_iter op_iter;
296 imm_use_iterator imm_iter;
297 use_operand_p use_p;
298 def_operand_p def_p;
300 *relevant = vect_unused_in_scope;
301 *live_p = false;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
307 *relevant = vect_used_in_scope;
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
311 if (gimple_vdef (stmt)
312 && !gimple_clobber_p (stmt))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE, vect_location,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant = vect_used_in_scope;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
323 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
325 basic_block bb = gimple_bb (USE_STMT (use_p));
326 if (!flow_bb_inside_loop_p (loop, bb))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE, vect_location,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p)))
333 continue;
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
338 gcc_assert (bb == single_exit (loop)->dest);
340 *live_p = true;
345 if (*live_p && *relevant == vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE, vect_location,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant = vect_used_only_live;
354 return (*live_p || *relevant);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
363 static bool
364 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
366 tree operand;
367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info))
373 return true;
375 /* STMT has a data_ref. FORNOW this means that its of one of
376 the following forms:
377 -1- ARRAY_REF = var
378 -2- var = ARRAY_REF
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
383 for array indexing.
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt))
390 if (is_gimple_call (stmt)
391 && gimple_call_internal_p (stmt))
392 switch (gimple_call_internal_fn (stmt))
394 case IFN_MASK_STORE:
395 operand = gimple_call_arg (stmt, 3);
396 if (operand == use)
397 return true;
398 /* FALLTHRU */
399 case IFN_MASK_LOAD:
400 operand = gimple_call_arg (stmt, 2);
401 if (operand == use)
402 return true;
403 break;
404 default:
405 break;
407 return false;
410 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
411 return false;
412 operand = gimple_assign_rhs1 (stmt);
413 if (TREE_CODE (operand) != SSA_NAME)
414 return false;
416 if (operand == use)
417 return true;
419 return false;
424 Function process_use.
426 Inputs:
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
432 be performed.
434 Outputs:
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
439 Exceptions:
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
450 static bool
451 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
452 enum vect_relevant relevant, vec<gimple *> *worklist,
453 bool force)
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
456 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
457 stmt_vec_info dstmt_vinfo;
458 basic_block bb, def_bb;
459 gimple *def_stmt;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
465 return true;
467 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
471 "not vectorized: unsupported use in stmt.\n");
472 return false;
475 if (!def_stmt || gimple_nop_p (def_stmt))
476 return true;
478 def_bb = gimple_bb (def_stmt);
479 if (!flow_bb_inside_loop_p (loop, def_bb))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
483 return true;
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo = vinfo_for_stmt (def_stmt);
492 bb = gimple_bb (stmt);
493 if (gimple_code (stmt) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
495 && gimple_code (def_stmt) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
497 && bb->loop_father == def_bb->loop_father)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
503 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
507 return true;
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
512 d = def_stmt
513 inner-loop:
514 stmt # use (d)
515 outer-loop-tail-bb:
516 ... */
517 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE, vect_location,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
523 switch (relevant)
525 case vect_unused_in_scope:
526 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
527 vect_used_in_scope : vect_unused_in_scope;
528 break;
530 case vect_used_in_outer_by_reduction:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
532 relevant = vect_used_by_reduction;
533 break;
535 case vect_used_in_outer:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
537 relevant = vect_used_in_scope;
538 break;
540 case vect_used_in_scope:
541 break;
543 default:
544 gcc_unreachable ();
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
551 inner-loop:
552 d = def_stmt
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
554 stmt # use (d) */
555 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE, vect_location,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
561 switch (relevant)
563 case vect_unused_in_scope:
564 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
566 vect_used_in_outer_by_reduction : vect_unused_in_scope;
567 break;
569 case vect_used_by_reduction:
570 case vect_used_only_live:
571 relevant = vect_used_in_outer_by_reduction;
572 break;
574 case vect_used_in_scope:
575 relevant = vect_used_in_outer;
576 break;
578 default:
579 gcc_unreachable ();
582 /* We are also not interested in uses on loop PHI backedges that are
583 inductions. Otherwise we'll needlessly vectorize the IV increment
584 and cause hybrid SLP for SLP inductions. Unless the PHI is live
585 of course. */
586 else if (gimple_code (stmt) == GIMPLE_PHI
587 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
588 && ! STMT_VINFO_LIVE_P (stmt_vinfo)
589 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
590 == use))
592 if (dump_enabled_p ())
593 dump_printf_loc (MSG_NOTE, vect_location,
594 "induction value on backedge.\n");
595 return true;
599 vect_mark_relevant (worklist, def_stmt, relevant, false);
600 return true;
604 /* Function vect_mark_stmts_to_be_vectorized.
606 Not all stmts in the loop need to be vectorized. For example:
608 for i...
609 for j...
610 1. T0 = i + j
611 2. T1 = a[T0]
613 3. j = j + 1
615 Stmt 1 and 3 do not need to be vectorized, because loop control and
616 addressing of vectorized data-refs are handled differently.
618 This pass detects such stmts. */
620 bool
621 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
623 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
624 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
625 unsigned int nbbs = loop->num_nodes;
626 gimple_stmt_iterator si;
627 gimple *stmt;
628 unsigned int i;
629 stmt_vec_info stmt_vinfo;
630 basic_block bb;
631 gimple *phi;
632 bool live_p;
633 enum vect_relevant relevant;
635 if (dump_enabled_p ())
636 dump_printf_loc (MSG_NOTE, vect_location,
637 "=== vect_mark_stmts_to_be_vectorized ===\n");
639 auto_vec<gimple *, 64> worklist;
641 /* 1. Init worklist. */
642 for (i = 0; i < nbbs; i++)
644 bb = bbs[i];
645 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
647 phi = gsi_stmt (si);
648 if (dump_enabled_p ())
650 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
651 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
654 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
655 vect_mark_relevant (&worklist, phi, relevant, live_p);
657 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
659 stmt = gsi_stmt (si);
660 if (dump_enabled_p ())
662 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
663 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
666 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
667 vect_mark_relevant (&worklist, stmt, relevant, live_p);
671 /* 2. Process_worklist */
672 while (worklist.length () > 0)
674 use_operand_p use_p;
675 ssa_op_iter iter;
677 stmt = worklist.pop ();
678 if (dump_enabled_p ())
680 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
681 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
684 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
685 (DEF_STMT) as relevant/irrelevant according to the relevance property
686 of STMT. */
687 stmt_vinfo = vinfo_for_stmt (stmt);
688 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
690 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
691 propagated as is to the DEF_STMTs of its USEs.
693 One exception is when STMT has been identified as defining a reduction
694 variable; in this case we set the relevance to vect_used_by_reduction.
695 This is because we distinguish between two kinds of relevant stmts -
696 those that are used by a reduction computation, and those that are
697 (also) used by a regular computation. This allows us later on to
698 identify stmts that are used solely by a reduction, and therefore the
699 order of the results that they produce does not have to be kept. */
701 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
703 case vect_reduction_def:
704 gcc_assert (relevant != vect_unused_in_scope);
705 if (relevant != vect_unused_in_scope
706 && relevant != vect_used_in_scope
707 && relevant != vect_used_by_reduction
708 && relevant != vect_used_only_live)
710 if (dump_enabled_p ())
711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
712 "unsupported use of reduction.\n");
713 return false;
715 break;
717 case vect_nested_cycle:
718 if (relevant != vect_unused_in_scope
719 && relevant != vect_used_in_outer_by_reduction
720 && relevant != vect_used_in_outer)
722 if (dump_enabled_p ())
723 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
724 "unsupported use of nested cycle.\n");
726 return false;
728 break;
730 case vect_double_reduction_def:
731 if (relevant != vect_unused_in_scope
732 && relevant != vect_used_by_reduction
733 && relevant != vect_used_only_live)
735 if (dump_enabled_p ())
736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
737 "unsupported use of double reduction.\n");
739 return false;
741 break;
743 default:
744 break;
747 if (is_pattern_stmt_p (stmt_vinfo))
749 /* Pattern statements are not inserted into the code, so
750 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
751 have to scan the RHS or function arguments instead. */
752 if (is_gimple_assign (stmt))
754 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
755 tree op = gimple_assign_rhs1 (stmt);
757 i = 1;
758 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
760 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
761 relevant, &worklist, false)
762 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
763 relevant, &worklist, false))
764 return false;
765 i = 2;
767 for (; i < gimple_num_ops (stmt); i++)
769 op = gimple_op (stmt, i);
770 if (TREE_CODE (op) == SSA_NAME
771 && !process_use (stmt, op, loop_vinfo, relevant,
772 &worklist, false))
773 return false;
776 else if (is_gimple_call (stmt))
778 for (i = 0; i < gimple_call_num_args (stmt); i++)
780 tree arg = gimple_call_arg (stmt, i);
781 if (!process_use (stmt, arg, loop_vinfo, relevant,
782 &worklist, false))
783 return false;
787 else
788 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
790 tree op = USE_FROM_PTR (use_p);
791 if (!process_use (stmt, op, loop_vinfo, relevant,
792 &worklist, false))
793 return false;
796 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
798 gather_scatter_info gs_info;
799 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
800 gcc_unreachable ();
801 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
802 &worklist, true))
803 return false;
805 } /* while worklist */
807 return true;
811 /* Function vect_model_simple_cost.
813 Models cost for simple operations, i.e. those that only emit ncopies of a
814 single op. Right now, this does not account for multiple insns that could
815 be generated for the single vector op. We will handle that shortly. */
817 void
818 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
819 enum vect_def_type *dt,
820 int ndts,
821 stmt_vector_for_cost *prologue_cost_vec,
822 stmt_vector_for_cost *body_cost_vec)
824 int i;
825 int inside_cost = 0, prologue_cost = 0;
827 /* The SLP costs were already calculated during SLP tree build. */
828 if (PURE_SLP_STMT (stmt_info))
829 return;
831 /* Cost the "broadcast" of a scalar operand in to a vector operand.
832 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
833 cost model. */
834 for (i = 0; i < ndts; i++)
835 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
836 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
837 stmt_info, 0, vect_prologue);
839 /* Pass the inside-of-loop statements to the target-specific cost model. */
840 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
841 stmt_info, 0, vect_body);
843 if (dump_enabled_p ())
844 dump_printf_loc (MSG_NOTE, vect_location,
845 "vect_model_simple_cost: inside_cost = %d, "
846 "prologue_cost = %d .\n", inside_cost, prologue_cost);
850 /* Model cost for type demotion and promotion operations. PWR is normally
851 zero for single-step promotions and demotions. It will be one if
852 two-step promotion/demotion is required, and so on. Each additional
853 step doubles the number of instructions required. */
855 static void
856 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
857 enum vect_def_type *dt, int pwr)
859 int i, tmp;
860 int inside_cost = 0, prologue_cost = 0;
861 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
862 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
863 void *target_cost_data;
865 /* The SLP costs were already calculated during SLP tree build. */
866 if (PURE_SLP_STMT (stmt_info))
867 return;
869 if (loop_vinfo)
870 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
871 else
872 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
874 for (i = 0; i < pwr + 1; i++)
876 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
877 (i + 1) : i;
878 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
879 vec_promote_demote, stmt_info, 0,
880 vect_body);
883 /* FORNOW: Assuming maximum 2 args per stmts. */
884 for (i = 0; i < 2; i++)
885 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
886 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
887 stmt_info, 0, vect_prologue);
889 if (dump_enabled_p ())
890 dump_printf_loc (MSG_NOTE, vect_location,
891 "vect_model_promotion_demotion_cost: inside_cost = %d, "
892 "prologue_cost = %d .\n", inside_cost, prologue_cost);
895 /* Function vect_model_store_cost
897 Models cost for stores. In the case of grouped accesses, one access
898 has the overhead of the grouped access attributed to it. */
900 void
901 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
902 vect_memory_access_type memory_access_type,
903 vec_load_store_type vls_type, slp_tree slp_node,
904 stmt_vector_for_cost *prologue_cost_vec,
905 stmt_vector_for_cost *body_cost_vec)
907 unsigned int inside_cost = 0, prologue_cost = 0;
908 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
909 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
910 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
912 if (vls_type == VLS_STORE_INVARIANT)
913 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
914 stmt_info, 0, vect_prologue);
916 /* Grouped stores update all elements in the group at once,
917 so we want the DR for the first statement. */
918 if (!slp_node && grouped_access_p)
920 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
921 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
924 /* True if we should include any once-per-group costs as well as
925 the cost of the statement itself. For SLP we only get called
926 once per group anyhow. */
927 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
929 /* We assume that the cost of a single store-lanes instruction is
930 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
931 access is instead being provided by a permute-and-store operation,
932 include the cost of the permutes. */
933 if (first_stmt_p
934 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
936 /* Uses a high and low interleave or shuffle operations for each
937 needed permute. */
938 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
939 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
940 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
941 stmt_info, 0, vect_body);
943 if (dump_enabled_p ())
944 dump_printf_loc (MSG_NOTE, vect_location,
945 "vect_model_store_cost: strided group_size = %d .\n",
946 group_size);
949 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
950 /* Costs of the stores. */
951 if (memory_access_type == VMAT_ELEMENTWISE
952 || memory_access_type == VMAT_GATHER_SCATTER)
954 /* N scalar stores plus extracting the elements. */
955 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
956 inside_cost += record_stmt_cost (body_cost_vec,
957 ncopies * assumed_nunits,
958 scalar_store, stmt_info, 0, vect_body);
960 else
961 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
963 if (memory_access_type == VMAT_ELEMENTWISE
964 || memory_access_type == VMAT_STRIDED_SLP)
966 /* N scalar stores plus extracting the elements. */
967 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
968 inside_cost += record_stmt_cost (body_cost_vec,
969 ncopies * assumed_nunits,
970 vec_to_scalar, stmt_info, 0, vect_body);
973 if (dump_enabled_p ())
974 dump_printf_loc (MSG_NOTE, vect_location,
975 "vect_model_store_cost: inside_cost = %d, "
976 "prologue_cost = %d .\n", inside_cost, prologue_cost);
980 /* Calculate cost of DR's memory access. */
981 void
982 vect_get_store_cost (struct data_reference *dr, int ncopies,
983 unsigned int *inside_cost,
984 stmt_vector_for_cost *body_cost_vec)
986 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
987 gimple *stmt = DR_STMT (dr);
988 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
990 switch (alignment_support_scheme)
992 case dr_aligned:
994 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
995 vector_store, stmt_info, 0,
996 vect_body);
998 if (dump_enabled_p ())
999 dump_printf_loc (MSG_NOTE, vect_location,
1000 "vect_model_store_cost: aligned.\n");
1001 break;
1004 case dr_unaligned_supported:
1006 /* Here, we assign an additional cost for the unaligned store. */
1007 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1008 unaligned_store, stmt_info,
1009 DR_MISALIGNMENT (dr), vect_body);
1010 if (dump_enabled_p ())
1011 dump_printf_loc (MSG_NOTE, vect_location,
1012 "vect_model_store_cost: unaligned supported by "
1013 "hardware.\n");
1014 break;
1017 case dr_unaligned_unsupported:
1019 *inside_cost = VECT_MAX_COST;
1021 if (dump_enabled_p ())
1022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1023 "vect_model_store_cost: unsupported access.\n");
1024 break;
1027 default:
1028 gcc_unreachable ();
1033 /* Function vect_model_load_cost
1035 Models cost for loads. In the case of grouped accesses, one access has
1036 the overhead of the grouped access attributed to it. Since unaligned
1037 accesses are supported for loads, we also account for the costs of the
1038 access scheme chosen. */
1040 void
1041 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1042 vect_memory_access_type memory_access_type,
1043 slp_tree slp_node,
1044 stmt_vector_for_cost *prologue_cost_vec,
1045 stmt_vector_for_cost *body_cost_vec)
1047 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1048 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1049 unsigned int inside_cost = 0, prologue_cost = 0;
1050 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1052 /* Grouped loads read all elements in the group at once,
1053 so we want the DR for the first statement. */
1054 if (!slp_node && grouped_access_p)
1056 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1057 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1060 /* True if we should include any once-per-group costs as well as
1061 the cost of the statement itself. For SLP we only get called
1062 once per group anyhow. */
1063 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1065 /* We assume that the cost of a single load-lanes instruction is
1066 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1067 access is instead being provided by a load-and-permute operation,
1068 include the cost of the permutes. */
1069 if (first_stmt_p
1070 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1072 /* Uses an even and odd extract operations or shuffle operations
1073 for each needed permute. */
1074 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1075 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1076 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1077 stmt_info, 0, vect_body);
1079 if (dump_enabled_p ())
1080 dump_printf_loc (MSG_NOTE, vect_location,
1081 "vect_model_load_cost: strided group_size = %d .\n",
1082 group_size);
1085 /* The loads themselves. */
1086 if (memory_access_type == VMAT_ELEMENTWISE
1087 || memory_access_type == VMAT_GATHER_SCATTER)
1089 /* N scalar loads plus gathering them into a vector. */
1090 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1091 unsigned int assumed_nunits = vect_nunits_for_cost (vectype);
1092 inside_cost += record_stmt_cost (body_cost_vec,
1093 ncopies * assumed_nunits,
1094 scalar_load, stmt_info, 0, vect_body);
1096 else
1097 vect_get_load_cost (dr, ncopies, first_stmt_p,
1098 &inside_cost, &prologue_cost,
1099 prologue_cost_vec, body_cost_vec, true);
1100 if (memory_access_type == VMAT_ELEMENTWISE
1101 || memory_access_type == VMAT_STRIDED_SLP)
1102 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1103 stmt_info, 0, vect_body);
1105 if (dump_enabled_p ())
1106 dump_printf_loc (MSG_NOTE, vect_location,
1107 "vect_model_load_cost: inside_cost = %d, "
1108 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1112 /* Calculate cost of DR's memory access. */
1113 void
1114 vect_get_load_cost (struct data_reference *dr, int ncopies,
1115 bool add_realign_cost, unsigned int *inside_cost,
1116 unsigned int *prologue_cost,
1117 stmt_vector_for_cost *prologue_cost_vec,
1118 stmt_vector_for_cost *body_cost_vec,
1119 bool record_prologue_costs)
1121 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1122 gimple *stmt = DR_STMT (dr);
1123 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1125 switch (alignment_support_scheme)
1127 case dr_aligned:
1129 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1130 stmt_info, 0, vect_body);
1132 if (dump_enabled_p ())
1133 dump_printf_loc (MSG_NOTE, vect_location,
1134 "vect_model_load_cost: aligned.\n");
1136 break;
1138 case dr_unaligned_supported:
1140 /* Here, we assign an additional cost for the unaligned load. */
1141 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1142 unaligned_load, stmt_info,
1143 DR_MISALIGNMENT (dr), vect_body);
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: unaligned supported by "
1148 "hardware.\n");
1150 break;
1152 case dr_explicit_realign:
1154 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1155 vector_load, stmt_info, 0, vect_body);
1156 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1157 vec_perm, stmt_info, 0, vect_body);
1159 /* FIXME: If the misalignment remains fixed across the iterations of
1160 the containing loop, the following cost should be added to the
1161 prologue costs. */
1162 if (targetm.vectorize.builtin_mask_for_load)
1163 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1164 stmt_info, 0, vect_body);
1166 if (dump_enabled_p ())
1167 dump_printf_loc (MSG_NOTE, vect_location,
1168 "vect_model_load_cost: explicit realign\n");
1170 break;
1172 case dr_explicit_realign_optimized:
1174 if (dump_enabled_p ())
1175 dump_printf_loc (MSG_NOTE, vect_location,
1176 "vect_model_load_cost: unaligned software "
1177 "pipelined.\n");
1179 /* Unaligned software pipeline has a load of an address, an initial
1180 load, and possibly a mask operation to "prime" the loop. However,
1181 if this is an access in a group of loads, which provide grouped
1182 access, then the above cost should only be considered for one
1183 access in the group. Inside the loop, there is a load op
1184 and a realignment op. */
1186 if (add_realign_cost && record_prologue_costs)
1188 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1189 vector_stmt, stmt_info,
1190 0, vect_prologue);
1191 if (targetm.vectorize.builtin_mask_for_load)
1192 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1193 vector_stmt, stmt_info,
1194 0, vect_prologue);
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1198 stmt_info, 0, vect_body);
1199 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1200 stmt_info, 0, vect_body);
1202 if (dump_enabled_p ())
1203 dump_printf_loc (MSG_NOTE, vect_location,
1204 "vect_model_load_cost: explicit realign optimized"
1205 "\n");
1207 break;
1210 case dr_unaligned_unsupported:
1212 *inside_cost = VECT_MAX_COST;
1214 if (dump_enabled_p ())
1215 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1216 "vect_model_load_cost: unsupported access.\n");
1217 break;
1220 default:
1221 gcc_unreachable ();
1225 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1226 the loop preheader for the vectorized stmt STMT. */
1228 static void
1229 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1231 if (gsi)
1232 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1233 else
1235 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1236 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1238 if (loop_vinfo)
1240 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1241 basic_block new_bb;
1242 edge pe;
1244 if (nested_in_vect_loop_p (loop, stmt))
1245 loop = loop->inner;
1247 pe = loop_preheader_edge (loop);
1248 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1249 gcc_assert (!new_bb);
1251 else
1253 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1254 basic_block bb;
1255 gimple_stmt_iterator gsi_bb_start;
1257 gcc_assert (bb_vinfo);
1258 bb = BB_VINFO_BB (bb_vinfo);
1259 gsi_bb_start = gsi_after_labels (bb);
1260 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1264 if (dump_enabled_p ())
1266 dump_printf_loc (MSG_NOTE, vect_location,
1267 "created new init_stmt: ");
1268 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1272 /* Function vect_init_vector.
1274 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1275 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1276 vector type a vector with all elements equal to VAL is created first.
1277 Place the initialization at BSI if it is not NULL. Otherwise, place the
1278 initialization at the loop preheader.
1279 Return the DEF of INIT_STMT.
1280 It will be used in the vectorization of STMT. */
1282 tree
1283 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1285 gimple *init_stmt;
1286 tree new_temp;
1288 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1289 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1291 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1292 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1294 /* Scalar boolean value should be transformed into
1295 all zeros or all ones value before building a vector. */
1296 if (VECTOR_BOOLEAN_TYPE_P (type))
1298 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1299 tree false_val = build_zero_cst (TREE_TYPE (type));
1301 if (CONSTANT_CLASS_P (val))
1302 val = integer_zerop (val) ? false_val : true_val;
1303 else
1305 new_temp = make_ssa_name (TREE_TYPE (type));
1306 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1307 val, true_val, false_val);
1308 vect_init_vector_1 (stmt, init_stmt, gsi);
1309 val = new_temp;
1312 else if (CONSTANT_CLASS_P (val))
1313 val = fold_convert (TREE_TYPE (type), val);
1314 else
1316 new_temp = make_ssa_name (TREE_TYPE (type));
1317 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1318 init_stmt = gimple_build_assign (new_temp,
1319 fold_build1 (VIEW_CONVERT_EXPR,
1320 TREE_TYPE (type),
1321 val));
1322 else
1323 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1324 vect_init_vector_1 (stmt, init_stmt, gsi);
1325 val = new_temp;
1328 val = build_vector_from_val (type, val);
1331 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1332 init_stmt = gimple_build_assign (new_temp, val);
1333 vect_init_vector_1 (stmt, init_stmt, gsi);
1334 return new_temp;
1337 /* Function vect_get_vec_def_for_operand_1.
1339 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1340 DT that will be used in the vectorized stmt. */
1342 tree
1343 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1345 tree vec_oprnd;
1346 gimple *vec_stmt;
1347 stmt_vec_info def_stmt_info = NULL;
1349 switch (dt)
1351 /* operand is a constant or a loop invariant. */
1352 case vect_constant_def:
1353 case vect_external_def:
1354 /* Code should use vect_get_vec_def_for_operand. */
1355 gcc_unreachable ();
1357 /* operand is defined inside the loop. */
1358 case vect_internal_def:
1360 /* Get the def from the vectorized stmt. */
1361 def_stmt_info = vinfo_for_stmt (def_stmt);
1363 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1364 /* Get vectorized pattern statement. */
1365 if (!vec_stmt
1366 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1367 && !STMT_VINFO_RELEVANT (def_stmt_info))
1368 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1369 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1370 gcc_assert (vec_stmt);
1371 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1372 vec_oprnd = PHI_RESULT (vec_stmt);
1373 else if (is_gimple_call (vec_stmt))
1374 vec_oprnd = gimple_call_lhs (vec_stmt);
1375 else
1376 vec_oprnd = gimple_assign_lhs (vec_stmt);
1377 return vec_oprnd;
1380 /* operand is defined by a loop header phi. */
1381 case vect_reduction_def:
1382 case vect_double_reduction_def:
1383 case vect_nested_cycle:
1384 case vect_induction_def:
1386 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1388 /* Get the def from the vectorized stmt. */
1389 def_stmt_info = vinfo_for_stmt (def_stmt);
1390 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1391 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1392 vec_oprnd = PHI_RESULT (vec_stmt);
1393 else
1394 vec_oprnd = gimple_get_lhs (vec_stmt);
1395 return vec_oprnd;
1398 default:
1399 gcc_unreachable ();
1404 /* Function vect_get_vec_def_for_operand.
1406 OP is an operand in STMT. This function returns a (vector) def that will be
1407 used in the vectorized stmt for STMT.
1409 In the case that OP is an SSA_NAME which is defined in the loop, then
1410 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1412 In case OP is an invariant or constant, a new stmt that creates a vector def
1413 needs to be introduced. VECTYPE may be used to specify a required type for
1414 vector invariant. */
1416 tree
1417 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1419 gimple *def_stmt;
1420 enum vect_def_type dt;
1421 bool is_simple_use;
1422 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1423 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1425 if (dump_enabled_p ())
1427 dump_printf_loc (MSG_NOTE, vect_location,
1428 "vect_get_vec_def_for_operand: ");
1429 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1430 dump_printf (MSG_NOTE, "\n");
1433 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1434 gcc_assert (is_simple_use);
1435 if (def_stmt && dump_enabled_p ())
1437 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1438 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1441 if (dt == vect_constant_def || dt == vect_external_def)
1443 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1444 tree vector_type;
1446 if (vectype)
1447 vector_type = vectype;
1448 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1449 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1450 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1451 else
1452 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1454 gcc_assert (vector_type);
1455 return vect_init_vector (stmt, op, vector_type, NULL);
1457 else
1458 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1462 /* Function vect_get_vec_def_for_stmt_copy
1464 Return a vector-def for an operand. This function is used when the
1465 vectorized stmt to be created (by the caller to this function) is a "copy"
1466 created in case the vectorized result cannot fit in one vector, and several
1467 copies of the vector-stmt are required. In this case the vector-def is
1468 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1469 of the stmt that defines VEC_OPRND.
1470 DT is the type of the vector def VEC_OPRND.
1472 Context:
1473 In case the vectorization factor (VF) is bigger than the number
1474 of elements that can fit in a vectype (nunits), we have to generate
1475 more than one vector stmt to vectorize the scalar stmt. This situation
1476 arises when there are multiple data-types operated upon in the loop; the
1477 smallest data-type determines the VF, and as a result, when vectorizing
1478 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1479 vector stmt (each computing a vector of 'nunits' results, and together
1480 computing 'VF' results in each iteration). This function is called when
1481 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1482 which VF=16 and nunits=4, so the number of copies required is 4):
1484 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1486 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1487 VS1.1: vx.1 = memref1 VS1.2
1488 VS1.2: vx.2 = memref2 VS1.3
1489 VS1.3: vx.3 = memref3
1491 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1492 VSnew.1: vz1 = vx.1 + ... VSnew.2
1493 VSnew.2: vz2 = vx.2 + ... VSnew.3
1494 VSnew.3: vz3 = vx.3 + ...
1496 The vectorization of S1 is explained in vectorizable_load.
1497 The vectorization of S2:
1498 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1499 the function 'vect_get_vec_def_for_operand' is called to
1500 get the relevant vector-def for each operand of S2. For operand x it
1501 returns the vector-def 'vx.0'.
1503 To create the remaining copies of the vector-stmt (VSnew.j), this
1504 function is called to get the relevant vector-def for each operand. It is
1505 obtained from the respective VS1.j stmt, which is recorded in the
1506 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1508 For example, to obtain the vector-def 'vx.1' in order to create the
1509 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1510 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1511 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1512 and return its def ('vx.1').
1513 Overall, to create the above sequence this function will be called 3 times:
1514 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1515 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1516 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1518 tree
1519 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1521 gimple *vec_stmt_for_operand;
1522 stmt_vec_info def_stmt_info;
1524 /* Do nothing; can reuse same def. */
1525 if (dt == vect_external_def || dt == vect_constant_def )
1526 return vec_oprnd;
1528 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1529 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1530 gcc_assert (def_stmt_info);
1531 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1532 gcc_assert (vec_stmt_for_operand);
1533 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1534 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1535 else
1536 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1537 return vec_oprnd;
1541 /* Get vectorized definitions for the operands to create a copy of an original
1542 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1544 void
1545 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1546 vec<tree> *vec_oprnds0,
1547 vec<tree> *vec_oprnds1)
1549 tree vec_oprnd = vec_oprnds0->pop ();
1551 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1552 vec_oprnds0->quick_push (vec_oprnd);
1554 if (vec_oprnds1 && vec_oprnds1->length ())
1556 vec_oprnd = vec_oprnds1->pop ();
1557 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1558 vec_oprnds1->quick_push (vec_oprnd);
1563 /* Get vectorized definitions for OP0 and OP1. */
1565 void
1566 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1567 vec<tree> *vec_oprnds0,
1568 vec<tree> *vec_oprnds1,
1569 slp_tree slp_node)
1571 if (slp_node)
1573 int nops = (op1 == NULL_TREE) ? 1 : 2;
1574 auto_vec<tree> ops (nops);
1575 auto_vec<vec<tree> > vec_defs (nops);
1577 ops.quick_push (op0);
1578 if (op1)
1579 ops.quick_push (op1);
1581 vect_get_slp_defs (ops, slp_node, &vec_defs);
1583 *vec_oprnds0 = vec_defs[0];
1584 if (op1)
1585 *vec_oprnds1 = vec_defs[1];
1587 else
1589 tree vec_oprnd;
1591 vec_oprnds0->create (1);
1592 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1593 vec_oprnds0->quick_push (vec_oprnd);
1595 if (op1)
1597 vec_oprnds1->create (1);
1598 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1599 vec_oprnds1->quick_push (vec_oprnd);
1605 /* Function vect_finish_stmt_generation.
1607 Insert a new stmt. */
1609 void
1610 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1611 gimple_stmt_iterator *gsi)
1613 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1614 vec_info *vinfo = stmt_info->vinfo;
1616 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1618 if (!gsi_end_p (*gsi)
1619 && gimple_has_mem_ops (vec_stmt))
1621 gimple *at_stmt = gsi_stmt (*gsi);
1622 tree vuse = gimple_vuse (at_stmt);
1623 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1625 tree vdef = gimple_vdef (at_stmt);
1626 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1627 /* If we have an SSA vuse and insert a store, update virtual
1628 SSA form to avoid triggering the renamer. Do so only
1629 if we can easily see all uses - which is what almost always
1630 happens with the way vectorized stmts are inserted. */
1631 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1632 && ((is_gimple_assign (vec_stmt)
1633 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1634 || (is_gimple_call (vec_stmt)
1635 && !(gimple_call_flags (vec_stmt)
1636 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1638 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1639 gimple_set_vdef (vec_stmt, new_vdef);
1640 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1644 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1646 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1648 if (dump_enabled_p ())
1650 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1651 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1654 gimple_set_location (vec_stmt, gimple_location (stmt));
1656 /* While EH edges will generally prevent vectorization, stmt might
1657 e.g. be in a must-not-throw region. Ensure newly created stmts
1658 that could throw are part of the same region. */
1659 int lp_nr = lookup_stmt_eh_lp (stmt);
1660 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1661 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1664 /* We want to vectorize a call to combined function CFN with function
1665 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1666 as the types of all inputs. Check whether this is possible using
1667 an internal function, returning its code if so or IFN_LAST if not. */
1669 static internal_fn
1670 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1671 tree vectype_out, tree vectype_in)
1673 internal_fn ifn;
1674 if (internal_fn_p (cfn))
1675 ifn = as_internal_fn (cfn);
1676 else
1677 ifn = associated_internal_fn (fndecl);
1678 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1680 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1681 if (info.vectorizable)
1683 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1684 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1685 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1686 OPTIMIZE_FOR_SPEED))
1687 return ifn;
1690 return IFN_LAST;
1694 static tree permute_vec_elements (tree, tree, tree, gimple *,
1695 gimple_stmt_iterator *);
1697 /* STMT is a non-strided load or store, meaning that it accesses
1698 elements with a known constant step. Return -1 if that step
1699 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1701 static int
1702 compare_step_with_zero (gimple *stmt)
1704 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1705 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1706 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1707 size_zero_node);
1710 /* If the target supports a permute mask that reverses the elements in
1711 a vector of type VECTYPE, return that mask, otherwise return null. */
1713 static tree
1714 perm_mask_for_reverse (tree vectype)
1716 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1718 /* The encoding has a single stepped pattern. */
1719 vec_perm_builder sel (nunits, 1, 3);
1720 for (int i = 0; i < 3; ++i)
1721 sel.quick_push (nunits - 1 - i);
1723 vec_perm_indices indices (sel, 1, nunits);
1724 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
1725 return NULL_TREE;
1726 return vect_gen_perm_mask_checked (vectype, indices);
1729 /* STMT is either a masked or unconditional store. Return the value
1730 being stored. */
1732 static tree
1733 vect_get_store_rhs (gimple *stmt)
1735 if (gassign *assign = dyn_cast <gassign *> (stmt))
1737 gcc_assert (gimple_assign_single_p (assign));
1738 return gimple_assign_rhs1 (assign);
1740 if (gcall *call = dyn_cast <gcall *> (stmt))
1742 internal_fn ifn = gimple_call_internal_fn (call);
1743 gcc_assert (ifn == IFN_MASK_STORE);
1744 return gimple_call_arg (stmt, 3);
1746 gcc_unreachable ();
1749 /* A subroutine of get_load_store_type, with a subset of the same
1750 arguments. Handle the case where STMT is part of a grouped load
1751 or store.
1753 For stores, the statements in the group are all consecutive
1754 and there is no gap at the end. For loads, the statements in the
1755 group might not be consecutive; there can be gaps between statements
1756 as well as at the end. */
1758 static bool
1759 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1760 vec_load_store_type vls_type,
1761 vect_memory_access_type *memory_access_type)
1763 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1764 vec_info *vinfo = stmt_info->vinfo;
1765 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1766 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1767 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1768 data_reference *first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1769 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1770 bool single_element_p = (stmt == first_stmt
1771 && !GROUP_NEXT_ELEMENT (stmt_info));
1772 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1773 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1775 /* True if the vectorized statements would access beyond the last
1776 statement in the group. */
1777 bool overrun_p = false;
1779 /* True if we can cope with such overrun by peeling for gaps, so that
1780 there is at least one final scalar iteration after the vector loop. */
1781 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1783 /* There can only be a gap at the end of the group if the stride is
1784 known at compile time. */
1785 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1787 /* Stores can't yet have gaps. */
1788 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1790 if (slp)
1792 if (STMT_VINFO_STRIDED_P (stmt_info))
1794 /* Try to use consecutive accesses of GROUP_SIZE elements,
1795 separated by the stride, until we have a complete vector.
1796 Fall back to scalar accesses if that isn't possible. */
1797 if (multiple_p (nunits, group_size))
1798 *memory_access_type = VMAT_STRIDED_SLP;
1799 else
1800 *memory_access_type = VMAT_ELEMENTWISE;
1802 else
1804 overrun_p = loop_vinfo && gap != 0;
1805 if (overrun_p && vls_type != VLS_LOAD)
1807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1808 "Grouped store with gaps requires"
1809 " non-consecutive accesses\n");
1810 return false;
1812 /* An overrun is fine if the trailing elements are smaller
1813 than the alignment boundary B. Every vector access will
1814 be a multiple of B and so we are guaranteed to access a
1815 non-gap element in the same B-sized block. */
1816 if (overrun_p
1817 && gap < (vect_known_alignment_in_bytes (first_dr)
1818 / vect_get_scalar_dr_size (first_dr)))
1819 overrun_p = false;
1820 if (overrun_p && !can_overrun_p)
1822 if (dump_enabled_p ())
1823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1824 "Peeling for outer loop is not supported\n");
1825 return false;
1827 *memory_access_type = VMAT_CONTIGUOUS;
1830 else
1832 /* We can always handle this case using elementwise accesses,
1833 but see if something more efficient is available. */
1834 *memory_access_type = VMAT_ELEMENTWISE;
1836 /* If there is a gap at the end of the group then these optimizations
1837 would access excess elements in the last iteration. */
1838 bool would_overrun_p = (gap != 0);
1839 /* An overrun is fine if the trailing elements are smaller than the
1840 alignment boundary B. Every vector access will be a multiple of B
1841 and so we are guaranteed to access a non-gap element in the
1842 same B-sized block. */
1843 if (would_overrun_p
1844 && gap < (vect_known_alignment_in_bytes (first_dr)
1845 / vect_get_scalar_dr_size (first_dr)))
1846 would_overrun_p = false;
1848 if (!STMT_VINFO_STRIDED_P (stmt_info)
1849 && (can_overrun_p || !would_overrun_p)
1850 && compare_step_with_zero (stmt) > 0)
1852 /* First cope with the degenerate case of a single-element
1853 vector. */
1854 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U))
1855 *memory_access_type = VMAT_CONTIGUOUS;
1857 /* Otherwise try using LOAD/STORE_LANES. */
1858 if (*memory_access_type == VMAT_ELEMENTWISE
1859 && (vls_type == VLS_LOAD
1860 ? vect_load_lanes_supported (vectype, group_size)
1861 : vect_store_lanes_supported (vectype, group_size)))
1863 *memory_access_type = VMAT_LOAD_STORE_LANES;
1864 overrun_p = would_overrun_p;
1867 /* If that fails, try using permuting loads. */
1868 if (*memory_access_type == VMAT_ELEMENTWISE
1869 && (vls_type == VLS_LOAD
1870 ? vect_grouped_load_supported (vectype, single_element_p,
1871 group_size)
1872 : vect_grouped_store_supported (vectype, group_size)))
1874 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1875 overrun_p = would_overrun_p;
1880 if (vls_type != VLS_LOAD && first_stmt == stmt)
1882 /* STMT is the leader of the group. Check the operands of all the
1883 stmts of the group. */
1884 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1885 while (next_stmt)
1887 gcc_assert (gimple_assign_single_p (next_stmt));
1888 tree op = gimple_assign_rhs1 (next_stmt);
1889 gimple *def_stmt;
1890 enum vect_def_type dt;
1891 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1893 if (dump_enabled_p ())
1894 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1895 "use not simple.\n");
1896 return false;
1898 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1902 if (overrun_p)
1904 gcc_assert (can_overrun_p);
1905 if (dump_enabled_p ())
1906 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1907 "Data access with gaps requires scalar "
1908 "epilogue loop\n");
1909 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1912 return true;
1915 /* A subroutine of get_load_store_type, with a subset of the same
1916 arguments. Handle the case where STMT is a load or store that
1917 accesses consecutive elements with a negative step. */
1919 static vect_memory_access_type
1920 get_negative_load_store_type (gimple *stmt, tree vectype,
1921 vec_load_store_type vls_type,
1922 unsigned int ncopies)
1924 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1925 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1926 dr_alignment_support alignment_support_scheme;
1928 if (ncopies > 1)
1930 if (dump_enabled_p ())
1931 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1932 "multiple types with negative step.\n");
1933 return VMAT_ELEMENTWISE;
1936 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1937 if (alignment_support_scheme != dr_aligned
1938 && alignment_support_scheme != dr_unaligned_supported)
1940 if (dump_enabled_p ())
1941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1942 "negative step but alignment required.\n");
1943 return VMAT_ELEMENTWISE;
1946 if (vls_type == VLS_STORE_INVARIANT)
1948 if (dump_enabled_p ())
1949 dump_printf_loc (MSG_NOTE, vect_location,
1950 "negative step with invariant source;"
1951 " no permute needed.\n");
1952 return VMAT_CONTIGUOUS_DOWN;
1955 if (!perm_mask_for_reverse (vectype))
1957 if (dump_enabled_p ())
1958 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1959 "negative step and reversing not supported.\n");
1960 return VMAT_ELEMENTWISE;
1963 return VMAT_CONTIGUOUS_REVERSE;
1966 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1967 if there is a memory access type that the vectorized form can use,
1968 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1969 or scatters, fill in GS_INFO accordingly.
1971 SLP says whether we're performing SLP rather than loop vectorization.
1972 VECTYPE is the vector type that the vectorized statements will use.
1973 NCOPIES is the number of vector statements that will be needed. */
1975 static bool
1976 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1977 vec_load_store_type vls_type, unsigned int ncopies,
1978 vect_memory_access_type *memory_access_type,
1979 gather_scatter_info *gs_info)
1981 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1982 vec_info *vinfo = stmt_info->vinfo;
1983 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1984 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1985 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1987 *memory_access_type = VMAT_GATHER_SCATTER;
1988 gimple *def_stmt;
1989 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1990 gcc_unreachable ();
1991 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1992 &gs_info->offset_dt,
1993 &gs_info->offset_vectype))
1995 if (dump_enabled_p ())
1996 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1997 "%s index use not simple.\n",
1998 vls_type == VLS_LOAD ? "gather" : "scatter");
1999 return false;
2002 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2004 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
2005 memory_access_type))
2006 return false;
2008 else if (STMT_VINFO_STRIDED_P (stmt_info))
2010 gcc_assert (!slp);
2011 *memory_access_type = VMAT_ELEMENTWISE;
2013 else
2015 int cmp = compare_step_with_zero (stmt);
2016 if (cmp < 0)
2017 *memory_access_type = get_negative_load_store_type
2018 (stmt, vectype, vls_type, ncopies);
2019 else if (cmp == 0)
2021 gcc_assert (vls_type == VLS_LOAD);
2022 *memory_access_type = VMAT_INVARIANT;
2024 else
2025 *memory_access_type = VMAT_CONTIGUOUS;
2028 if ((*memory_access_type == VMAT_ELEMENTWISE
2029 || *memory_access_type == VMAT_STRIDED_SLP)
2030 && !nunits.is_constant ())
2032 if (dump_enabled_p ())
2033 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2034 "Not using elementwise accesses due to variable "
2035 "vectorization factor.\n");
2036 return false;
2039 /* FIXME: At the moment the cost model seems to underestimate the
2040 cost of using elementwise accesses. This check preserves the
2041 traditional behavior until that can be fixed. */
2042 if (*memory_access_type == VMAT_ELEMENTWISE
2043 && !STMT_VINFO_STRIDED_P (stmt_info))
2045 if (dump_enabled_p ())
2046 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2047 "not falling back to elementwise accesses\n");
2048 return false;
2050 return true;
2053 /* Return true if boolean argument MASK is suitable for vectorizing
2054 conditional load or store STMT. When returning true, store the
2055 type of the vectorized mask in *MASK_VECTYPE_OUT. */
2057 static bool
2058 vect_check_load_store_mask (gimple *stmt, tree mask, tree *mask_vectype_out)
2060 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2062 if (dump_enabled_p ())
2063 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2064 "mask argument is not a boolean.\n");
2065 return false;
2068 if (TREE_CODE (mask) != SSA_NAME)
2070 if (dump_enabled_p ())
2071 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2072 "mask argument is not an SSA name.\n");
2073 return false;
2076 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2077 gimple *def_stmt;
2078 enum vect_def_type dt;
2079 tree mask_vectype;
2080 if (!vect_is_simple_use (mask, stmt_info->vinfo, &def_stmt, &dt,
2081 &mask_vectype))
2083 if (dump_enabled_p ())
2084 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2085 "mask use not simple.\n");
2086 return false;
2089 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2090 if (!mask_vectype)
2091 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2093 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
2095 if (dump_enabled_p ())
2096 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2097 "could not find an appropriate vector mask type.\n");
2098 return false;
2101 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype),
2102 TYPE_VECTOR_SUBPARTS (vectype)))
2104 if (dump_enabled_p ())
2106 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2107 "vector mask type ");
2108 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, mask_vectype);
2109 dump_printf (MSG_MISSED_OPTIMIZATION,
2110 " does not match vector data type ");
2111 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, vectype);
2112 dump_printf (MSG_MISSED_OPTIMIZATION, ".\n");
2114 return false;
2117 *mask_vectype_out = mask_vectype;
2118 return true;
2121 /* Return true if stored value RHS is suitable for vectorizing store
2122 statement STMT. When returning true, store the type of the
2123 vectorized store value in *RHS_VECTYPE_OUT and the type of the
2124 store in *VLS_TYPE_OUT. */
2126 static bool
2127 vect_check_store_rhs (gimple *stmt, tree rhs, tree *rhs_vectype_out,
2128 vec_load_store_type *vls_type_out)
2130 /* In the case this is a store from a constant make sure
2131 native_encode_expr can handle it. */
2132 if (CONSTANT_CLASS_P (rhs) && native_encode_expr (rhs, NULL, 64) == 0)
2134 if (dump_enabled_p ())
2135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2136 "cannot encode constant as a byte sequence.\n");
2137 return false;
2140 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2141 gimple *def_stmt;
2142 enum vect_def_type dt;
2143 tree rhs_vectype;
2144 if (!vect_is_simple_use (rhs, stmt_info->vinfo, &def_stmt, &dt,
2145 &rhs_vectype))
2147 if (dump_enabled_p ())
2148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2149 "use not simple.\n");
2150 return false;
2153 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2154 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
2156 if (dump_enabled_p ())
2157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2158 "incompatible vector types.\n");
2159 return false;
2162 *rhs_vectype_out = rhs_vectype;
2163 if (dt == vect_constant_def || dt == vect_external_def)
2164 *vls_type_out = VLS_STORE_INVARIANT;
2165 else
2166 *vls_type_out = VLS_STORE;
2167 return true;
2170 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT.
2171 Note that we support masks with floating-point type, in which case the
2172 floats are interpreted as a bitmask. */
2174 static tree
2175 vect_build_all_ones_mask (gimple *stmt, tree masktype)
2177 if (TREE_CODE (masktype) == INTEGER_TYPE)
2178 return build_int_cst (masktype, -1);
2179 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
2181 tree mask = build_int_cst (TREE_TYPE (masktype), -1);
2182 mask = build_vector_from_val (masktype, mask);
2183 return vect_init_vector (stmt, mask, masktype, NULL);
2185 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
2187 REAL_VALUE_TYPE r;
2188 long tmp[6];
2189 for (int j = 0; j < 6; ++j)
2190 tmp[j] = -1;
2191 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
2192 tree mask = build_real (TREE_TYPE (masktype), r);
2193 mask = build_vector_from_val (masktype, mask);
2194 return vect_init_vector (stmt, mask, masktype, NULL);
2196 gcc_unreachable ();
2199 /* Build an all-zero merge value of type VECTYPE while vectorizing
2200 STMT as a gather load. */
2202 static tree
2203 vect_build_zero_merge_argument (gimple *stmt, tree vectype)
2205 tree merge;
2206 if (TREE_CODE (TREE_TYPE (vectype)) == INTEGER_TYPE)
2207 merge = build_int_cst (TREE_TYPE (vectype), 0);
2208 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype)))
2210 REAL_VALUE_TYPE r;
2211 long tmp[6];
2212 for (int j = 0; j < 6; ++j)
2213 tmp[j] = 0;
2214 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (vectype)));
2215 merge = build_real (TREE_TYPE (vectype), r);
2217 else
2218 gcc_unreachable ();
2219 merge = build_vector_from_val (vectype, merge);
2220 return vect_init_vector (stmt, merge, vectype, NULL);
2223 /* Build a gather load call while vectorizing STMT. Insert new instructions
2224 before GSI and add them to VEC_STMT. GS_INFO describes the gather load
2225 operation. If the load is conditional, MASK is the unvectorized
2226 condition, otherwise MASK is null. */
2228 static void
2229 vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
2230 gimple **vec_stmt, gather_scatter_info *gs_info,
2231 tree mask)
2233 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2234 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2235 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2236 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2237 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2238 int ncopies = vect_get_num_copies (loop_vinfo, vectype);
2239 edge pe = loop_preheader_edge (loop);
2240 enum { NARROW, NONE, WIDEN } modifier;
2241 poly_uint64 gather_off_nunits
2242 = TYPE_VECTOR_SUBPARTS (gs_info->offset_vectype);
2244 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info->decl));
2245 tree rettype = TREE_TYPE (TREE_TYPE (gs_info->decl));
2246 tree srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2247 tree ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2248 tree idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2249 tree masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2250 tree scaletype = TREE_VALUE (arglist);
2251 gcc_checking_assert (types_compatible_p (srctype, rettype)
2252 && (!mask || types_compatible_p (srctype, masktype)));
2254 tree perm_mask = NULL_TREE;
2255 tree mask_perm_mask = NULL_TREE;
2256 if (known_eq (nunits, gather_off_nunits))
2257 modifier = NONE;
2258 else if (known_eq (nunits * 2, gather_off_nunits))
2260 modifier = WIDEN;
2262 /* Currently widening gathers and scatters are only supported for
2263 fixed-length vectors. */
2264 int count = gather_off_nunits.to_constant ();
2265 vec_perm_builder sel (count, count, 1);
2266 for (int i = 0; i < count; ++i)
2267 sel.quick_push (i | (count / 2));
2269 vec_perm_indices indices (sel, 1, count);
2270 perm_mask = vect_gen_perm_mask_checked (gs_info->offset_vectype,
2271 indices);
2273 else if (known_eq (nunits, gather_off_nunits * 2))
2275 modifier = NARROW;
2277 /* Currently narrowing gathers and scatters are only supported for
2278 fixed-length vectors. */
2279 int count = nunits.to_constant ();
2280 vec_perm_builder sel (count, count, 1);
2281 sel.quick_grow (count);
2282 for (int i = 0; i < count; ++i)
2283 sel[i] = i < count / 2 ? i : i + count / 2;
2284 vec_perm_indices indices (sel, 2, count);
2285 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
2287 ncopies *= 2;
2289 if (mask)
2291 for (int i = 0; i < count; ++i)
2292 sel[i] = i | (count / 2);
2293 indices.new_vector (sel, 2, count);
2294 mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
2297 else
2298 gcc_unreachable ();
2300 tree vec_dest = vect_create_destination_var (gimple_get_lhs (stmt),
2301 vectype);
2303 tree ptr = fold_convert (ptrtype, gs_info->base);
2304 if (!is_gimple_min_invariant (ptr))
2306 gimple_seq seq;
2307 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2308 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2309 gcc_assert (!new_bb);
2312 tree scale = build_int_cst (scaletype, gs_info->scale);
2314 tree vec_oprnd0 = NULL_TREE;
2315 tree vec_mask = NULL_TREE;
2316 tree src_op = NULL_TREE;
2317 tree mask_op = NULL_TREE;
2318 tree prev_res = NULL_TREE;
2319 stmt_vec_info prev_stmt_info = NULL;
2321 if (!mask)
2323 src_op = vect_build_zero_merge_argument (stmt, rettype);
2324 mask_op = vect_build_all_ones_mask (stmt, masktype);
2327 for (int j = 0; j < ncopies; ++j)
2329 tree op, var;
2330 gimple *new_stmt;
2331 if (modifier == WIDEN && (j & 1))
2332 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2333 perm_mask, stmt, gsi);
2334 else if (j == 0)
2335 op = vec_oprnd0
2336 = vect_get_vec_def_for_operand (gs_info->offset, stmt);
2337 else
2338 op = vec_oprnd0
2339 = vect_get_vec_def_for_stmt_copy (gs_info->offset_dt, vec_oprnd0);
2341 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2343 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
2344 TYPE_VECTOR_SUBPARTS (idxtype)));
2345 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2346 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2347 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2348 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2349 op = var;
2352 if (mask)
2354 if (mask_perm_mask && (j & 1))
2355 mask_op = permute_vec_elements (mask_op, mask_op,
2356 mask_perm_mask, stmt, gsi);
2357 else
2359 if (j == 0)
2360 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2361 else
2363 gimple *def_stmt;
2364 enum vect_def_type dt;
2365 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2366 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2369 mask_op = vec_mask;
2370 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2372 gcc_assert
2373 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op)),
2374 TYPE_VECTOR_SUBPARTS (masktype)));
2375 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2376 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2377 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR,
2378 mask_op);
2379 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2380 mask_op = var;
2383 src_op = mask_op;
2386 new_stmt = gimple_build_call (gs_info->decl, 5, src_op, ptr, op,
2387 mask_op, scale);
2389 if (!useless_type_conversion_p (vectype, rettype))
2391 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype),
2392 TYPE_VECTOR_SUBPARTS (rettype)));
2393 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2394 gimple_call_set_lhs (new_stmt, op);
2395 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2396 var = make_ssa_name (vec_dest);
2397 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2398 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2400 else
2402 var = make_ssa_name (vec_dest, new_stmt);
2403 gimple_call_set_lhs (new_stmt, var);
2406 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2408 if (modifier == NARROW)
2410 if ((j & 1) == 0)
2412 prev_res = var;
2413 continue;
2415 var = permute_vec_elements (prev_res, var, perm_mask, stmt, gsi);
2416 new_stmt = SSA_NAME_DEF_STMT (var);
2419 if (prev_stmt_info == NULL)
2420 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2421 else
2422 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2423 prev_stmt_info = vinfo_for_stmt (new_stmt);
2427 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2429 static bool
2430 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2431 gimple **vec_stmt, slp_tree slp_node,
2432 tree vectype_in, enum vect_def_type *dt)
2434 tree op, vectype;
2435 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2436 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2437 unsigned ncopies;
2438 unsigned HOST_WIDE_INT nunits, num_bytes;
2440 op = gimple_call_arg (stmt, 0);
2441 vectype = STMT_VINFO_VECTYPE (stmt_info);
2443 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits))
2444 return false;
2446 /* Multiple types in SLP are handled by creating the appropriate number of
2447 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2448 case of SLP. */
2449 if (slp_node)
2450 ncopies = 1;
2451 else
2452 ncopies = vect_get_num_copies (loop_vinfo, vectype);
2454 gcc_assert (ncopies >= 1);
2456 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2457 if (! char_vectype)
2458 return false;
2460 if (!TYPE_VECTOR_SUBPARTS (char_vectype).is_constant (&num_bytes))
2461 return false;
2463 unsigned word_bytes = num_bytes / nunits;
2465 /* The encoding uses one stepped pattern for each byte in the word. */
2466 vec_perm_builder elts (num_bytes, word_bytes, 3);
2467 for (unsigned i = 0; i < 3; ++i)
2468 for (unsigned j = 0; j < word_bytes; ++j)
2469 elts.quick_push ((i + 1) * word_bytes - j - 1);
2471 vec_perm_indices indices (elts, 1, num_bytes);
2472 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2473 return false;
2475 if (! vec_stmt)
2477 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2478 if (dump_enabled_p ())
2479 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2480 "\n");
2481 if (! PURE_SLP_STMT (stmt_info))
2483 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2484 1, vector_stmt, stmt_info, 0, vect_prologue);
2485 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2486 ncopies, vec_perm, stmt_info, 0, vect_body);
2488 return true;
2491 tree bswap_vconst = vec_perm_indices_to_tree (char_vectype, indices);
2493 /* Transform. */
2494 vec<tree> vec_oprnds = vNULL;
2495 gimple *new_stmt = NULL;
2496 stmt_vec_info prev_stmt_info = NULL;
2497 for (unsigned j = 0; j < ncopies; j++)
2499 /* Handle uses. */
2500 if (j == 0)
2501 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2502 else
2503 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2505 /* Arguments are ready. create the new vector stmt. */
2506 unsigned i;
2507 tree vop;
2508 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2510 tree tem = make_ssa_name (char_vectype);
2511 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2512 char_vectype, vop));
2513 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2514 tree tem2 = make_ssa_name (char_vectype);
2515 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2516 tem, tem, bswap_vconst);
2517 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2518 tem = make_ssa_name (vectype);
2519 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2520 vectype, tem2));
2521 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2522 if (slp_node)
2523 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2526 if (slp_node)
2527 continue;
2529 if (j == 0)
2530 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2531 else
2532 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2534 prev_stmt_info = vinfo_for_stmt (new_stmt);
2537 vec_oprnds.release ();
2538 return true;
2541 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2542 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2543 in a single step. On success, store the binary pack code in
2544 *CONVERT_CODE. */
2546 static bool
2547 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2548 tree_code *convert_code)
2550 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2551 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2552 return false;
2554 tree_code code;
2555 int multi_step_cvt = 0;
2556 auto_vec <tree, 8> interm_types;
2557 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2558 &code, &multi_step_cvt,
2559 &interm_types)
2560 || multi_step_cvt)
2561 return false;
2563 *convert_code = code;
2564 return true;
2567 /* Function vectorizable_call.
2569 Check if GS performs a function call that can be vectorized.
2570 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2571 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2572 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2574 static bool
2575 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2576 slp_tree slp_node)
2578 gcall *stmt;
2579 tree vec_dest;
2580 tree scalar_dest;
2581 tree op, type;
2582 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2583 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2584 tree vectype_out, vectype_in;
2585 poly_uint64 nunits_in;
2586 poly_uint64 nunits_out;
2587 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2588 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2589 vec_info *vinfo = stmt_info->vinfo;
2590 tree fndecl, new_temp, rhs_type;
2591 gimple *def_stmt;
2592 enum vect_def_type dt[3]
2593 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2594 int ndts = 3;
2595 gimple *new_stmt = NULL;
2596 int ncopies, j;
2597 vec<tree> vargs = vNULL;
2598 enum { NARROW, NONE, WIDEN } modifier;
2599 size_t i, nargs;
2600 tree lhs;
2602 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2603 return false;
2605 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2606 && ! vec_stmt)
2607 return false;
2609 /* Is GS a vectorizable call? */
2610 stmt = dyn_cast <gcall *> (gs);
2611 if (!stmt)
2612 return false;
2614 if (gimple_call_internal_p (stmt)
2615 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2616 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2617 /* Handled by vectorizable_load and vectorizable_store. */
2618 return false;
2620 if (gimple_call_lhs (stmt) == NULL_TREE
2621 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2622 return false;
2624 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2626 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2628 /* Process function arguments. */
2629 rhs_type = NULL_TREE;
2630 vectype_in = NULL_TREE;
2631 nargs = gimple_call_num_args (stmt);
2633 /* Bail out if the function has more than three arguments, we do not have
2634 interesting builtin functions to vectorize with more than two arguments
2635 except for fma. No arguments is also not good. */
2636 if (nargs == 0 || nargs > 3)
2637 return false;
2639 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2640 if (gimple_call_internal_p (stmt)
2641 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2643 nargs = 0;
2644 rhs_type = unsigned_type_node;
2647 for (i = 0; i < nargs; i++)
2649 tree opvectype;
2651 op = gimple_call_arg (stmt, i);
2653 /* We can only handle calls with arguments of the same type. */
2654 if (rhs_type
2655 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2657 if (dump_enabled_p ())
2658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2659 "argument types differ.\n");
2660 return false;
2662 if (!rhs_type)
2663 rhs_type = TREE_TYPE (op);
2665 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2667 if (dump_enabled_p ())
2668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2669 "use not simple.\n");
2670 return false;
2673 if (!vectype_in)
2674 vectype_in = opvectype;
2675 else if (opvectype
2676 && opvectype != vectype_in)
2678 if (dump_enabled_p ())
2679 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2680 "argument vector types differ.\n");
2681 return false;
2684 /* If all arguments are external or constant defs use a vector type with
2685 the same size as the output vector type. */
2686 if (!vectype_in)
2687 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2688 if (vec_stmt)
2689 gcc_assert (vectype_in);
2690 if (!vectype_in)
2692 if (dump_enabled_p ())
2694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2695 "no vectype for scalar type ");
2696 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2697 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2700 return false;
2703 /* FORNOW */
2704 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2705 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2706 if (known_eq (nunits_in * 2, nunits_out))
2707 modifier = NARROW;
2708 else if (known_eq (nunits_out, nunits_in))
2709 modifier = NONE;
2710 else if (known_eq (nunits_out * 2, nunits_in))
2711 modifier = WIDEN;
2712 else
2713 return false;
2715 /* We only handle functions that do not read or clobber memory. */
2716 if (gimple_vuse (stmt))
2718 if (dump_enabled_p ())
2719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2720 "function reads from or writes to memory.\n");
2721 return false;
2724 /* For now, we only vectorize functions if a target specific builtin
2725 is available. TODO -- in some cases, it might be profitable to
2726 insert the calls for pieces of the vector, in order to be able
2727 to vectorize other operations in the loop. */
2728 fndecl = NULL_TREE;
2729 internal_fn ifn = IFN_LAST;
2730 combined_fn cfn = gimple_call_combined_fn (stmt);
2731 tree callee = gimple_call_fndecl (stmt);
2733 /* First try using an internal function. */
2734 tree_code convert_code = ERROR_MARK;
2735 if (cfn != CFN_LAST
2736 && (modifier == NONE
2737 || (modifier == NARROW
2738 && simple_integer_narrowing (vectype_out, vectype_in,
2739 &convert_code))))
2740 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2741 vectype_in);
2743 /* If that fails, try asking for a target-specific built-in function. */
2744 if (ifn == IFN_LAST)
2746 if (cfn != CFN_LAST)
2747 fndecl = targetm.vectorize.builtin_vectorized_function
2748 (cfn, vectype_out, vectype_in);
2749 else
2750 fndecl = targetm.vectorize.builtin_md_vectorized_function
2751 (callee, vectype_out, vectype_in);
2754 if (ifn == IFN_LAST && !fndecl)
2756 if (cfn == CFN_GOMP_SIMD_LANE
2757 && !slp_node
2758 && loop_vinfo
2759 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2760 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2761 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2762 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2764 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2765 { 0, 1, 2, ... vf - 1 } vector. */
2766 gcc_assert (nargs == 0);
2768 else if (modifier == NONE
2769 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2770 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2771 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2772 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2773 vectype_in, dt);
2774 else
2776 if (dump_enabled_p ())
2777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2778 "function is not vectorizable.\n");
2779 return false;
2783 if (slp_node)
2784 ncopies = 1;
2785 else if (modifier == NARROW && ifn == IFN_LAST)
2786 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
2787 else
2788 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
2790 /* Sanity check: make sure that at least one copy of the vectorized stmt
2791 needs to be generated. */
2792 gcc_assert (ncopies >= 1);
2794 if (!vec_stmt) /* transformation not required. */
2796 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2797 if (dump_enabled_p ())
2798 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2799 "\n");
2800 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2801 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2802 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2803 vec_promote_demote, stmt_info, 0, vect_body);
2805 return true;
2808 /* Transform. */
2810 if (dump_enabled_p ())
2811 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2813 /* Handle def. */
2814 scalar_dest = gimple_call_lhs (stmt);
2815 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2817 prev_stmt_info = NULL;
2818 if (modifier == NONE || ifn != IFN_LAST)
2820 tree prev_res = NULL_TREE;
2821 for (j = 0; j < ncopies; ++j)
2823 /* Build argument list for the vectorized call. */
2824 if (j == 0)
2825 vargs.create (nargs);
2826 else
2827 vargs.truncate (0);
2829 if (slp_node)
2831 auto_vec<vec<tree> > vec_defs (nargs);
2832 vec<tree> vec_oprnds0;
2834 for (i = 0; i < nargs; i++)
2835 vargs.quick_push (gimple_call_arg (stmt, i));
2836 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2837 vec_oprnds0 = vec_defs[0];
2839 /* Arguments are ready. Create the new vector stmt. */
2840 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2842 size_t k;
2843 for (k = 0; k < nargs; k++)
2845 vec<tree> vec_oprndsk = vec_defs[k];
2846 vargs[k] = vec_oprndsk[i];
2848 if (modifier == NARROW)
2850 tree half_res = make_ssa_name (vectype_in);
2851 gcall *call
2852 = gimple_build_call_internal_vec (ifn, vargs);
2853 gimple_call_set_lhs (call, half_res);
2854 gimple_call_set_nothrow (call, true);
2855 new_stmt = call;
2856 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2857 if ((i & 1) == 0)
2859 prev_res = half_res;
2860 continue;
2862 new_temp = make_ssa_name (vec_dest);
2863 new_stmt = gimple_build_assign (new_temp, convert_code,
2864 prev_res, half_res);
2866 else
2868 gcall *call;
2869 if (ifn != IFN_LAST)
2870 call = gimple_build_call_internal_vec (ifn, vargs);
2871 else
2872 call = gimple_build_call_vec (fndecl, vargs);
2873 new_temp = make_ssa_name (vec_dest, call);
2874 gimple_call_set_lhs (call, new_temp);
2875 gimple_call_set_nothrow (call, true);
2876 new_stmt = call;
2878 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2879 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2882 for (i = 0; i < nargs; i++)
2884 vec<tree> vec_oprndsi = vec_defs[i];
2885 vec_oprndsi.release ();
2887 continue;
2890 for (i = 0; i < nargs; i++)
2892 op = gimple_call_arg (stmt, i);
2893 if (j == 0)
2894 vec_oprnd0
2895 = vect_get_vec_def_for_operand (op, stmt);
2896 else
2898 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2899 vec_oprnd0
2900 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2903 vargs.quick_push (vec_oprnd0);
2906 if (gimple_call_internal_p (stmt)
2907 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2909 tree cst = build_index_vector (vectype_out, j * nunits_out, 1);
2910 tree new_var
2911 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2912 gimple *init_stmt = gimple_build_assign (new_var, cst);
2913 vect_init_vector_1 (stmt, init_stmt, NULL);
2914 new_temp = make_ssa_name (vec_dest);
2915 new_stmt = gimple_build_assign (new_temp, new_var);
2917 else if (modifier == NARROW)
2919 tree half_res = make_ssa_name (vectype_in);
2920 gcall *call = gimple_build_call_internal_vec (ifn, vargs);
2921 gimple_call_set_lhs (call, half_res);
2922 gimple_call_set_nothrow (call, true);
2923 new_stmt = call;
2924 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2925 if ((j & 1) == 0)
2927 prev_res = half_res;
2928 continue;
2930 new_temp = make_ssa_name (vec_dest);
2931 new_stmt = gimple_build_assign (new_temp, convert_code,
2932 prev_res, half_res);
2934 else
2936 gcall *call;
2937 if (ifn != IFN_LAST)
2938 call = gimple_build_call_internal_vec (ifn, vargs);
2939 else
2940 call = gimple_build_call_vec (fndecl, vargs);
2941 new_temp = make_ssa_name (vec_dest, new_stmt);
2942 gimple_call_set_lhs (call, new_temp);
2943 gimple_call_set_nothrow (call, true);
2944 new_stmt = call;
2946 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2948 if (j == (modifier == NARROW ? 1 : 0))
2949 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2950 else
2951 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2953 prev_stmt_info = vinfo_for_stmt (new_stmt);
2956 else if (modifier == NARROW)
2958 for (j = 0; j < ncopies; ++j)
2960 /* Build argument list for the vectorized call. */
2961 if (j == 0)
2962 vargs.create (nargs * 2);
2963 else
2964 vargs.truncate (0);
2966 if (slp_node)
2968 auto_vec<vec<tree> > vec_defs (nargs);
2969 vec<tree> vec_oprnds0;
2971 for (i = 0; i < nargs; i++)
2972 vargs.quick_push (gimple_call_arg (stmt, i));
2973 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2974 vec_oprnds0 = vec_defs[0];
2976 /* Arguments are ready. Create the new vector stmt. */
2977 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2979 size_t k;
2980 vargs.truncate (0);
2981 for (k = 0; k < nargs; k++)
2983 vec<tree> vec_oprndsk = vec_defs[k];
2984 vargs.quick_push (vec_oprndsk[i]);
2985 vargs.quick_push (vec_oprndsk[i + 1]);
2987 gcall *call;
2988 if (ifn != IFN_LAST)
2989 call = gimple_build_call_internal_vec (ifn, vargs);
2990 else
2991 call = gimple_build_call_vec (fndecl, vargs);
2992 new_temp = make_ssa_name (vec_dest, call);
2993 gimple_call_set_lhs (call, new_temp);
2994 gimple_call_set_nothrow (call, true);
2995 new_stmt = call;
2996 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2997 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3000 for (i = 0; i < nargs; i++)
3002 vec<tree> vec_oprndsi = vec_defs[i];
3003 vec_oprndsi.release ();
3005 continue;
3008 for (i = 0; i < nargs; i++)
3010 op = gimple_call_arg (stmt, i);
3011 if (j == 0)
3013 vec_oprnd0
3014 = vect_get_vec_def_for_operand (op, stmt);
3015 vec_oprnd1
3016 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3018 else
3020 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3021 vec_oprnd0
3022 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3023 vec_oprnd1
3024 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3027 vargs.quick_push (vec_oprnd0);
3028 vargs.quick_push (vec_oprnd1);
3031 new_stmt = gimple_build_call_vec (fndecl, vargs);
3032 new_temp = make_ssa_name (vec_dest, new_stmt);
3033 gimple_call_set_lhs (new_stmt, new_temp);
3034 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3036 if (j == 0)
3037 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3038 else
3039 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3041 prev_stmt_info = vinfo_for_stmt (new_stmt);
3044 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3046 else
3047 /* No current target implements this case. */
3048 return false;
3050 vargs.release ();
3052 /* The call in STMT might prevent it from being removed in dce.
3053 We however cannot remove it here, due to the way the ssa name
3054 it defines is mapped to the new definition. So just replace
3055 rhs of the statement with something harmless. */
3057 if (slp_node)
3058 return true;
3060 type = TREE_TYPE (scalar_dest);
3061 if (is_pattern_stmt_p (stmt_info))
3062 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3063 else
3064 lhs = gimple_call_lhs (stmt);
3066 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3067 set_vinfo_for_stmt (new_stmt, stmt_info);
3068 set_vinfo_for_stmt (stmt, NULL);
3069 STMT_VINFO_STMT (stmt_info) = new_stmt;
3070 gsi_replace (gsi, new_stmt, false);
3072 return true;
3076 struct simd_call_arg_info
3078 tree vectype;
3079 tree op;
3080 HOST_WIDE_INT linear_step;
3081 enum vect_def_type dt;
3082 unsigned int align;
3083 bool simd_lane_linear;
3086 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3087 is linear within simd lane (but not within whole loop), note it in
3088 *ARGINFO. */
3090 static void
3091 vect_simd_lane_linear (tree op, struct loop *loop,
3092 struct simd_call_arg_info *arginfo)
3094 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3096 if (!is_gimple_assign (def_stmt)
3097 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3098 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3099 return;
3101 tree base = gimple_assign_rhs1 (def_stmt);
3102 HOST_WIDE_INT linear_step = 0;
3103 tree v = gimple_assign_rhs2 (def_stmt);
3104 while (TREE_CODE (v) == SSA_NAME)
3106 tree t;
3107 def_stmt = SSA_NAME_DEF_STMT (v);
3108 if (is_gimple_assign (def_stmt))
3109 switch (gimple_assign_rhs_code (def_stmt))
3111 case PLUS_EXPR:
3112 t = gimple_assign_rhs2 (def_stmt);
3113 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3114 return;
3115 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3116 v = gimple_assign_rhs1 (def_stmt);
3117 continue;
3118 case MULT_EXPR:
3119 t = gimple_assign_rhs2 (def_stmt);
3120 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3121 return;
3122 linear_step = tree_to_shwi (t);
3123 v = gimple_assign_rhs1 (def_stmt);
3124 continue;
3125 CASE_CONVERT:
3126 t = gimple_assign_rhs1 (def_stmt);
3127 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3128 || (TYPE_PRECISION (TREE_TYPE (v))
3129 < TYPE_PRECISION (TREE_TYPE (t))))
3130 return;
3131 if (!linear_step)
3132 linear_step = 1;
3133 v = t;
3134 continue;
3135 default:
3136 return;
3138 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3139 && loop->simduid
3140 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3141 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3142 == loop->simduid))
3144 if (!linear_step)
3145 linear_step = 1;
3146 arginfo->linear_step = linear_step;
3147 arginfo->op = base;
3148 arginfo->simd_lane_linear = true;
3149 return;
3154 /* Return the number of elements in vector type VECTYPE, which is associated
3155 with a SIMD clone. At present these vectors always have a constant
3156 length. */
3158 static unsigned HOST_WIDE_INT
3159 simd_clone_subparts (tree vectype)
3161 return TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
3164 /* Function vectorizable_simd_clone_call.
3166 Check if STMT performs a function call that can be vectorized
3167 by calling a simd clone of the function.
3168 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3169 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3170 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3172 static bool
3173 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3174 gimple **vec_stmt, slp_tree slp_node)
3176 tree vec_dest;
3177 tree scalar_dest;
3178 tree op, type;
3179 tree vec_oprnd0 = NULL_TREE;
3180 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3181 tree vectype;
3182 unsigned int nunits;
3183 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3184 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3185 vec_info *vinfo = stmt_info->vinfo;
3186 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3187 tree fndecl, new_temp;
3188 gimple *def_stmt;
3189 gimple *new_stmt = NULL;
3190 int ncopies, j;
3191 auto_vec<simd_call_arg_info> arginfo;
3192 vec<tree> vargs = vNULL;
3193 size_t i, nargs;
3194 tree lhs, rtype, ratype;
3195 vec<constructor_elt, va_gc> *ret_ctor_elts = NULL;
3197 /* Is STMT a vectorizable call? */
3198 if (!is_gimple_call (stmt))
3199 return false;
3201 fndecl = gimple_call_fndecl (stmt);
3202 if (fndecl == NULL_TREE)
3203 return false;
3205 struct cgraph_node *node = cgraph_node::get (fndecl);
3206 if (node == NULL || node->simd_clones == NULL)
3207 return false;
3209 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3210 return false;
3212 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3213 && ! vec_stmt)
3214 return false;
3216 if (gimple_call_lhs (stmt)
3217 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3218 return false;
3220 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3222 vectype = STMT_VINFO_VECTYPE (stmt_info);
3224 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3225 return false;
3227 /* FORNOW */
3228 if (slp_node)
3229 return false;
3231 /* Process function arguments. */
3232 nargs = gimple_call_num_args (stmt);
3234 /* Bail out if the function has zero arguments. */
3235 if (nargs == 0)
3236 return false;
3238 arginfo.reserve (nargs, true);
3240 for (i = 0; i < nargs; i++)
3242 simd_call_arg_info thisarginfo;
3243 affine_iv iv;
3245 thisarginfo.linear_step = 0;
3246 thisarginfo.align = 0;
3247 thisarginfo.op = NULL_TREE;
3248 thisarginfo.simd_lane_linear = false;
3250 op = gimple_call_arg (stmt, i);
3251 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3252 &thisarginfo.vectype)
3253 || thisarginfo.dt == vect_uninitialized_def)
3255 if (dump_enabled_p ())
3256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3257 "use not simple.\n");
3258 return false;
3261 if (thisarginfo.dt == vect_constant_def
3262 || thisarginfo.dt == vect_external_def)
3263 gcc_assert (thisarginfo.vectype == NULL_TREE);
3264 else
3265 gcc_assert (thisarginfo.vectype != NULL_TREE);
3267 /* For linear arguments, the analyze phase should have saved
3268 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3269 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3270 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3272 gcc_assert (vec_stmt);
3273 thisarginfo.linear_step
3274 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3275 thisarginfo.op
3276 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3277 thisarginfo.simd_lane_linear
3278 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3279 == boolean_true_node);
3280 /* If loop has been peeled for alignment, we need to adjust it. */
3281 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3282 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3283 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3285 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3286 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3287 tree opt = TREE_TYPE (thisarginfo.op);
3288 bias = fold_convert (TREE_TYPE (step), bias);
3289 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3290 thisarginfo.op
3291 = fold_build2 (POINTER_TYPE_P (opt)
3292 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3293 thisarginfo.op, bias);
3296 else if (!vec_stmt
3297 && thisarginfo.dt != vect_constant_def
3298 && thisarginfo.dt != vect_external_def
3299 && loop_vinfo
3300 && TREE_CODE (op) == SSA_NAME
3301 && simple_iv (loop, loop_containing_stmt (stmt), op,
3302 &iv, false)
3303 && tree_fits_shwi_p (iv.step))
3305 thisarginfo.linear_step = tree_to_shwi (iv.step);
3306 thisarginfo.op = iv.base;
3308 else if ((thisarginfo.dt == vect_constant_def
3309 || thisarginfo.dt == vect_external_def)
3310 && POINTER_TYPE_P (TREE_TYPE (op)))
3311 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3312 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3313 linear too. */
3314 if (POINTER_TYPE_P (TREE_TYPE (op))
3315 && !thisarginfo.linear_step
3316 && !vec_stmt
3317 && thisarginfo.dt != vect_constant_def
3318 && thisarginfo.dt != vect_external_def
3319 && loop_vinfo
3320 && !slp_node
3321 && TREE_CODE (op) == SSA_NAME)
3322 vect_simd_lane_linear (op, loop, &thisarginfo);
3324 arginfo.quick_push (thisarginfo);
3327 unsigned HOST_WIDE_INT vf;
3328 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
3330 if (dump_enabled_p ())
3331 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3332 "not considering SIMD clones; not yet supported"
3333 " for variable-width vectors.\n");
3334 return NULL;
3337 unsigned int badness = 0;
3338 struct cgraph_node *bestn = NULL;
3339 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3340 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3341 else
3342 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3343 n = n->simdclone->next_clone)
3345 unsigned int this_badness = 0;
3346 if (n->simdclone->simdlen > vf
3347 || n->simdclone->nargs != nargs)
3348 continue;
3349 if (n->simdclone->simdlen < vf)
3350 this_badness += (exact_log2 (vf)
3351 - exact_log2 (n->simdclone->simdlen)) * 1024;
3352 if (n->simdclone->inbranch)
3353 this_badness += 2048;
3354 int target_badness = targetm.simd_clone.usable (n);
3355 if (target_badness < 0)
3356 continue;
3357 this_badness += target_badness * 512;
3358 /* FORNOW: Have to add code to add the mask argument. */
3359 if (n->simdclone->inbranch)
3360 continue;
3361 for (i = 0; i < nargs; i++)
3363 switch (n->simdclone->args[i].arg_type)
3365 case SIMD_CLONE_ARG_TYPE_VECTOR:
3366 if (!useless_type_conversion_p
3367 (n->simdclone->args[i].orig_type,
3368 TREE_TYPE (gimple_call_arg (stmt, i))))
3369 i = -1;
3370 else if (arginfo[i].dt == vect_constant_def
3371 || arginfo[i].dt == vect_external_def
3372 || arginfo[i].linear_step)
3373 this_badness += 64;
3374 break;
3375 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3376 if (arginfo[i].dt != vect_constant_def
3377 && arginfo[i].dt != vect_external_def)
3378 i = -1;
3379 break;
3380 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3381 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3382 if (arginfo[i].dt == vect_constant_def
3383 || arginfo[i].dt == vect_external_def
3384 || (arginfo[i].linear_step
3385 != n->simdclone->args[i].linear_step))
3386 i = -1;
3387 break;
3388 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3389 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3390 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3391 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3392 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3393 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3394 /* FORNOW */
3395 i = -1;
3396 break;
3397 case SIMD_CLONE_ARG_TYPE_MASK:
3398 gcc_unreachable ();
3400 if (i == (size_t) -1)
3401 break;
3402 if (n->simdclone->args[i].alignment > arginfo[i].align)
3404 i = -1;
3405 break;
3407 if (arginfo[i].align)
3408 this_badness += (exact_log2 (arginfo[i].align)
3409 - exact_log2 (n->simdclone->args[i].alignment));
3411 if (i == (size_t) -1)
3412 continue;
3413 if (bestn == NULL || this_badness < badness)
3415 bestn = n;
3416 badness = this_badness;
3420 if (bestn == NULL)
3421 return false;
3423 for (i = 0; i < nargs; i++)
3424 if ((arginfo[i].dt == vect_constant_def
3425 || arginfo[i].dt == vect_external_def)
3426 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3428 arginfo[i].vectype
3429 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3430 i)));
3431 if (arginfo[i].vectype == NULL
3432 || (simd_clone_subparts (arginfo[i].vectype)
3433 > bestn->simdclone->simdlen))
3434 return false;
3437 fndecl = bestn->decl;
3438 nunits = bestn->simdclone->simdlen;
3439 ncopies = vf / nunits;
3441 /* If the function isn't const, only allow it in simd loops where user
3442 has asserted that at least nunits consecutive iterations can be
3443 performed using SIMD instructions. */
3444 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3445 && gimple_vuse (stmt))
3446 return false;
3448 /* Sanity check: make sure that at least one copy of the vectorized stmt
3449 needs to be generated. */
3450 gcc_assert (ncopies >= 1);
3452 if (!vec_stmt) /* transformation not required. */
3454 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3455 for (i = 0; i < nargs; i++)
3456 if ((bestn->simdclone->args[i].arg_type
3457 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3458 || (bestn->simdclone->args[i].arg_type
3459 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3461 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3462 + 1);
3463 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3464 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3465 ? size_type_node : TREE_TYPE (arginfo[i].op);
3466 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3467 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3468 tree sll = arginfo[i].simd_lane_linear
3469 ? boolean_true_node : boolean_false_node;
3470 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3472 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3473 if (dump_enabled_p ())
3474 dump_printf_loc (MSG_NOTE, vect_location,
3475 "=== vectorizable_simd_clone_call ===\n");
3476 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3477 return true;
3480 /* Transform. */
3482 if (dump_enabled_p ())
3483 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3485 /* Handle def. */
3486 scalar_dest = gimple_call_lhs (stmt);
3487 vec_dest = NULL_TREE;
3488 rtype = NULL_TREE;
3489 ratype = NULL_TREE;
3490 if (scalar_dest)
3492 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3493 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3494 if (TREE_CODE (rtype) == ARRAY_TYPE)
3496 ratype = rtype;
3497 rtype = TREE_TYPE (ratype);
3501 prev_stmt_info = NULL;
3502 for (j = 0; j < ncopies; ++j)
3504 /* Build argument list for the vectorized call. */
3505 if (j == 0)
3506 vargs.create (nargs);
3507 else
3508 vargs.truncate (0);
3510 for (i = 0; i < nargs; i++)
3512 unsigned int k, l, m, o;
3513 tree atype;
3514 op = gimple_call_arg (stmt, i);
3515 switch (bestn->simdclone->args[i].arg_type)
3517 case SIMD_CLONE_ARG_TYPE_VECTOR:
3518 atype = bestn->simdclone->args[i].vector_type;
3519 o = nunits / simd_clone_subparts (atype);
3520 for (m = j * o; m < (j + 1) * o; m++)
3522 if (simd_clone_subparts (atype)
3523 < simd_clone_subparts (arginfo[i].vectype))
3525 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3526 k = (simd_clone_subparts (arginfo[i].vectype)
3527 / simd_clone_subparts (atype));
3528 gcc_assert ((k & (k - 1)) == 0);
3529 if (m == 0)
3530 vec_oprnd0
3531 = vect_get_vec_def_for_operand (op, stmt);
3532 else
3534 vec_oprnd0 = arginfo[i].op;
3535 if ((m & (k - 1)) == 0)
3536 vec_oprnd0
3537 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3538 vec_oprnd0);
3540 arginfo[i].op = vec_oprnd0;
3541 vec_oprnd0
3542 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3543 bitsize_int (prec),
3544 bitsize_int ((m & (k - 1)) * prec));
3545 new_stmt
3546 = gimple_build_assign (make_ssa_name (atype),
3547 vec_oprnd0);
3548 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3549 vargs.safe_push (gimple_assign_lhs (new_stmt));
3551 else
3553 k = (simd_clone_subparts (atype)
3554 / simd_clone_subparts (arginfo[i].vectype));
3555 gcc_assert ((k & (k - 1)) == 0);
3556 vec<constructor_elt, va_gc> *ctor_elts;
3557 if (k != 1)
3558 vec_alloc (ctor_elts, k);
3559 else
3560 ctor_elts = NULL;
3561 for (l = 0; l < k; l++)
3563 if (m == 0 && l == 0)
3564 vec_oprnd0
3565 = vect_get_vec_def_for_operand (op, stmt);
3566 else
3567 vec_oprnd0
3568 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3569 arginfo[i].op);
3570 arginfo[i].op = vec_oprnd0;
3571 if (k == 1)
3572 break;
3573 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3574 vec_oprnd0);
3576 if (k == 1)
3577 vargs.safe_push (vec_oprnd0);
3578 else
3580 vec_oprnd0 = build_constructor (atype, ctor_elts);
3581 new_stmt
3582 = gimple_build_assign (make_ssa_name (atype),
3583 vec_oprnd0);
3584 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3585 vargs.safe_push (gimple_assign_lhs (new_stmt));
3589 break;
3590 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3591 vargs.safe_push (op);
3592 break;
3593 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3594 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3595 if (j == 0)
3597 gimple_seq stmts;
3598 arginfo[i].op
3599 = force_gimple_operand (arginfo[i].op, &stmts, true,
3600 NULL_TREE);
3601 if (stmts != NULL)
3603 basic_block new_bb;
3604 edge pe = loop_preheader_edge (loop);
3605 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3606 gcc_assert (!new_bb);
3608 if (arginfo[i].simd_lane_linear)
3610 vargs.safe_push (arginfo[i].op);
3611 break;
3613 tree phi_res = copy_ssa_name (op);
3614 gphi *new_phi = create_phi_node (phi_res, loop->header);
3615 set_vinfo_for_stmt (new_phi,
3616 new_stmt_vec_info (new_phi, loop_vinfo));
3617 add_phi_arg (new_phi, arginfo[i].op,
3618 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3619 enum tree_code code
3620 = POINTER_TYPE_P (TREE_TYPE (op))
3621 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3622 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3623 ? sizetype : TREE_TYPE (op);
3624 widest_int cst
3625 = wi::mul (bestn->simdclone->args[i].linear_step,
3626 ncopies * nunits);
3627 tree tcst = wide_int_to_tree (type, cst);
3628 tree phi_arg = copy_ssa_name (op);
3629 new_stmt
3630 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3631 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3632 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3633 set_vinfo_for_stmt (new_stmt,
3634 new_stmt_vec_info (new_stmt, loop_vinfo));
3635 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3636 UNKNOWN_LOCATION);
3637 arginfo[i].op = phi_res;
3638 vargs.safe_push (phi_res);
3640 else
3642 enum tree_code code
3643 = POINTER_TYPE_P (TREE_TYPE (op))
3644 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3645 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3646 ? sizetype : TREE_TYPE (op);
3647 widest_int cst
3648 = wi::mul (bestn->simdclone->args[i].linear_step,
3649 j * nunits);
3650 tree tcst = wide_int_to_tree (type, cst);
3651 new_temp = make_ssa_name (TREE_TYPE (op));
3652 new_stmt = gimple_build_assign (new_temp, code,
3653 arginfo[i].op, tcst);
3654 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3655 vargs.safe_push (new_temp);
3657 break;
3658 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3659 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3660 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3661 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3662 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3663 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3664 default:
3665 gcc_unreachable ();
3669 new_stmt = gimple_build_call_vec (fndecl, vargs);
3670 if (vec_dest)
3672 gcc_assert (ratype || simd_clone_subparts (rtype) == nunits);
3673 if (ratype)
3674 new_temp = create_tmp_var (ratype);
3675 else if (simd_clone_subparts (vectype)
3676 == simd_clone_subparts (rtype))
3677 new_temp = make_ssa_name (vec_dest, new_stmt);
3678 else
3679 new_temp = make_ssa_name (rtype, new_stmt);
3680 gimple_call_set_lhs (new_stmt, new_temp);
3682 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3684 if (vec_dest)
3686 if (simd_clone_subparts (vectype) < nunits)
3688 unsigned int k, l;
3689 poly_uint64 prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3690 poly_uint64 bytes = GET_MODE_SIZE (TYPE_MODE (vectype));
3691 k = nunits / simd_clone_subparts (vectype);
3692 gcc_assert ((k & (k - 1)) == 0);
3693 for (l = 0; l < k; l++)
3695 tree t;
3696 if (ratype)
3698 t = build_fold_addr_expr (new_temp);
3699 t = build2 (MEM_REF, vectype, t,
3700 build_int_cst (TREE_TYPE (t), l * bytes));
3702 else
3703 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3704 bitsize_int (prec), bitsize_int (l * prec));
3705 new_stmt
3706 = gimple_build_assign (make_ssa_name (vectype), t);
3707 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3708 if (j == 0 && l == 0)
3709 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3710 else
3711 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3713 prev_stmt_info = vinfo_for_stmt (new_stmt);
3716 if (ratype)
3718 tree clobber = build_constructor (ratype, NULL);
3719 TREE_THIS_VOLATILE (clobber) = 1;
3720 new_stmt = gimple_build_assign (new_temp, clobber);
3721 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3723 continue;
3725 else if (simd_clone_subparts (vectype) > nunits)
3727 unsigned int k = (simd_clone_subparts (vectype)
3728 / simd_clone_subparts (rtype));
3729 gcc_assert ((k & (k - 1)) == 0);
3730 if ((j & (k - 1)) == 0)
3731 vec_alloc (ret_ctor_elts, k);
3732 if (ratype)
3734 unsigned int m, o = nunits / simd_clone_subparts (rtype);
3735 for (m = 0; m < o; m++)
3737 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3738 size_int (m), NULL_TREE, NULL_TREE);
3739 new_stmt
3740 = gimple_build_assign (make_ssa_name (rtype), tem);
3741 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3742 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3743 gimple_assign_lhs (new_stmt));
3745 tree clobber = build_constructor (ratype, NULL);
3746 TREE_THIS_VOLATILE (clobber) = 1;
3747 new_stmt = gimple_build_assign (new_temp, clobber);
3748 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3750 else
3751 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3752 if ((j & (k - 1)) != k - 1)
3753 continue;
3754 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3755 new_stmt
3756 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3757 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3759 if ((unsigned) j == k - 1)
3760 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3761 else
3762 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3764 prev_stmt_info = vinfo_for_stmt (new_stmt);
3765 continue;
3767 else if (ratype)
3769 tree t = build_fold_addr_expr (new_temp);
3770 t = build2 (MEM_REF, vectype, t,
3771 build_int_cst (TREE_TYPE (t), 0));
3772 new_stmt
3773 = gimple_build_assign (make_ssa_name (vec_dest), t);
3774 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3775 tree clobber = build_constructor (ratype, NULL);
3776 TREE_THIS_VOLATILE (clobber) = 1;
3777 vect_finish_stmt_generation (stmt,
3778 gimple_build_assign (new_temp,
3779 clobber), gsi);
3783 if (j == 0)
3784 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3785 else
3786 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3788 prev_stmt_info = vinfo_for_stmt (new_stmt);
3791 vargs.release ();
3793 /* The call in STMT might prevent it from being removed in dce.
3794 We however cannot remove it here, due to the way the ssa name
3795 it defines is mapped to the new definition. So just replace
3796 rhs of the statement with something harmless. */
3798 if (slp_node)
3799 return true;
3801 if (scalar_dest)
3803 type = TREE_TYPE (scalar_dest);
3804 if (is_pattern_stmt_p (stmt_info))
3805 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3806 else
3807 lhs = gimple_call_lhs (stmt);
3808 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3810 else
3811 new_stmt = gimple_build_nop ();
3812 set_vinfo_for_stmt (new_stmt, stmt_info);
3813 set_vinfo_for_stmt (stmt, NULL);
3814 STMT_VINFO_STMT (stmt_info) = new_stmt;
3815 gsi_replace (gsi, new_stmt, true);
3816 unlink_stmt_vdef (stmt);
3818 return true;
3822 /* Function vect_gen_widened_results_half
3824 Create a vector stmt whose code, type, number of arguments, and result
3825 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3826 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3827 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3828 needs to be created (DECL is a function-decl of a target-builtin).
3829 STMT is the original scalar stmt that we are vectorizing. */
3831 static gimple *
3832 vect_gen_widened_results_half (enum tree_code code,
3833 tree decl,
3834 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3835 tree vec_dest, gimple_stmt_iterator *gsi,
3836 gimple *stmt)
3838 gimple *new_stmt;
3839 tree new_temp;
3841 /* Generate half of the widened result: */
3842 if (code == CALL_EXPR)
3844 /* Target specific support */
3845 if (op_type == binary_op)
3846 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3847 else
3848 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3849 new_temp = make_ssa_name (vec_dest, new_stmt);
3850 gimple_call_set_lhs (new_stmt, new_temp);
3852 else
3854 /* Generic support */
3855 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3856 if (op_type != binary_op)
3857 vec_oprnd1 = NULL;
3858 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3859 new_temp = make_ssa_name (vec_dest, new_stmt);
3860 gimple_assign_set_lhs (new_stmt, new_temp);
3862 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3864 return new_stmt;
3868 /* Get vectorized definitions for loop-based vectorization. For the first
3869 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3870 scalar operand), and for the rest we get a copy with
3871 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3872 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3873 The vectors are collected into VEC_OPRNDS. */
3875 static void
3876 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3877 vec<tree> *vec_oprnds, int multi_step_cvt)
3879 tree vec_oprnd;
3881 /* Get first vector operand. */
3882 /* All the vector operands except the very first one (that is scalar oprnd)
3883 are stmt copies. */
3884 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3885 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3886 else
3887 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3889 vec_oprnds->quick_push (vec_oprnd);
3891 /* Get second vector operand. */
3892 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3893 vec_oprnds->quick_push (vec_oprnd);
3895 *oprnd = vec_oprnd;
3897 /* For conversion in multiple steps, continue to get operands
3898 recursively. */
3899 if (multi_step_cvt)
3900 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3904 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3905 For multi-step conversions store the resulting vectors and call the function
3906 recursively. */
3908 static void
3909 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3910 int multi_step_cvt, gimple *stmt,
3911 vec<tree> vec_dsts,
3912 gimple_stmt_iterator *gsi,
3913 slp_tree slp_node, enum tree_code code,
3914 stmt_vec_info *prev_stmt_info)
3916 unsigned int i;
3917 tree vop0, vop1, new_tmp, vec_dest;
3918 gimple *new_stmt;
3919 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3921 vec_dest = vec_dsts.pop ();
3923 for (i = 0; i < vec_oprnds->length (); i += 2)
3925 /* Create demotion operation. */
3926 vop0 = (*vec_oprnds)[i];
3927 vop1 = (*vec_oprnds)[i + 1];
3928 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3929 new_tmp = make_ssa_name (vec_dest, new_stmt);
3930 gimple_assign_set_lhs (new_stmt, new_tmp);
3931 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3933 if (multi_step_cvt)
3934 /* Store the resulting vector for next recursive call. */
3935 (*vec_oprnds)[i/2] = new_tmp;
3936 else
3938 /* This is the last step of the conversion sequence. Store the
3939 vectors in SLP_NODE or in vector info of the scalar statement
3940 (or in STMT_VINFO_RELATED_STMT chain). */
3941 if (slp_node)
3942 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3943 else
3945 if (!*prev_stmt_info)
3946 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3947 else
3948 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3950 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3955 /* For multi-step demotion operations we first generate demotion operations
3956 from the source type to the intermediate types, and then combine the
3957 results (stored in VEC_OPRNDS) in demotion operation to the destination
3958 type. */
3959 if (multi_step_cvt)
3961 /* At each level of recursion we have half of the operands we had at the
3962 previous level. */
3963 vec_oprnds->truncate ((i+1)/2);
3964 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3965 stmt, vec_dsts, gsi, slp_node,
3966 VEC_PACK_TRUNC_EXPR,
3967 prev_stmt_info);
3970 vec_dsts.quick_push (vec_dest);
3974 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3975 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3976 the resulting vectors and call the function recursively. */
3978 static void
3979 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3980 vec<tree> *vec_oprnds1,
3981 gimple *stmt, tree vec_dest,
3982 gimple_stmt_iterator *gsi,
3983 enum tree_code code1,
3984 enum tree_code code2, tree decl1,
3985 tree decl2, int op_type)
3987 int i;
3988 tree vop0, vop1, new_tmp1, new_tmp2;
3989 gimple *new_stmt1, *new_stmt2;
3990 vec<tree> vec_tmp = vNULL;
3992 vec_tmp.create (vec_oprnds0->length () * 2);
3993 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3995 if (op_type == binary_op)
3996 vop1 = (*vec_oprnds1)[i];
3997 else
3998 vop1 = NULL_TREE;
4000 /* Generate the two halves of promotion operation. */
4001 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
4002 op_type, vec_dest, gsi, stmt);
4003 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
4004 op_type, vec_dest, gsi, stmt);
4005 if (is_gimple_call (new_stmt1))
4007 new_tmp1 = gimple_call_lhs (new_stmt1);
4008 new_tmp2 = gimple_call_lhs (new_stmt2);
4010 else
4012 new_tmp1 = gimple_assign_lhs (new_stmt1);
4013 new_tmp2 = gimple_assign_lhs (new_stmt2);
4016 /* Store the results for the next step. */
4017 vec_tmp.quick_push (new_tmp1);
4018 vec_tmp.quick_push (new_tmp2);
4021 vec_oprnds0->release ();
4022 *vec_oprnds0 = vec_tmp;
4026 /* Check if STMT performs a conversion operation, that can be vectorized.
4027 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4028 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4029 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4031 static bool
4032 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4033 gimple **vec_stmt, slp_tree slp_node)
4035 tree vec_dest;
4036 tree scalar_dest;
4037 tree op0, op1 = NULL_TREE;
4038 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4039 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4040 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4041 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4042 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4043 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4044 tree new_temp;
4045 gimple *def_stmt;
4046 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4047 int ndts = 2;
4048 gimple *new_stmt = NULL;
4049 stmt_vec_info prev_stmt_info;
4050 poly_uint64 nunits_in;
4051 poly_uint64 nunits_out;
4052 tree vectype_out, vectype_in;
4053 int ncopies, i, j;
4054 tree lhs_type, rhs_type;
4055 enum { NARROW, NONE, WIDEN } modifier;
4056 vec<tree> vec_oprnds0 = vNULL;
4057 vec<tree> vec_oprnds1 = vNULL;
4058 tree vop0;
4059 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4060 vec_info *vinfo = stmt_info->vinfo;
4061 int multi_step_cvt = 0;
4062 vec<tree> interm_types = vNULL;
4063 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4064 int op_type;
4065 unsigned short fltsz;
4067 /* Is STMT a vectorizable conversion? */
4069 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4070 return false;
4072 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4073 && ! vec_stmt)
4074 return false;
4076 if (!is_gimple_assign (stmt))
4077 return false;
4079 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4080 return false;
4082 code = gimple_assign_rhs_code (stmt);
4083 if (!CONVERT_EXPR_CODE_P (code)
4084 && code != FIX_TRUNC_EXPR
4085 && code != FLOAT_EXPR
4086 && code != WIDEN_MULT_EXPR
4087 && code != WIDEN_LSHIFT_EXPR)
4088 return false;
4090 op_type = TREE_CODE_LENGTH (code);
4092 /* Check types of lhs and rhs. */
4093 scalar_dest = gimple_assign_lhs (stmt);
4094 lhs_type = TREE_TYPE (scalar_dest);
4095 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4097 op0 = gimple_assign_rhs1 (stmt);
4098 rhs_type = TREE_TYPE (op0);
4100 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4101 && !((INTEGRAL_TYPE_P (lhs_type)
4102 && INTEGRAL_TYPE_P (rhs_type))
4103 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4104 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4105 return false;
4107 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4108 && ((INTEGRAL_TYPE_P (lhs_type)
4109 && !type_has_mode_precision_p (lhs_type))
4110 || (INTEGRAL_TYPE_P (rhs_type)
4111 && !type_has_mode_precision_p (rhs_type))))
4113 if (dump_enabled_p ())
4114 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4115 "type conversion to/from bit-precision unsupported."
4116 "\n");
4117 return false;
4120 /* Check the operands of the operation. */
4121 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4123 if (dump_enabled_p ())
4124 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4125 "use not simple.\n");
4126 return false;
4128 if (op_type == binary_op)
4130 bool ok;
4132 op1 = gimple_assign_rhs2 (stmt);
4133 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4134 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4135 OP1. */
4136 if (CONSTANT_CLASS_P (op0))
4137 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4138 else
4139 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4141 if (!ok)
4143 if (dump_enabled_p ())
4144 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4145 "use not simple.\n");
4146 return false;
4150 /* If op0 is an external or constant defs use a vector type of
4151 the same size as the output vector type. */
4152 if (!vectype_in)
4153 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4154 if (vec_stmt)
4155 gcc_assert (vectype_in);
4156 if (!vectype_in)
4158 if (dump_enabled_p ())
4160 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4161 "no vectype for scalar type ");
4162 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4163 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4166 return false;
4169 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4170 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4172 if (dump_enabled_p ())
4174 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4175 "can't convert between boolean and non "
4176 "boolean vectors");
4177 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4178 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4181 return false;
4184 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4185 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4186 if (known_eq (nunits_out, nunits_in))
4187 modifier = NONE;
4188 else if (multiple_p (nunits_out, nunits_in))
4189 modifier = NARROW;
4190 else
4192 gcc_checking_assert (multiple_p (nunits_in, nunits_out));
4193 modifier = WIDEN;
4196 /* Multiple types in SLP are handled by creating the appropriate number of
4197 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4198 case of SLP. */
4199 if (slp_node)
4200 ncopies = 1;
4201 else if (modifier == NARROW)
4202 ncopies = vect_get_num_copies (loop_vinfo, vectype_out);
4203 else
4204 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
4206 /* Sanity check: make sure that at least one copy of the vectorized stmt
4207 needs to be generated. */
4208 gcc_assert (ncopies >= 1);
4210 bool found_mode = false;
4211 scalar_mode lhs_mode = SCALAR_TYPE_MODE (lhs_type);
4212 scalar_mode rhs_mode = SCALAR_TYPE_MODE (rhs_type);
4213 opt_scalar_mode rhs_mode_iter;
4215 /* Supportable by target? */
4216 switch (modifier)
4218 case NONE:
4219 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4220 return false;
4221 if (supportable_convert_operation (code, vectype_out, vectype_in,
4222 &decl1, &code1))
4223 break;
4224 /* FALLTHRU */
4225 unsupported:
4226 if (dump_enabled_p ())
4227 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4228 "conversion not supported by target.\n");
4229 return false;
4231 case WIDEN:
4232 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4233 &code1, &code2, &multi_step_cvt,
4234 &interm_types))
4236 /* Binary widening operation can only be supported directly by the
4237 architecture. */
4238 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4239 break;
4242 if (code != FLOAT_EXPR
4243 || GET_MODE_SIZE (lhs_mode) <= GET_MODE_SIZE (rhs_mode))
4244 goto unsupported;
4246 fltsz = GET_MODE_SIZE (lhs_mode);
4247 FOR_EACH_2XWIDER_MODE (rhs_mode_iter, rhs_mode)
4249 rhs_mode = rhs_mode_iter.require ();
4250 if (GET_MODE_SIZE (rhs_mode) > fltsz)
4251 break;
4253 cvt_type
4254 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4255 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4256 if (cvt_type == NULL_TREE)
4257 goto unsupported;
4259 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4261 if (!supportable_convert_operation (code, vectype_out,
4262 cvt_type, &decl1, &codecvt1))
4263 goto unsupported;
4265 else if (!supportable_widening_operation (code, stmt, vectype_out,
4266 cvt_type, &codecvt1,
4267 &codecvt2, &multi_step_cvt,
4268 &interm_types))
4269 continue;
4270 else
4271 gcc_assert (multi_step_cvt == 0);
4273 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4274 vectype_in, &code1, &code2,
4275 &multi_step_cvt, &interm_types))
4277 found_mode = true;
4278 break;
4282 if (!found_mode)
4283 goto unsupported;
4285 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4286 codecvt2 = ERROR_MARK;
4287 else
4289 multi_step_cvt++;
4290 interm_types.safe_push (cvt_type);
4291 cvt_type = NULL_TREE;
4293 break;
4295 case NARROW:
4296 gcc_assert (op_type == unary_op);
4297 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4298 &code1, &multi_step_cvt,
4299 &interm_types))
4300 break;
4302 if (code != FIX_TRUNC_EXPR
4303 || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode))
4304 goto unsupported;
4306 cvt_type
4307 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4308 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4309 if (cvt_type == NULL_TREE)
4310 goto unsupported;
4311 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4312 &decl1, &codecvt1))
4313 goto unsupported;
4314 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4315 &code1, &multi_step_cvt,
4316 &interm_types))
4317 break;
4318 goto unsupported;
4320 default:
4321 gcc_unreachable ();
4324 if (!vec_stmt) /* transformation not required. */
4326 if (dump_enabled_p ())
4327 dump_printf_loc (MSG_NOTE, vect_location,
4328 "=== vectorizable_conversion ===\n");
4329 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4331 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4332 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4334 else if (modifier == NARROW)
4336 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4337 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4339 else
4341 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4342 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4344 interm_types.release ();
4345 return true;
4348 /* Transform. */
4349 if (dump_enabled_p ())
4350 dump_printf_loc (MSG_NOTE, vect_location,
4351 "transform conversion. ncopies = %d.\n", ncopies);
4353 if (op_type == binary_op)
4355 if (CONSTANT_CLASS_P (op0))
4356 op0 = fold_convert (TREE_TYPE (op1), op0);
4357 else if (CONSTANT_CLASS_P (op1))
4358 op1 = fold_convert (TREE_TYPE (op0), op1);
4361 /* In case of multi-step conversion, we first generate conversion operations
4362 to the intermediate types, and then from that types to the final one.
4363 We create vector destinations for the intermediate type (TYPES) received
4364 from supportable_*_operation, and store them in the correct order
4365 for future use in vect_create_vectorized_*_stmts (). */
4366 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4367 vec_dest = vect_create_destination_var (scalar_dest,
4368 (cvt_type && modifier == WIDEN)
4369 ? cvt_type : vectype_out);
4370 vec_dsts.quick_push (vec_dest);
4372 if (multi_step_cvt)
4374 for (i = interm_types.length () - 1;
4375 interm_types.iterate (i, &intermediate_type); i--)
4377 vec_dest = vect_create_destination_var (scalar_dest,
4378 intermediate_type);
4379 vec_dsts.quick_push (vec_dest);
4383 if (cvt_type)
4384 vec_dest = vect_create_destination_var (scalar_dest,
4385 modifier == WIDEN
4386 ? vectype_out : cvt_type);
4388 if (!slp_node)
4390 if (modifier == WIDEN)
4392 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4393 if (op_type == binary_op)
4394 vec_oprnds1.create (1);
4396 else if (modifier == NARROW)
4397 vec_oprnds0.create (
4398 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4400 else if (code == WIDEN_LSHIFT_EXPR)
4401 vec_oprnds1.create (slp_node->vec_stmts_size);
4403 last_oprnd = op0;
4404 prev_stmt_info = NULL;
4405 switch (modifier)
4407 case NONE:
4408 for (j = 0; j < ncopies; j++)
4410 if (j == 0)
4411 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4412 else
4413 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4415 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4417 /* Arguments are ready, create the new vector stmt. */
4418 if (code1 == CALL_EXPR)
4420 new_stmt = gimple_build_call (decl1, 1, vop0);
4421 new_temp = make_ssa_name (vec_dest, new_stmt);
4422 gimple_call_set_lhs (new_stmt, new_temp);
4424 else
4426 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4427 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4428 new_temp = make_ssa_name (vec_dest, new_stmt);
4429 gimple_assign_set_lhs (new_stmt, new_temp);
4432 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4433 if (slp_node)
4434 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4435 else
4437 if (!prev_stmt_info)
4438 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4439 else
4440 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4441 prev_stmt_info = vinfo_for_stmt (new_stmt);
4445 break;
4447 case WIDEN:
4448 /* In case the vectorization factor (VF) is bigger than the number
4449 of elements that we can fit in a vectype (nunits), we have to
4450 generate more than one vector stmt - i.e - we need to "unroll"
4451 the vector stmt by a factor VF/nunits. */
4452 for (j = 0; j < ncopies; j++)
4454 /* Handle uses. */
4455 if (j == 0)
4457 if (slp_node)
4459 if (code == WIDEN_LSHIFT_EXPR)
4461 unsigned int k;
4463 vec_oprnd1 = op1;
4464 /* Store vec_oprnd1 for every vector stmt to be created
4465 for SLP_NODE. We check during the analysis that all
4466 the shift arguments are the same. */
4467 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4468 vec_oprnds1.quick_push (vec_oprnd1);
4470 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4471 slp_node);
4473 else
4474 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4475 &vec_oprnds1, slp_node);
4477 else
4479 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4480 vec_oprnds0.quick_push (vec_oprnd0);
4481 if (op_type == binary_op)
4483 if (code == WIDEN_LSHIFT_EXPR)
4484 vec_oprnd1 = op1;
4485 else
4486 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4487 vec_oprnds1.quick_push (vec_oprnd1);
4491 else
4493 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4494 vec_oprnds0.truncate (0);
4495 vec_oprnds0.quick_push (vec_oprnd0);
4496 if (op_type == binary_op)
4498 if (code == WIDEN_LSHIFT_EXPR)
4499 vec_oprnd1 = op1;
4500 else
4501 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4502 vec_oprnd1);
4503 vec_oprnds1.truncate (0);
4504 vec_oprnds1.quick_push (vec_oprnd1);
4508 /* Arguments are ready. Create the new vector stmts. */
4509 for (i = multi_step_cvt; i >= 0; i--)
4511 tree this_dest = vec_dsts[i];
4512 enum tree_code c1 = code1, c2 = code2;
4513 if (i == 0 && codecvt2 != ERROR_MARK)
4515 c1 = codecvt1;
4516 c2 = codecvt2;
4518 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4519 &vec_oprnds1,
4520 stmt, this_dest, gsi,
4521 c1, c2, decl1, decl2,
4522 op_type);
4525 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4527 if (cvt_type)
4529 if (codecvt1 == CALL_EXPR)
4531 new_stmt = gimple_build_call (decl1, 1, vop0);
4532 new_temp = make_ssa_name (vec_dest, new_stmt);
4533 gimple_call_set_lhs (new_stmt, new_temp);
4535 else
4537 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4538 new_temp = make_ssa_name (vec_dest);
4539 new_stmt = gimple_build_assign (new_temp, codecvt1,
4540 vop0);
4543 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4545 else
4546 new_stmt = SSA_NAME_DEF_STMT (vop0);
4548 if (slp_node)
4549 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4550 else
4552 if (!prev_stmt_info)
4553 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4554 else
4555 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4556 prev_stmt_info = vinfo_for_stmt (new_stmt);
4561 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4562 break;
4564 case NARROW:
4565 /* In case the vectorization factor (VF) is bigger than the number
4566 of elements that we can fit in a vectype (nunits), we have to
4567 generate more than one vector stmt - i.e - we need to "unroll"
4568 the vector stmt by a factor VF/nunits. */
4569 for (j = 0; j < ncopies; j++)
4571 /* Handle uses. */
4572 if (slp_node)
4573 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4574 slp_node);
4575 else
4577 vec_oprnds0.truncate (0);
4578 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4579 vect_pow2 (multi_step_cvt) - 1);
4582 /* Arguments are ready. Create the new vector stmts. */
4583 if (cvt_type)
4584 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4586 if (codecvt1 == CALL_EXPR)
4588 new_stmt = gimple_build_call (decl1, 1, vop0);
4589 new_temp = make_ssa_name (vec_dest, new_stmt);
4590 gimple_call_set_lhs (new_stmt, new_temp);
4592 else
4594 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4595 new_temp = make_ssa_name (vec_dest);
4596 new_stmt = gimple_build_assign (new_temp, codecvt1,
4597 vop0);
4600 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4601 vec_oprnds0[i] = new_temp;
4604 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4605 stmt, vec_dsts, gsi,
4606 slp_node, code1,
4607 &prev_stmt_info);
4610 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4611 break;
4614 vec_oprnds0.release ();
4615 vec_oprnds1.release ();
4616 interm_types.release ();
4618 return true;
4622 /* Function vectorizable_assignment.
4624 Check if STMT performs an assignment (copy) that can be vectorized.
4625 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4626 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4627 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4629 static bool
4630 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4631 gimple **vec_stmt, slp_tree slp_node)
4633 tree vec_dest;
4634 tree scalar_dest;
4635 tree op;
4636 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4637 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4638 tree new_temp;
4639 gimple *def_stmt;
4640 enum vect_def_type dt[1] = {vect_unknown_def_type};
4641 int ndts = 1;
4642 int ncopies;
4643 int i, j;
4644 vec<tree> vec_oprnds = vNULL;
4645 tree vop;
4646 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4647 vec_info *vinfo = stmt_info->vinfo;
4648 gimple *new_stmt = NULL;
4649 stmt_vec_info prev_stmt_info = NULL;
4650 enum tree_code code;
4651 tree vectype_in;
4653 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4654 return false;
4656 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4657 && ! vec_stmt)
4658 return false;
4660 /* Is vectorizable assignment? */
4661 if (!is_gimple_assign (stmt))
4662 return false;
4664 scalar_dest = gimple_assign_lhs (stmt);
4665 if (TREE_CODE (scalar_dest) != SSA_NAME)
4666 return false;
4668 code = gimple_assign_rhs_code (stmt);
4669 if (gimple_assign_single_p (stmt)
4670 || code == PAREN_EXPR
4671 || CONVERT_EXPR_CODE_P (code))
4672 op = gimple_assign_rhs1 (stmt);
4673 else
4674 return false;
4676 if (code == VIEW_CONVERT_EXPR)
4677 op = TREE_OPERAND (op, 0);
4679 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4680 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4682 /* Multiple types in SLP are handled by creating the appropriate number of
4683 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4684 case of SLP. */
4685 if (slp_node)
4686 ncopies = 1;
4687 else
4688 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4690 gcc_assert (ncopies >= 1);
4692 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4694 if (dump_enabled_p ())
4695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4696 "use not simple.\n");
4697 return false;
4700 /* We can handle NOP_EXPR conversions that do not change the number
4701 of elements or the vector size. */
4702 if ((CONVERT_EXPR_CODE_P (code)
4703 || code == VIEW_CONVERT_EXPR)
4704 && (!vectype_in
4705 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in), nunits)
4706 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
4707 GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4708 return false;
4710 /* We do not handle bit-precision changes. */
4711 if ((CONVERT_EXPR_CODE_P (code)
4712 || code == VIEW_CONVERT_EXPR)
4713 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4714 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest))
4715 || !type_has_mode_precision_p (TREE_TYPE (op)))
4716 /* But a conversion that does not change the bit-pattern is ok. */
4717 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4718 > TYPE_PRECISION (TREE_TYPE (op)))
4719 && TYPE_UNSIGNED (TREE_TYPE (op)))
4720 /* Conversion between boolean types of different sizes is
4721 a simple assignment in case their vectypes are same
4722 boolean vectors. */
4723 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4724 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4726 if (dump_enabled_p ())
4727 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4728 "type conversion to/from bit-precision "
4729 "unsupported.\n");
4730 return false;
4733 if (!vec_stmt) /* transformation not required. */
4735 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4736 if (dump_enabled_p ())
4737 dump_printf_loc (MSG_NOTE, vect_location,
4738 "=== vectorizable_assignment ===\n");
4739 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4740 return true;
4743 /* Transform. */
4744 if (dump_enabled_p ())
4745 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4747 /* Handle def. */
4748 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4750 /* Handle use. */
4751 for (j = 0; j < ncopies; j++)
4753 /* Handle uses. */
4754 if (j == 0)
4755 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
4756 else
4757 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4759 /* Arguments are ready. create the new vector stmt. */
4760 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4762 if (CONVERT_EXPR_CODE_P (code)
4763 || code == VIEW_CONVERT_EXPR)
4764 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4765 new_stmt = gimple_build_assign (vec_dest, vop);
4766 new_temp = make_ssa_name (vec_dest, new_stmt);
4767 gimple_assign_set_lhs (new_stmt, new_temp);
4768 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4769 if (slp_node)
4770 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4773 if (slp_node)
4774 continue;
4776 if (j == 0)
4777 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4778 else
4779 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4781 prev_stmt_info = vinfo_for_stmt (new_stmt);
4784 vec_oprnds.release ();
4785 return true;
4789 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4790 either as shift by a scalar or by a vector. */
4792 bool
4793 vect_supportable_shift (enum tree_code code, tree scalar_type)
4796 machine_mode vec_mode;
4797 optab optab;
4798 int icode;
4799 tree vectype;
4801 vectype = get_vectype_for_scalar_type (scalar_type);
4802 if (!vectype)
4803 return false;
4805 optab = optab_for_tree_code (code, vectype, optab_scalar);
4806 if (!optab
4807 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4809 optab = optab_for_tree_code (code, vectype, optab_vector);
4810 if (!optab
4811 || (optab_handler (optab, TYPE_MODE (vectype))
4812 == CODE_FOR_nothing))
4813 return false;
4816 vec_mode = TYPE_MODE (vectype);
4817 icode = (int) optab_handler (optab, vec_mode);
4818 if (icode == CODE_FOR_nothing)
4819 return false;
4821 return true;
4825 /* Function vectorizable_shift.
4827 Check if STMT performs a shift operation that can be vectorized.
4828 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4829 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4830 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4832 static bool
4833 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4834 gimple **vec_stmt, slp_tree slp_node)
4836 tree vec_dest;
4837 tree scalar_dest;
4838 tree op0, op1 = NULL;
4839 tree vec_oprnd1 = NULL_TREE;
4840 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4841 tree vectype;
4842 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4843 enum tree_code code;
4844 machine_mode vec_mode;
4845 tree new_temp;
4846 optab optab;
4847 int icode;
4848 machine_mode optab_op2_mode;
4849 gimple *def_stmt;
4850 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4851 int ndts = 2;
4852 gimple *new_stmt = NULL;
4853 stmt_vec_info prev_stmt_info;
4854 poly_uint64 nunits_in;
4855 poly_uint64 nunits_out;
4856 tree vectype_out;
4857 tree op1_vectype;
4858 int ncopies;
4859 int j, i;
4860 vec<tree> vec_oprnds0 = vNULL;
4861 vec<tree> vec_oprnds1 = vNULL;
4862 tree vop0, vop1;
4863 unsigned int k;
4864 bool scalar_shift_arg = true;
4865 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4866 vec_info *vinfo = stmt_info->vinfo;
4868 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4869 return false;
4871 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4872 && ! vec_stmt)
4873 return false;
4875 /* Is STMT a vectorizable binary/unary operation? */
4876 if (!is_gimple_assign (stmt))
4877 return false;
4879 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4880 return false;
4882 code = gimple_assign_rhs_code (stmt);
4884 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4885 || code == RROTATE_EXPR))
4886 return false;
4888 scalar_dest = gimple_assign_lhs (stmt);
4889 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4890 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest)))
4892 if (dump_enabled_p ())
4893 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4894 "bit-precision shifts not supported.\n");
4895 return false;
4898 op0 = gimple_assign_rhs1 (stmt);
4899 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4901 if (dump_enabled_p ())
4902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4903 "use not simple.\n");
4904 return false;
4906 /* If op0 is an external or constant def use a vector type with
4907 the same size as the output vector type. */
4908 if (!vectype)
4909 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4910 if (vec_stmt)
4911 gcc_assert (vectype);
4912 if (!vectype)
4914 if (dump_enabled_p ())
4915 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4916 "no vectype for scalar type\n");
4917 return false;
4920 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4921 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4922 if (maybe_ne (nunits_out, nunits_in))
4923 return false;
4925 op1 = gimple_assign_rhs2 (stmt);
4926 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4928 if (dump_enabled_p ())
4929 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4930 "use not simple.\n");
4931 return false;
4934 /* Multiple types in SLP are handled by creating the appropriate number of
4935 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4936 case of SLP. */
4937 if (slp_node)
4938 ncopies = 1;
4939 else
4940 ncopies = vect_get_num_copies (loop_vinfo, vectype);
4942 gcc_assert (ncopies >= 1);
4944 /* Determine whether the shift amount is a vector, or scalar. If the
4945 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4947 if ((dt[1] == vect_internal_def
4948 || dt[1] == vect_induction_def)
4949 && !slp_node)
4950 scalar_shift_arg = false;
4951 else if (dt[1] == vect_constant_def
4952 || dt[1] == vect_external_def
4953 || dt[1] == vect_internal_def)
4955 /* In SLP, need to check whether the shift count is the same,
4956 in loops if it is a constant or invariant, it is always
4957 a scalar shift. */
4958 if (slp_node)
4960 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4961 gimple *slpstmt;
4963 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4964 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4965 scalar_shift_arg = false;
4968 /* If the shift amount is computed by a pattern stmt we cannot
4969 use the scalar amount directly thus give up and use a vector
4970 shift. */
4971 if (dt[1] == vect_internal_def)
4973 gimple *def = SSA_NAME_DEF_STMT (op1);
4974 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4975 scalar_shift_arg = false;
4978 else
4980 if (dump_enabled_p ())
4981 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4982 "operand mode requires invariant argument.\n");
4983 return false;
4986 /* Vector shifted by vector. */
4987 if (!scalar_shift_arg)
4989 optab = optab_for_tree_code (code, vectype, optab_vector);
4990 if (dump_enabled_p ())
4991 dump_printf_loc (MSG_NOTE, vect_location,
4992 "vector/vector shift/rotate found.\n");
4994 if (!op1_vectype)
4995 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4996 if (op1_vectype == NULL_TREE
4997 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4999 if (dump_enabled_p ())
5000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5001 "unusable type for last operand in"
5002 " vector/vector shift/rotate.\n");
5003 return false;
5006 /* See if the machine has a vector shifted by scalar insn and if not
5007 then see if it has a vector shifted by vector insn. */
5008 else
5010 optab = optab_for_tree_code (code, vectype, optab_scalar);
5011 if (optab
5012 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5014 if (dump_enabled_p ())
5015 dump_printf_loc (MSG_NOTE, vect_location,
5016 "vector/scalar shift/rotate found.\n");
5018 else
5020 optab = optab_for_tree_code (code, vectype, optab_vector);
5021 if (optab
5022 && (optab_handler (optab, TYPE_MODE (vectype))
5023 != CODE_FOR_nothing))
5025 scalar_shift_arg = false;
5027 if (dump_enabled_p ())
5028 dump_printf_loc (MSG_NOTE, vect_location,
5029 "vector/vector shift/rotate found.\n");
5031 /* Unlike the other binary operators, shifts/rotates have
5032 the rhs being int, instead of the same type as the lhs,
5033 so make sure the scalar is the right type if we are
5034 dealing with vectors of long long/long/short/char. */
5035 if (dt[1] == vect_constant_def)
5036 op1 = fold_convert (TREE_TYPE (vectype), op1);
5037 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5038 TREE_TYPE (op1)))
5040 if (slp_node
5041 && TYPE_MODE (TREE_TYPE (vectype))
5042 != TYPE_MODE (TREE_TYPE (op1)))
5044 if (dump_enabled_p ())
5045 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5046 "unusable type for last operand in"
5047 " vector/vector shift/rotate.\n");
5048 return false;
5050 if (vec_stmt && !slp_node)
5052 op1 = fold_convert (TREE_TYPE (vectype), op1);
5053 op1 = vect_init_vector (stmt, op1,
5054 TREE_TYPE (vectype), NULL);
5061 /* Supportable by target? */
5062 if (!optab)
5064 if (dump_enabled_p ())
5065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5066 "no optab.\n");
5067 return false;
5069 vec_mode = TYPE_MODE (vectype);
5070 icode = (int) optab_handler (optab, vec_mode);
5071 if (icode == CODE_FOR_nothing)
5073 if (dump_enabled_p ())
5074 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5075 "op not supported by target.\n");
5076 /* Check only during analysis. */
5077 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5078 || (!vec_stmt
5079 && !vect_worthwhile_without_simd_p (vinfo, code)))
5080 return false;
5081 if (dump_enabled_p ())
5082 dump_printf_loc (MSG_NOTE, vect_location,
5083 "proceeding using word mode.\n");
5086 /* Worthwhile without SIMD support? Check only during analysis. */
5087 if (!vec_stmt
5088 && !VECTOR_MODE_P (TYPE_MODE (vectype))
5089 && !vect_worthwhile_without_simd_p (vinfo, code))
5091 if (dump_enabled_p ())
5092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5093 "not worthwhile without SIMD support.\n");
5094 return false;
5097 if (!vec_stmt) /* transformation not required. */
5099 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5100 if (dump_enabled_p ())
5101 dump_printf_loc (MSG_NOTE, vect_location,
5102 "=== vectorizable_shift ===\n");
5103 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5104 return true;
5107 /* Transform. */
5109 if (dump_enabled_p ())
5110 dump_printf_loc (MSG_NOTE, vect_location,
5111 "transform binary/unary operation.\n");
5113 /* Handle def. */
5114 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5116 prev_stmt_info = NULL;
5117 for (j = 0; j < ncopies; j++)
5119 /* Handle uses. */
5120 if (j == 0)
5122 if (scalar_shift_arg)
5124 /* Vector shl and shr insn patterns can be defined with scalar
5125 operand 2 (shift operand). In this case, use constant or loop
5126 invariant op1 directly, without extending it to vector mode
5127 first. */
5128 optab_op2_mode = insn_data[icode].operand[2].mode;
5129 if (!VECTOR_MODE_P (optab_op2_mode))
5131 if (dump_enabled_p ())
5132 dump_printf_loc (MSG_NOTE, vect_location,
5133 "operand 1 using scalar mode.\n");
5134 vec_oprnd1 = op1;
5135 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5136 vec_oprnds1.quick_push (vec_oprnd1);
5137 if (slp_node)
5139 /* Store vec_oprnd1 for every vector stmt to be created
5140 for SLP_NODE. We check during the analysis that all
5141 the shift arguments are the same.
5142 TODO: Allow different constants for different vector
5143 stmts generated for an SLP instance. */
5144 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5145 vec_oprnds1.quick_push (vec_oprnd1);
5150 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5151 (a special case for certain kind of vector shifts); otherwise,
5152 operand 1 should be of a vector type (the usual case). */
5153 if (vec_oprnd1)
5154 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5155 slp_node);
5156 else
5157 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5158 slp_node);
5160 else
5161 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5163 /* Arguments are ready. Create the new vector stmt. */
5164 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5166 vop1 = vec_oprnds1[i];
5167 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5168 new_temp = make_ssa_name (vec_dest, new_stmt);
5169 gimple_assign_set_lhs (new_stmt, new_temp);
5170 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5171 if (slp_node)
5172 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5175 if (slp_node)
5176 continue;
5178 if (j == 0)
5179 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5180 else
5181 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5182 prev_stmt_info = vinfo_for_stmt (new_stmt);
5185 vec_oprnds0.release ();
5186 vec_oprnds1.release ();
5188 return true;
5192 /* Function vectorizable_operation.
5194 Check if STMT performs a binary, unary or ternary operation that can
5195 be vectorized.
5196 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5197 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5198 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5200 static bool
5201 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5202 gimple **vec_stmt, slp_tree slp_node)
5204 tree vec_dest;
5205 tree scalar_dest;
5206 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5207 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5208 tree vectype;
5209 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5210 enum tree_code code, orig_code;
5211 machine_mode vec_mode;
5212 tree new_temp;
5213 int op_type;
5214 optab optab;
5215 bool target_support_p;
5216 gimple *def_stmt;
5217 enum vect_def_type dt[3]
5218 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5219 int ndts = 3;
5220 gimple *new_stmt = NULL;
5221 stmt_vec_info prev_stmt_info;
5222 poly_uint64 nunits_in;
5223 poly_uint64 nunits_out;
5224 tree vectype_out;
5225 int ncopies;
5226 int j, i;
5227 vec<tree> vec_oprnds0 = vNULL;
5228 vec<tree> vec_oprnds1 = vNULL;
5229 vec<tree> vec_oprnds2 = vNULL;
5230 tree vop0, vop1, vop2;
5231 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5232 vec_info *vinfo = stmt_info->vinfo;
5234 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5235 return false;
5237 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5238 && ! vec_stmt)
5239 return false;
5241 /* Is STMT a vectorizable binary/unary operation? */
5242 if (!is_gimple_assign (stmt))
5243 return false;
5245 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5246 return false;
5248 orig_code = code = gimple_assign_rhs_code (stmt);
5250 /* For pointer addition and subtraction, we should use the normal
5251 plus and minus for the vector operation. */
5252 if (code == POINTER_PLUS_EXPR)
5253 code = PLUS_EXPR;
5254 if (code == POINTER_DIFF_EXPR)
5255 code = MINUS_EXPR;
5257 /* Support only unary or binary operations. */
5258 op_type = TREE_CODE_LENGTH (code);
5259 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5261 if (dump_enabled_p ())
5262 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5263 "num. args = %d (not unary/binary/ternary op).\n",
5264 op_type);
5265 return false;
5268 scalar_dest = gimple_assign_lhs (stmt);
5269 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5271 /* Most operations cannot handle bit-precision types without extra
5272 truncations. */
5273 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5274 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
5275 /* Exception are bitwise binary operations. */
5276 && code != BIT_IOR_EXPR
5277 && code != BIT_XOR_EXPR
5278 && code != BIT_AND_EXPR)
5280 if (dump_enabled_p ())
5281 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5282 "bit-precision arithmetic not supported.\n");
5283 return false;
5286 op0 = gimple_assign_rhs1 (stmt);
5287 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5289 if (dump_enabled_p ())
5290 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5291 "use not simple.\n");
5292 return false;
5294 /* If op0 is an external or constant def use a vector type with
5295 the same size as the output vector type. */
5296 if (!vectype)
5298 /* For boolean type we cannot determine vectype by
5299 invariant value (don't know whether it is a vector
5300 of booleans or vector of integers). We use output
5301 vectype because operations on boolean don't change
5302 type. */
5303 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5305 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5307 if (dump_enabled_p ())
5308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5309 "not supported operation on bool value.\n");
5310 return false;
5312 vectype = vectype_out;
5314 else
5315 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5317 if (vec_stmt)
5318 gcc_assert (vectype);
5319 if (!vectype)
5321 if (dump_enabled_p ())
5323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5324 "no vectype for scalar type ");
5325 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5326 TREE_TYPE (op0));
5327 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5330 return false;
5333 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5334 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5335 if (maybe_ne (nunits_out, nunits_in))
5336 return false;
5338 if (op_type == binary_op || op_type == ternary_op)
5340 op1 = gimple_assign_rhs2 (stmt);
5341 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5343 if (dump_enabled_p ())
5344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5345 "use not simple.\n");
5346 return false;
5349 if (op_type == ternary_op)
5351 op2 = gimple_assign_rhs3 (stmt);
5352 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5354 if (dump_enabled_p ())
5355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5356 "use not simple.\n");
5357 return false;
5361 /* Multiple types in SLP are handled by creating the appropriate number of
5362 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5363 case of SLP. */
5364 if (slp_node)
5365 ncopies = 1;
5366 else
5367 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5369 gcc_assert (ncopies >= 1);
5371 /* Shifts are handled in vectorizable_shift (). */
5372 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5373 || code == RROTATE_EXPR)
5374 return false;
5376 /* Supportable by target? */
5378 vec_mode = TYPE_MODE (vectype);
5379 if (code == MULT_HIGHPART_EXPR)
5380 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5381 else
5383 optab = optab_for_tree_code (code, vectype, optab_default);
5384 if (!optab)
5386 if (dump_enabled_p ())
5387 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5388 "no optab.\n");
5389 return false;
5391 target_support_p = (optab_handler (optab, vec_mode)
5392 != CODE_FOR_nothing);
5395 if (!target_support_p)
5397 if (dump_enabled_p ())
5398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5399 "op not supported by target.\n");
5400 /* Check only during analysis. */
5401 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
5402 || (!vec_stmt && !vect_worthwhile_without_simd_p (vinfo, code)))
5403 return false;
5404 if (dump_enabled_p ())
5405 dump_printf_loc (MSG_NOTE, vect_location,
5406 "proceeding using word mode.\n");
5409 /* Worthwhile without SIMD support? Check only during analysis. */
5410 if (!VECTOR_MODE_P (vec_mode)
5411 && !vec_stmt
5412 && !vect_worthwhile_without_simd_p (vinfo, code))
5414 if (dump_enabled_p ())
5415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5416 "not worthwhile without SIMD support.\n");
5417 return false;
5420 if (!vec_stmt) /* transformation not required. */
5422 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5423 if (dump_enabled_p ())
5424 dump_printf_loc (MSG_NOTE, vect_location,
5425 "=== vectorizable_operation ===\n");
5426 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5427 return true;
5430 /* Transform. */
5432 if (dump_enabled_p ())
5433 dump_printf_loc (MSG_NOTE, vect_location,
5434 "transform binary/unary operation.\n");
5436 /* Handle def. */
5437 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5439 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5440 vectors with unsigned elements, but the result is signed. So, we
5441 need to compute the MINUS_EXPR into vectype temporary and
5442 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5443 tree vec_cvt_dest = NULL_TREE;
5444 if (orig_code == POINTER_DIFF_EXPR)
5445 vec_cvt_dest = vect_create_destination_var (scalar_dest, vectype_out);
5447 /* In case the vectorization factor (VF) is bigger than the number
5448 of elements that we can fit in a vectype (nunits), we have to generate
5449 more than one vector stmt - i.e - we need to "unroll" the
5450 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5451 from one copy of the vector stmt to the next, in the field
5452 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5453 stages to find the correct vector defs to be used when vectorizing
5454 stmts that use the defs of the current stmt. The example below
5455 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5456 we need to create 4 vectorized stmts):
5458 before vectorization:
5459 RELATED_STMT VEC_STMT
5460 S1: x = memref - -
5461 S2: z = x + 1 - -
5463 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5464 there):
5465 RELATED_STMT VEC_STMT
5466 VS1_0: vx0 = memref0 VS1_1 -
5467 VS1_1: vx1 = memref1 VS1_2 -
5468 VS1_2: vx2 = memref2 VS1_3 -
5469 VS1_3: vx3 = memref3 - -
5470 S1: x = load - VS1_0
5471 S2: z = x + 1 - -
5473 step2: vectorize stmt S2 (done here):
5474 To vectorize stmt S2 we first need to find the relevant vector
5475 def for the first operand 'x'. This is, as usual, obtained from
5476 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5477 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5478 relevant vector def 'vx0'. Having found 'vx0' we can generate
5479 the vector stmt VS2_0, and as usual, record it in the
5480 STMT_VINFO_VEC_STMT of stmt S2.
5481 When creating the second copy (VS2_1), we obtain the relevant vector
5482 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5483 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5484 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5485 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5486 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5487 chain of stmts and pointers:
5488 RELATED_STMT VEC_STMT
5489 VS1_0: vx0 = memref0 VS1_1 -
5490 VS1_1: vx1 = memref1 VS1_2 -
5491 VS1_2: vx2 = memref2 VS1_3 -
5492 VS1_3: vx3 = memref3 - -
5493 S1: x = load - VS1_0
5494 VS2_0: vz0 = vx0 + v1 VS2_1 -
5495 VS2_1: vz1 = vx1 + v1 VS2_2 -
5496 VS2_2: vz2 = vx2 + v1 VS2_3 -
5497 VS2_3: vz3 = vx3 + v1 - -
5498 S2: z = x + 1 - VS2_0 */
5500 prev_stmt_info = NULL;
5501 for (j = 0; j < ncopies; j++)
5503 /* Handle uses. */
5504 if (j == 0)
5506 if (op_type == binary_op || op_type == ternary_op)
5507 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5508 slp_node);
5509 else
5510 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5511 slp_node);
5512 if (op_type == ternary_op)
5513 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5514 slp_node);
5516 else
5518 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5519 if (op_type == ternary_op)
5521 tree vec_oprnd = vec_oprnds2.pop ();
5522 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5523 vec_oprnd));
5527 /* Arguments are ready. Create the new vector stmt. */
5528 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5530 vop1 = ((op_type == binary_op || op_type == ternary_op)
5531 ? vec_oprnds1[i] : NULL_TREE);
5532 vop2 = ((op_type == ternary_op)
5533 ? vec_oprnds2[i] : NULL_TREE);
5534 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5535 new_temp = make_ssa_name (vec_dest, new_stmt);
5536 gimple_assign_set_lhs (new_stmt, new_temp);
5537 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5538 if (vec_cvt_dest)
5540 new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
5541 new_stmt = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
5542 new_temp);
5543 new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
5544 gimple_assign_set_lhs (new_stmt, new_temp);
5545 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5547 if (slp_node)
5548 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5551 if (slp_node)
5552 continue;
5554 if (j == 0)
5555 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5556 else
5557 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5558 prev_stmt_info = vinfo_for_stmt (new_stmt);
5561 vec_oprnds0.release ();
5562 vec_oprnds1.release ();
5563 vec_oprnds2.release ();
5565 return true;
5568 /* A helper function to ensure data reference DR's base alignment. */
5570 static void
5571 ensure_base_align (struct data_reference *dr)
5573 if (!dr->aux)
5574 return;
5576 if (DR_VECT_AUX (dr)->base_misaligned)
5578 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5580 unsigned int align_base_to = DR_TARGET_ALIGNMENT (dr) * BITS_PER_UNIT;
5582 if (decl_in_symtab_p (base_decl))
5583 symtab_node::get (base_decl)->increase_alignment (align_base_to);
5584 else
5586 SET_DECL_ALIGN (base_decl, align_base_to);
5587 DECL_USER_ALIGN (base_decl) = 1;
5589 DR_VECT_AUX (dr)->base_misaligned = false;
5594 /* Function get_group_alias_ptr_type.
5596 Return the alias type for the group starting at FIRST_STMT. */
5598 static tree
5599 get_group_alias_ptr_type (gimple *first_stmt)
5601 struct data_reference *first_dr, *next_dr;
5602 gimple *next_stmt;
5604 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5605 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5606 while (next_stmt)
5608 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5609 if (get_alias_set (DR_REF (first_dr))
5610 != get_alias_set (DR_REF (next_dr)))
5612 if (dump_enabled_p ())
5613 dump_printf_loc (MSG_NOTE, vect_location,
5614 "conflicting alias set types.\n");
5615 return ptr_type_node;
5617 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5619 return reference_alias_ptr_type (DR_REF (first_dr));
5623 /* Function vectorizable_store.
5625 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5626 can be vectorized.
5627 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5628 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5629 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5631 static bool
5632 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5633 slp_tree slp_node)
5635 tree data_ref;
5636 tree op;
5637 tree vec_oprnd = NULL_TREE;
5638 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5639 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5640 tree elem_type;
5641 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5642 struct loop *loop = NULL;
5643 machine_mode vec_mode;
5644 tree dummy;
5645 enum dr_alignment_support alignment_support_scheme;
5646 gimple *def_stmt;
5647 enum vect_def_type dt;
5648 stmt_vec_info prev_stmt_info = NULL;
5649 tree dataref_ptr = NULL_TREE;
5650 tree dataref_offset = NULL_TREE;
5651 gimple *ptr_incr = NULL;
5652 int ncopies;
5653 int j;
5654 gimple *next_stmt, *first_stmt;
5655 bool grouped_store;
5656 unsigned int group_size, i;
5657 vec<tree> oprnds = vNULL;
5658 vec<tree> result_chain = vNULL;
5659 bool inv_p;
5660 tree offset = NULL_TREE;
5661 vec<tree> vec_oprnds = vNULL;
5662 bool slp = (slp_node != NULL);
5663 unsigned int vec_num;
5664 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5665 vec_info *vinfo = stmt_info->vinfo;
5666 tree aggr_type;
5667 gather_scatter_info gs_info;
5668 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5669 gimple *new_stmt;
5670 poly_uint64 vf;
5671 vec_load_store_type vls_type;
5672 tree ref_type;
5674 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5675 return false;
5677 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5678 && ! vec_stmt)
5679 return false;
5681 /* Is vectorizable store? */
5683 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
5684 if (is_gimple_assign (stmt))
5686 tree scalar_dest = gimple_assign_lhs (stmt);
5687 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5688 && is_pattern_stmt_p (stmt_info))
5689 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5690 if (TREE_CODE (scalar_dest) != ARRAY_REF
5691 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5692 && TREE_CODE (scalar_dest) != INDIRECT_REF
5693 && TREE_CODE (scalar_dest) != COMPONENT_REF
5694 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5695 && TREE_CODE (scalar_dest) != REALPART_EXPR
5696 && TREE_CODE (scalar_dest) != MEM_REF)
5697 return false;
5699 else
5701 gcall *call = dyn_cast <gcall *> (stmt);
5702 if (!call || !gimple_call_internal_p (call, IFN_MASK_STORE))
5703 return false;
5705 if (slp_node != NULL)
5707 if (dump_enabled_p ())
5708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5709 "SLP of masked stores not supported.\n");
5710 return false;
5713 ref_type = TREE_TYPE (gimple_call_arg (call, 1));
5714 mask = gimple_call_arg (call, 2);
5715 if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
5716 return false;
5719 op = vect_get_store_rhs (stmt);
5721 /* Cannot have hybrid store SLP -- that would mean storing to the
5722 same location twice. */
5723 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5725 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5726 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5728 if (loop_vinfo)
5730 loop = LOOP_VINFO_LOOP (loop_vinfo);
5731 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5733 else
5734 vf = 1;
5736 /* Multiple types in SLP are handled by creating the appropriate number of
5737 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5738 case of SLP. */
5739 if (slp)
5740 ncopies = 1;
5741 else
5742 ncopies = vect_get_num_copies (loop_vinfo, vectype);
5744 gcc_assert (ncopies >= 1);
5746 /* FORNOW. This restriction should be relaxed. */
5747 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5749 if (dump_enabled_p ())
5750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5751 "multiple types in nested loop.\n");
5752 return false;
5755 if (!vect_check_store_rhs (stmt, op, &rhs_vectype, &vls_type))
5756 return false;
5758 elem_type = TREE_TYPE (vectype);
5759 vec_mode = TYPE_MODE (vectype);
5761 if (!STMT_VINFO_DATA_REF (stmt_info))
5762 return false;
5764 vect_memory_access_type memory_access_type;
5765 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5766 &memory_access_type, &gs_info))
5767 return false;
5769 if (mask)
5771 if (memory_access_type != VMAT_CONTIGUOUS)
5773 if (dump_enabled_p ())
5774 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5775 "unsupported access type for masked store.\n");
5776 return false;
5778 if (!VECTOR_MODE_P (vec_mode)
5779 || !can_vec_mask_load_store_p (vec_mode, TYPE_MODE (mask_vectype),
5780 false))
5781 return false;
5783 else
5785 /* FORNOW. In some cases can vectorize even if data-type not supported
5786 (e.g. - array initialization with 0). */
5787 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5788 return false;
5791 if (!vec_stmt) /* transformation not required. */
5793 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5794 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5795 /* The SLP costs are calculated during SLP analysis. */
5796 if (!PURE_SLP_STMT (stmt_info))
5797 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
5798 vls_type, NULL, NULL, NULL);
5799 return true;
5801 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5803 /* Transform. */
5805 ensure_base_align (dr);
5807 if (memory_access_type == VMAT_GATHER_SCATTER)
5809 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, src;
5810 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5811 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5812 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5813 edge pe = loop_preheader_edge (loop);
5814 gimple_seq seq;
5815 basic_block new_bb;
5816 enum { NARROW, NONE, WIDEN } modifier;
5817 poly_uint64 scatter_off_nunits
5818 = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5820 if (known_eq (nunits, scatter_off_nunits))
5821 modifier = NONE;
5822 else if (known_eq (nunits * 2, scatter_off_nunits))
5824 modifier = WIDEN;
5826 /* Currently gathers and scatters are only supported for
5827 fixed-length vectors. */
5828 unsigned int count = scatter_off_nunits.to_constant ();
5829 vec_perm_builder sel (count, count, 1);
5830 for (i = 0; i < (unsigned int) count; ++i)
5831 sel.quick_push (i | (count / 2));
5833 vec_perm_indices indices (sel, 1, count);
5834 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype,
5835 indices);
5836 gcc_assert (perm_mask != NULL_TREE);
5838 else if (known_eq (nunits, scatter_off_nunits * 2))
5840 modifier = NARROW;
5842 /* Currently gathers and scatters are only supported for
5843 fixed-length vectors. */
5844 unsigned int count = nunits.to_constant ();
5845 vec_perm_builder sel (count, count, 1);
5846 for (i = 0; i < (unsigned int) count; ++i)
5847 sel.quick_push (i | (count / 2));
5849 vec_perm_indices indices (sel, 2, count);
5850 perm_mask = vect_gen_perm_mask_checked (vectype, indices);
5851 gcc_assert (perm_mask != NULL_TREE);
5852 ncopies *= 2;
5854 else
5855 gcc_unreachable ();
5857 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5858 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5859 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5860 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5861 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5862 scaletype = TREE_VALUE (arglist);
5864 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5865 && TREE_CODE (rettype) == VOID_TYPE);
5867 ptr = fold_convert (ptrtype, gs_info.base);
5868 if (!is_gimple_min_invariant (ptr))
5870 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5871 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5872 gcc_assert (!new_bb);
5875 /* Currently we support only unconditional scatter stores,
5876 so mask should be all ones. */
5877 mask = build_int_cst (masktype, -1);
5878 mask = vect_init_vector (stmt, mask, masktype, NULL);
5880 scale = build_int_cst (scaletype, gs_info.scale);
5882 prev_stmt_info = NULL;
5883 for (j = 0; j < ncopies; ++j)
5885 if (j == 0)
5887 src = vec_oprnd1
5888 = vect_get_vec_def_for_operand (op, stmt);
5889 op = vec_oprnd0
5890 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5892 else if (modifier != NONE && (j & 1))
5894 if (modifier == WIDEN)
5896 src = vec_oprnd1
5897 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5898 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5899 stmt, gsi);
5901 else if (modifier == NARROW)
5903 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5904 stmt, gsi);
5905 op = vec_oprnd0
5906 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5907 vec_oprnd0);
5909 else
5910 gcc_unreachable ();
5912 else
5914 src = vec_oprnd1
5915 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5916 op = vec_oprnd0
5917 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5918 vec_oprnd0);
5921 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5923 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src)),
5924 TYPE_VECTOR_SUBPARTS (srctype)));
5925 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5926 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5927 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5928 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5929 src = var;
5932 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5934 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op)),
5935 TYPE_VECTOR_SUBPARTS (idxtype)));
5936 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5937 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5938 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5939 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5940 op = var;
5943 new_stmt
5944 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5946 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5948 if (prev_stmt_info == NULL)
5949 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5950 else
5951 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5952 prev_stmt_info = vinfo_for_stmt (new_stmt);
5954 return true;
5957 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5958 if (grouped_store)
5960 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5961 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5962 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5964 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5966 /* FORNOW */
5967 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5969 /* We vectorize all the stmts of the interleaving group when we
5970 reach the last stmt in the group. */
5971 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5972 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5973 && !slp)
5975 *vec_stmt = NULL;
5976 return true;
5979 if (slp)
5981 grouped_store = false;
5982 /* VEC_NUM is the number of vect stmts to be created for this
5983 group. */
5984 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5985 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5986 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5987 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5988 op = vect_get_store_rhs (first_stmt);
5990 else
5991 /* VEC_NUM is the number of vect stmts to be created for this
5992 group. */
5993 vec_num = group_size;
5995 ref_type = get_group_alias_ptr_type (first_stmt);
5997 else
5999 first_stmt = stmt;
6000 first_dr = dr;
6001 group_size = vec_num = 1;
6002 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6005 if (dump_enabled_p ())
6006 dump_printf_loc (MSG_NOTE, vect_location,
6007 "transform store. ncopies = %d\n", ncopies);
6009 if (memory_access_type == VMAT_ELEMENTWISE
6010 || memory_access_type == VMAT_STRIDED_SLP)
6012 gimple_stmt_iterator incr_gsi;
6013 bool insert_after;
6014 gimple *incr;
6015 tree offvar;
6016 tree ivstep;
6017 tree running_off;
6018 gimple_seq stmts = NULL;
6019 tree stride_base, stride_step, alias_off;
6020 tree vec_oprnd;
6021 unsigned int g;
6022 /* Checked by get_load_store_type. */
6023 unsigned int const_nunits = nunits.to_constant ();
6025 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
6027 stride_base
6028 = fold_build_pointer_plus
6029 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
6030 size_binop (PLUS_EXPR,
6031 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
6032 convert_to_ptrofftype (DR_INIT (first_dr))));
6033 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
6035 /* For a store with loop-invariant (but other than power-of-2)
6036 stride (i.e. not a grouped access) like so:
6038 for (i = 0; i < n; i += stride)
6039 array[i] = ...;
6041 we generate a new induction variable and new stores from
6042 the components of the (vectorized) rhs:
6044 for (j = 0; ; j += VF*stride)
6045 vectemp = ...;
6046 tmp1 = vectemp[0];
6047 array[j] = tmp1;
6048 tmp2 = vectemp[1];
6049 array[j + stride] = tmp2;
6053 unsigned nstores = const_nunits;
6054 unsigned lnel = 1;
6055 tree ltype = elem_type;
6056 tree lvectype = vectype;
6057 if (slp)
6059 if (group_size < const_nunits
6060 && const_nunits % group_size == 0)
6062 nstores = const_nunits / group_size;
6063 lnel = group_size;
6064 ltype = build_vector_type (elem_type, group_size);
6065 lvectype = vectype;
6067 /* First check if vec_extract optab doesn't support extraction
6068 of vector elts directly. */
6069 scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
6070 machine_mode vmode;
6071 if (!mode_for_vector (elmode, group_size).exists (&vmode)
6072 || !VECTOR_MODE_P (vmode)
6073 || (convert_optab_handler (vec_extract_optab,
6074 TYPE_MODE (vectype), vmode)
6075 == CODE_FOR_nothing))
6077 /* Try to avoid emitting an extract of vector elements
6078 by performing the extracts using an integer type of the
6079 same size, extracting from a vector of those and then
6080 re-interpreting it as the original vector type if
6081 supported. */
6082 unsigned lsize
6083 = group_size * GET_MODE_BITSIZE (elmode);
6084 elmode = int_mode_for_size (lsize, 0).require ();
6085 unsigned int lnunits = const_nunits / group_size;
6086 /* If we can't construct such a vector fall back to
6087 element extracts from the original vector type and
6088 element size stores. */
6089 if (mode_for_vector (elmode, lnunits).exists (&vmode)
6090 && VECTOR_MODE_P (vmode)
6091 && (convert_optab_handler (vec_extract_optab,
6092 vmode, elmode)
6093 != CODE_FOR_nothing))
6095 nstores = lnunits;
6096 lnel = group_size;
6097 ltype = build_nonstandard_integer_type (lsize, 1);
6098 lvectype = build_vector_type (ltype, nstores);
6100 /* Else fall back to vector extraction anyway.
6101 Fewer stores are more important than avoiding spilling
6102 of the vector we extract from. Compared to the
6103 construction case in vectorizable_load no store-forwarding
6104 issue exists here for reasonable archs. */
6107 else if (group_size >= const_nunits
6108 && group_size % const_nunits == 0)
6110 nstores = 1;
6111 lnel = const_nunits;
6112 ltype = vectype;
6113 lvectype = vectype;
6115 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6116 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6119 ivstep = stride_step;
6120 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6121 build_int_cst (TREE_TYPE (ivstep), vf));
6123 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6125 create_iv (stride_base, ivstep, NULL,
6126 loop, &incr_gsi, insert_after,
6127 &offvar, NULL);
6128 incr = gsi_stmt (incr_gsi);
6129 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6131 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6132 if (stmts)
6133 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6135 prev_stmt_info = NULL;
6136 alias_off = build_int_cst (ref_type, 0);
6137 next_stmt = first_stmt;
6138 for (g = 0; g < group_size; g++)
6140 running_off = offvar;
6141 if (g)
6143 tree size = TYPE_SIZE_UNIT (ltype);
6144 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6145 size);
6146 tree newoff = copy_ssa_name (running_off, NULL);
6147 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6148 running_off, pos);
6149 vect_finish_stmt_generation (stmt, incr, gsi);
6150 running_off = newoff;
6152 unsigned int group_el = 0;
6153 unsigned HOST_WIDE_INT
6154 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6155 for (j = 0; j < ncopies; j++)
6157 /* We've set op and dt above, from vect_get_store_rhs,
6158 and first_stmt == stmt. */
6159 if (j == 0)
6161 if (slp)
6163 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6164 slp_node);
6165 vec_oprnd = vec_oprnds[0];
6167 else
6169 op = vect_get_store_rhs (next_stmt);
6170 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6173 else
6175 if (slp)
6176 vec_oprnd = vec_oprnds[j];
6177 else
6179 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6180 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6183 /* Pun the vector to extract from if necessary. */
6184 if (lvectype != vectype)
6186 tree tem = make_ssa_name (lvectype);
6187 gimple *pun
6188 = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
6189 lvectype, vec_oprnd));
6190 vect_finish_stmt_generation (stmt, pun, gsi);
6191 vec_oprnd = tem;
6193 for (i = 0; i < nstores; i++)
6195 tree newref, newoff;
6196 gimple *incr, *assign;
6197 tree size = TYPE_SIZE (ltype);
6198 /* Extract the i'th component. */
6199 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6200 bitsize_int (i), size);
6201 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6202 size, pos);
6204 elem = force_gimple_operand_gsi (gsi, elem, true,
6205 NULL_TREE, true,
6206 GSI_SAME_STMT);
6208 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6209 group_el * elsz);
6210 newref = build2 (MEM_REF, ltype,
6211 running_off, this_off);
6213 /* And store it to *running_off. */
6214 assign = gimple_build_assign (newref, elem);
6215 vect_finish_stmt_generation (stmt, assign, gsi);
6217 group_el += lnel;
6218 if (! slp
6219 || group_el == group_size)
6221 newoff = copy_ssa_name (running_off, NULL);
6222 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6223 running_off, stride_step);
6224 vect_finish_stmt_generation (stmt, incr, gsi);
6226 running_off = newoff;
6227 group_el = 0;
6229 if (g == group_size - 1
6230 && !slp)
6232 if (j == 0 && i == 0)
6233 STMT_VINFO_VEC_STMT (stmt_info)
6234 = *vec_stmt = assign;
6235 else
6236 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6237 prev_stmt_info = vinfo_for_stmt (assign);
6241 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6242 if (slp)
6243 break;
6246 vec_oprnds.release ();
6247 return true;
6250 auto_vec<tree> dr_chain (group_size);
6251 oprnds.create (group_size);
6253 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6254 gcc_assert (alignment_support_scheme);
6255 /* Targets with store-lane instructions must not require explicit
6256 realignment. vect_supportable_dr_alignment always returns either
6257 dr_aligned or dr_unaligned_supported for masked operations. */
6258 gcc_assert ((memory_access_type != VMAT_LOAD_STORE_LANES && !mask)
6259 || alignment_support_scheme == dr_aligned
6260 || alignment_support_scheme == dr_unaligned_supported);
6262 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6263 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6264 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6266 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6267 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6268 else
6269 aggr_type = vectype;
6271 if (mask)
6272 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
6274 /* In case the vectorization factor (VF) is bigger than the number
6275 of elements that we can fit in a vectype (nunits), we have to generate
6276 more than one vector stmt - i.e - we need to "unroll" the
6277 vector stmt by a factor VF/nunits. For more details see documentation in
6278 vect_get_vec_def_for_copy_stmt. */
6280 /* In case of interleaving (non-unit grouped access):
6282 S1: &base + 2 = x2
6283 S2: &base = x0
6284 S3: &base + 1 = x1
6285 S4: &base + 3 = x3
6287 We create vectorized stores starting from base address (the access of the
6288 first stmt in the chain (S2 in the above example), when the last store stmt
6289 of the chain (S4) is reached:
6291 VS1: &base = vx2
6292 VS2: &base + vec_size*1 = vx0
6293 VS3: &base + vec_size*2 = vx1
6294 VS4: &base + vec_size*3 = vx3
6296 Then permutation statements are generated:
6298 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6299 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6302 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6303 (the order of the data-refs in the output of vect_permute_store_chain
6304 corresponds to the order of scalar stmts in the interleaving chain - see
6305 the documentation of vect_permute_store_chain()).
6307 In case of both multiple types and interleaving, above vector stores and
6308 permutation stmts are created for every copy. The result vector stmts are
6309 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6310 STMT_VINFO_RELATED_STMT for the next copies.
6313 prev_stmt_info = NULL;
6314 tree vec_mask = NULL_TREE;
6315 for (j = 0; j < ncopies; j++)
6318 if (j == 0)
6320 if (slp)
6322 /* Get vectorized arguments for SLP_NODE. */
6323 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6324 NULL, slp_node);
6326 vec_oprnd = vec_oprnds[0];
6328 else
6330 /* For interleaved stores we collect vectorized defs for all the
6331 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6332 used as an input to vect_permute_store_chain(), and OPRNDS as
6333 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6335 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6336 OPRNDS are of size 1. */
6337 next_stmt = first_stmt;
6338 for (i = 0; i < group_size; i++)
6340 /* Since gaps are not supported for interleaved stores,
6341 GROUP_SIZE is the exact number of stmts in the chain.
6342 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6343 there is no interleaving, GROUP_SIZE is 1, and only one
6344 iteration of the loop will be executed. */
6345 op = vect_get_store_rhs (next_stmt);
6346 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6347 dr_chain.quick_push (vec_oprnd);
6348 oprnds.quick_push (vec_oprnd);
6349 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6351 if (mask)
6352 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
6353 mask_vectype);
6356 /* We should have catched mismatched types earlier. */
6357 gcc_assert (useless_type_conversion_p (vectype,
6358 TREE_TYPE (vec_oprnd)));
6359 bool simd_lane_access_p
6360 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6361 if (simd_lane_access_p
6362 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6363 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6364 && integer_zerop (DR_OFFSET (first_dr))
6365 && integer_zerop (DR_INIT (first_dr))
6366 && alias_sets_conflict_p (get_alias_set (aggr_type),
6367 get_alias_set (TREE_TYPE (ref_type))))
6369 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6370 dataref_offset = build_int_cst (ref_type, 0);
6371 inv_p = false;
6373 else
6374 dataref_ptr
6375 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6376 simd_lane_access_p ? loop : NULL,
6377 offset, &dummy, gsi, &ptr_incr,
6378 simd_lane_access_p, &inv_p);
6379 gcc_assert (bb_vinfo || !inv_p);
6381 else
6383 /* For interleaved stores we created vectorized defs for all the
6384 defs stored in OPRNDS in the previous iteration (previous copy).
6385 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6386 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6387 next copy.
6388 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6389 OPRNDS are of size 1. */
6390 for (i = 0; i < group_size; i++)
6392 op = oprnds[i];
6393 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6394 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6395 dr_chain[i] = vec_oprnd;
6396 oprnds[i] = vec_oprnd;
6398 if (mask)
6400 vect_is_simple_use (vec_mask, vinfo, &def_stmt, &dt);
6401 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
6403 if (dataref_offset)
6404 dataref_offset
6405 = int_const_binop (PLUS_EXPR, dataref_offset,
6406 TYPE_SIZE_UNIT (aggr_type));
6407 else
6408 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6409 TYPE_SIZE_UNIT (aggr_type));
6412 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6414 tree vec_array;
6416 /* Combine all the vectors into an array. */
6417 vec_array = create_vector_array (vectype, vec_num);
6418 for (i = 0; i < vec_num; i++)
6420 vec_oprnd = dr_chain[i];
6421 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6424 /* Emit:
6425 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6426 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6427 gcall *call = gimple_build_call_internal (IFN_STORE_LANES, 1,
6428 vec_array);
6429 gimple_call_set_lhs (call, data_ref);
6430 gimple_call_set_nothrow (call, true);
6431 new_stmt = call;
6432 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6434 else
6436 new_stmt = NULL;
6437 if (grouped_store)
6439 if (j == 0)
6440 result_chain.create (group_size);
6441 /* Permute. */
6442 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6443 &result_chain);
6446 next_stmt = first_stmt;
6447 for (i = 0; i < vec_num; i++)
6449 unsigned align, misalign;
6451 if (i > 0)
6452 /* Bump the vector pointer. */
6453 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6454 stmt, NULL_TREE);
6456 if (slp)
6457 vec_oprnd = vec_oprnds[i];
6458 else if (grouped_store)
6459 /* For grouped stores vectorized defs are interleaved in
6460 vect_permute_store_chain(). */
6461 vec_oprnd = result_chain[i];
6463 align = DR_TARGET_ALIGNMENT (first_dr);
6464 if (aligned_access_p (first_dr))
6465 misalign = 0;
6466 else if (DR_MISALIGNMENT (first_dr) == -1)
6468 align = dr_alignment (vect_dr_behavior (first_dr));
6469 misalign = 0;
6471 else
6472 misalign = DR_MISALIGNMENT (first_dr);
6473 if (dataref_offset == NULL_TREE
6474 && TREE_CODE (dataref_ptr) == SSA_NAME)
6475 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6476 misalign);
6478 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6480 tree perm_mask = perm_mask_for_reverse (vectype);
6481 tree perm_dest
6482 = vect_create_destination_var (vect_get_store_rhs (stmt),
6483 vectype);
6484 tree new_temp = make_ssa_name (perm_dest);
6486 /* Generate the permute statement. */
6487 gimple *perm_stmt
6488 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6489 vec_oprnd, perm_mask);
6490 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6492 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6493 vec_oprnd = new_temp;
6496 /* Arguments are ready. Create the new vector stmt. */
6497 if (mask)
6499 align = least_bit_hwi (misalign | align);
6500 tree ptr = build_int_cst (ref_type, align);
6501 gcall *call
6502 = gimple_build_call_internal (IFN_MASK_STORE, 4,
6503 dataref_ptr, ptr,
6504 vec_mask, vec_oprnd);
6505 gimple_call_set_nothrow (call, true);
6506 new_stmt = call;
6508 else
6510 data_ref = fold_build2 (MEM_REF, vectype,
6511 dataref_ptr,
6512 dataref_offset
6513 ? dataref_offset
6514 : build_int_cst (ref_type, 0));
6515 if (aligned_access_p (first_dr))
6517 else if (DR_MISALIGNMENT (first_dr) == -1)
6518 TREE_TYPE (data_ref)
6519 = build_aligned_type (TREE_TYPE (data_ref),
6520 align * BITS_PER_UNIT);
6521 else
6522 TREE_TYPE (data_ref)
6523 = build_aligned_type (TREE_TYPE (data_ref),
6524 TYPE_ALIGN (elem_type));
6525 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6527 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6529 if (slp)
6530 continue;
6532 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6533 if (!next_stmt)
6534 break;
6537 if (!slp)
6539 if (j == 0)
6540 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6541 else
6542 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6543 prev_stmt_info = vinfo_for_stmt (new_stmt);
6547 oprnds.release ();
6548 result_chain.release ();
6549 vec_oprnds.release ();
6551 return true;
6554 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6555 VECTOR_CST mask. No checks are made that the target platform supports the
6556 mask, so callers may wish to test can_vec_perm_const_p separately, or use
6557 vect_gen_perm_mask_checked. */
6559 tree
6560 vect_gen_perm_mask_any (tree vectype, const vec_perm_indices &sel)
6562 tree mask_type;
6564 poly_uint64 nunits = sel.length ();
6565 gcc_assert (known_eq (nunits, TYPE_VECTOR_SUBPARTS (vectype)));
6567 mask_type = build_vector_type (ssizetype, nunits);
6568 return vec_perm_indices_to_tree (mask_type, sel);
6571 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
6572 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6574 tree
6575 vect_gen_perm_mask_checked (tree vectype, const vec_perm_indices &sel)
6577 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype), sel));
6578 return vect_gen_perm_mask_any (vectype, sel);
6581 /* Given a vector variable X and Y, that was generated for the scalar
6582 STMT, generate instructions to permute the vector elements of X and Y
6583 using permutation mask MASK_VEC, insert them at *GSI and return the
6584 permuted vector variable. */
6586 static tree
6587 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6588 gimple_stmt_iterator *gsi)
6590 tree vectype = TREE_TYPE (x);
6591 tree perm_dest, data_ref;
6592 gimple *perm_stmt;
6594 tree scalar_dest = gimple_get_lhs (stmt);
6595 if (TREE_CODE (scalar_dest) == SSA_NAME)
6596 perm_dest = vect_create_destination_var (scalar_dest, vectype);
6597 else
6598 perm_dest = vect_get_new_vect_var (vectype, vect_simple_var, NULL);
6599 data_ref = make_ssa_name (perm_dest);
6601 /* Generate the permute statement. */
6602 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6603 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6605 return data_ref;
6608 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6609 inserting them on the loops preheader edge. Returns true if we
6610 were successful in doing so (and thus STMT can be moved then),
6611 otherwise returns false. */
6613 static bool
6614 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6616 ssa_op_iter i;
6617 tree op;
6618 bool any = false;
6620 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6622 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6623 if (!gimple_nop_p (def_stmt)
6624 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6626 /* Make sure we don't need to recurse. While we could do
6627 so in simple cases when there are more complex use webs
6628 we don't have an easy way to preserve stmt order to fulfil
6629 dependencies within them. */
6630 tree op2;
6631 ssa_op_iter i2;
6632 if (gimple_code (def_stmt) == GIMPLE_PHI)
6633 return false;
6634 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6636 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6637 if (!gimple_nop_p (def_stmt2)
6638 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6639 return false;
6641 any = true;
6645 if (!any)
6646 return true;
6648 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6650 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6651 if (!gimple_nop_p (def_stmt)
6652 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6654 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6655 gsi_remove (&gsi, false);
6656 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6660 return true;
6663 /* vectorizable_load.
6665 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6666 can be vectorized.
6667 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6668 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6669 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6671 static bool
6672 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6673 slp_tree slp_node, slp_instance slp_node_instance)
6675 tree scalar_dest;
6676 tree vec_dest = NULL;
6677 tree data_ref = NULL;
6678 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6679 stmt_vec_info prev_stmt_info;
6680 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6681 struct loop *loop = NULL;
6682 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6683 bool nested_in_vect_loop = false;
6684 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6685 tree elem_type;
6686 tree new_temp;
6687 machine_mode mode;
6688 gimple *new_stmt = NULL;
6689 tree dummy;
6690 enum dr_alignment_support alignment_support_scheme;
6691 tree dataref_ptr = NULL_TREE;
6692 tree dataref_offset = NULL_TREE;
6693 gimple *ptr_incr = NULL;
6694 int ncopies;
6695 int i, j;
6696 unsigned int group_size;
6697 poly_uint64 group_gap_adj;
6698 tree msq = NULL_TREE, lsq;
6699 tree offset = NULL_TREE;
6700 tree byte_offset = NULL_TREE;
6701 tree realignment_token = NULL_TREE;
6702 gphi *phi = NULL;
6703 vec<tree> dr_chain = vNULL;
6704 bool grouped_load = false;
6705 gimple *first_stmt;
6706 gimple *first_stmt_for_drptr = NULL;
6707 bool inv_p;
6708 bool compute_in_loop = false;
6709 struct loop *at_loop;
6710 int vec_num;
6711 bool slp = (slp_node != NULL);
6712 bool slp_perm = false;
6713 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6714 poly_uint64 vf;
6715 tree aggr_type;
6716 gather_scatter_info gs_info;
6717 vec_info *vinfo = stmt_info->vinfo;
6718 tree ref_type;
6720 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6721 return false;
6723 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6724 && ! vec_stmt)
6725 return false;
6727 tree mask = NULL_TREE, mask_vectype = NULL_TREE;
6728 if (is_gimple_assign (stmt))
6730 scalar_dest = gimple_assign_lhs (stmt);
6731 if (TREE_CODE (scalar_dest) != SSA_NAME)
6732 return false;
6734 tree_code code = gimple_assign_rhs_code (stmt);
6735 if (code != ARRAY_REF
6736 && code != BIT_FIELD_REF
6737 && code != INDIRECT_REF
6738 && code != COMPONENT_REF
6739 && code != IMAGPART_EXPR
6740 && code != REALPART_EXPR
6741 && code != MEM_REF
6742 && TREE_CODE_CLASS (code) != tcc_declaration)
6743 return false;
6745 else
6747 gcall *call = dyn_cast <gcall *> (stmt);
6748 if (!call || !gimple_call_internal_p (call, IFN_MASK_LOAD))
6749 return false;
6751 scalar_dest = gimple_call_lhs (call);
6752 if (!scalar_dest)
6753 return false;
6755 if (slp_node != NULL)
6757 if (dump_enabled_p ())
6758 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6759 "SLP of masked loads not supported.\n");
6760 return false;
6763 mask = gimple_call_arg (call, 2);
6764 if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
6765 return false;
6768 if (!STMT_VINFO_DATA_REF (stmt_info))
6769 return false;
6771 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6772 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6774 if (loop_vinfo)
6776 loop = LOOP_VINFO_LOOP (loop_vinfo);
6777 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6778 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6780 else
6781 vf = 1;
6783 /* Multiple types in SLP are handled by creating the appropriate number of
6784 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6785 case of SLP. */
6786 if (slp)
6787 ncopies = 1;
6788 else
6789 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6791 gcc_assert (ncopies >= 1);
6793 /* FORNOW. This restriction should be relaxed. */
6794 if (nested_in_vect_loop && ncopies > 1)
6796 if (dump_enabled_p ())
6797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6798 "multiple types in nested loop.\n");
6799 return false;
6802 /* Invalidate assumptions made by dependence analysis when vectorization
6803 on the unrolled body effectively re-orders stmts. */
6804 if (ncopies > 1
6805 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6806 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
6807 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6809 if (dump_enabled_p ())
6810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6811 "cannot perform implicit CSE when unrolling "
6812 "with negative dependence distance\n");
6813 return false;
6816 elem_type = TREE_TYPE (vectype);
6817 mode = TYPE_MODE (vectype);
6819 /* FORNOW. In some cases can vectorize even if data-type not supported
6820 (e.g. - data copies). */
6821 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6823 if (dump_enabled_p ())
6824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6825 "Aligned load, but unsupported type.\n");
6826 return false;
6829 /* Check if the load is a part of an interleaving chain. */
6830 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6832 grouped_load = true;
6833 /* FORNOW */
6834 gcc_assert (!nested_in_vect_loop);
6835 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6837 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6838 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6840 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6841 slp_perm = true;
6843 /* Invalidate assumptions made by dependence analysis when vectorization
6844 on the unrolled body effectively re-orders stmts. */
6845 if (!PURE_SLP_STMT (stmt_info)
6846 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6847 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
6848 STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6850 if (dump_enabled_p ())
6851 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6852 "cannot perform implicit CSE when performing "
6853 "group loads with negative dependence distance\n");
6854 return false;
6857 /* Similarly when the stmt is a load that is both part of a SLP
6858 instance and a loop vectorized stmt via the same-dr mechanism
6859 we have to give up. */
6860 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6861 && (STMT_SLP_TYPE (stmt_info)
6862 != STMT_SLP_TYPE (vinfo_for_stmt
6863 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6865 if (dump_enabled_p ())
6866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6867 "conflicting SLP types for CSEd load\n");
6868 return false;
6872 vect_memory_access_type memory_access_type;
6873 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6874 &memory_access_type, &gs_info))
6875 return false;
6877 if (mask)
6879 if (memory_access_type == VMAT_CONTIGUOUS)
6881 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
6882 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
6883 TYPE_MODE (mask_vectype), true))
6884 return false;
6886 else if (memory_access_type == VMAT_GATHER_SCATTER)
6888 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6889 tree masktype
6890 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
6891 if (TREE_CODE (masktype) == INTEGER_TYPE)
6893 if (dump_enabled_p ())
6894 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6895 "masked gather with integer mask not"
6896 " supported.");
6897 return false;
6900 else
6902 if (dump_enabled_p ())
6903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6904 "unsupported access type for masked load.\n");
6905 return false;
6909 if (!vec_stmt) /* transformation not required. */
6911 if (!slp)
6912 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6913 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6914 /* The SLP costs are calculated during SLP analysis. */
6915 if (!PURE_SLP_STMT (stmt_info))
6916 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6917 NULL, NULL, NULL);
6918 return true;
6921 if (!slp)
6922 gcc_assert (memory_access_type
6923 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6925 if (dump_enabled_p ())
6926 dump_printf_loc (MSG_NOTE, vect_location,
6927 "transform load. ncopies = %d\n", ncopies);
6929 /* Transform. */
6931 ensure_base_align (dr);
6933 if (memory_access_type == VMAT_GATHER_SCATTER)
6935 vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask);
6936 return true;
6939 if (memory_access_type == VMAT_ELEMENTWISE
6940 || memory_access_type == VMAT_STRIDED_SLP)
6942 gimple_stmt_iterator incr_gsi;
6943 bool insert_after;
6944 gimple *incr;
6945 tree offvar;
6946 tree ivstep;
6947 tree running_off;
6948 vec<constructor_elt, va_gc> *v = NULL;
6949 gimple_seq stmts = NULL;
6950 tree stride_base, stride_step, alias_off;
6951 /* Checked by get_load_store_type. */
6952 unsigned int const_nunits = nunits.to_constant ();
6954 gcc_assert (!nested_in_vect_loop);
6956 if (slp && grouped_load)
6958 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6959 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6960 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6961 ref_type = get_group_alias_ptr_type (first_stmt);
6963 else
6965 first_stmt = stmt;
6966 first_dr = dr;
6967 group_size = 1;
6968 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6971 stride_base
6972 = fold_build_pointer_plus
6973 (DR_BASE_ADDRESS (first_dr),
6974 size_binop (PLUS_EXPR,
6975 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6976 convert_to_ptrofftype (DR_INIT (first_dr))));
6977 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6979 /* For a load with loop-invariant (but other than power-of-2)
6980 stride (i.e. not a grouped access) like so:
6982 for (i = 0; i < n; i += stride)
6983 ... = array[i];
6985 we generate a new induction variable and new accesses to
6986 form a new vector (or vectors, depending on ncopies):
6988 for (j = 0; ; j += VF*stride)
6989 tmp1 = array[j];
6990 tmp2 = array[j + stride];
6992 vectemp = {tmp1, tmp2, ...}
6995 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6996 build_int_cst (TREE_TYPE (stride_step), vf));
6998 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
7000 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
7001 loop, &incr_gsi, insert_after,
7002 &offvar, NULL);
7003 incr = gsi_stmt (incr_gsi);
7004 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
7006 stride_step = force_gimple_operand (unshare_expr (stride_step),
7007 &stmts, true, NULL_TREE);
7008 if (stmts)
7009 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
7011 prev_stmt_info = NULL;
7012 running_off = offvar;
7013 alias_off = build_int_cst (ref_type, 0);
7014 int nloads = const_nunits;
7015 int lnel = 1;
7016 tree ltype = TREE_TYPE (vectype);
7017 tree lvectype = vectype;
7018 auto_vec<tree> dr_chain;
7019 if (memory_access_type == VMAT_STRIDED_SLP)
7021 if (group_size < const_nunits)
7023 /* First check if vec_init optab supports construction from
7024 vector elts directly. */
7025 scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
7026 machine_mode vmode;
7027 if (mode_for_vector (elmode, group_size).exists (&vmode)
7028 && VECTOR_MODE_P (vmode)
7029 && (convert_optab_handler (vec_init_optab,
7030 TYPE_MODE (vectype), vmode)
7031 != CODE_FOR_nothing))
7033 nloads = const_nunits / group_size;
7034 lnel = group_size;
7035 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
7037 else
7039 /* Otherwise avoid emitting a constructor of vector elements
7040 by performing the loads using an integer type of the same
7041 size, constructing a vector of those and then
7042 re-interpreting it as the original vector type.
7043 This avoids a huge runtime penalty due to the general
7044 inability to perform store forwarding from smaller stores
7045 to a larger load. */
7046 unsigned lsize
7047 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7048 elmode = int_mode_for_size (lsize, 0).require ();
7049 unsigned int lnunits = const_nunits / group_size;
7050 /* If we can't construct such a vector fall back to
7051 element loads of the original vector type. */
7052 if (mode_for_vector (elmode, lnunits).exists (&vmode)
7053 && VECTOR_MODE_P (vmode)
7054 && (convert_optab_handler (vec_init_optab, vmode, elmode)
7055 != CODE_FOR_nothing))
7057 nloads = lnunits;
7058 lnel = group_size;
7059 ltype = build_nonstandard_integer_type (lsize, 1);
7060 lvectype = build_vector_type (ltype, nloads);
7064 else
7066 nloads = 1;
7067 lnel = const_nunits;
7068 ltype = vectype;
7070 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7072 if (slp)
7074 /* For SLP permutation support we need to load the whole group,
7075 not only the number of vector stmts the permutation result
7076 fits in. */
7077 if (slp_perm)
7079 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7080 variable VF. */
7081 unsigned int const_vf = vf.to_constant ();
7082 ncopies = CEIL (group_size * const_vf, const_nunits);
7083 dr_chain.create (ncopies);
7085 else
7086 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7088 unsigned int group_el = 0;
7089 unsigned HOST_WIDE_INT
7090 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7091 for (j = 0; j < ncopies; j++)
7093 if (nloads > 1)
7094 vec_alloc (v, nloads);
7095 for (i = 0; i < nloads; i++)
7097 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7098 group_el * elsz);
7099 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7100 build2 (MEM_REF, ltype,
7101 running_off, this_off));
7102 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7103 if (nloads > 1)
7104 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7105 gimple_assign_lhs (new_stmt));
7107 group_el += lnel;
7108 if (! slp
7109 || group_el == group_size)
7111 tree newoff = copy_ssa_name (running_off);
7112 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7113 running_off, stride_step);
7114 vect_finish_stmt_generation (stmt, incr, gsi);
7116 running_off = newoff;
7117 group_el = 0;
7120 if (nloads > 1)
7122 tree vec_inv = build_constructor (lvectype, v);
7123 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7124 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7125 if (lvectype != vectype)
7127 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7128 VIEW_CONVERT_EXPR,
7129 build1 (VIEW_CONVERT_EXPR,
7130 vectype, new_temp));
7131 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7135 if (slp)
7137 if (slp_perm)
7138 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7139 else
7140 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7142 else
7144 if (j == 0)
7145 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7146 else
7147 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7148 prev_stmt_info = vinfo_for_stmt (new_stmt);
7151 if (slp_perm)
7153 unsigned n_perms;
7154 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7155 slp_node_instance, false, &n_perms);
7157 return true;
7160 if (grouped_load)
7162 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7163 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7164 /* For SLP vectorization we directly vectorize a subchain
7165 without permutation. */
7166 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7167 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7168 /* For BB vectorization always use the first stmt to base
7169 the data ref pointer on. */
7170 if (bb_vinfo)
7171 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7173 /* Check if the chain of loads is already vectorized. */
7174 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7175 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7176 ??? But we can only do so if there is exactly one
7177 as we have no way to get at the rest. Leave the CSE
7178 opportunity alone.
7179 ??? With the group load eventually participating
7180 in multiple different permutations (having multiple
7181 slp nodes which refer to the same group) the CSE
7182 is even wrong code. See PR56270. */
7183 && !slp)
7185 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7186 return true;
7188 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7189 group_gap_adj = 0;
7191 /* VEC_NUM is the number of vect stmts to be created for this group. */
7192 if (slp)
7194 grouped_load = false;
7195 /* For SLP permutation support we need to load the whole group,
7196 not only the number of vector stmts the permutation result
7197 fits in. */
7198 if (slp_perm)
7200 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7201 variable VF. */
7202 unsigned int const_vf = vf.to_constant ();
7203 unsigned int const_nunits = nunits.to_constant ();
7204 vec_num = CEIL (group_size * const_vf, const_nunits);
7205 group_gap_adj = vf * group_size - nunits * vec_num;
7207 else
7209 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7210 group_gap_adj
7211 = group_size - SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
7214 else
7215 vec_num = group_size;
7217 ref_type = get_group_alias_ptr_type (first_stmt);
7219 else
7221 first_stmt = stmt;
7222 first_dr = dr;
7223 group_size = vec_num = 1;
7224 group_gap_adj = 0;
7225 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7228 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7229 gcc_assert (alignment_support_scheme);
7230 /* Targets with load-lane instructions must not require explicit
7231 realignment. */
7232 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7233 || alignment_support_scheme == dr_aligned
7234 || alignment_support_scheme == dr_unaligned_supported);
7236 /* In case the vectorization factor (VF) is bigger than the number
7237 of elements that we can fit in a vectype (nunits), we have to generate
7238 more than one vector stmt - i.e - we need to "unroll" the
7239 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7240 from one copy of the vector stmt to the next, in the field
7241 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7242 stages to find the correct vector defs to be used when vectorizing
7243 stmts that use the defs of the current stmt. The example below
7244 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7245 need to create 4 vectorized stmts):
7247 before vectorization:
7248 RELATED_STMT VEC_STMT
7249 S1: x = memref - -
7250 S2: z = x + 1 - -
7252 step 1: vectorize stmt S1:
7253 We first create the vector stmt VS1_0, and, as usual, record a
7254 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7255 Next, we create the vector stmt VS1_1, and record a pointer to
7256 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7257 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7258 stmts and pointers:
7259 RELATED_STMT VEC_STMT
7260 VS1_0: vx0 = memref0 VS1_1 -
7261 VS1_1: vx1 = memref1 VS1_2 -
7262 VS1_2: vx2 = memref2 VS1_3 -
7263 VS1_3: vx3 = memref3 - -
7264 S1: x = load - VS1_0
7265 S2: z = x + 1 - -
7267 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7268 information we recorded in RELATED_STMT field is used to vectorize
7269 stmt S2. */
7271 /* In case of interleaving (non-unit grouped access):
7273 S1: x2 = &base + 2
7274 S2: x0 = &base
7275 S3: x1 = &base + 1
7276 S4: x3 = &base + 3
7278 Vectorized loads are created in the order of memory accesses
7279 starting from the access of the first stmt of the chain:
7281 VS1: vx0 = &base
7282 VS2: vx1 = &base + vec_size*1
7283 VS3: vx3 = &base + vec_size*2
7284 VS4: vx4 = &base + vec_size*3
7286 Then permutation statements are generated:
7288 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7289 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7292 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7293 (the order of the data-refs in the output of vect_permute_load_chain
7294 corresponds to the order of scalar stmts in the interleaving chain - see
7295 the documentation of vect_permute_load_chain()).
7296 The generation of permutation stmts and recording them in
7297 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7299 In case of both multiple types and interleaving, the vector loads and
7300 permutation stmts above are created for every copy. The result vector
7301 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7302 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7304 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7305 on a target that supports unaligned accesses (dr_unaligned_supported)
7306 we generate the following code:
7307 p = initial_addr;
7308 indx = 0;
7309 loop {
7310 p = p + indx * vectype_size;
7311 vec_dest = *(p);
7312 indx = indx + 1;
7315 Otherwise, the data reference is potentially unaligned on a target that
7316 does not support unaligned accesses (dr_explicit_realign_optimized) -
7317 then generate the following code, in which the data in each iteration is
7318 obtained by two vector loads, one from the previous iteration, and one
7319 from the current iteration:
7320 p1 = initial_addr;
7321 msq_init = *(floor(p1))
7322 p2 = initial_addr + VS - 1;
7323 realignment_token = call target_builtin;
7324 indx = 0;
7325 loop {
7326 p2 = p2 + indx * vectype_size
7327 lsq = *(floor(p2))
7328 vec_dest = realign_load (msq, lsq, realignment_token)
7329 indx = indx + 1;
7330 msq = lsq;
7331 } */
7333 /* If the misalignment remains the same throughout the execution of the
7334 loop, we can create the init_addr and permutation mask at the loop
7335 preheader. Otherwise, it needs to be created inside the loop.
7336 This can only occur when vectorizing memory accesses in the inner-loop
7337 nested within an outer-loop that is being vectorized. */
7339 if (nested_in_vect_loop
7340 && !multiple_p (DR_STEP_ALIGNMENT (dr),
7341 GET_MODE_SIZE (TYPE_MODE (vectype))))
7343 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7344 compute_in_loop = true;
7347 if ((alignment_support_scheme == dr_explicit_realign_optimized
7348 || alignment_support_scheme == dr_explicit_realign)
7349 && !compute_in_loop)
7351 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7352 alignment_support_scheme, NULL_TREE,
7353 &at_loop);
7354 if (alignment_support_scheme == dr_explicit_realign_optimized)
7356 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7357 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7358 size_one_node);
7361 else
7362 at_loop = loop;
7364 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7365 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7367 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7368 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7369 else
7370 aggr_type = vectype;
7372 tree vec_mask = NULL_TREE;
7373 prev_stmt_info = NULL;
7374 poly_uint64 group_elt = 0;
7375 for (j = 0; j < ncopies; j++)
7377 /* 1. Create the vector or array pointer update chain. */
7378 if (j == 0)
7380 bool simd_lane_access_p
7381 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7382 if (simd_lane_access_p
7383 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7384 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7385 && integer_zerop (DR_OFFSET (first_dr))
7386 && integer_zerop (DR_INIT (first_dr))
7387 && alias_sets_conflict_p (get_alias_set (aggr_type),
7388 get_alias_set (TREE_TYPE (ref_type)))
7389 && (alignment_support_scheme == dr_aligned
7390 || alignment_support_scheme == dr_unaligned_supported))
7392 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7393 dataref_offset = build_int_cst (ref_type, 0);
7394 inv_p = false;
7396 else if (first_stmt_for_drptr
7397 && first_stmt != first_stmt_for_drptr)
7399 dataref_ptr
7400 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7401 at_loop, offset, &dummy, gsi,
7402 &ptr_incr, simd_lane_access_p,
7403 &inv_p, byte_offset);
7404 /* Adjust the pointer by the difference to first_stmt. */
7405 data_reference_p ptrdr
7406 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7407 tree diff = fold_convert (sizetype,
7408 size_binop (MINUS_EXPR,
7409 DR_INIT (first_dr),
7410 DR_INIT (ptrdr)));
7411 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7412 stmt, diff);
7414 else
7415 dataref_ptr
7416 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7417 offset, &dummy, gsi, &ptr_incr,
7418 simd_lane_access_p, &inv_p,
7419 byte_offset);
7420 if (mask)
7421 vec_mask = vect_get_vec_def_for_operand (mask, stmt,
7422 mask_vectype);
7424 else
7426 if (dataref_offset)
7427 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7428 TYPE_SIZE_UNIT (aggr_type));
7429 else
7430 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7431 TYPE_SIZE_UNIT (aggr_type));
7432 if (mask)
7434 gimple *def_stmt;
7435 vect_def_type dt;
7436 vect_is_simple_use (vec_mask, vinfo, &def_stmt, &dt);
7437 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
7441 if (grouped_load || slp_perm)
7442 dr_chain.create (vec_num);
7444 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7446 tree vec_array;
7448 vec_array = create_vector_array (vectype, vec_num);
7450 /* Emit:
7451 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7452 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7453 gcall *call = gimple_build_call_internal (IFN_LOAD_LANES, 1,
7454 data_ref);
7455 gimple_call_set_lhs (call, vec_array);
7456 gimple_call_set_nothrow (call, true);
7457 new_stmt = call;
7458 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7460 /* Extract each vector into an SSA_NAME. */
7461 for (i = 0; i < vec_num; i++)
7463 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7464 vec_array, i);
7465 dr_chain.quick_push (new_temp);
7468 /* Record the mapping between SSA_NAMEs and statements. */
7469 vect_record_grouped_load_vectors (stmt, dr_chain);
7471 else
7473 for (i = 0; i < vec_num; i++)
7475 if (i > 0)
7476 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7477 stmt, NULL_TREE);
7479 /* 2. Create the vector-load in the loop. */
7480 switch (alignment_support_scheme)
7482 case dr_aligned:
7483 case dr_unaligned_supported:
7485 unsigned int align, misalign;
7487 align = DR_TARGET_ALIGNMENT (dr);
7488 if (alignment_support_scheme == dr_aligned)
7490 gcc_assert (aligned_access_p (first_dr));
7491 misalign = 0;
7493 else if (DR_MISALIGNMENT (first_dr) == -1)
7495 align = dr_alignment (vect_dr_behavior (first_dr));
7496 misalign = 0;
7498 else
7499 misalign = DR_MISALIGNMENT (first_dr);
7500 if (dataref_offset == NULL_TREE
7501 && TREE_CODE (dataref_ptr) == SSA_NAME)
7502 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7503 align, misalign);
7505 if (mask)
7507 align = least_bit_hwi (misalign | align);
7508 tree ptr = build_int_cst (ref_type, align);
7509 gcall *call
7510 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
7511 dataref_ptr, ptr,
7512 vec_mask);
7513 gimple_call_set_nothrow (call, true);
7514 new_stmt = call;
7515 data_ref = NULL_TREE;
7517 else
7519 data_ref
7520 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7521 dataref_offset
7522 ? dataref_offset
7523 : build_int_cst (ref_type, 0));
7524 if (alignment_support_scheme == dr_aligned)
7526 else if (DR_MISALIGNMENT (first_dr) == -1)
7527 TREE_TYPE (data_ref)
7528 = build_aligned_type (TREE_TYPE (data_ref),
7529 align * BITS_PER_UNIT);
7530 else
7531 TREE_TYPE (data_ref)
7532 = build_aligned_type (TREE_TYPE (data_ref),
7533 TYPE_ALIGN (elem_type));
7535 break;
7537 case dr_explicit_realign:
7539 tree ptr, bump;
7541 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7543 if (compute_in_loop)
7544 msq = vect_setup_realignment (first_stmt, gsi,
7545 &realignment_token,
7546 dr_explicit_realign,
7547 dataref_ptr, NULL);
7549 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7550 ptr = copy_ssa_name (dataref_ptr);
7551 else
7552 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7553 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7554 new_stmt = gimple_build_assign
7555 (ptr, BIT_AND_EXPR, dataref_ptr,
7556 build_int_cst
7557 (TREE_TYPE (dataref_ptr),
7558 -(HOST_WIDE_INT) align));
7559 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7560 data_ref
7561 = build2 (MEM_REF, vectype, ptr,
7562 build_int_cst (ref_type, 0));
7563 vec_dest = vect_create_destination_var (scalar_dest,
7564 vectype);
7565 new_stmt = gimple_build_assign (vec_dest, data_ref);
7566 new_temp = make_ssa_name (vec_dest, new_stmt);
7567 gimple_assign_set_lhs (new_stmt, new_temp);
7568 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7569 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7570 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7571 msq = new_temp;
7573 bump = size_binop (MULT_EXPR, vs,
7574 TYPE_SIZE_UNIT (elem_type));
7575 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7576 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7577 new_stmt = gimple_build_assign
7578 (NULL_TREE, BIT_AND_EXPR, ptr,
7579 build_int_cst
7580 (TREE_TYPE (ptr), -(HOST_WIDE_INT) align));
7581 ptr = copy_ssa_name (ptr, new_stmt);
7582 gimple_assign_set_lhs (new_stmt, ptr);
7583 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7584 data_ref
7585 = build2 (MEM_REF, vectype, ptr,
7586 build_int_cst (ref_type, 0));
7587 break;
7589 case dr_explicit_realign_optimized:
7591 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7592 new_temp = copy_ssa_name (dataref_ptr);
7593 else
7594 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7595 unsigned int align = DR_TARGET_ALIGNMENT (first_dr);
7596 new_stmt = gimple_build_assign
7597 (new_temp, BIT_AND_EXPR, dataref_ptr,
7598 build_int_cst (TREE_TYPE (dataref_ptr),
7599 -(HOST_WIDE_INT) align));
7600 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7601 data_ref
7602 = build2 (MEM_REF, vectype, new_temp,
7603 build_int_cst (ref_type, 0));
7604 break;
7606 default:
7607 gcc_unreachable ();
7609 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7610 /* DATA_REF is null if we've already built the statement. */
7611 if (data_ref)
7612 new_stmt = gimple_build_assign (vec_dest, data_ref);
7613 new_temp = make_ssa_name (vec_dest, new_stmt);
7614 gimple_set_lhs (new_stmt, new_temp);
7615 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7617 /* 3. Handle explicit realignment if necessary/supported.
7618 Create in loop:
7619 vec_dest = realign_load (msq, lsq, realignment_token) */
7620 if (alignment_support_scheme == dr_explicit_realign_optimized
7621 || alignment_support_scheme == dr_explicit_realign)
7623 lsq = gimple_assign_lhs (new_stmt);
7624 if (!realignment_token)
7625 realignment_token = dataref_ptr;
7626 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7627 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7628 msq, lsq, realignment_token);
7629 new_temp = make_ssa_name (vec_dest, new_stmt);
7630 gimple_assign_set_lhs (new_stmt, new_temp);
7631 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7633 if (alignment_support_scheme == dr_explicit_realign_optimized)
7635 gcc_assert (phi);
7636 if (i == vec_num - 1 && j == ncopies - 1)
7637 add_phi_arg (phi, lsq,
7638 loop_latch_edge (containing_loop),
7639 UNKNOWN_LOCATION);
7640 msq = lsq;
7644 /* 4. Handle invariant-load. */
7645 if (inv_p && !bb_vinfo)
7647 gcc_assert (!grouped_load);
7648 /* If we have versioned for aliasing or the loop doesn't
7649 have any data dependencies that would preclude this,
7650 then we are sure this is a loop invariant load and
7651 thus we can insert it on the preheader edge. */
7652 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7653 && !nested_in_vect_loop
7654 && hoist_defs_of_uses (stmt, loop))
7656 if (dump_enabled_p ())
7658 dump_printf_loc (MSG_NOTE, vect_location,
7659 "hoisting out of the vectorized "
7660 "loop: ");
7661 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7663 tree tem = copy_ssa_name (scalar_dest);
7664 gsi_insert_on_edge_immediate
7665 (loop_preheader_edge (loop),
7666 gimple_build_assign (tem,
7667 unshare_expr
7668 (gimple_assign_rhs1 (stmt))));
7669 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7670 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7671 set_vinfo_for_stmt (new_stmt,
7672 new_stmt_vec_info (new_stmt, vinfo));
7674 else
7676 gimple_stmt_iterator gsi2 = *gsi;
7677 gsi_next (&gsi2);
7678 new_temp = vect_init_vector (stmt, scalar_dest,
7679 vectype, &gsi2);
7680 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7684 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7686 tree perm_mask = perm_mask_for_reverse (vectype);
7687 new_temp = permute_vec_elements (new_temp, new_temp,
7688 perm_mask, stmt, gsi);
7689 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7692 /* Collect vector loads and later create their permutation in
7693 vect_transform_grouped_load (). */
7694 if (grouped_load || slp_perm)
7695 dr_chain.quick_push (new_temp);
7697 /* Store vector loads in the corresponding SLP_NODE. */
7698 if (slp && !slp_perm)
7699 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7701 /* With SLP permutation we load the gaps as well, without
7702 we need to skip the gaps after we manage to fully load
7703 all elements. group_gap_adj is GROUP_SIZE here. */
7704 group_elt += nunits;
7705 if (maybe_ne (group_gap_adj, 0U)
7706 && !slp_perm
7707 && known_eq (group_elt, group_size - group_gap_adj))
7709 poly_wide_int bump_val
7710 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7711 * group_gap_adj);
7712 tree bump = wide_int_to_tree (sizetype, bump_val);
7713 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7714 stmt, bump);
7715 group_elt = 0;
7718 /* Bump the vector pointer to account for a gap or for excess
7719 elements loaded for a permuted SLP load. */
7720 if (maybe_ne (group_gap_adj, 0U) && slp_perm)
7722 poly_wide_int bump_val
7723 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type))
7724 * group_gap_adj);
7725 tree bump = wide_int_to_tree (sizetype, bump_val);
7726 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7727 stmt, bump);
7731 if (slp && !slp_perm)
7732 continue;
7734 if (slp_perm)
7736 unsigned n_perms;
7737 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7738 slp_node_instance, false,
7739 &n_perms))
7741 dr_chain.release ();
7742 return false;
7745 else
7747 if (grouped_load)
7749 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7750 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7751 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7753 else
7755 if (j == 0)
7756 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7757 else
7758 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7759 prev_stmt_info = vinfo_for_stmt (new_stmt);
7762 dr_chain.release ();
7765 return true;
7768 /* Function vect_is_simple_cond.
7770 Input:
7771 LOOP - the loop that is being vectorized.
7772 COND - Condition that is checked for simple use.
7774 Output:
7775 *COMP_VECTYPE - the vector type for the comparison.
7776 *DTS - The def types for the arguments of the comparison
7778 Returns whether a COND can be vectorized. Checks whether
7779 condition operands are supportable using vec_is_simple_use. */
7781 static bool
7782 vect_is_simple_cond (tree cond, vec_info *vinfo,
7783 tree *comp_vectype, enum vect_def_type *dts,
7784 tree vectype)
7786 tree lhs, rhs;
7787 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7789 /* Mask case. */
7790 if (TREE_CODE (cond) == SSA_NAME
7791 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7793 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7794 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7795 &dts[0], comp_vectype)
7796 || !*comp_vectype
7797 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7798 return false;
7799 return true;
7802 if (!COMPARISON_CLASS_P (cond))
7803 return false;
7805 lhs = TREE_OPERAND (cond, 0);
7806 rhs = TREE_OPERAND (cond, 1);
7808 if (TREE_CODE (lhs) == SSA_NAME)
7810 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7811 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
7812 return false;
7814 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7815 || TREE_CODE (lhs) == FIXED_CST)
7816 dts[0] = vect_constant_def;
7817 else
7818 return false;
7820 if (TREE_CODE (rhs) == SSA_NAME)
7822 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7823 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
7824 return false;
7826 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7827 || TREE_CODE (rhs) == FIXED_CST)
7828 dts[1] = vect_constant_def;
7829 else
7830 return false;
7832 if (vectype1 && vectype2
7833 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
7834 TYPE_VECTOR_SUBPARTS (vectype2)))
7835 return false;
7837 *comp_vectype = vectype1 ? vectype1 : vectype2;
7838 /* Invariant comparison. */
7839 if (! *comp_vectype)
7841 tree scalar_type = TREE_TYPE (lhs);
7842 /* If we can widen the comparison to match vectype do so. */
7843 if (INTEGRAL_TYPE_P (scalar_type)
7844 && tree_int_cst_lt (TYPE_SIZE (scalar_type),
7845 TYPE_SIZE (TREE_TYPE (vectype))))
7846 scalar_type = build_nonstandard_integer_type
7847 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
7848 TYPE_UNSIGNED (scalar_type));
7849 *comp_vectype = get_vectype_for_scalar_type (scalar_type);
7852 return true;
7855 /* vectorizable_condition.
7857 Check if STMT is conditional modify expression that can be vectorized.
7858 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7859 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7860 at GSI.
7862 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7863 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7864 else clause if it is 2).
7866 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7868 bool
7869 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7870 gimple **vec_stmt, tree reduc_def, int reduc_index,
7871 slp_tree slp_node)
7873 tree scalar_dest = NULL_TREE;
7874 tree vec_dest = NULL_TREE;
7875 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7876 tree then_clause, else_clause;
7877 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7878 tree comp_vectype = NULL_TREE;
7879 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7880 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7881 tree vec_compare;
7882 tree new_temp;
7883 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7884 enum vect_def_type dts[4]
7885 = {vect_unknown_def_type, vect_unknown_def_type,
7886 vect_unknown_def_type, vect_unknown_def_type};
7887 int ndts = 4;
7888 int ncopies;
7889 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7890 stmt_vec_info prev_stmt_info = NULL;
7891 int i, j;
7892 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7893 vec<tree> vec_oprnds0 = vNULL;
7894 vec<tree> vec_oprnds1 = vNULL;
7895 vec<tree> vec_oprnds2 = vNULL;
7896 vec<tree> vec_oprnds3 = vNULL;
7897 tree vec_cmp_type;
7898 bool masked = false;
7900 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7901 return false;
7903 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7905 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7906 return false;
7908 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7909 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7910 && reduc_def))
7911 return false;
7913 /* FORNOW: not yet supported. */
7914 if (STMT_VINFO_LIVE_P (stmt_info))
7916 if (dump_enabled_p ())
7917 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7918 "value used after loop.\n");
7919 return false;
7923 /* Is vectorizable conditional operation? */
7924 if (!is_gimple_assign (stmt))
7925 return false;
7927 code = gimple_assign_rhs_code (stmt);
7929 if (code != COND_EXPR)
7930 return false;
7932 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7933 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7935 if (slp_node)
7936 ncopies = 1;
7937 else
7938 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7940 gcc_assert (ncopies >= 1);
7941 if (reduc_index && ncopies > 1)
7942 return false; /* FORNOW */
7944 cond_expr = gimple_assign_rhs1 (stmt);
7945 then_clause = gimple_assign_rhs2 (stmt);
7946 else_clause = gimple_assign_rhs3 (stmt);
7948 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
7949 &comp_vectype, &dts[0], vectype)
7950 || !comp_vectype)
7951 return false;
7953 gimple *def_stmt;
7954 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
7955 &vectype1))
7956 return false;
7957 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
7958 &vectype2))
7959 return false;
7961 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7962 return false;
7964 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7965 return false;
7967 masked = !COMPARISON_CLASS_P (cond_expr);
7968 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7970 if (vec_cmp_type == NULL_TREE)
7971 return false;
7973 cond_code = TREE_CODE (cond_expr);
7974 if (!masked)
7976 cond_expr0 = TREE_OPERAND (cond_expr, 0);
7977 cond_expr1 = TREE_OPERAND (cond_expr, 1);
7980 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
7982 /* Boolean values may have another representation in vectors
7983 and therefore we prefer bit operations over comparison for
7984 them (which also works for scalar masks). We store opcodes
7985 to use in bitop1 and bitop2. Statement is vectorized as
7986 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7987 depending on bitop1 and bitop2 arity. */
7988 switch (cond_code)
7990 case GT_EXPR:
7991 bitop1 = BIT_NOT_EXPR;
7992 bitop2 = BIT_AND_EXPR;
7993 break;
7994 case GE_EXPR:
7995 bitop1 = BIT_NOT_EXPR;
7996 bitop2 = BIT_IOR_EXPR;
7997 break;
7998 case LT_EXPR:
7999 bitop1 = BIT_NOT_EXPR;
8000 bitop2 = BIT_AND_EXPR;
8001 std::swap (cond_expr0, cond_expr1);
8002 break;
8003 case LE_EXPR:
8004 bitop1 = BIT_NOT_EXPR;
8005 bitop2 = BIT_IOR_EXPR;
8006 std::swap (cond_expr0, cond_expr1);
8007 break;
8008 case NE_EXPR:
8009 bitop1 = BIT_XOR_EXPR;
8010 break;
8011 case EQ_EXPR:
8012 bitop1 = BIT_XOR_EXPR;
8013 bitop2 = BIT_NOT_EXPR;
8014 break;
8015 default:
8016 return false;
8018 cond_code = SSA_NAME;
8021 if (!vec_stmt)
8023 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
8024 if (bitop1 != NOP_EXPR)
8026 machine_mode mode = TYPE_MODE (comp_vectype);
8027 optab optab;
8029 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
8030 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8031 return false;
8033 if (bitop2 != NOP_EXPR)
8035 optab = optab_for_tree_code (bitop2, comp_vectype,
8036 optab_default);
8037 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8038 return false;
8041 if (expand_vec_cond_expr_p (vectype, comp_vectype,
8042 cond_code))
8044 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
8045 return true;
8047 return false;
8050 /* Transform. */
8052 if (!slp_node)
8054 vec_oprnds0.create (1);
8055 vec_oprnds1.create (1);
8056 vec_oprnds2.create (1);
8057 vec_oprnds3.create (1);
8060 /* Handle def. */
8061 scalar_dest = gimple_assign_lhs (stmt);
8062 vec_dest = vect_create_destination_var (scalar_dest, vectype);
8064 /* Handle cond expr. */
8065 for (j = 0; j < ncopies; j++)
8067 gassign *new_stmt = NULL;
8068 if (j == 0)
8070 if (slp_node)
8072 auto_vec<tree, 4> ops;
8073 auto_vec<vec<tree>, 4> vec_defs;
8075 if (masked)
8076 ops.safe_push (cond_expr);
8077 else
8079 ops.safe_push (cond_expr0);
8080 ops.safe_push (cond_expr1);
8082 ops.safe_push (then_clause);
8083 ops.safe_push (else_clause);
8084 vect_get_slp_defs (ops, slp_node, &vec_defs);
8085 vec_oprnds3 = vec_defs.pop ();
8086 vec_oprnds2 = vec_defs.pop ();
8087 if (!masked)
8088 vec_oprnds1 = vec_defs.pop ();
8089 vec_oprnds0 = vec_defs.pop ();
8091 else
8093 gimple *gtemp;
8094 if (masked)
8096 vec_cond_lhs
8097 = vect_get_vec_def_for_operand (cond_expr, stmt,
8098 comp_vectype);
8099 vect_is_simple_use (cond_expr, stmt_info->vinfo,
8100 &gtemp, &dts[0]);
8102 else
8104 vec_cond_lhs
8105 = vect_get_vec_def_for_operand (cond_expr0,
8106 stmt, comp_vectype);
8107 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8109 vec_cond_rhs
8110 = vect_get_vec_def_for_operand (cond_expr1,
8111 stmt, comp_vectype);
8112 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8114 if (reduc_index == 1)
8115 vec_then_clause = reduc_def;
8116 else
8118 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8119 stmt);
8120 vect_is_simple_use (then_clause, loop_vinfo,
8121 &gtemp, &dts[2]);
8123 if (reduc_index == 2)
8124 vec_else_clause = reduc_def;
8125 else
8127 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8128 stmt);
8129 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8133 else
8135 vec_cond_lhs
8136 = vect_get_vec_def_for_stmt_copy (dts[0],
8137 vec_oprnds0.pop ());
8138 if (!masked)
8139 vec_cond_rhs
8140 = vect_get_vec_def_for_stmt_copy (dts[1],
8141 vec_oprnds1.pop ());
8143 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8144 vec_oprnds2.pop ());
8145 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8146 vec_oprnds3.pop ());
8149 if (!slp_node)
8151 vec_oprnds0.quick_push (vec_cond_lhs);
8152 if (!masked)
8153 vec_oprnds1.quick_push (vec_cond_rhs);
8154 vec_oprnds2.quick_push (vec_then_clause);
8155 vec_oprnds3.quick_push (vec_else_clause);
8158 /* Arguments are ready. Create the new vector stmt. */
8159 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8161 vec_then_clause = vec_oprnds2[i];
8162 vec_else_clause = vec_oprnds3[i];
8164 if (masked)
8165 vec_compare = vec_cond_lhs;
8166 else
8168 vec_cond_rhs = vec_oprnds1[i];
8169 if (bitop1 == NOP_EXPR)
8170 vec_compare = build2 (cond_code, vec_cmp_type,
8171 vec_cond_lhs, vec_cond_rhs);
8172 else
8174 new_temp = make_ssa_name (vec_cmp_type);
8175 if (bitop1 == BIT_NOT_EXPR)
8176 new_stmt = gimple_build_assign (new_temp, bitop1,
8177 vec_cond_rhs);
8178 else
8179 new_stmt
8180 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8181 vec_cond_rhs);
8182 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8183 if (bitop2 == NOP_EXPR)
8184 vec_compare = new_temp;
8185 else if (bitop2 == BIT_NOT_EXPR)
8187 /* Instead of doing ~x ? y : z do x ? z : y. */
8188 vec_compare = new_temp;
8189 std::swap (vec_then_clause, vec_else_clause);
8191 else
8193 vec_compare = make_ssa_name (vec_cmp_type);
8194 new_stmt
8195 = gimple_build_assign (vec_compare, bitop2,
8196 vec_cond_lhs, new_temp);
8197 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8201 new_temp = make_ssa_name (vec_dest);
8202 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8203 vec_compare, vec_then_clause,
8204 vec_else_clause);
8205 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8206 if (slp_node)
8207 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8210 if (slp_node)
8211 continue;
8213 if (j == 0)
8214 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8215 else
8216 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8218 prev_stmt_info = vinfo_for_stmt (new_stmt);
8221 vec_oprnds0.release ();
8222 vec_oprnds1.release ();
8223 vec_oprnds2.release ();
8224 vec_oprnds3.release ();
8226 return true;
8229 /* vectorizable_comparison.
8231 Check if STMT is comparison expression that can be vectorized.
8232 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8233 comparison, put it in VEC_STMT, and insert it at GSI.
8235 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8237 static bool
8238 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8239 gimple **vec_stmt, tree reduc_def,
8240 slp_tree slp_node)
8242 tree lhs, rhs1, rhs2;
8243 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8244 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8245 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8246 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8247 tree new_temp;
8248 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8249 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8250 int ndts = 2;
8251 poly_uint64 nunits;
8252 int ncopies;
8253 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8254 stmt_vec_info prev_stmt_info = NULL;
8255 int i, j;
8256 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8257 vec<tree> vec_oprnds0 = vNULL;
8258 vec<tree> vec_oprnds1 = vNULL;
8259 gimple *def_stmt;
8260 tree mask_type;
8261 tree mask;
8263 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8264 return false;
8266 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8267 return false;
8269 mask_type = vectype;
8270 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8272 if (slp_node)
8273 ncopies = 1;
8274 else
8275 ncopies = vect_get_num_copies (loop_vinfo, vectype);
8277 gcc_assert (ncopies >= 1);
8278 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8279 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8280 && reduc_def))
8281 return false;
8283 if (STMT_VINFO_LIVE_P (stmt_info))
8285 if (dump_enabled_p ())
8286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8287 "value used after loop.\n");
8288 return false;
8291 if (!is_gimple_assign (stmt))
8292 return false;
8294 code = gimple_assign_rhs_code (stmt);
8296 if (TREE_CODE_CLASS (code) != tcc_comparison)
8297 return false;
8299 rhs1 = gimple_assign_rhs1 (stmt);
8300 rhs2 = gimple_assign_rhs2 (stmt);
8302 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8303 &dts[0], &vectype1))
8304 return false;
8306 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8307 &dts[1], &vectype2))
8308 return false;
8310 if (vectype1 && vectype2
8311 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
8312 TYPE_VECTOR_SUBPARTS (vectype2)))
8313 return false;
8315 vectype = vectype1 ? vectype1 : vectype2;
8317 /* Invariant comparison. */
8318 if (!vectype)
8320 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8321 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
8322 return false;
8324 else if (maybe_ne (nunits, TYPE_VECTOR_SUBPARTS (vectype)))
8325 return false;
8327 /* Can't compare mask and non-mask types. */
8328 if (vectype1 && vectype2
8329 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8330 return false;
8332 /* Boolean values may have another representation in vectors
8333 and therefore we prefer bit operations over comparison for
8334 them (which also works for scalar masks). We store opcodes
8335 to use in bitop1 and bitop2. Statement is vectorized as
8336 BITOP2 (rhs1 BITOP1 rhs2) or
8337 rhs1 BITOP2 (BITOP1 rhs2)
8338 depending on bitop1 and bitop2 arity. */
8339 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8341 if (code == GT_EXPR)
8343 bitop1 = BIT_NOT_EXPR;
8344 bitop2 = BIT_AND_EXPR;
8346 else if (code == GE_EXPR)
8348 bitop1 = BIT_NOT_EXPR;
8349 bitop2 = BIT_IOR_EXPR;
8351 else if (code == LT_EXPR)
8353 bitop1 = BIT_NOT_EXPR;
8354 bitop2 = BIT_AND_EXPR;
8355 std::swap (rhs1, rhs2);
8356 std::swap (dts[0], dts[1]);
8358 else if (code == LE_EXPR)
8360 bitop1 = BIT_NOT_EXPR;
8361 bitop2 = BIT_IOR_EXPR;
8362 std::swap (rhs1, rhs2);
8363 std::swap (dts[0], dts[1]);
8365 else
8367 bitop1 = BIT_XOR_EXPR;
8368 if (code == EQ_EXPR)
8369 bitop2 = BIT_NOT_EXPR;
8373 if (!vec_stmt)
8375 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8376 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8377 dts, ndts, NULL, NULL);
8378 if (bitop1 == NOP_EXPR)
8379 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8380 else
8382 machine_mode mode = TYPE_MODE (vectype);
8383 optab optab;
8385 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8386 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8387 return false;
8389 if (bitop2 != NOP_EXPR)
8391 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8392 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8393 return false;
8395 return true;
8399 /* Transform. */
8400 if (!slp_node)
8402 vec_oprnds0.create (1);
8403 vec_oprnds1.create (1);
8406 /* Handle def. */
8407 lhs = gimple_assign_lhs (stmt);
8408 mask = vect_create_destination_var (lhs, mask_type);
8410 /* Handle cmp expr. */
8411 for (j = 0; j < ncopies; j++)
8413 gassign *new_stmt = NULL;
8414 if (j == 0)
8416 if (slp_node)
8418 auto_vec<tree, 2> ops;
8419 auto_vec<vec<tree>, 2> vec_defs;
8421 ops.safe_push (rhs1);
8422 ops.safe_push (rhs2);
8423 vect_get_slp_defs (ops, slp_node, &vec_defs);
8424 vec_oprnds1 = vec_defs.pop ();
8425 vec_oprnds0 = vec_defs.pop ();
8427 else
8429 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8430 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8433 else
8435 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8436 vec_oprnds0.pop ());
8437 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8438 vec_oprnds1.pop ());
8441 if (!slp_node)
8443 vec_oprnds0.quick_push (vec_rhs1);
8444 vec_oprnds1.quick_push (vec_rhs2);
8447 /* Arguments are ready. Create the new vector stmt. */
8448 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8450 vec_rhs2 = vec_oprnds1[i];
8452 new_temp = make_ssa_name (mask);
8453 if (bitop1 == NOP_EXPR)
8455 new_stmt = gimple_build_assign (new_temp, code,
8456 vec_rhs1, vec_rhs2);
8457 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8459 else
8461 if (bitop1 == BIT_NOT_EXPR)
8462 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8463 else
8464 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8465 vec_rhs2);
8466 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8467 if (bitop2 != NOP_EXPR)
8469 tree res = make_ssa_name (mask);
8470 if (bitop2 == BIT_NOT_EXPR)
8471 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8472 else
8473 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8474 new_temp);
8475 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8478 if (slp_node)
8479 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8482 if (slp_node)
8483 continue;
8485 if (j == 0)
8486 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8487 else
8488 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8490 prev_stmt_info = vinfo_for_stmt (new_stmt);
8493 vec_oprnds0.release ();
8494 vec_oprnds1.release ();
8496 return true;
8499 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
8500 can handle all live statements in the node. Otherwise return true
8501 if STMT is not live or if vectorizable_live_operation can handle it.
8502 GSI and VEC_STMT are as for vectorizable_live_operation. */
8504 static bool
8505 can_vectorize_live_stmts (gimple *stmt, gimple_stmt_iterator *gsi,
8506 slp_tree slp_node, gimple **vec_stmt)
8508 if (slp_node)
8510 gimple *slp_stmt;
8511 unsigned int i;
8512 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8514 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8515 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8516 && !vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8517 vec_stmt))
8518 return false;
8521 else if (STMT_VINFO_LIVE_P (vinfo_for_stmt (stmt))
8522 && !vectorizable_live_operation (stmt, gsi, slp_node, -1, vec_stmt))
8523 return false;
8525 return true;
8528 /* Make sure the statement is vectorizable. */
8530 bool
8531 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node,
8532 slp_instance node_instance)
8534 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8535 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8536 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8537 bool ok;
8538 gimple *pattern_stmt;
8539 gimple_seq pattern_def_seq;
8541 if (dump_enabled_p ())
8543 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8544 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8547 if (gimple_has_volatile_ops (stmt))
8549 if (dump_enabled_p ())
8550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8551 "not vectorized: stmt has volatile operands\n");
8553 return false;
8556 /* Skip stmts that do not need to be vectorized. In loops this is expected
8557 to include:
8558 - the COND_EXPR which is the loop exit condition
8559 - any LABEL_EXPRs in the loop
8560 - computations that are used only for array indexing or loop control.
8561 In basic blocks we only analyze statements that are a part of some SLP
8562 instance, therefore, all the statements are relevant.
8564 Pattern statement needs to be analyzed instead of the original statement
8565 if the original statement is not relevant. Otherwise, we analyze both
8566 statements. In basic blocks we are called from some SLP instance
8567 traversal, don't analyze pattern stmts instead, the pattern stmts
8568 already will be part of SLP instance. */
8570 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8571 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8572 && !STMT_VINFO_LIVE_P (stmt_info))
8574 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8575 && pattern_stmt
8576 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8577 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8579 /* Analyze PATTERN_STMT instead of the original stmt. */
8580 stmt = pattern_stmt;
8581 stmt_info = vinfo_for_stmt (pattern_stmt);
8582 if (dump_enabled_p ())
8584 dump_printf_loc (MSG_NOTE, vect_location,
8585 "==> examining pattern statement: ");
8586 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8589 else
8591 if (dump_enabled_p ())
8592 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8594 return true;
8597 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8598 && node == NULL
8599 && pattern_stmt
8600 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8601 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8603 /* Analyze PATTERN_STMT too. */
8604 if (dump_enabled_p ())
8606 dump_printf_loc (MSG_NOTE, vect_location,
8607 "==> examining pattern statement: ");
8608 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8611 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node,
8612 node_instance))
8613 return false;
8616 if (is_pattern_stmt_p (stmt_info)
8617 && node == NULL
8618 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8620 gimple_stmt_iterator si;
8622 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8624 gimple *pattern_def_stmt = gsi_stmt (si);
8625 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8626 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8628 /* Analyze def stmt of STMT if it's a pattern stmt. */
8629 if (dump_enabled_p ())
8631 dump_printf_loc (MSG_NOTE, vect_location,
8632 "==> examining pattern def statement: ");
8633 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8636 if (!vect_analyze_stmt (pattern_def_stmt,
8637 need_to_vectorize, node, node_instance))
8638 return false;
8643 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8645 case vect_internal_def:
8646 break;
8648 case vect_reduction_def:
8649 case vect_nested_cycle:
8650 gcc_assert (!bb_vinfo
8651 && (relevance == vect_used_in_outer
8652 || relevance == vect_used_in_outer_by_reduction
8653 || relevance == vect_used_by_reduction
8654 || relevance == vect_unused_in_scope
8655 || relevance == vect_used_only_live));
8656 break;
8658 case vect_induction_def:
8659 gcc_assert (!bb_vinfo);
8660 break;
8662 case vect_constant_def:
8663 case vect_external_def:
8664 case vect_unknown_def_type:
8665 default:
8666 gcc_unreachable ();
8669 if (STMT_VINFO_RELEVANT_P (stmt_info))
8671 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8672 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8673 || (is_gimple_call (stmt)
8674 && gimple_call_lhs (stmt) == NULL_TREE));
8675 *need_to_vectorize = true;
8678 if (PURE_SLP_STMT (stmt_info) && !node)
8680 dump_printf_loc (MSG_NOTE, vect_location,
8681 "handled only by SLP analysis\n");
8682 return true;
8685 ok = true;
8686 if (!bb_vinfo
8687 && (STMT_VINFO_RELEVANT_P (stmt_info)
8688 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8689 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8690 || vectorizable_conversion (stmt, NULL, NULL, node)
8691 || vectorizable_shift (stmt, NULL, NULL, node)
8692 || vectorizable_operation (stmt, NULL, NULL, node)
8693 || vectorizable_assignment (stmt, NULL, NULL, node)
8694 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8695 || vectorizable_call (stmt, NULL, NULL, node)
8696 || vectorizable_store (stmt, NULL, NULL, node)
8697 || vectorizable_reduction (stmt, NULL, NULL, node, node_instance)
8698 || vectorizable_induction (stmt, NULL, NULL, node)
8699 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8700 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8701 else
8703 if (bb_vinfo)
8704 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8705 || vectorizable_conversion (stmt, NULL, NULL, node)
8706 || vectorizable_shift (stmt, NULL, NULL, node)
8707 || vectorizable_operation (stmt, NULL, NULL, node)
8708 || vectorizable_assignment (stmt, NULL, NULL, node)
8709 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8710 || vectorizable_call (stmt, NULL, NULL, node)
8711 || vectorizable_store (stmt, NULL, NULL, node)
8712 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8713 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8716 if (!ok)
8718 if (dump_enabled_p ())
8720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8721 "not vectorized: relevant stmt not ");
8722 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8723 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8726 return false;
8729 if (bb_vinfo)
8730 return true;
8732 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8733 need extra handling, except for vectorizable reductions. */
8734 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8735 && !can_vectorize_live_stmts (stmt, NULL, node, NULL))
8737 if (dump_enabled_p ())
8739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8740 "not vectorized: live stmt not supported: ");
8741 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8744 return false;
8747 return true;
8751 /* Function vect_transform_stmt.
8753 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8755 bool
8756 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8757 bool *grouped_store, slp_tree slp_node,
8758 slp_instance slp_node_instance)
8760 bool is_store = false;
8761 gimple *vec_stmt = NULL;
8762 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8763 bool done;
8765 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8766 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8768 switch (STMT_VINFO_TYPE (stmt_info))
8770 case type_demotion_vec_info_type:
8771 case type_promotion_vec_info_type:
8772 case type_conversion_vec_info_type:
8773 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8774 gcc_assert (done);
8775 break;
8777 case induc_vec_info_type:
8778 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
8779 gcc_assert (done);
8780 break;
8782 case shift_vec_info_type:
8783 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8784 gcc_assert (done);
8785 break;
8787 case op_vec_info_type:
8788 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8789 gcc_assert (done);
8790 break;
8792 case assignment_vec_info_type:
8793 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8794 gcc_assert (done);
8795 break;
8797 case load_vec_info_type:
8798 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8799 slp_node_instance);
8800 gcc_assert (done);
8801 break;
8803 case store_vec_info_type:
8804 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8805 gcc_assert (done);
8806 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8808 /* In case of interleaving, the whole chain is vectorized when the
8809 last store in the chain is reached. Store stmts before the last
8810 one are skipped, and there vec_stmt_info shouldn't be freed
8811 meanwhile. */
8812 *grouped_store = true;
8813 if (STMT_VINFO_VEC_STMT (stmt_info))
8814 is_store = true;
8816 else
8817 is_store = true;
8818 break;
8820 case condition_vec_info_type:
8821 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8822 gcc_assert (done);
8823 break;
8825 case comparison_vec_info_type:
8826 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8827 gcc_assert (done);
8828 break;
8830 case call_vec_info_type:
8831 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8832 stmt = gsi_stmt (*gsi);
8833 break;
8835 case call_simd_clone_vec_info_type:
8836 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8837 stmt = gsi_stmt (*gsi);
8838 break;
8840 case reduc_vec_info_type:
8841 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node,
8842 slp_node_instance);
8843 gcc_assert (done);
8844 break;
8846 default:
8847 if (!STMT_VINFO_LIVE_P (stmt_info))
8849 if (dump_enabled_p ())
8850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8851 "stmt not supported.\n");
8852 gcc_unreachable ();
8856 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8857 This would break hybrid SLP vectorization. */
8858 if (slp_node)
8859 gcc_assert (!vec_stmt
8860 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8862 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8863 is being vectorized, but outside the immediately enclosing loop. */
8864 if (vec_stmt
8865 && STMT_VINFO_LOOP_VINFO (stmt_info)
8866 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8867 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8868 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8869 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8870 || STMT_VINFO_RELEVANT (stmt_info) ==
8871 vect_used_in_outer_by_reduction))
8873 struct loop *innerloop = LOOP_VINFO_LOOP (
8874 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8875 imm_use_iterator imm_iter;
8876 use_operand_p use_p;
8877 tree scalar_dest;
8878 gimple *exit_phi;
8880 if (dump_enabled_p ())
8881 dump_printf_loc (MSG_NOTE, vect_location,
8882 "Record the vdef for outer-loop vectorization.\n");
8884 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8885 (to be used when vectorizing outer-loop stmts that use the DEF of
8886 STMT). */
8887 if (gimple_code (stmt) == GIMPLE_PHI)
8888 scalar_dest = PHI_RESULT (stmt);
8889 else
8890 scalar_dest = gimple_assign_lhs (stmt);
8892 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8894 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8896 exit_phi = USE_STMT (use_p);
8897 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8902 /* Handle stmts whose DEF is used outside the loop-nest that is
8903 being vectorized. */
8904 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8906 done = can_vectorize_live_stmts (stmt, gsi, slp_node, &vec_stmt);
8907 gcc_assert (done);
8910 if (vec_stmt)
8911 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8913 return is_store;
8917 /* Remove a group of stores (for SLP or interleaving), free their
8918 stmt_vec_info. */
8920 void
8921 vect_remove_stores (gimple *first_stmt)
8923 gimple *next = first_stmt;
8924 gimple *tmp;
8925 gimple_stmt_iterator next_si;
8927 while (next)
8929 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8931 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8932 if (is_pattern_stmt_p (stmt_info))
8933 next = STMT_VINFO_RELATED_STMT (stmt_info);
8934 /* Free the attached stmt_vec_info and remove the stmt. */
8935 next_si = gsi_for_stmt (next);
8936 unlink_stmt_vdef (next);
8937 gsi_remove (&next_si, true);
8938 release_defs (next);
8939 free_stmt_vec_info (next);
8940 next = tmp;
8945 /* Function new_stmt_vec_info.
8947 Create and initialize a new stmt_vec_info struct for STMT. */
8949 stmt_vec_info
8950 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8952 stmt_vec_info res;
8953 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8955 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8956 STMT_VINFO_STMT (res) = stmt;
8957 res->vinfo = vinfo;
8958 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8959 STMT_VINFO_LIVE_P (res) = false;
8960 STMT_VINFO_VECTYPE (res) = NULL;
8961 STMT_VINFO_VEC_STMT (res) = NULL;
8962 STMT_VINFO_VECTORIZABLE (res) = true;
8963 STMT_VINFO_IN_PATTERN_P (res) = false;
8964 STMT_VINFO_RELATED_STMT (res) = NULL;
8965 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8966 STMT_VINFO_DATA_REF (res) = NULL;
8967 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8968 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
8970 if (gimple_code (stmt) == GIMPLE_PHI
8971 && is_loop_header_bb_p (gimple_bb (stmt)))
8972 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8973 else
8974 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8976 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8977 STMT_SLP_TYPE (res) = loop_vect;
8978 STMT_VINFO_NUM_SLP_USES (res) = 0;
8980 GROUP_FIRST_ELEMENT (res) = NULL;
8981 GROUP_NEXT_ELEMENT (res) = NULL;
8982 GROUP_SIZE (res) = 0;
8983 GROUP_STORE_COUNT (res) = 0;
8984 GROUP_GAP (res) = 0;
8985 GROUP_SAME_DR_STMT (res) = NULL;
8987 return res;
8991 /* Create a hash table for stmt_vec_info. */
8993 void
8994 init_stmt_vec_info_vec (void)
8996 gcc_assert (!stmt_vec_info_vec.exists ());
8997 stmt_vec_info_vec.create (50);
9001 /* Free hash table for stmt_vec_info. */
9003 void
9004 free_stmt_vec_info_vec (void)
9006 unsigned int i;
9007 stmt_vec_info info;
9008 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
9009 if (info != NULL)
9010 free_stmt_vec_info (STMT_VINFO_STMT (info));
9011 gcc_assert (stmt_vec_info_vec.exists ());
9012 stmt_vec_info_vec.release ();
9016 /* Free stmt vectorization related info. */
9018 void
9019 free_stmt_vec_info (gimple *stmt)
9021 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9023 if (!stmt_info)
9024 return;
9026 /* Check if this statement has a related "pattern stmt"
9027 (introduced by the vectorizer during the pattern recognition
9028 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
9029 too. */
9030 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
9032 stmt_vec_info patt_info
9033 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9034 if (patt_info)
9036 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
9037 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
9038 gimple_set_bb (patt_stmt, NULL);
9039 tree lhs = gimple_get_lhs (patt_stmt);
9040 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9041 release_ssa_name (lhs);
9042 if (seq)
9044 gimple_stmt_iterator si;
9045 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
9047 gimple *seq_stmt = gsi_stmt (si);
9048 gimple_set_bb (seq_stmt, NULL);
9049 lhs = gimple_get_lhs (seq_stmt);
9050 if (lhs && TREE_CODE (lhs) == SSA_NAME)
9051 release_ssa_name (lhs);
9052 free_stmt_vec_info (seq_stmt);
9055 free_stmt_vec_info (patt_stmt);
9059 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
9060 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
9061 set_vinfo_for_stmt (stmt, NULL);
9062 free (stmt_info);
9066 /* Function get_vectype_for_scalar_type_and_size.
9068 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9069 by the target. */
9071 tree
9072 get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
9074 tree orig_scalar_type = scalar_type;
9075 scalar_mode inner_mode;
9076 machine_mode simd_mode;
9077 poly_uint64 nunits;
9078 tree vectype;
9080 if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
9081 && !is_float_mode (TYPE_MODE (scalar_type), &inner_mode))
9082 return NULL_TREE;
9084 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
9086 /* For vector types of elements whose mode precision doesn't
9087 match their types precision we use a element type of mode
9088 precision. The vectorization routines will have to make sure
9089 they support the proper result truncation/extension.
9090 We also make sure to build vector types with INTEGER_TYPE
9091 component type only. */
9092 if (INTEGRAL_TYPE_P (scalar_type)
9093 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
9094 || TREE_CODE (scalar_type) != INTEGER_TYPE))
9095 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
9096 TYPE_UNSIGNED (scalar_type));
9098 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9099 When the component mode passes the above test simply use a type
9100 corresponding to that mode. The theory is that any use that
9101 would cause problems with this will disable vectorization anyway. */
9102 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
9103 && !INTEGRAL_TYPE_P (scalar_type))
9104 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
9106 /* We can't build a vector type of elements with alignment bigger than
9107 their size. */
9108 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9109 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9110 TYPE_UNSIGNED (scalar_type));
9112 /* If we felt back to using the mode fail if there was
9113 no scalar type for it. */
9114 if (scalar_type == NULL_TREE)
9115 return NULL_TREE;
9117 /* If no size was supplied use the mode the target prefers. Otherwise
9118 lookup a vector mode of the specified size. */
9119 if (known_eq (size, 0U))
9120 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9121 else if (!multiple_p (size, nbytes, &nunits)
9122 || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
9123 return NULL_TREE;
9124 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9125 if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
9126 return NULL_TREE;
9128 vectype = build_vector_type (scalar_type, nunits);
9130 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9131 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9132 return NULL_TREE;
9134 /* Re-attach the address-space qualifier if we canonicalized the scalar
9135 type. */
9136 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9137 return build_qualified_type
9138 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9140 return vectype;
9143 poly_uint64 current_vector_size;
9145 /* Function get_vectype_for_scalar_type.
9147 Returns the vector type corresponding to SCALAR_TYPE as supported
9148 by the target. */
9150 tree
9151 get_vectype_for_scalar_type (tree scalar_type)
9153 tree vectype;
9154 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9155 current_vector_size);
9156 if (vectype
9157 && known_eq (current_vector_size, 0U))
9158 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9159 return vectype;
9162 /* Function get_mask_type_for_scalar_type.
9164 Returns the mask type corresponding to a result of comparison
9165 of vectors of specified SCALAR_TYPE as supported by target. */
9167 tree
9168 get_mask_type_for_scalar_type (tree scalar_type)
9170 tree vectype = get_vectype_for_scalar_type (scalar_type);
9172 if (!vectype)
9173 return NULL;
9175 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9176 current_vector_size);
9179 /* Function get_same_sized_vectype
9181 Returns a vector type corresponding to SCALAR_TYPE of size
9182 VECTOR_TYPE if supported by the target. */
9184 tree
9185 get_same_sized_vectype (tree scalar_type, tree vector_type)
9187 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9188 return build_same_sized_truth_vector_type (vector_type);
9190 return get_vectype_for_scalar_type_and_size
9191 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9194 /* Function vect_is_simple_use.
9196 Input:
9197 VINFO - the vect info of the loop or basic block that is being vectorized.
9198 OPERAND - operand in the loop or bb.
9199 Output:
9200 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9201 DT - the type of definition
9203 Returns whether a stmt with OPERAND can be vectorized.
9204 For loops, supportable operands are constants, loop invariants, and operands
9205 that are defined by the current iteration of the loop. Unsupportable
9206 operands are those that are defined by a previous iteration of the loop (as
9207 is the case in reduction/induction computations).
9208 For basic blocks, supportable operands are constants and bb invariants.
9209 For now, operands defined outside the basic block are not supported. */
9211 bool
9212 vect_is_simple_use (tree operand, vec_info *vinfo,
9213 gimple **def_stmt, enum vect_def_type *dt)
9215 *def_stmt = NULL;
9216 *dt = vect_unknown_def_type;
9218 if (dump_enabled_p ())
9220 dump_printf_loc (MSG_NOTE, vect_location,
9221 "vect_is_simple_use: operand ");
9222 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9223 dump_printf (MSG_NOTE, "\n");
9226 if (CONSTANT_CLASS_P (operand))
9228 *dt = vect_constant_def;
9229 return true;
9232 if (is_gimple_min_invariant (operand))
9234 *dt = vect_external_def;
9235 return true;
9238 if (TREE_CODE (operand) != SSA_NAME)
9240 if (dump_enabled_p ())
9241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9242 "not ssa-name.\n");
9243 return false;
9246 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9248 *dt = vect_external_def;
9249 return true;
9252 *def_stmt = SSA_NAME_DEF_STMT (operand);
9253 if (dump_enabled_p ())
9255 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9256 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9259 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9260 *dt = vect_external_def;
9261 else
9263 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9264 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9267 if (dump_enabled_p ())
9269 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9270 switch (*dt)
9272 case vect_uninitialized_def:
9273 dump_printf (MSG_NOTE, "uninitialized\n");
9274 break;
9275 case vect_constant_def:
9276 dump_printf (MSG_NOTE, "constant\n");
9277 break;
9278 case vect_external_def:
9279 dump_printf (MSG_NOTE, "external\n");
9280 break;
9281 case vect_internal_def:
9282 dump_printf (MSG_NOTE, "internal\n");
9283 break;
9284 case vect_induction_def:
9285 dump_printf (MSG_NOTE, "induction\n");
9286 break;
9287 case vect_reduction_def:
9288 dump_printf (MSG_NOTE, "reduction\n");
9289 break;
9290 case vect_double_reduction_def:
9291 dump_printf (MSG_NOTE, "double reduction\n");
9292 break;
9293 case vect_nested_cycle:
9294 dump_printf (MSG_NOTE, "nested cycle\n");
9295 break;
9296 case vect_unknown_def_type:
9297 dump_printf (MSG_NOTE, "unknown\n");
9298 break;
9302 if (*dt == vect_unknown_def_type)
9304 if (dump_enabled_p ())
9305 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9306 "Unsupported pattern.\n");
9307 return false;
9310 switch (gimple_code (*def_stmt))
9312 case GIMPLE_PHI:
9313 case GIMPLE_ASSIGN:
9314 case GIMPLE_CALL:
9315 break;
9316 default:
9317 if (dump_enabled_p ())
9318 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9319 "unsupported defining stmt:\n");
9320 return false;
9323 return true;
9326 /* Function vect_is_simple_use.
9328 Same as vect_is_simple_use but also determines the vector operand
9329 type of OPERAND and stores it to *VECTYPE. If the definition of
9330 OPERAND is vect_uninitialized_def, vect_constant_def or
9331 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9332 is responsible to compute the best suited vector type for the
9333 scalar operand. */
9335 bool
9336 vect_is_simple_use (tree operand, vec_info *vinfo,
9337 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9339 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9340 return false;
9342 /* Now get a vector type if the def is internal, otherwise supply
9343 NULL_TREE and leave it up to the caller to figure out a proper
9344 type for the use stmt. */
9345 if (*dt == vect_internal_def
9346 || *dt == vect_induction_def
9347 || *dt == vect_reduction_def
9348 || *dt == vect_double_reduction_def
9349 || *dt == vect_nested_cycle)
9351 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9353 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9354 && !STMT_VINFO_RELEVANT (stmt_info)
9355 && !STMT_VINFO_LIVE_P (stmt_info))
9356 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9358 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9359 gcc_assert (*vectype != NULL_TREE);
9361 else if (*dt == vect_uninitialized_def
9362 || *dt == vect_constant_def
9363 || *dt == vect_external_def)
9364 *vectype = NULL_TREE;
9365 else
9366 gcc_unreachable ();
9368 return true;
9372 /* Function supportable_widening_operation
9374 Check whether an operation represented by the code CODE is a
9375 widening operation that is supported by the target platform in
9376 vector form (i.e., when operating on arguments of type VECTYPE_IN
9377 producing a result of type VECTYPE_OUT).
9379 Widening operations we currently support are NOP (CONVERT), FLOAT
9380 and WIDEN_MULT. This function checks if these operations are supported
9381 by the target platform either directly (via vector tree-codes), or via
9382 target builtins.
9384 Output:
9385 - CODE1 and CODE2 are codes of vector operations to be used when
9386 vectorizing the operation, if available.
9387 - MULTI_STEP_CVT determines the number of required intermediate steps in
9388 case of multi-step conversion (like char->short->int - in that case
9389 MULTI_STEP_CVT will be 1).
9390 - INTERM_TYPES contains the intermediate type required to perform the
9391 widening operation (short in the above example). */
9393 bool
9394 supportable_widening_operation (enum tree_code code, gimple *stmt,
9395 tree vectype_out, tree vectype_in,
9396 enum tree_code *code1, enum tree_code *code2,
9397 int *multi_step_cvt,
9398 vec<tree> *interm_types)
9400 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9401 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9402 struct loop *vect_loop = NULL;
9403 machine_mode vec_mode;
9404 enum insn_code icode1, icode2;
9405 optab optab1, optab2;
9406 tree vectype = vectype_in;
9407 tree wide_vectype = vectype_out;
9408 enum tree_code c1, c2;
9409 int i;
9410 tree prev_type, intermediate_type;
9411 machine_mode intermediate_mode, prev_mode;
9412 optab optab3, optab4;
9414 *multi_step_cvt = 0;
9415 if (loop_info)
9416 vect_loop = LOOP_VINFO_LOOP (loop_info);
9418 switch (code)
9420 case WIDEN_MULT_EXPR:
9421 /* The result of a vectorized widening operation usually requires
9422 two vectors (because the widened results do not fit into one vector).
9423 The generated vector results would normally be expected to be
9424 generated in the same order as in the original scalar computation,
9425 i.e. if 8 results are generated in each vector iteration, they are
9426 to be organized as follows:
9427 vect1: [res1,res2,res3,res4],
9428 vect2: [res5,res6,res7,res8].
9430 However, in the special case that the result of the widening
9431 operation is used in a reduction computation only, the order doesn't
9432 matter (because when vectorizing a reduction we change the order of
9433 the computation). Some targets can take advantage of this and
9434 generate more efficient code. For example, targets like Altivec,
9435 that support widen_mult using a sequence of {mult_even,mult_odd}
9436 generate the following vectors:
9437 vect1: [res1,res3,res5,res7],
9438 vect2: [res2,res4,res6,res8].
9440 When vectorizing outer-loops, we execute the inner-loop sequentially
9441 (each vectorized inner-loop iteration contributes to VF outer-loop
9442 iterations in parallel). We therefore don't allow to change the
9443 order of the computation in the inner-loop during outer-loop
9444 vectorization. */
9445 /* TODO: Another case in which order doesn't *really* matter is when we
9446 widen and then contract again, e.g. (short)((int)x * y >> 8).
9447 Normally, pack_trunc performs an even/odd permute, whereas the
9448 repack from an even/odd expansion would be an interleave, which
9449 would be significantly simpler for e.g. AVX2. */
9450 /* In any case, in order to avoid duplicating the code below, recurse
9451 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9452 are properly set up for the caller. If we fail, we'll continue with
9453 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9454 if (vect_loop
9455 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9456 && !nested_in_vect_loop_p (vect_loop, stmt)
9457 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9458 stmt, vectype_out, vectype_in,
9459 code1, code2, multi_step_cvt,
9460 interm_types))
9462 /* Elements in a vector with vect_used_by_reduction property cannot
9463 be reordered if the use chain with this property does not have the
9464 same operation. One such an example is s += a * b, where elements
9465 in a and b cannot be reordered. Here we check if the vector defined
9466 by STMT is only directly used in the reduction statement. */
9467 tree lhs = gimple_assign_lhs (stmt);
9468 use_operand_p dummy;
9469 gimple *use_stmt;
9470 stmt_vec_info use_stmt_info = NULL;
9471 if (single_imm_use (lhs, &dummy, &use_stmt)
9472 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9473 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9474 return true;
9476 c1 = VEC_WIDEN_MULT_LO_EXPR;
9477 c2 = VEC_WIDEN_MULT_HI_EXPR;
9478 break;
9480 case DOT_PROD_EXPR:
9481 c1 = DOT_PROD_EXPR;
9482 c2 = DOT_PROD_EXPR;
9483 break;
9485 case SAD_EXPR:
9486 c1 = SAD_EXPR;
9487 c2 = SAD_EXPR;
9488 break;
9490 case VEC_WIDEN_MULT_EVEN_EXPR:
9491 /* Support the recursion induced just above. */
9492 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9493 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9494 break;
9496 case WIDEN_LSHIFT_EXPR:
9497 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9498 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9499 break;
9501 CASE_CONVERT:
9502 c1 = VEC_UNPACK_LO_EXPR;
9503 c2 = VEC_UNPACK_HI_EXPR;
9504 break;
9506 case FLOAT_EXPR:
9507 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9508 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9509 break;
9511 case FIX_TRUNC_EXPR:
9512 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9513 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9514 computing the operation. */
9515 return false;
9517 default:
9518 gcc_unreachable ();
9521 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9522 std::swap (c1, c2);
9524 if (code == FIX_TRUNC_EXPR)
9526 /* The signedness is determined from output operand. */
9527 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9528 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9530 else
9532 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9533 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9536 if (!optab1 || !optab2)
9537 return false;
9539 vec_mode = TYPE_MODE (vectype);
9540 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9541 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9542 return false;
9544 *code1 = c1;
9545 *code2 = c2;
9547 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9548 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9549 /* For scalar masks we may have different boolean
9550 vector types having the same QImode. Thus we
9551 add additional check for elements number. */
9552 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9553 || known_eq (TYPE_VECTOR_SUBPARTS (vectype),
9554 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
9556 /* Check if it's a multi-step conversion that can be done using intermediate
9557 types. */
9559 prev_type = vectype;
9560 prev_mode = vec_mode;
9562 if (!CONVERT_EXPR_CODE_P (code))
9563 return false;
9565 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9566 intermediate steps in promotion sequence. We try
9567 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9568 not. */
9569 interm_types->create (MAX_INTERM_CVT_STEPS);
9570 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9572 intermediate_mode = insn_data[icode1].operand[0].mode;
9573 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9575 poly_uint64 intermediate_nelts
9576 = exact_div (TYPE_VECTOR_SUBPARTS (prev_type), 2);
9577 intermediate_type
9578 = build_truth_vector_type (intermediate_nelts,
9579 current_vector_size);
9580 if (intermediate_mode != TYPE_MODE (intermediate_type))
9581 return false;
9583 else
9584 intermediate_type
9585 = lang_hooks.types.type_for_mode (intermediate_mode,
9586 TYPE_UNSIGNED (prev_type));
9588 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9589 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9591 if (!optab3 || !optab4
9592 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9593 || insn_data[icode1].operand[0].mode != intermediate_mode
9594 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9595 || insn_data[icode2].operand[0].mode != intermediate_mode
9596 || ((icode1 = optab_handler (optab3, intermediate_mode))
9597 == CODE_FOR_nothing)
9598 || ((icode2 = optab_handler (optab4, intermediate_mode))
9599 == CODE_FOR_nothing))
9600 break;
9602 interm_types->quick_push (intermediate_type);
9603 (*multi_step_cvt)++;
9605 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9606 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9607 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9608 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type),
9609 TYPE_VECTOR_SUBPARTS (wide_vectype) * 2));
9611 prev_type = intermediate_type;
9612 prev_mode = intermediate_mode;
9615 interm_types->release ();
9616 return false;
9620 /* Function supportable_narrowing_operation
9622 Check whether an operation represented by the code CODE is a
9623 narrowing operation that is supported by the target platform in
9624 vector form (i.e., when operating on arguments of type VECTYPE_IN
9625 and producing a result of type VECTYPE_OUT).
9627 Narrowing operations we currently support are NOP (CONVERT) and
9628 FIX_TRUNC. This function checks if these operations are supported by
9629 the target platform directly via vector tree-codes.
9631 Output:
9632 - CODE1 is the code of a vector operation to be used when
9633 vectorizing the operation, if available.
9634 - MULTI_STEP_CVT determines the number of required intermediate steps in
9635 case of multi-step conversion (like int->short->char - in that case
9636 MULTI_STEP_CVT will be 1).
9637 - INTERM_TYPES contains the intermediate type required to perform the
9638 narrowing operation (short in the above example). */
9640 bool
9641 supportable_narrowing_operation (enum tree_code code,
9642 tree vectype_out, tree vectype_in,
9643 enum tree_code *code1, int *multi_step_cvt,
9644 vec<tree> *interm_types)
9646 machine_mode vec_mode;
9647 enum insn_code icode1;
9648 optab optab1, interm_optab;
9649 tree vectype = vectype_in;
9650 tree narrow_vectype = vectype_out;
9651 enum tree_code c1;
9652 tree intermediate_type, prev_type;
9653 machine_mode intermediate_mode, prev_mode;
9654 int i;
9655 bool uns;
9657 *multi_step_cvt = 0;
9658 switch (code)
9660 CASE_CONVERT:
9661 c1 = VEC_PACK_TRUNC_EXPR;
9662 break;
9664 case FIX_TRUNC_EXPR:
9665 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9666 break;
9668 case FLOAT_EXPR:
9669 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9670 tree code and optabs used for computing the operation. */
9671 return false;
9673 default:
9674 gcc_unreachable ();
9677 if (code == FIX_TRUNC_EXPR)
9678 /* The signedness is determined from output operand. */
9679 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9680 else
9681 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9683 if (!optab1)
9684 return false;
9686 vec_mode = TYPE_MODE (vectype);
9687 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9688 return false;
9690 *code1 = c1;
9692 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9693 /* For scalar masks we may have different boolean
9694 vector types having the same QImode. Thus we
9695 add additional check for elements number. */
9696 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9697 || known_eq (TYPE_VECTOR_SUBPARTS (vectype) * 2,
9698 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9700 /* Check if it's a multi-step conversion that can be done using intermediate
9701 types. */
9702 prev_mode = vec_mode;
9703 prev_type = vectype;
9704 if (code == FIX_TRUNC_EXPR)
9705 uns = TYPE_UNSIGNED (vectype_out);
9706 else
9707 uns = TYPE_UNSIGNED (vectype);
9709 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9710 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9711 costly than signed. */
9712 if (code == FIX_TRUNC_EXPR && uns)
9714 enum insn_code icode2;
9716 intermediate_type
9717 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9718 interm_optab
9719 = optab_for_tree_code (c1, intermediate_type, optab_default);
9720 if (interm_optab != unknown_optab
9721 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9722 && insn_data[icode1].operand[0].mode
9723 == insn_data[icode2].operand[0].mode)
9725 uns = false;
9726 optab1 = interm_optab;
9727 icode1 = icode2;
9731 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9732 intermediate steps in promotion sequence. We try
9733 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9734 interm_types->create (MAX_INTERM_CVT_STEPS);
9735 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9737 intermediate_mode = insn_data[icode1].operand[0].mode;
9738 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9740 intermediate_type
9741 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9742 current_vector_size);
9743 if (intermediate_mode != TYPE_MODE (intermediate_type))
9744 return false;
9746 else
9747 intermediate_type
9748 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9749 interm_optab
9750 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9751 optab_default);
9752 if (!interm_optab
9753 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9754 || insn_data[icode1].operand[0].mode != intermediate_mode
9755 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9756 == CODE_FOR_nothing))
9757 break;
9759 interm_types->quick_push (intermediate_type);
9760 (*multi_step_cvt)++;
9762 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9763 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9764 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2,
9765 TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9767 prev_mode = intermediate_mode;
9768 prev_type = intermediate_type;
9769 optab1 = interm_optab;
9772 interm_types->release ();
9773 return false;