2017-07-18 François Dumont <fdumont@gcc.gnu.org>
[official-gcc.git] / gcc / tree-vect-stmts.c
blobc50555499d295cec663cf32d29f43bdcb3d8da99
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type {
58 VLS_LOAD,
59 VLS_STORE,
60 VLS_STORE_INVARIANT
63 /* Return the vectorized type for the given statement. */
65 tree
66 stmt_vectype (struct _stmt_vec_info *stmt_info)
68 return STMT_VINFO_VECTYPE (stmt_info);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
73 bool
74 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
76 gimple *stmt = STMT_VINFO_STMT (stmt_info);
77 basic_block bb = gimple_bb (stmt);
78 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
79 struct loop* loop;
81 if (!loop_vinfo)
82 return false;
84 loop = LOOP_VINFO_LOOP (loop_vinfo);
86 return (bb->loop_father == loop->inner);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
93 unsigned
94 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
95 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
96 int misalign, enum vect_cost_model_location where)
98 if (body_cost_vec)
100 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
101 stmt_info_for_cost si = { count, kind,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
108 else
109 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
110 count, kind, stmt_info, misalign, where);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 static tree
116 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
118 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
119 "vect_array");
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
127 static tree
128 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
129 tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
153 static void
154 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
155 tree array, unsigned HOST_WIDE_INT n)
157 tree array_ref;
158 gimple *new_stmt;
160 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
161 build_int_cst (size_type_node, n),
162 NULL_TREE, NULL_TREE);
164 new_stmt = gimple_build_assign (array_ref, vect);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
170 (and its group). */
172 static tree
173 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 tree mem_ref;
177 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
180 return mem_ref;
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
189 static void
190 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
191 enum vect_relevant relevant, bool live_p)
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
196 gimple *pattern_stmt;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE, vect_location,
201 "mark relevant %d, live %d: ", relevant, live_p);
202 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE, vect_location,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info = vinfo_for_stmt (pattern_stmt);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
224 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
225 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
226 stmt = pattern_stmt;
229 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
230 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
231 STMT_VINFO_RELEVANT (stmt_info) = relevant;
233 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE, vect_location,
238 "already marked relevant/live.\n");
239 return;
242 worklist->safe_push (stmt);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
250 bool
251 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
253 tree op;
254 gimple *def_stmt;
255 ssa_op_iter iter;
257 if (!is_gimple_assign (stmt))
258 return false;
260 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
262 enum vect_def_type dt = vect_uninitialized_def;
264 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268 "use not simple.\n");
269 return false;
272 if (dt != vect_external_def && dt != vect_constant_def)
273 return false;
275 return true;
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
290 static bool
291 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
295 ssa_op_iter op_iter;
296 imm_use_iterator imm_iter;
297 use_operand_p use_p;
298 def_operand_p def_p;
300 *relevant = vect_unused_in_scope;
301 *live_p = false;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
307 *relevant = vect_used_in_scope;
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
311 if (gimple_vdef (stmt)
312 && !gimple_clobber_p (stmt))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE, vect_location,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant = vect_used_in_scope;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
323 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
325 basic_block bb = gimple_bb (USE_STMT (use_p));
326 if (!flow_bb_inside_loop_p (loop, bb))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE, vect_location,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p)))
333 continue;
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
338 gcc_assert (bb == single_exit (loop)->dest);
340 *live_p = true;
345 if (*live_p && *relevant == vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE, vect_location,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant = vect_used_only_live;
354 return (*live_p || *relevant);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
363 static bool
364 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
366 tree operand;
367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info))
373 return true;
375 /* STMT has a data_ref. FORNOW this means that its of one of
376 the following forms:
377 -1- ARRAY_REF = var
378 -2- var = ARRAY_REF
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
383 for array indexing.
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt))
390 if (is_gimple_call (stmt)
391 && gimple_call_internal_p (stmt))
392 switch (gimple_call_internal_fn (stmt))
394 case IFN_MASK_STORE:
395 operand = gimple_call_arg (stmt, 3);
396 if (operand == use)
397 return true;
398 /* FALLTHRU */
399 case IFN_MASK_LOAD:
400 operand = gimple_call_arg (stmt, 2);
401 if (operand == use)
402 return true;
403 break;
404 default:
405 break;
407 return false;
410 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
411 return false;
412 operand = gimple_assign_rhs1 (stmt);
413 if (TREE_CODE (operand) != SSA_NAME)
414 return false;
416 if (operand == use)
417 return true;
419 return false;
424 Function process_use.
426 Inputs:
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
432 be performed.
434 Outputs:
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
439 Exceptions:
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
450 static bool
451 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
452 enum vect_relevant relevant, vec<gimple *> *worklist,
453 bool force)
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
456 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
457 stmt_vec_info dstmt_vinfo;
458 basic_block bb, def_bb;
459 gimple *def_stmt;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
465 return true;
467 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
471 "not vectorized: unsupported use in stmt.\n");
472 return false;
475 if (!def_stmt || gimple_nop_p (def_stmt))
476 return true;
478 def_bb = gimple_bb (def_stmt);
479 if (!flow_bb_inside_loop_p (loop, def_bb))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
483 return true;
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo = vinfo_for_stmt (def_stmt);
492 bb = gimple_bb (stmt);
493 if (gimple_code (stmt) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
495 && gimple_code (def_stmt) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
497 && bb->loop_father == def_bb->loop_father)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
503 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
507 return true;
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
512 d = def_stmt
513 inner-loop:
514 stmt # use (d)
515 outer-loop-tail-bb:
516 ... */
517 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE, vect_location,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
523 switch (relevant)
525 case vect_unused_in_scope:
526 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
527 vect_used_in_scope : vect_unused_in_scope;
528 break;
530 case vect_used_in_outer_by_reduction:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
532 relevant = vect_used_by_reduction;
533 break;
535 case vect_used_in_outer:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
537 relevant = vect_used_in_scope;
538 break;
540 case vect_used_in_scope:
541 break;
543 default:
544 gcc_unreachable ();
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
551 inner-loop:
552 d = def_stmt
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
554 stmt # use (d) */
555 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE, vect_location,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
561 switch (relevant)
563 case vect_unused_in_scope:
564 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
566 vect_used_in_outer_by_reduction : vect_unused_in_scope;
567 break;
569 case vect_used_by_reduction:
570 case vect_used_only_live:
571 relevant = vect_used_in_outer_by_reduction;
572 break;
574 case vect_used_in_scope:
575 relevant = vect_used_in_outer;
576 break;
578 default:
579 gcc_unreachable ();
582 /* We are also not interested in uses on loop PHI backedges that are
583 inductions. Otherwise we'll needlessly vectorize the IV increment
584 and cause hybrid SLP for SLP inductions. */
585 else if (gimple_code (stmt) == GIMPLE_PHI
586 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_induction_def
587 && (PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (bb->loop_father))
588 == use))
590 if (dump_enabled_p ())
591 dump_printf_loc (MSG_NOTE, vect_location,
592 "induction value on backedge.\n");
593 return true;
597 vect_mark_relevant (worklist, def_stmt, relevant, false);
598 return true;
602 /* Function vect_mark_stmts_to_be_vectorized.
604 Not all stmts in the loop need to be vectorized. For example:
606 for i...
607 for j...
608 1. T0 = i + j
609 2. T1 = a[T0]
611 3. j = j + 1
613 Stmt 1 and 3 do not need to be vectorized, because loop control and
614 addressing of vectorized data-refs are handled differently.
616 This pass detects such stmts. */
618 bool
619 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
621 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
622 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
623 unsigned int nbbs = loop->num_nodes;
624 gimple_stmt_iterator si;
625 gimple *stmt;
626 unsigned int i;
627 stmt_vec_info stmt_vinfo;
628 basic_block bb;
629 gimple *phi;
630 bool live_p;
631 enum vect_relevant relevant;
633 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE, vect_location,
635 "=== vect_mark_stmts_to_be_vectorized ===\n");
637 auto_vec<gimple *, 64> worklist;
639 /* 1. Init worklist. */
640 for (i = 0; i < nbbs; i++)
642 bb = bbs[i];
643 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
645 phi = gsi_stmt (si);
646 if (dump_enabled_p ())
648 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
649 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
652 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
653 vect_mark_relevant (&worklist, phi, relevant, live_p);
655 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
657 stmt = gsi_stmt (si);
658 if (dump_enabled_p ())
660 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
661 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
664 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
665 vect_mark_relevant (&worklist, stmt, relevant, live_p);
669 /* 2. Process_worklist */
670 while (worklist.length () > 0)
672 use_operand_p use_p;
673 ssa_op_iter iter;
675 stmt = worklist.pop ();
676 if (dump_enabled_p ())
678 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
679 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
682 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
683 (DEF_STMT) as relevant/irrelevant according to the relevance property
684 of STMT. */
685 stmt_vinfo = vinfo_for_stmt (stmt);
686 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
688 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
689 propagated as is to the DEF_STMTs of its USEs.
691 One exception is when STMT has been identified as defining a reduction
692 variable; in this case we set the relevance to vect_used_by_reduction.
693 This is because we distinguish between two kinds of relevant stmts -
694 those that are used by a reduction computation, and those that are
695 (also) used by a regular computation. This allows us later on to
696 identify stmts that are used solely by a reduction, and therefore the
697 order of the results that they produce does not have to be kept. */
699 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
701 case vect_reduction_def:
702 gcc_assert (relevant != vect_unused_in_scope);
703 if (relevant != vect_unused_in_scope
704 && relevant != vect_used_in_scope
705 && relevant != vect_used_by_reduction
706 && relevant != vect_used_only_live)
708 if (dump_enabled_p ())
709 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
710 "unsupported use of reduction.\n");
711 return false;
713 break;
715 case vect_nested_cycle:
716 if (relevant != vect_unused_in_scope
717 && relevant != vect_used_in_outer_by_reduction
718 && relevant != vect_used_in_outer)
720 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
722 "unsupported use of nested cycle.\n");
724 return false;
726 break;
728 case vect_double_reduction_def:
729 if (relevant != vect_unused_in_scope
730 && relevant != vect_used_by_reduction
731 && relevant != vect_used_only_live)
733 if (dump_enabled_p ())
734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
735 "unsupported use of double reduction.\n");
737 return false;
739 break;
741 default:
742 break;
745 if (is_pattern_stmt_p (stmt_vinfo))
747 /* Pattern statements are not inserted into the code, so
748 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
749 have to scan the RHS or function arguments instead. */
750 if (is_gimple_assign (stmt))
752 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
753 tree op = gimple_assign_rhs1 (stmt);
755 i = 1;
756 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
758 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
759 relevant, &worklist, false)
760 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
761 relevant, &worklist, false))
762 return false;
763 i = 2;
765 for (; i < gimple_num_ops (stmt); i++)
767 op = gimple_op (stmt, i);
768 if (TREE_CODE (op) == SSA_NAME
769 && !process_use (stmt, op, loop_vinfo, relevant,
770 &worklist, false))
771 return false;
774 else if (is_gimple_call (stmt))
776 for (i = 0; i < gimple_call_num_args (stmt); i++)
778 tree arg = gimple_call_arg (stmt, i);
779 if (!process_use (stmt, arg, loop_vinfo, relevant,
780 &worklist, false))
781 return false;
785 else
786 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
788 tree op = USE_FROM_PTR (use_p);
789 if (!process_use (stmt, op, loop_vinfo, relevant,
790 &worklist, false))
791 return false;
794 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
796 gather_scatter_info gs_info;
797 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
798 gcc_unreachable ();
799 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
800 &worklist, true))
801 return false;
803 } /* while worklist */
805 return true;
809 /* Function vect_model_simple_cost.
811 Models cost for simple operations, i.e. those that only emit ncopies of a
812 single op. Right now, this does not account for multiple insns that could
813 be generated for the single vector op. We will handle that shortly. */
815 void
816 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
817 enum vect_def_type *dt,
818 int ndts,
819 stmt_vector_for_cost *prologue_cost_vec,
820 stmt_vector_for_cost *body_cost_vec)
822 int i;
823 int inside_cost = 0, prologue_cost = 0;
825 /* The SLP costs were already calculated during SLP tree build. */
826 if (PURE_SLP_STMT (stmt_info))
827 return;
829 /* Cost the "broadcast" of a scalar operand in to a vector operand.
830 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
831 cost model. */
832 for (i = 0; i < ndts; i++)
833 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
834 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
835 stmt_info, 0, vect_prologue);
837 /* Pass the inside-of-loop statements to the target-specific cost model. */
838 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
839 stmt_info, 0, vect_body);
841 if (dump_enabled_p ())
842 dump_printf_loc (MSG_NOTE, vect_location,
843 "vect_model_simple_cost: inside_cost = %d, "
844 "prologue_cost = %d .\n", inside_cost, prologue_cost);
848 /* Model cost for type demotion and promotion operations. PWR is normally
849 zero for single-step promotions and demotions. It will be one if
850 two-step promotion/demotion is required, and so on. Each additional
851 step doubles the number of instructions required. */
853 static void
854 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
855 enum vect_def_type *dt, int pwr)
857 int i, tmp;
858 int inside_cost = 0, prologue_cost = 0;
859 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
860 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
861 void *target_cost_data;
863 /* The SLP costs were already calculated during SLP tree build. */
864 if (PURE_SLP_STMT (stmt_info))
865 return;
867 if (loop_vinfo)
868 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
869 else
870 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
872 for (i = 0; i < pwr + 1; i++)
874 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
875 (i + 1) : i;
876 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
877 vec_promote_demote, stmt_info, 0,
878 vect_body);
881 /* FORNOW: Assuming maximum 2 args per stmts. */
882 for (i = 0; i < 2; i++)
883 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
884 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
885 stmt_info, 0, vect_prologue);
887 if (dump_enabled_p ())
888 dump_printf_loc (MSG_NOTE, vect_location,
889 "vect_model_promotion_demotion_cost: inside_cost = %d, "
890 "prologue_cost = %d .\n", inside_cost, prologue_cost);
893 /* Function vect_model_store_cost
895 Models cost for stores. In the case of grouped accesses, one access
896 has the overhead of the grouped access attributed to it. */
898 void
899 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
900 vect_memory_access_type memory_access_type,
901 enum vect_def_type dt, slp_tree slp_node,
902 stmt_vector_for_cost *prologue_cost_vec,
903 stmt_vector_for_cost *body_cost_vec)
905 unsigned int inside_cost = 0, prologue_cost = 0;
906 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
907 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
908 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
910 if (dt == vect_constant_def || dt == vect_external_def)
911 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
912 stmt_info, 0, vect_prologue);
914 /* Grouped stores update all elements in the group at once,
915 so we want the DR for the first statement. */
916 if (!slp_node && grouped_access_p)
918 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
919 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
922 /* True if we should include any once-per-group costs as well as
923 the cost of the statement itself. For SLP we only get called
924 once per group anyhow. */
925 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
927 /* We assume that the cost of a single store-lanes instruction is
928 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
929 access is instead being provided by a permute-and-store operation,
930 include the cost of the permutes. */
931 if (first_stmt_p
932 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
934 /* Uses a high and low interleave or shuffle operations for each
935 needed permute. */
936 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
937 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
938 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
939 stmt_info, 0, vect_body);
941 if (dump_enabled_p ())
942 dump_printf_loc (MSG_NOTE, vect_location,
943 "vect_model_store_cost: strided group_size = %d .\n",
944 group_size);
947 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
948 /* Costs of the stores. */
949 if (memory_access_type == VMAT_ELEMENTWISE
950 || memory_access_type == VMAT_GATHER_SCATTER)
951 /* N scalar stores plus extracting the elements. */
952 inside_cost += record_stmt_cost (body_cost_vec,
953 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
954 scalar_store, stmt_info, 0, vect_body);
955 else
956 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
958 if (memory_access_type == VMAT_ELEMENTWISE
959 || memory_access_type == VMAT_STRIDED_SLP)
960 inside_cost += record_stmt_cost (body_cost_vec,
961 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
962 vec_to_scalar, stmt_info, 0, vect_body);
964 if (dump_enabled_p ())
965 dump_printf_loc (MSG_NOTE, vect_location,
966 "vect_model_store_cost: inside_cost = %d, "
967 "prologue_cost = %d .\n", inside_cost, prologue_cost);
971 /* Calculate cost of DR's memory access. */
972 void
973 vect_get_store_cost (struct data_reference *dr, int ncopies,
974 unsigned int *inside_cost,
975 stmt_vector_for_cost *body_cost_vec)
977 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
978 gimple *stmt = DR_STMT (dr);
979 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
981 switch (alignment_support_scheme)
983 case dr_aligned:
985 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
986 vector_store, stmt_info, 0,
987 vect_body);
989 if (dump_enabled_p ())
990 dump_printf_loc (MSG_NOTE, vect_location,
991 "vect_model_store_cost: aligned.\n");
992 break;
995 case dr_unaligned_supported:
997 /* Here, we assign an additional cost for the unaligned store. */
998 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
999 unaligned_store, stmt_info,
1000 DR_MISALIGNMENT (dr), vect_body);
1001 if (dump_enabled_p ())
1002 dump_printf_loc (MSG_NOTE, vect_location,
1003 "vect_model_store_cost: unaligned supported by "
1004 "hardware.\n");
1005 break;
1008 case dr_unaligned_unsupported:
1010 *inside_cost = VECT_MAX_COST;
1012 if (dump_enabled_p ())
1013 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1014 "vect_model_store_cost: unsupported access.\n");
1015 break;
1018 default:
1019 gcc_unreachable ();
1024 /* Function vect_model_load_cost
1026 Models cost for loads. In the case of grouped accesses, one access has
1027 the overhead of the grouped access attributed to it. Since unaligned
1028 accesses are supported for loads, we also account for the costs of the
1029 access scheme chosen. */
1031 void
1032 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1033 vect_memory_access_type memory_access_type,
1034 slp_tree slp_node,
1035 stmt_vector_for_cost *prologue_cost_vec,
1036 stmt_vector_for_cost *body_cost_vec)
1038 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1039 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1040 unsigned int inside_cost = 0, prologue_cost = 0;
1041 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1043 /* Grouped loads read all elements in the group at once,
1044 so we want the DR for the first statement. */
1045 if (!slp_node && grouped_access_p)
1047 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1048 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1051 /* True if we should include any once-per-group costs as well as
1052 the cost of the statement itself. For SLP we only get called
1053 once per group anyhow. */
1054 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1056 /* We assume that the cost of a single load-lanes instruction is
1057 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1058 access is instead being provided by a load-and-permute operation,
1059 include the cost of the permutes. */
1060 if (first_stmt_p
1061 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1063 /* Uses an even and odd extract operations or shuffle operations
1064 for each needed permute. */
1065 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1066 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1067 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1068 stmt_info, 0, vect_body);
1070 if (dump_enabled_p ())
1071 dump_printf_loc (MSG_NOTE, vect_location,
1072 "vect_model_load_cost: strided group_size = %d .\n",
1073 group_size);
1076 /* The loads themselves. */
1077 if (memory_access_type == VMAT_ELEMENTWISE
1078 || memory_access_type == VMAT_GATHER_SCATTER)
1080 /* N scalar loads plus gathering them into a vector. */
1081 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1082 inside_cost += record_stmt_cost (body_cost_vec,
1083 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1084 scalar_load, stmt_info, 0, vect_body);
1086 else
1087 vect_get_load_cost (dr, ncopies, first_stmt_p,
1088 &inside_cost, &prologue_cost,
1089 prologue_cost_vec, body_cost_vec, true);
1090 if (memory_access_type == VMAT_ELEMENTWISE
1091 || memory_access_type == VMAT_STRIDED_SLP)
1092 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1093 stmt_info, 0, vect_body);
1095 if (dump_enabled_p ())
1096 dump_printf_loc (MSG_NOTE, vect_location,
1097 "vect_model_load_cost: inside_cost = %d, "
1098 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1102 /* Calculate cost of DR's memory access. */
1103 void
1104 vect_get_load_cost (struct data_reference *dr, int ncopies,
1105 bool add_realign_cost, unsigned int *inside_cost,
1106 unsigned int *prologue_cost,
1107 stmt_vector_for_cost *prologue_cost_vec,
1108 stmt_vector_for_cost *body_cost_vec,
1109 bool record_prologue_costs)
1111 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1112 gimple *stmt = DR_STMT (dr);
1113 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1115 switch (alignment_support_scheme)
1117 case dr_aligned:
1119 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1120 stmt_info, 0, vect_body);
1122 if (dump_enabled_p ())
1123 dump_printf_loc (MSG_NOTE, vect_location,
1124 "vect_model_load_cost: aligned.\n");
1126 break;
1128 case dr_unaligned_supported:
1130 /* Here, we assign an additional cost for the unaligned load. */
1131 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1132 unaligned_load, stmt_info,
1133 DR_MISALIGNMENT (dr), vect_body);
1135 if (dump_enabled_p ())
1136 dump_printf_loc (MSG_NOTE, vect_location,
1137 "vect_model_load_cost: unaligned supported by "
1138 "hardware.\n");
1140 break;
1142 case dr_explicit_realign:
1144 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1145 vector_load, stmt_info, 0, vect_body);
1146 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1147 vec_perm, stmt_info, 0, vect_body);
1149 /* FIXME: If the misalignment remains fixed across the iterations of
1150 the containing loop, the following cost should be added to the
1151 prologue costs. */
1152 if (targetm.vectorize.builtin_mask_for_load)
1153 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1154 stmt_info, 0, vect_body);
1156 if (dump_enabled_p ())
1157 dump_printf_loc (MSG_NOTE, vect_location,
1158 "vect_model_load_cost: explicit realign\n");
1160 break;
1162 case dr_explicit_realign_optimized:
1164 if (dump_enabled_p ())
1165 dump_printf_loc (MSG_NOTE, vect_location,
1166 "vect_model_load_cost: unaligned software "
1167 "pipelined.\n");
1169 /* Unaligned software pipeline has a load of an address, an initial
1170 load, and possibly a mask operation to "prime" the loop. However,
1171 if this is an access in a group of loads, which provide grouped
1172 access, then the above cost should only be considered for one
1173 access in the group. Inside the loop, there is a load op
1174 and a realignment op. */
1176 if (add_realign_cost && record_prologue_costs)
1178 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1179 vector_stmt, stmt_info,
1180 0, vect_prologue);
1181 if (targetm.vectorize.builtin_mask_for_load)
1182 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1183 vector_stmt, stmt_info,
1184 0, vect_prologue);
1187 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1188 stmt_info, 0, vect_body);
1189 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1190 stmt_info, 0, vect_body);
1192 if (dump_enabled_p ())
1193 dump_printf_loc (MSG_NOTE, vect_location,
1194 "vect_model_load_cost: explicit realign optimized"
1195 "\n");
1197 break;
1200 case dr_unaligned_unsupported:
1202 *inside_cost = VECT_MAX_COST;
1204 if (dump_enabled_p ())
1205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1206 "vect_model_load_cost: unsupported access.\n");
1207 break;
1210 default:
1211 gcc_unreachable ();
1215 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1216 the loop preheader for the vectorized stmt STMT. */
1218 static void
1219 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1221 if (gsi)
1222 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1223 else
1225 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1226 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1228 if (loop_vinfo)
1230 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1231 basic_block new_bb;
1232 edge pe;
1234 if (nested_in_vect_loop_p (loop, stmt))
1235 loop = loop->inner;
1237 pe = loop_preheader_edge (loop);
1238 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1239 gcc_assert (!new_bb);
1241 else
1243 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1244 basic_block bb;
1245 gimple_stmt_iterator gsi_bb_start;
1247 gcc_assert (bb_vinfo);
1248 bb = BB_VINFO_BB (bb_vinfo);
1249 gsi_bb_start = gsi_after_labels (bb);
1250 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1254 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE, vect_location,
1257 "created new init_stmt: ");
1258 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1262 /* Function vect_init_vector.
1264 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1265 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1266 vector type a vector with all elements equal to VAL is created first.
1267 Place the initialization at BSI if it is not NULL. Otherwise, place the
1268 initialization at the loop preheader.
1269 Return the DEF of INIT_STMT.
1270 It will be used in the vectorization of STMT. */
1272 tree
1273 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1275 gimple *init_stmt;
1276 tree new_temp;
1278 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1279 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1281 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1282 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1284 /* Scalar boolean value should be transformed into
1285 all zeros or all ones value before building a vector. */
1286 if (VECTOR_BOOLEAN_TYPE_P (type))
1288 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1289 tree false_val = build_zero_cst (TREE_TYPE (type));
1291 if (CONSTANT_CLASS_P (val))
1292 val = integer_zerop (val) ? false_val : true_val;
1293 else
1295 new_temp = make_ssa_name (TREE_TYPE (type));
1296 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1297 val, true_val, false_val);
1298 vect_init_vector_1 (stmt, init_stmt, gsi);
1299 val = new_temp;
1302 else if (CONSTANT_CLASS_P (val))
1303 val = fold_convert (TREE_TYPE (type), val);
1304 else
1306 new_temp = make_ssa_name (TREE_TYPE (type));
1307 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1308 init_stmt = gimple_build_assign (new_temp,
1309 fold_build1 (VIEW_CONVERT_EXPR,
1310 TREE_TYPE (type),
1311 val));
1312 else
1313 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1314 vect_init_vector_1 (stmt, init_stmt, gsi);
1315 val = new_temp;
1318 val = build_vector_from_val (type, val);
1321 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1322 init_stmt = gimple_build_assign (new_temp, val);
1323 vect_init_vector_1 (stmt, init_stmt, gsi);
1324 return new_temp;
1327 /* Function vect_get_vec_def_for_operand_1.
1329 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1330 DT that will be used in the vectorized stmt. */
1332 tree
1333 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1335 tree vec_oprnd;
1336 gimple *vec_stmt;
1337 stmt_vec_info def_stmt_info = NULL;
1339 switch (dt)
1341 /* operand is a constant or a loop invariant. */
1342 case vect_constant_def:
1343 case vect_external_def:
1344 /* Code should use vect_get_vec_def_for_operand. */
1345 gcc_unreachable ();
1347 /* operand is defined inside the loop. */
1348 case vect_internal_def:
1350 /* Get the def from the vectorized stmt. */
1351 def_stmt_info = vinfo_for_stmt (def_stmt);
1353 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1354 /* Get vectorized pattern statement. */
1355 if (!vec_stmt
1356 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1357 && !STMT_VINFO_RELEVANT (def_stmt_info))
1358 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1359 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1360 gcc_assert (vec_stmt);
1361 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1362 vec_oprnd = PHI_RESULT (vec_stmt);
1363 else if (is_gimple_call (vec_stmt))
1364 vec_oprnd = gimple_call_lhs (vec_stmt);
1365 else
1366 vec_oprnd = gimple_assign_lhs (vec_stmt);
1367 return vec_oprnd;
1370 /* operand is defined by a loop header phi. */
1371 case vect_reduction_def:
1372 case vect_double_reduction_def:
1373 case vect_nested_cycle:
1374 case vect_induction_def:
1376 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1378 /* Get the def from the vectorized stmt. */
1379 def_stmt_info = vinfo_for_stmt (def_stmt);
1380 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1381 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1382 vec_oprnd = PHI_RESULT (vec_stmt);
1383 else
1384 vec_oprnd = gimple_get_lhs (vec_stmt);
1385 return vec_oprnd;
1388 default:
1389 gcc_unreachable ();
1394 /* Function vect_get_vec_def_for_operand.
1396 OP is an operand in STMT. This function returns a (vector) def that will be
1397 used in the vectorized stmt for STMT.
1399 In the case that OP is an SSA_NAME which is defined in the loop, then
1400 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1402 In case OP is an invariant or constant, a new stmt that creates a vector def
1403 needs to be introduced. VECTYPE may be used to specify a required type for
1404 vector invariant. */
1406 tree
1407 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1409 gimple *def_stmt;
1410 enum vect_def_type dt;
1411 bool is_simple_use;
1412 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1413 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1415 if (dump_enabled_p ())
1417 dump_printf_loc (MSG_NOTE, vect_location,
1418 "vect_get_vec_def_for_operand: ");
1419 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1420 dump_printf (MSG_NOTE, "\n");
1423 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1424 gcc_assert (is_simple_use);
1425 if (def_stmt && dump_enabled_p ())
1427 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1428 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1431 if (dt == vect_constant_def || dt == vect_external_def)
1433 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1434 tree vector_type;
1436 if (vectype)
1437 vector_type = vectype;
1438 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
1439 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1440 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1441 else
1442 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1444 gcc_assert (vector_type);
1445 return vect_init_vector (stmt, op, vector_type, NULL);
1447 else
1448 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1452 /* Function vect_get_vec_def_for_stmt_copy
1454 Return a vector-def for an operand. This function is used when the
1455 vectorized stmt to be created (by the caller to this function) is a "copy"
1456 created in case the vectorized result cannot fit in one vector, and several
1457 copies of the vector-stmt are required. In this case the vector-def is
1458 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1459 of the stmt that defines VEC_OPRND.
1460 DT is the type of the vector def VEC_OPRND.
1462 Context:
1463 In case the vectorization factor (VF) is bigger than the number
1464 of elements that can fit in a vectype (nunits), we have to generate
1465 more than one vector stmt to vectorize the scalar stmt. This situation
1466 arises when there are multiple data-types operated upon in the loop; the
1467 smallest data-type determines the VF, and as a result, when vectorizing
1468 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1469 vector stmt (each computing a vector of 'nunits' results, and together
1470 computing 'VF' results in each iteration). This function is called when
1471 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1472 which VF=16 and nunits=4, so the number of copies required is 4):
1474 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1476 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1477 VS1.1: vx.1 = memref1 VS1.2
1478 VS1.2: vx.2 = memref2 VS1.3
1479 VS1.3: vx.3 = memref3
1481 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1482 VSnew.1: vz1 = vx.1 + ... VSnew.2
1483 VSnew.2: vz2 = vx.2 + ... VSnew.3
1484 VSnew.3: vz3 = vx.3 + ...
1486 The vectorization of S1 is explained in vectorizable_load.
1487 The vectorization of S2:
1488 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1489 the function 'vect_get_vec_def_for_operand' is called to
1490 get the relevant vector-def for each operand of S2. For operand x it
1491 returns the vector-def 'vx.0'.
1493 To create the remaining copies of the vector-stmt (VSnew.j), this
1494 function is called to get the relevant vector-def for each operand. It is
1495 obtained from the respective VS1.j stmt, which is recorded in the
1496 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1498 For example, to obtain the vector-def 'vx.1' in order to create the
1499 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1500 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1501 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1502 and return its def ('vx.1').
1503 Overall, to create the above sequence this function will be called 3 times:
1504 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1505 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1506 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1508 tree
1509 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1511 gimple *vec_stmt_for_operand;
1512 stmt_vec_info def_stmt_info;
1514 /* Do nothing; can reuse same def. */
1515 if (dt == vect_external_def || dt == vect_constant_def )
1516 return vec_oprnd;
1518 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1519 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1520 gcc_assert (def_stmt_info);
1521 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1522 gcc_assert (vec_stmt_for_operand);
1523 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1524 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1525 else
1526 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1527 return vec_oprnd;
1531 /* Get vectorized definitions for the operands to create a copy of an original
1532 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1534 void
1535 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1536 vec<tree> *vec_oprnds0,
1537 vec<tree> *vec_oprnds1)
1539 tree vec_oprnd = vec_oprnds0->pop ();
1541 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1542 vec_oprnds0->quick_push (vec_oprnd);
1544 if (vec_oprnds1 && vec_oprnds1->length ())
1546 vec_oprnd = vec_oprnds1->pop ();
1547 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1548 vec_oprnds1->quick_push (vec_oprnd);
1553 /* Get vectorized definitions for OP0 and OP1. */
1555 void
1556 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1557 vec<tree> *vec_oprnds0,
1558 vec<tree> *vec_oprnds1,
1559 slp_tree slp_node)
1561 if (slp_node)
1563 int nops = (op1 == NULL_TREE) ? 1 : 2;
1564 auto_vec<tree> ops (nops);
1565 auto_vec<vec<tree> > vec_defs (nops);
1567 ops.quick_push (op0);
1568 if (op1)
1569 ops.quick_push (op1);
1571 vect_get_slp_defs (ops, slp_node, &vec_defs);
1573 *vec_oprnds0 = vec_defs[0];
1574 if (op1)
1575 *vec_oprnds1 = vec_defs[1];
1577 else
1579 tree vec_oprnd;
1581 vec_oprnds0->create (1);
1582 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1583 vec_oprnds0->quick_push (vec_oprnd);
1585 if (op1)
1587 vec_oprnds1->create (1);
1588 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1589 vec_oprnds1->quick_push (vec_oprnd);
1595 /* Function vect_finish_stmt_generation.
1597 Insert a new stmt. */
1599 void
1600 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1601 gimple_stmt_iterator *gsi)
1603 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1604 vec_info *vinfo = stmt_info->vinfo;
1606 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1608 if (!gsi_end_p (*gsi)
1609 && gimple_has_mem_ops (vec_stmt))
1611 gimple *at_stmt = gsi_stmt (*gsi);
1612 tree vuse = gimple_vuse (at_stmt);
1613 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1615 tree vdef = gimple_vdef (at_stmt);
1616 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1617 /* If we have an SSA vuse and insert a store, update virtual
1618 SSA form to avoid triggering the renamer. Do so only
1619 if we can easily see all uses - which is what almost always
1620 happens with the way vectorized stmts are inserted. */
1621 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1622 && ((is_gimple_assign (vec_stmt)
1623 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1624 || (is_gimple_call (vec_stmt)
1625 && !(gimple_call_flags (vec_stmt)
1626 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1628 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1629 gimple_set_vdef (vec_stmt, new_vdef);
1630 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1634 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1636 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1638 if (dump_enabled_p ())
1640 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1641 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1644 gimple_set_location (vec_stmt, gimple_location (stmt));
1646 /* While EH edges will generally prevent vectorization, stmt might
1647 e.g. be in a must-not-throw region. Ensure newly created stmts
1648 that could throw are part of the same region. */
1649 int lp_nr = lookup_stmt_eh_lp (stmt);
1650 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1651 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1654 /* We want to vectorize a call to combined function CFN with function
1655 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1656 as the types of all inputs. Check whether this is possible using
1657 an internal function, returning its code if so or IFN_LAST if not. */
1659 static internal_fn
1660 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1661 tree vectype_out, tree vectype_in)
1663 internal_fn ifn;
1664 if (internal_fn_p (cfn))
1665 ifn = as_internal_fn (cfn);
1666 else
1667 ifn = associated_internal_fn (fndecl);
1668 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1670 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1671 if (info.vectorizable)
1673 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1674 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1675 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1676 OPTIMIZE_FOR_SPEED))
1677 return ifn;
1680 return IFN_LAST;
1684 static tree permute_vec_elements (tree, tree, tree, gimple *,
1685 gimple_stmt_iterator *);
1687 /* STMT is a non-strided load or store, meaning that it accesses
1688 elements with a known constant step. Return -1 if that step
1689 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1691 static int
1692 compare_step_with_zero (gimple *stmt)
1694 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1695 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1696 return tree_int_cst_compare (vect_dr_behavior (dr)->step,
1697 size_zero_node);
1700 /* If the target supports a permute mask that reverses the elements in
1701 a vector of type VECTYPE, return that mask, otherwise return null. */
1703 static tree
1704 perm_mask_for_reverse (tree vectype)
1706 int i, nunits;
1707 unsigned char *sel;
1709 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1710 sel = XALLOCAVEC (unsigned char, nunits);
1712 for (i = 0; i < nunits; ++i)
1713 sel[i] = nunits - 1 - i;
1715 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
1716 return NULL_TREE;
1717 return vect_gen_perm_mask_checked (vectype, sel);
1720 /* A subroutine of get_load_store_type, with a subset of the same
1721 arguments. Handle the case where STMT is part of a grouped load
1722 or store.
1724 For stores, the statements in the group are all consecutive
1725 and there is no gap at the end. For loads, the statements in the
1726 group might not be consecutive; there can be gaps between statements
1727 as well as at the end. */
1729 static bool
1730 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1731 vec_load_store_type vls_type,
1732 vect_memory_access_type *memory_access_type)
1734 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1735 vec_info *vinfo = stmt_info->vinfo;
1736 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1737 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1738 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1739 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1740 bool single_element_p = (stmt == first_stmt
1741 && !GROUP_NEXT_ELEMENT (stmt_info));
1742 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1743 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype);
1745 /* True if the vectorized statements would access beyond the last
1746 statement in the group. */
1747 bool overrun_p = false;
1749 /* True if we can cope with such overrun by peeling for gaps, so that
1750 there is at least one final scalar iteration after the vector loop. */
1751 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1753 /* There can only be a gap at the end of the group if the stride is
1754 known at compile time. */
1755 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1757 /* Stores can't yet have gaps. */
1758 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1760 if (slp)
1762 if (STMT_VINFO_STRIDED_P (stmt_info))
1764 /* Try to use consecutive accesses of GROUP_SIZE elements,
1765 separated by the stride, until we have a complete vector.
1766 Fall back to scalar accesses if that isn't possible. */
1767 if (nunits % group_size == 0)
1768 *memory_access_type = VMAT_STRIDED_SLP;
1769 else
1770 *memory_access_type = VMAT_ELEMENTWISE;
1772 else
1774 overrun_p = loop_vinfo && gap != 0;
1775 if (overrun_p && vls_type != VLS_LOAD)
1777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1778 "Grouped store with gaps requires"
1779 " non-consecutive accesses\n");
1780 return false;
1782 /* If the access is aligned an overrun is fine. */
1783 if (overrun_p
1784 && aligned_access_p
1785 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1786 overrun_p = false;
1787 if (overrun_p && !can_overrun_p)
1789 if (dump_enabled_p ())
1790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1791 "Peeling for outer loop is not supported\n");
1792 return false;
1794 *memory_access_type = VMAT_CONTIGUOUS;
1797 else
1799 /* We can always handle this case using elementwise accesses,
1800 but see if something more efficient is available. */
1801 *memory_access_type = VMAT_ELEMENTWISE;
1803 /* If there is a gap at the end of the group then these optimizations
1804 would access excess elements in the last iteration. */
1805 bool would_overrun_p = (gap != 0);
1806 /* If the access is aligned an overrun is fine, but only if the
1807 overrun is not inside an unused vector (if the gap is as large
1808 or larger than a vector). */
1809 if (would_overrun_p
1810 && gap < nunits
1811 && aligned_access_p
1812 (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt))))
1813 would_overrun_p = false;
1814 if (!STMT_VINFO_STRIDED_P (stmt_info)
1815 && (can_overrun_p || !would_overrun_p)
1816 && compare_step_with_zero (stmt) > 0)
1818 /* First try using LOAD/STORE_LANES. */
1819 if (vls_type == VLS_LOAD
1820 ? vect_load_lanes_supported (vectype, group_size)
1821 : vect_store_lanes_supported (vectype, group_size))
1823 *memory_access_type = VMAT_LOAD_STORE_LANES;
1824 overrun_p = would_overrun_p;
1827 /* If that fails, try using permuting loads. */
1828 if (*memory_access_type == VMAT_ELEMENTWISE
1829 && (vls_type == VLS_LOAD
1830 ? vect_grouped_load_supported (vectype, single_element_p,
1831 group_size)
1832 : vect_grouped_store_supported (vectype, group_size)))
1834 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1835 overrun_p = would_overrun_p;
1840 if (vls_type != VLS_LOAD && first_stmt == stmt)
1842 /* STMT is the leader of the group. Check the operands of all the
1843 stmts of the group. */
1844 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1845 while (next_stmt)
1847 gcc_assert (gimple_assign_single_p (next_stmt));
1848 tree op = gimple_assign_rhs1 (next_stmt);
1849 gimple *def_stmt;
1850 enum vect_def_type dt;
1851 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1853 if (dump_enabled_p ())
1854 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1855 "use not simple.\n");
1856 return false;
1858 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1862 if (overrun_p)
1864 gcc_assert (can_overrun_p);
1865 if (dump_enabled_p ())
1866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1867 "Data access with gaps requires scalar "
1868 "epilogue loop\n");
1869 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1872 return true;
1875 /* A subroutine of get_load_store_type, with a subset of the same
1876 arguments. Handle the case where STMT is a load or store that
1877 accesses consecutive elements with a negative step. */
1879 static vect_memory_access_type
1880 get_negative_load_store_type (gimple *stmt, tree vectype,
1881 vec_load_store_type vls_type,
1882 unsigned int ncopies)
1884 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1885 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1886 dr_alignment_support alignment_support_scheme;
1888 if (ncopies > 1)
1890 if (dump_enabled_p ())
1891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1892 "multiple types with negative step.\n");
1893 return VMAT_ELEMENTWISE;
1896 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1897 if (alignment_support_scheme != dr_aligned
1898 && alignment_support_scheme != dr_unaligned_supported)
1900 if (dump_enabled_p ())
1901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1902 "negative step but alignment required.\n");
1903 return VMAT_ELEMENTWISE;
1906 if (vls_type == VLS_STORE_INVARIANT)
1908 if (dump_enabled_p ())
1909 dump_printf_loc (MSG_NOTE, vect_location,
1910 "negative step with invariant source;"
1911 " no permute needed.\n");
1912 return VMAT_CONTIGUOUS_DOWN;
1915 if (!perm_mask_for_reverse (vectype))
1917 if (dump_enabled_p ())
1918 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1919 "negative step and reversing not supported.\n");
1920 return VMAT_ELEMENTWISE;
1923 return VMAT_CONTIGUOUS_REVERSE;
1926 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1927 if there is a memory access type that the vectorized form can use,
1928 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1929 or scatters, fill in GS_INFO accordingly.
1931 SLP says whether we're performing SLP rather than loop vectorization.
1932 VECTYPE is the vector type that the vectorized statements will use.
1933 NCOPIES is the number of vector statements that will be needed. */
1935 static bool
1936 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1937 vec_load_store_type vls_type, unsigned int ncopies,
1938 vect_memory_access_type *memory_access_type,
1939 gather_scatter_info *gs_info)
1941 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1942 vec_info *vinfo = stmt_info->vinfo;
1943 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1944 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1946 *memory_access_type = VMAT_GATHER_SCATTER;
1947 gimple *def_stmt;
1948 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1949 gcc_unreachable ();
1950 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1951 &gs_info->offset_dt,
1952 &gs_info->offset_vectype))
1954 if (dump_enabled_p ())
1955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1956 "%s index use not simple.\n",
1957 vls_type == VLS_LOAD ? "gather" : "scatter");
1958 return false;
1961 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1963 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1964 memory_access_type))
1965 return false;
1967 else if (STMT_VINFO_STRIDED_P (stmt_info))
1969 gcc_assert (!slp);
1970 *memory_access_type = VMAT_ELEMENTWISE;
1972 else
1974 int cmp = compare_step_with_zero (stmt);
1975 if (cmp < 0)
1976 *memory_access_type = get_negative_load_store_type
1977 (stmt, vectype, vls_type, ncopies);
1978 else if (cmp == 0)
1980 gcc_assert (vls_type == VLS_LOAD);
1981 *memory_access_type = VMAT_INVARIANT;
1983 else
1984 *memory_access_type = VMAT_CONTIGUOUS;
1987 /* FIXME: At the moment the cost model seems to underestimate the
1988 cost of using elementwise accesses. This check preserves the
1989 traditional behavior until that can be fixed. */
1990 if (*memory_access_type == VMAT_ELEMENTWISE
1991 && !STMT_VINFO_STRIDED_P (stmt_info))
1993 if (dump_enabled_p ())
1994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1995 "not falling back to elementwise accesses\n");
1996 return false;
1998 return true;
2001 /* Function vectorizable_mask_load_store.
2003 Check if STMT performs a conditional load or store that can be vectorized.
2004 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2005 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2006 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2008 static bool
2009 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
2010 gimple **vec_stmt, slp_tree slp_node)
2012 tree vec_dest = NULL;
2013 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2014 stmt_vec_info prev_stmt_info;
2015 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2016 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2017 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2018 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2019 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2020 tree rhs_vectype = NULL_TREE;
2021 tree mask_vectype;
2022 tree elem_type;
2023 gimple *new_stmt;
2024 tree dummy;
2025 tree dataref_ptr = NULL_TREE;
2026 gimple *ptr_incr;
2027 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2028 int ncopies;
2029 int i, j;
2030 bool inv_p;
2031 gather_scatter_info gs_info;
2032 vec_load_store_type vls_type;
2033 tree mask;
2034 gimple *def_stmt;
2035 enum vect_def_type dt;
2037 if (slp_node != NULL)
2038 return false;
2040 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2041 gcc_assert (ncopies >= 1);
2043 mask = gimple_call_arg (stmt, 2);
2045 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
2046 return false;
2048 /* FORNOW. This restriction should be relaxed. */
2049 if (nested_in_vect_loop && ncopies > 1)
2051 if (dump_enabled_p ())
2052 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2053 "multiple types in nested loop.");
2054 return false;
2057 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2058 return false;
2060 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2061 && ! vec_stmt)
2062 return false;
2064 if (!STMT_VINFO_DATA_REF (stmt_info))
2065 return false;
2067 elem_type = TREE_TYPE (vectype);
2069 if (TREE_CODE (mask) != SSA_NAME)
2070 return false;
2072 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2073 return false;
2075 if (!mask_vectype)
2076 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2078 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2079 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2080 return false;
2082 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2084 tree rhs = gimple_call_arg (stmt, 3);
2085 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2086 return false;
2087 if (dt == vect_constant_def || dt == vect_external_def)
2088 vls_type = VLS_STORE_INVARIANT;
2089 else
2090 vls_type = VLS_STORE;
2092 else
2093 vls_type = VLS_LOAD;
2095 vect_memory_access_type memory_access_type;
2096 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2097 &memory_access_type, &gs_info))
2098 return false;
2100 if (memory_access_type == VMAT_GATHER_SCATTER)
2102 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2103 tree masktype
2104 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2105 if (TREE_CODE (masktype) == INTEGER_TYPE)
2107 if (dump_enabled_p ())
2108 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2109 "masked gather with integer mask not supported.");
2110 return false;
2113 else if (memory_access_type != VMAT_CONTIGUOUS)
2115 if (dump_enabled_p ())
2116 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2117 "unsupported access type for masked %s.\n",
2118 vls_type == VLS_LOAD ? "load" : "store");
2119 return false;
2121 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2122 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2123 TYPE_MODE (mask_vectype),
2124 vls_type == VLS_LOAD)
2125 || (rhs_vectype
2126 && !useless_type_conversion_p (vectype, rhs_vectype)))
2127 return false;
2129 if (!vec_stmt) /* transformation not required. */
2131 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2132 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2133 if (vls_type == VLS_LOAD)
2134 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2135 NULL, NULL, NULL);
2136 else
2137 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2138 dt, NULL, NULL, NULL);
2139 return true;
2141 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2143 /* Transform. */
2145 if (memory_access_type == VMAT_GATHER_SCATTER)
2147 tree vec_oprnd0 = NULL_TREE, op;
2148 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2149 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2150 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2151 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2152 tree mask_perm_mask = NULL_TREE;
2153 edge pe = loop_preheader_edge (loop);
2154 gimple_seq seq;
2155 basic_block new_bb;
2156 enum { NARROW, NONE, WIDEN } modifier;
2157 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2159 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2160 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2161 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2162 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2163 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2164 scaletype = TREE_VALUE (arglist);
2165 gcc_checking_assert (types_compatible_p (srctype, rettype)
2166 && types_compatible_p (srctype, masktype));
2168 if (nunits == gather_off_nunits)
2169 modifier = NONE;
2170 else if (nunits == gather_off_nunits / 2)
2172 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
2173 modifier = WIDEN;
2175 for (i = 0; i < gather_off_nunits; ++i)
2176 sel[i] = i | nunits;
2178 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
2180 else if (nunits == gather_off_nunits * 2)
2182 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
2183 modifier = NARROW;
2185 for (i = 0; i < nunits; ++i)
2186 sel[i] = i < gather_off_nunits
2187 ? i : i + nunits - gather_off_nunits;
2189 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
2190 ncopies *= 2;
2191 for (i = 0; i < nunits; ++i)
2192 sel[i] = i | gather_off_nunits;
2193 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
2195 else
2196 gcc_unreachable ();
2198 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2200 ptr = fold_convert (ptrtype, gs_info.base);
2201 if (!is_gimple_min_invariant (ptr))
2203 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2204 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2205 gcc_assert (!new_bb);
2208 scale = build_int_cst (scaletype, gs_info.scale);
2210 prev_stmt_info = NULL;
2211 for (j = 0; j < ncopies; ++j)
2213 if (modifier == WIDEN && (j & 1))
2214 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2215 perm_mask, stmt, gsi);
2216 else if (j == 0)
2217 op = vec_oprnd0
2218 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2219 else
2220 op = vec_oprnd0
2221 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2223 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2225 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2226 == TYPE_VECTOR_SUBPARTS (idxtype));
2227 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2228 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2229 new_stmt
2230 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2231 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2232 op = var;
2235 if (mask_perm_mask && (j & 1))
2236 mask_op = permute_vec_elements (mask_op, mask_op,
2237 mask_perm_mask, stmt, gsi);
2238 else
2240 if (j == 0)
2241 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2242 else
2244 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2245 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2248 mask_op = vec_mask;
2249 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2251 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2252 == TYPE_VECTOR_SUBPARTS (masktype));
2253 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2254 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2255 new_stmt
2256 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2257 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2258 mask_op = var;
2262 new_stmt
2263 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2264 scale);
2266 if (!useless_type_conversion_p (vectype, rettype))
2268 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2269 == TYPE_VECTOR_SUBPARTS (rettype));
2270 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2271 gimple_call_set_lhs (new_stmt, op);
2272 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2273 var = make_ssa_name (vec_dest);
2274 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2275 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2277 else
2279 var = make_ssa_name (vec_dest, new_stmt);
2280 gimple_call_set_lhs (new_stmt, var);
2283 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2285 if (modifier == NARROW)
2287 if ((j & 1) == 0)
2289 prev_res = var;
2290 continue;
2292 var = permute_vec_elements (prev_res, var,
2293 perm_mask, stmt, gsi);
2294 new_stmt = SSA_NAME_DEF_STMT (var);
2297 if (prev_stmt_info == NULL)
2298 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2299 else
2300 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2301 prev_stmt_info = vinfo_for_stmt (new_stmt);
2304 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2305 from the IL. */
2306 if (STMT_VINFO_RELATED_STMT (stmt_info))
2308 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2309 stmt_info = vinfo_for_stmt (stmt);
2311 tree lhs = gimple_call_lhs (stmt);
2312 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2313 set_vinfo_for_stmt (new_stmt, stmt_info);
2314 set_vinfo_for_stmt (stmt, NULL);
2315 STMT_VINFO_STMT (stmt_info) = new_stmt;
2316 gsi_replace (gsi, new_stmt, true);
2317 return true;
2319 else if (vls_type != VLS_LOAD)
2321 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2322 prev_stmt_info = NULL;
2323 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2324 for (i = 0; i < ncopies; i++)
2326 unsigned align, misalign;
2328 if (i == 0)
2330 tree rhs = gimple_call_arg (stmt, 3);
2331 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2332 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2333 /* We should have catched mismatched types earlier. */
2334 gcc_assert (useless_type_conversion_p (vectype,
2335 TREE_TYPE (vec_rhs)));
2336 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2337 NULL_TREE, &dummy, gsi,
2338 &ptr_incr, false, &inv_p);
2339 gcc_assert (!inv_p);
2341 else
2343 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2344 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2345 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2346 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2347 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2348 TYPE_SIZE_UNIT (vectype));
2351 align = TYPE_ALIGN_UNIT (vectype);
2352 if (aligned_access_p (dr))
2353 misalign = 0;
2354 else if (DR_MISALIGNMENT (dr) == -1)
2356 align = TYPE_ALIGN_UNIT (elem_type);
2357 misalign = 0;
2359 else
2360 misalign = DR_MISALIGNMENT (dr);
2361 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2362 misalign);
2363 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2364 misalign ? least_bit_hwi (misalign) : align);
2365 new_stmt
2366 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2367 ptr, vec_mask, vec_rhs);
2368 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2369 if (i == 0)
2370 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2371 else
2372 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2373 prev_stmt_info = vinfo_for_stmt (new_stmt);
2376 else
2378 tree vec_mask = NULL_TREE;
2379 prev_stmt_info = NULL;
2380 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2381 for (i = 0; i < ncopies; i++)
2383 unsigned align, misalign;
2385 if (i == 0)
2387 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2388 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2389 NULL_TREE, &dummy, gsi,
2390 &ptr_incr, false, &inv_p);
2391 gcc_assert (!inv_p);
2393 else
2395 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2396 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2397 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2398 TYPE_SIZE_UNIT (vectype));
2401 align = TYPE_ALIGN_UNIT (vectype);
2402 if (aligned_access_p (dr))
2403 misalign = 0;
2404 else if (DR_MISALIGNMENT (dr) == -1)
2406 align = TYPE_ALIGN_UNIT (elem_type);
2407 misalign = 0;
2409 else
2410 misalign = DR_MISALIGNMENT (dr);
2411 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2412 misalign);
2413 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2414 misalign ? least_bit_hwi (misalign) : align);
2415 new_stmt
2416 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2417 ptr, vec_mask);
2418 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2419 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2420 if (i == 0)
2421 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2422 else
2423 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2424 prev_stmt_info = vinfo_for_stmt (new_stmt);
2428 if (vls_type == VLS_LOAD)
2430 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2431 from the IL. */
2432 if (STMT_VINFO_RELATED_STMT (stmt_info))
2434 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2435 stmt_info = vinfo_for_stmt (stmt);
2437 tree lhs = gimple_call_lhs (stmt);
2438 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2439 set_vinfo_for_stmt (new_stmt, stmt_info);
2440 set_vinfo_for_stmt (stmt, NULL);
2441 STMT_VINFO_STMT (stmt_info) = new_stmt;
2442 gsi_replace (gsi, new_stmt, true);
2445 return true;
2448 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2450 static bool
2451 vectorizable_bswap (gimple *stmt, gimple_stmt_iterator *gsi,
2452 gimple **vec_stmt, slp_tree slp_node,
2453 tree vectype_in, enum vect_def_type *dt)
2455 tree op, vectype;
2456 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2457 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2458 unsigned ncopies, nunits;
2460 op = gimple_call_arg (stmt, 0);
2461 vectype = STMT_VINFO_VECTYPE (stmt_info);
2462 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2464 /* Multiple types in SLP are handled by creating the appropriate number of
2465 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2466 case of SLP. */
2467 if (slp_node)
2468 ncopies = 1;
2469 else
2470 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2472 gcc_assert (ncopies >= 1);
2474 tree char_vectype = get_same_sized_vectype (char_type_node, vectype_in);
2475 if (! char_vectype)
2476 return false;
2478 unsigned char *elts
2479 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (char_vectype));
2480 unsigned char *elt = elts;
2481 unsigned word_bytes = TYPE_VECTOR_SUBPARTS (char_vectype) / nunits;
2482 for (unsigned i = 0; i < nunits; ++i)
2483 for (unsigned j = 0; j < word_bytes; ++j)
2484 *elt++ = (i + 1) * word_bytes - j - 1;
2486 if (! can_vec_perm_p (TYPE_MODE (char_vectype), false, elts))
2487 return false;
2489 if (! vec_stmt)
2491 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2492 if (dump_enabled_p ())
2493 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_bswap ==="
2494 "\n");
2495 if (! PURE_SLP_STMT (stmt_info))
2497 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2498 1, vector_stmt, stmt_info, 0, vect_prologue);
2499 add_stmt_cost (stmt_info->vinfo->target_cost_data,
2500 ncopies, vec_perm, stmt_info, 0, vect_body);
2502 return true;
2505 tree *telts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (char_vectype));
2506 for (unsigned i = 0; i < TYPE_VECTOR_SUBPARTS (char_vectype); ++i)
2507 telts[i] = build_int_cst (char_type_node, elts[i]);
2508 tree bswap_vconst = build_vector (char_vectype, telts);
2510 /* Transform. */
2511 vec<tree> vec_oprnds = vNULL;
2512 gimple *new_stmt = NULL;
2513 stmt_vec_info prev_stmt_info = NULL;
2514 for (unsigned j = 0; j < ncopies; j++)
2516 /* Handle uses. */
2517 if (j == 0)
2518 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
2519 else
2520 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
2522 /* Arguments are ready. create the new vector stmt. */
2523 unsigned i;
2524 tree vop;
2525 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
2527 tree tem = make_ssa_name (char_vectype);
2528 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2529 char_vectype, vop));
2530 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2531 tree tem2 = make_ssa_name (char_vectype);
2532 new_stmt = gimple_build_assign (tem2, VEC_PERM_EXPR,
2533 tem, tem, bswap_vconst);
2534 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2535 tem = make_ssa_name (vectype);
2536 new_stmt = gimple_build_assign (tem, build1 (VIEW_CONVERT_EXPR,
2537 vectype, tem2));
2538 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2539 if (slp_node)
2540 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2543 if (slp_node)
2544 continue;
2546 if (j == 0)
2547 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2548 else
2549 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2551 prev_stmt_info = vinfo_for_stmt (new_stmt);
2554 vec_oprnds.release ();
2555 return true;
2558 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2559 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2560 in a single step. On success, store the binary pack code in
2561 *CONVERT_CODE. */
2563 static bool
2564 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2565 tree_code *convert_code)
2567 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2568 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2569 return false;
2571 tree_code code;
2572 int multi_step_cvt = 0;
2573 auto_vec <tree, 8> interm_types;
2574 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2575 &code, &multi_step_cvt,
2576 &interm_types)
2577 || multi_step_cvt)
2578 return false;
2580 *convert_code = code;
2581 return true;
2584 /* Function vectorizable_call.
2586 Check if GS performs a function call that can be vectorized.
2587 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2588 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2589 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2591 static bool
2592 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2593 slp_tree slp_node)
2595 gcall *stmt;
2596 tree vec_dest;
2597 tree scalar_dest;
2598 tree op, type;
2599 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2600 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2601 tree vectype_out, vectype_in;
2602 int nunits_in;
2603 int nunits_out;
2604 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2605 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2606 vec_info *vinfo = stmt_info->vinfo;
2607 tree fndecl, new_temp, rhs_type;
2608 gimple *def_stmt;
2609 enum vect_def_type dt[3]
2610 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2611 int ndts = 3;
2612 gimple *new_stmt = NULL;
2613 int ncopies, j;
2614 vec<tree> vargs = vNULL;
2615 enum { NARROW, NONE, WIDEN } modifier;
2616 size_t i, nargs;
2617 tree lhs;
2619 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2620 return false;
2622 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2623 && ! vec_stmt)
2624 return false;
2626 /* Is GS a vectorizable call? */
2627 stmt = dyn_cast <gcall *> (gs);
2628 if (!stmt)
2629 return false;
2631 if (gimple_call_internal_p (stmt)
2632 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2633 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2634 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2635 slp_node);
2637 if (gimple_call_lhs (stmt) == NULL_TREE
2638 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2639 return false;
2641 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2643 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2645 /* Process function arguments. */
2646 rhs_type = NULL_TREE;
2647 vectype_in = NULL_TREE;
2648 nargs = gimple_call_num_args (stmt);
2650 /* Bail out if the function has more than three arguments, we do not have
2651 interesting builtin functions to vectorize with more than two arguments
2652 except for fma. No arguments is also not good. */
2653 if (nargs == 0 || nargs > 3)
2654 return false;
2656 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2657 if (gimple_call_internal_p (stmt)
2658 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2660 nargs = 0;
2661 rhs_type = unsigned_type_node;
2664 for (i = 0; i < nargs; i++)
2666 tree opvectype;
2668 op = gimple_call_arg (stmt, i);
2670 /* We can only handle calls with arguments of the same type. */
2671 if (rhs_type
2672 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2674 if (dump_enabled_p ())
2675 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2676 "argument types differ.\n");
2677 return false;
2679 if (!rhs_type)
2680 rhs_type = TREE_TYPE (op);
2682 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2684 if (dump_enabled_p ())
2685 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2686 "use not simple.\n");
2687 return false;
2690 if (!vectype_in)
2691 vectype_in = opvectype;
2692 else if (opvectype
2693 && opvectype != vectype_in)
2695 if (dump_enabled_p ())
2696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2697 "argument vector types differ.\n");
2698 return false;
2701 /* If all arguments are external or constant defs use a vector type with
2702 the same size as the output vector type. */
2703 if (!vectype_in)
2704 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2705 if (vec_stmt)
2706 gcc_assert (vectype_in);
2707 if (!vectype_in)
2709 if (dump_enabled_p ())
2711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2712 "no vectype for scalar type ");
2713 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2714 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2717 return false;
2720 /* FORNOW */
2721 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2722 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2723 if (nunits_in == nunits_out / 2)
2724 modifier = NARROW;
2725 else if (nunits_out == nunits_in)
2726 modifier = NONE;
2727 else if (nunits_out == nunits_in / 2)
2728 modifier = WIDEN;
2729 else
2730 return false;
2732 /* We only handle functions that do not read or clobber memory. */
2733 if (gimple_vuse (stmt))
2735 if (dump_enabled_p ())
2736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2737 "function reads from or writes to memory.\n");
2738 return false;
2741 /* For now, we only vectorize functions if a target specific builtin
2742 is available. TODO -- in some cases, it might be profitable to
2743 insert the calls for pieces of the vector, in order to be able
2744 to vectorize other operations in the loop. */
2745 fndecl = NULL_TREE;
2746 internal_fn ifn = IFN_LAST;
2747 combined_fn cfn = gimple_call_combined_fn (stmt);
2748 tree callee = gimple_call_fndecl (stmt);
2750 /* First try using an internal function. */
2751 tree_code convert_code = ERROR_MARK;
2752 if (cfn != CFN_LAST
2753 && (modifier == NONE
2754 || (modifier == NARROW
2755 && simple_integer_narrowing (vectype_out, vectype_in,
2756 &convert_code))))
2757 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2758 vectype_in);
2760 /* If that fails, try asking for a target-specific built-in function. */
2761 if (ifn == IFN_LAST)
2763 if (cfn != CFN_LAST)
2764 fndecl = targetm.vectorize.builtin_vectorized_function
2765 (cfn, vectype_out, vectype_in);
2766 else
2767 fndecl = targetm.vectorize.builtin_md_vectorized_function
2768 (callee, vectype_out, vectype_in);
2771 if (ifn == IFN_LAST && !fndecl)
2773 if (cfn == CFN_GOMP_SIMD_LANE
2774 && !slp_node
2775 && loop_vinfo
2776 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2777 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2778 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2779 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2781 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2782 { 0, 1, 2, ... vf - 1 } vector. */
2783 gcc_assert (nargs == 0);
2785 else if (modifier == NONE
2786 && (gimple_call_builtin_p (stmt, BUILT_IN_BSWAP16)
2787 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP32)
2788 || gimple_call_builtin_p (stmt, BUILT_IN_BSWAP64)))
2789 return vectorizable_bswap (stmt, gsi, vec_stmt, slp_node,
2790 vectype_in, dt);
2791 else
2793 if (dump_enabled_p ())
2794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2795 "function is not vectorizable.\n");
2796 return false;
2800 if (slp_node)
2801 ncopies = 1;
2802 else if (modifier == NARROW && ifn == IFN_LAST)
2803 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2804 else
2805 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2807 /* Sanity check: make sure that at least one copy of the vectorized stmt
2808 needs to be generated. */
2809 gcc_assert (ncopies >= 1);
2811 if (!vec_stmt) /* transformation not required. */
2813 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2814 if (dump_enabled_p ())
2815 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2816 "\n");
2817 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
2818 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2819 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2820 vec_promote_demote, stmt_info, 0, vect_body);
2822 return true;
2825 /* Transform. */
2827 if (dump_enabled_p ())
2828 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2830 /* Handle def. */
2831 scalar_dest = gimple_call_lhs (stmt);
2832 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2834 prev_stmt_info = NULL;
2835 if (modifier == NONE || ifn != IFN_LAST)
2837 tree prev_res = NULL_TREE;
2838 for (j = 0; j < ncopies; ++j)
2840 /* Build argument list for the vectorized call. */
2841 if (j == 0)
2842 vargs.create (nargs);
2843 else
2844 vargs.truncate (0);
2846 if (slp_node)
2848 auto_vec<vec<tree> > vec_defs (nargs);
2849 vec<tree> vec_oprnds0;
2851 for (i = 0; i < nargs; i++)
2852 vargs.quick_push (gimple_call_arg (stmt, i));
2853 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2854 vec_oprnds0 = vec_defs[0];
2856 /* Arguments are ready. Create the new vector stmt. */
2857 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2859 size_t k;
2860 for (k = 0; k < nargs; k++)
2862 vec<tree> vec_oprndsk = vec_defs[k];
2863 vargs[k] = vec_oprndsk[i];
2865 if (modifier == NARROW)
2867 tree half_res = make_ssa_name (vectype_in);
2868 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2869 gimple_call_set_lhs (new_stmt, half_res);
2870 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2871 if ((i & 1) == 0)
2873 prev_res = half_res;
2874 continue;
2876 new_temp = make_ssa_name (vec_dest);
2877 new_stmt = gimple_build_assign (new_temp, convert_code,
2878 prev_res, half_res);
2880 else
2882 if (ifn != IFN_LAST)
2883 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2884 else
2885 new_stmt = gimple_build_call_vec (fndecl, vargs);
2886 new_temp = make_ssa_name (vec_dest, new_stmt);
2887 gimple_call_set_lhs (new_stmt, new_temp);
2889 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2890 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2893 for (i = 0; i < nargs; i++)
2895 vec<tree> vec_oprndsi = vec_defs[i];
2896 vec_oprndsi.release ();
2898 continue;
2901 for (i = 0; i < nargs; i++)
2903 op = gimple_call_arg (stmt, i);
2904 if (j == 0)
2905 vec_oprnd0
2906 = vect_get_vec_def_for_operand (op, stmt);
2907 else
2909 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2910 vec_oprnd0
2911 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2914 vargs.quick_push (vec_oprnd0);
2917 if (gimple_call_internal_p (stmt)
2918 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2920 tree *v = XALLOCAVEC (tree, nunits_out);
2921 int k;
2922 for (k = 0; k < nunits_out; ++k)
2923 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2924 tree cst = build_vector (vectype_out, v);
2925 tree new_var
2926 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2927 gimple *init_stmt = gimple_build_assign (new_var, cst);
2928 vect_init_vector_1 (stmt, init_stmt, NULL);
2929 new_temp = make_ssa_name (vec_dest);
2930 new_stmt = gimple_build_assign (new_temp, new_var);
2932 else if (modifier == NARROW)
2934 tree half_res = make_ssa_name (vectype_in);
2935 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2936 gimple_call_set_lhs (new_stmt, half_res);
2937 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2938 if ((j & 1) == 0)
2940 prev_res = half_res;
2941 continue;
2943 new_temp = make_ssa_name (vec_dest);
2944 new_stmt = gimple_build_assign (new_temp, convert_code,
2945 prev_res, half_res);
2947 else
2949 if (ifn != IFN_LAST)
2950 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2951 else
2952 new_stmt = gimple_build_call_vec (fndecl, vargs);
2953 new_temp = make_ssa_name (vec_dest, new_stmt);
2954 gimple_call_set_lhs (new_stmt, new_temp);
2956 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2958 if (j == (modifier == NARROW ? 1 : 0))
2959 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2960 else
2961 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2963 prev_stmt_info = vinfo_for_stmt (new_stmt);
2966 else if (modifier == NARROW)
2968 for (j = 0; j < ncopies; ++j)
2970 /* Build argument list for the vectorized call. */
2971 if (j == 0)
2972 vargs.create (nargs * 2);
2973 else
2974 vargs.truncate (0);
2976 if (slp_node)
2978 auto_vec<vec<tree> > vec_defs (nargs);
2979 vec<tree> vec_oprnds0;
2981 for (i = 0; i < nargs; i++)
2982 vargs.quick_push (gimple_call_arg (stmt, i));
2983 vect_get_slp_defs (vargs, slp_node, &vec_defs);
2984 vec_oprnds0 = vec_defs[0];
2986 /* Arguments are ready. Create the new vector stmt. */
2987 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2989 size_t k;
2990 vargs.truncate (0);
2991 for (k = 0; k < nargs; k++)
2993 vec<tree> vec_oprndsk = vec_defs[k];
2994 vargs.quick_push (vec_oprndsk[i]);
2995 vargs.quick_push (vec_oprndsk[i + 1]);
2997 if (ifn != IFN_LAST)
2998 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2999 else
3000 new_stmt = gimple_build_call_vec (fndecl, vargs);
3001 new_temp = make_ssa_name (vec_dest, new_stmt);
3002 gimple_call_set_lhs (new_stmt, new_temp);
3003 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3004 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3007 for (i = 0; i < nargs; i++)
3009 vec<tree> vec_oprndsi = vec_defs[i];
3010 vec_oprndsi.release ();
3012 continue;
3015 for (i = 0; i < nargs; i++)
3017 op = gimple_call_arg (stmt, i);
3018 if (j == 0)
3020 vec_oprnd0
3021 = vect_get_vec_def_for_operand (op, stmt);
3022 vec_oprnd1
3023 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3025 else
3027 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
3028 vec_oprnd0
3029 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
3030 vec_oprnd1
3031 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
3034 vargs.quick_push (vec_oprnd0);
3035 vargs.quick_push (vec_oprnd1);
3038 new_stmt = gimple_build_call_vec (fndecl, vargs);
3039 new_temp = make_ssa_name (vec_dest, new_stmt);
3040 gimple_call_set_lhs (new_stmt, new_temp);
3041 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3043 if (j == 0)
3044 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3045 else
3046 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3048 prev_stmt_info = vinfo_for_stmt (new_stmt);
3051 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3053 else
3054 /* No current target implements this case. */
3055 return false;
3057 vargs.release ();
3059 /* The call in STMT might prevent it from being removed in dce.
3060 We however cannot remove it here, due to the way the ssa name
3061 it defines is mapped to the new definition. So just replace
3062 rhs of the statement with something harmless. */
3064 if (slp_node)
3065 return true;
3067 type = TREE_TYPE (scalar_dest);
3068 if (is_pattern_stmt_p (stmt_info))
3069 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3070 else
3071 lhs = gimple_call_lhs (stmt);
3073 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3074 set_vinfo_for_stmt (new_stmt, stmt_info);
3075 set_vinfo_for_stmt (stmt, NULL);
3076 STMT_VINFO_STMT (stmt_info) = new_stmt;
3077 gsi_replace (gsi, new_stmt, false);
3079 return true;
3083 struct simd_call_arg_info
3085 tree vectype;
3086 tree op;
3087 HOST_WIDE_INT linear_step;
3088 enum vect_def_type dt;
3089 unsigned int align;
3090 bool simd_lane_linear;
3093 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3094 is linear within simd lane (but not within whole loop), note it in
3095 *ARGINFO. */
3097 static void
3098 vect_simd_lane_linear (tree op, struct loop *loop,
3099 struct simd_call_arg_info *arginfo)
3101 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
3103 if (!is_gimple_assign (def_stmt)
3104 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
3105 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
3106 return;
3108 tree base = gimple_assign_rhs1 (def_stmt);
3109 HOST_WIDE_INT linear_step = 0;
3110 tree v = gimple_assign_rhs2 (def_stmt);
3111 while (TREE_CODE (v) == SSA_NAME)
3113 tree t;
3114 def_stmt = SSA_NAME_DEF_STMT (v);
3115 if (is_gimple_assign (def_stmt))
3116 switch (gimple_assign_rhs_code (def_stmt))
3118 case PLUS_EXPR:
3119 t = gimple_assign_rhs2 (def_stmt);
3120 if (linear_step || TREE_CODE (t) != INTEGER_CST)
3121 return;
3122 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
3123 v = gimple_assign_rhs1 (def_stmt);
3124 continue;
3125 case MULT_EXPR:
3126 t = gimple_assign_rhs2 (def_stmt);
3127 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
3128 return;
3129 linear_step = tree_to_shwi (t);
3130 v = gimple_assign_rhs1 (def_stmt);
3131 continue;
3132 CASE_CONVERT:
3133 t = gimple_assign_rhs1 (def_stmt);
3134 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
3135 || (TYPE_PRECISION (TREE_TYPE (v))
3136 < TYPE_PRECISION (TREE_TYPE (t))))
3137 return;
3138 if (!linear_step)
3139 linear_step = 1;
3140 v = t;
3141 continue;
3142 default:
3143 return;
3145 else if (gimple_call_internal_p (def_stmt, IFN_GOMP_SIMD_LANE)
3146 && loop->simduid
3147 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3148 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3149 == loop->simduid))
3151 if (!linear_step)
3152 linear_step = 1;
3153 arginfo->linear_step = linear_step;
3154 arginfo->op = base;
3155 arginfo->simd_lane_linear = true;
3156 return;
3161 /* Function vectorizable_simd_clone_call.
3163 Check if STMT performs a function call that can be vectorized
3164 by calling a simd clone of the function.
3165 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3166 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3167 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3169 static bool
3170 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3171 gimple **vec_stmt, slp_tree slp_node)
3173 tree vec_dest;
3174 tree scalar_dest;
3175 tree op, type;
3176 tree vec_oprnd0 = NULL_TREE;
3177 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3178 tree vectype;
3179 unsigned int nunits;
3180 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3181 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3182 vec_info *vinfo = stmt_info->vinfo;
3183 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3184 tree fndecl, new_temp;
3185 gimple *def_stmt;
3186 gimple *new_stmt = NULL;
3187 int ncopies, j;
3188 auto_vec<simd_call_arg_info> arginfo;
3189 vec<tree> vargs = vNULL;
3190 size_t i, nargs;
3191 tree lhs, rtype, ratype;
3192 vec<constructor_elt, va_gc> *ret_ctor_elts;
3194 /* Is STMT a vectorizable call? */
3195 if (!is_gimple_call (stmt))
3196 return false;
3198 fndecl = gimple_call_fndecl (stmt);
3199 if (fndecl == NULL_TREE)
3200 return false;
3202 struct cgraph_node *node = cgraph_node::get (fndecl);
3203 if (node == NULL || node->simd_clones == NULL)
3204 return false;
3206 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3207 return false;
3209 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3210 && ! vec_stmt)
3211 return false;
3213 if (gimple_call_lhs (stmt)
3214 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3215 return false;
3217 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3219 vectype = STMT_VINFO_VECTYPE (stmt_info);
3221 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3222 return false;
3224 /* FORNOW */
3225 if (slp_node)
3226 return false;
3228 /* Process function arguments. */
3229 nargs = gimple_call_num_args (stmt);
3231 /* Bail out if the function has zero arguments. */
3232 if (nargs == 0)
3233 return false;
3235 arginfo.reserve (nargs, true);
3237 for (i = 0; i < nargs; i++)
3239 simd_call_arg_info thisarginfo;
3240 affine_iv iv;
3242 thisarginfo.linear_step = 0;
3243 thisarginfo.align = 0;
3244 thisarginfo.op = NULL_TREE;
3245 thisarginfo.simd_lane_linear = false;
3247 op = gimple_call_arg (stmt, i);
3248 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3249 &thisarginfo.vectype)
3250 || thisarginfo.dt == vect_uninitialized_def)
3252 if (dump_enabled_p ())
3253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3254 "use not simple.\n");
3255 return false;
3258 if (thisarginfo.dt == vect_constant_def
3259 || thisarginfo.dt == vect_external_def)
3260 gcc_assert (thisarginfo.vectype == NULL_TREE);
3261 else
3262 gcc_assert (thisarginfo.vectype != NULL_TREE);
3264 /* For linear arguments, the analyze phase should have saved
3265 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3266 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3267 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3269 gcc_assert (vec_stmt);
3270 thisarginfo.linear_step
3271 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3272 thisarginfo.op
3273 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3274 thisarginfo.simd_lane_linear
3275 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3276 == boolean_true_node);
3277 /* If loop has been peeled for alignment, we need to adjust it. */
3278 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3279 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3280 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3282 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3283 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3284 tree opt = TREE_TYPE (thisarginfo.op);
3285 bias = fold_convert (TREE_TYPE (step), bias);
3286 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3287 thisarginfo.op
3288 = fold_build2 (POINTER_TYPE_P (opt)
3289 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3290 thisarginfo.op, bias);
3293 else if (!vec_stmt
3294 && thisarginfo.dt != vect_constant_def
3295 && thisarginfo.dt != vect_external_def
3296 && loop_vinfo
3297 && TREE_CODE (op) == SSA_NAME
3298 && simple_iv (loop, loop_containing_stmt (stmt), op,
3299 &iv, false)
3300 && tree_fits_shwi_p (iv.step))
3302 thisarginfo.linear_step = tree_to_shwi (iv.step);
3303 thisarginfo.op = iv.base;
3305 else if ((thisarginfo.dt == vect_constant_def
3306 || thisarginfo.dt == vect_external_def)
3307 && POINTER_TYPE_P (TREE_TYPE (op)))
3308 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3309 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3310 linear too. */
3311 if (POINTER_TYPE_P (TREE_TYPE (op))
3312 && !thisarginfo.linear_step
3313 && !vec_stmt
3314 && thisarginfo.dt != vect_constant_def
3315 && thisarginfo.dt != vect_external_def
3316 && loop_vinfo
3317 && !slp_node
3318 && TREE_CODE (op) == SSA_NAME)
3319 vect_simd_lane_linear (op, loop, &thisarginfo);
3321 arginfo.quick_push (thisarginfo);
3324 unsigned int badness = 0;
3325 struct cgraph_node *bestn = NULL;
3326 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3327 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3328 else
3329 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3330 n = n->simdclone->next_clone)
3332 unsigned int this_badness = 0;
3333 if (n->simdclone->simdlen
3334 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3335 || n->simdclone->nargs != nargs)
3336 continue;
3337 if (n->simdclone->simdlen
3338 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3339 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3340 - exact_log2 (n->simdclone->simdlen)) * 1024;
3341 if (n->simdclone->inbranch)
3342 this_badness += 2048;
3343 int target_badness = targetm.simd_clone.usable (n);
3344 if (target_badness < 0)
3345 continue;
3346 this_badness += target_badness * 512;
3347 /* FORNOW: Have to add code to add the mask argument. */
3348 if (n->simdclone->inbranch)
3349 continue;
3350 for (i = 0; i < nargs; i++)
3352 switch (n->simdclone->args[i].arg_type)
3354 case SIMD_CLONE_ARG_TYPE_VECTOR:
3355 if (!useless_type_conversion_p
3356 (n->simdclone->args[i].orig_type,
3357 TREE_TYPE (gimple_call_arg (stmt, i))))
3358 i = -1;
3359 else if (arginfo[i].dt == vect_constant_def
3360 || arginfo[i].dt == vect_external_def
3361 || arginfo[i].linear_step)
3362 this_badness += 64;
3363 break;
3364 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3365 if (arginfo[i].dt != vect_constant_def
3366 && arginfo[i].dt != vect_external_def)
3367 i = -1;
3368 break;
3369 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3370 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3371 if (arginfo[i].dt == vect_constant_def
3372 || arginfo[i].dt == vect_external_def
3373 || (arginfo[i].linear_step
3374 != n->simdclone->args[i].linear_step))
3375 i = -1;
3376 break;
3377 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3378 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3379 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3380 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3381 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3382 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3383 /* FORNOW */
3384 i = -1;
3385 break;
3386 case SIMD_CLONE_ARG_TYPE_MASK:
3387 gcc_unreachable ();
3389 if (i == (size_t) -1)
3390 break;
3391 if (n->simdclone->args[i].alignment > arginfo[i].align)
3393 i = -1;
3394 break;
3396 if (arginfo[i].align)
3397 this_badness += (exact_log2 (arginfo[i].align)
3398 - exact_log2 (n->simdclone->args[i].alignment));
3400 if (i == (size_t) -1)
3401 continue;
3402 if (bestn == NULL || this_badness < badness)
3404 bestn = n;
3405 badness = this_badness;
3409 if (bestn == NULL)
3410 return false;
3412 for (i = 0; i < nargs; i++)
3413 if ((arginfo[i].dt == vect_constant_def
3414 || arginfo[i].dt == vect_external_def)
3415 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3417 arginfo[i].vectype
3418 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3419 i)));
3420 if (arginfo[i].vectype == NULL
3421 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3422 > bestn->simdclone->simdlen))
3423 return false;
3426 fndecl = bestn->decl;
3427 nunits = bestn->simdclone->simdlen;
3428 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3430 /* If the function isn't const, only allow it in simd loops where user
3431 has asserted that at least nunits consecutive iterations can be
3432 performed using SIMD instructions. */
3433 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3434 && gimple_vuse (stmt))
3435 return false;
3437 /* Sanity check: make sure that at least one copy of the vectorized stmt
3438 needs to be generated. */
3439 gcc_assert (ncopies >= 1);
3441 if (!vec_stmt) /* transformation not required. */
3443 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3444 for (i = 0; i < nargs; i++)
3445 if ((bestn->simdclone->args[i].arg_type
3446 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3447 || (bestn->simdclone->args[i].arg_type
3448 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3450 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3451 + 1);
3452 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3453 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3454 ? size_type_node : TREE_TYPE (arginfo[i].op);
3455 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3456 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3457 tree sll = arginfo[i].simd_lane_linear
3458 ? boolean_true_node : boolean_false_node;
3459 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3461 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3462 if (dump_enabled_p ())
3463 dump_printf_loc (MSG_NOTE, vect_location,
3464 "=== vectorizable_simd_clone_call ===\n");
3465 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3466 return true;
3469 /* Transform. */
3471 if (dump_enabled_p ())
3472 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3474 /* Handle def. */
3475 scalar_dest = gimple_call_lhs (stmt);
3476 vec_dest = NULL_TREE;
3477 rtype = NULL_TREE;
3478 ratype = NULL_TREE;
3479 if (scalar_dest)
3481 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3482 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3483 if (TREE_CODE (rtype) == ARRAY_TYPE)
3485 ratype = rtype;
3486 rtype = TREE_TYPE (ratype);
3490 prev_stmt_info = NULL;
3491 for (j = 0; j < ncopies; ++j)
3493 /* Build argument list for the vectorized call. */
3494 if (j == 0)
3495 vargs.create (nargs);
3496 else
3497 vargs.truncate (0);
3499 for (i = 0; i < nargs; i++)
3501 unsigned int k, l, m, o;
3502 tree atype;
3503 op = gimple_call_arg (stmt, i);
3504 switch (bestn->simdclone->args[i].arg_type)
3506 case SIMD_CLONE_ARG_TYPE_VECTOR:
3507 atype = bestn->simdclone->args[i].vector_type;
3508 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3509 for (m = j * o; m < (j + 1) * o; m++)
3511 if (TYPE_VECTOR_SUBPARTS (atype)
3512 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3514 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3515 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3516 / TYPE_VECTOR_SUBPARTS (atype));
3517 gcc_assert ((k & (k - 1)) == 0);
3518 if (m == 0)
3519 vec_oprnd0
3520 = vect_get_vec_def_for_operand (op, stmt);
3521 else
3523 vec_oprnd0 = arginfo[i].op;
3524 if ((m & (k - 1)) == 0)
3525 vec_oprnd0
3526 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3527 vec_oprnd0);
3529 arginfo[i].op = vec_oprnd0;
3530 vec_oprnd0
3531 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3532 size_int (prec),
3533 bitsize_int ((m & (k - 1)) * prec));
3534 new_stmt
3535 = gimple_build_assign (make_ssa_name (atype),
3536 vec_oprnd0);
3537 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3538 vargs.safe_push (gimple_assign_lhs (new_stmt));
3540 else
3542 k = (TYPE_VECTOR_SUBPARTS (atype)
3543 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3544 gcc_assert ((k & (k - 1)) == 0);
3545 vec<constructor_elt, va_gc> *ctor_elts;
3546 if (k != 1)
3547 vec_alloc (ctor_elts, k);
3548 else
3549 ctor_elts = NULL;
3550 for (l = 0; l < k; l++)
3552 if (m == 0 && l == 0)
3553 vec_oprnd0
3554 = vect_get_vec_def_for_operand (op, stmt);
3555 else
3556 vec_oprnd0
3557 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3558 arginfo[i].op);
3559 arginfo[i].op = vec_oprnd0;
3560 if (k == 1)
3561 break;
3562 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3563 vec_oprnd0);
3565 if (k == 1)
3566 vargs.safe_push (vec_oprnd0);
3567 else
3569 vec_oprnd0 = build_constructor (atype, ctor_elts);
3570 new_stmt
3571 = gimple_build_assign (make_ssa_name (atype),
3572 vec_oprnd0);
3573 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3574 vargs.safe_push (gimple_assign_lhs (new_stmt));
3578 break;
3579 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3580 vargs.safe_push (op);
3581 break;
3582 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3583 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3584 if (j == 0)
3586 gimple_seq stmts;
3587 arginfo[i].op
3588 = force_gimple_operand (arginfo[i].op, &stmts, true,
3589 NULL_TREE);
3590 if (stmts != NULL)
3592 basic_block new_bb;
3593 edge pe = loop_preheader_edge (loop);
3594 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3595 gcc_assert (!new_bb);
3597 if (arginfo[i].simd_lane_linear)
3599 vargs.safe_push (arginfo[i].op);
3600 break;
3602 tree phi_res = copy_ssa_name (op);
3603 gphi *new_phi = create_phi_node (phi_res, loop->header);
3604 set_vinfo_for_stmt (new_phi,
3605 new_stmt_vec_info (new_phi, loop_vinfo));
3606 add_phi_arg (new_phi, arginfo[i].op,
3607 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3608 enum tree_code code
3609 = POINTER_TYPE_P (TREE_TYPE (op))
3610 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3611 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3612 ? sizetype : TREE_TYPE (op);
3613 widest_int cst
3614 = wi::mul (bestn->simdclone->args[i].linear_step,
3615 ncopies * nunits);
3616 tree tcst = wide_int_to_tree (type, cst);
3617 tree phi_arg = copy_ssa_name (op);
3618 new_stmt
3619 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3620 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3621 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3622 set_vinfo_for_stmt (new_stmt,
3623 new_stmt_vec_info (new_stmt, loop_vinfo));
3624 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3625 UNKNOWN_LOCATION);
3626 arginfo[i].op = phi_res;
3627 vargs.safe_push (phi_res);
3629 else
3631 enum tree_code code
3632 = POINTER_TYPE_P (TREE_TYPE (op))
3633 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3634 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3635 ? sizetype : TREE_TYPE (op);
3636 widest_int cst
3637 = wi::mul (bestn->simdclone->args[i].linear_step,
3638 j * nunits);
3639 tree tcst = wide_int_to_tree (type, cst);
3640 new_temp = make_ssa_name (TREE_TYPE (op));
3641 new_stmt = gimple_build_assign (new_temp, code,
3642 arginfo[i].op, tcst);
3643 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3644 vargs.safe_push (new_temp);
3646 break;
3647 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3648 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3649 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3650 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3651 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3652 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3653 default:
3654 gcc_unreachable ();
3658 new_stmt = gimple_build_call_vec (fndecl, vargs);
3659 if (vec_dest)
3661 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3662 if (ratype)
3663 new_temp = create_tmp_var (ratype);
3664 else if (TYPE_VECTOR_SUBPARTS (vectype)
3665 == TYPE_VECTOR_SUBPARTS (rtype))
3666 new_temp = make_ssa_name (vec_dest, new_stmt);
3667 else
3668 new_temp = make_ssa_name (rtype, new_stmt);
3669 gimple_call_set_lhs (new_stmt, new_temp);
3671 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3673 if (vec_dest)
3675 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3677 unsigned int k, l;
3678 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3679 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3680 gcc_assert ((k & (k - 1)) == 0);
3681 for (l = 0; l < k; l++)
3683 tree t;
3684 if (ratype)
3686 t = build_fold_addr_expr (new_temp);
3687 t = build2 (MEM_REF, vectype, t,
3688 build_int_cst (TREE_TYPE (t),
3689 l * prec / BITS_PER_UNIT));
3691 else
3692 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3693 size_int (prec), bitsize_int (l * prec));
3694 new_stmt
3695 = gimple_build_assign (make_ssa_name (vectype), t);
3696 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3697 if (j == 0 && l == 0)
3698 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3699 else
3700 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3702 prev_stmt_info = vinfo_for_stmt (new_stmt);
3705 if (ratype)
3707 tree clobber = build_constructor (ratype, NULL);
3708 TREE_THIS_VOLATILE (clobber) = 1;
3709 new_stmt = gimple_build_assign (new_temp, clobber);
3710 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3712 continue;
3714 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3716 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3717 / TYPE_VECTOR_SUBPARTS (rtype));
3718 gcc_assert ((k & (k - 1)) == 0);
3719 if ((j & (k - 1)) == 0)
3720 vec_alloc (ret_ctor_elts, k);
3721 if (ratype)
3723 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3724 for (m = 0; m < o; m++)
3726 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3727 size_int (m), NULL_TREE, NULL_TREE);
3728 new_stmt
3729 = gimple_build_assign (make_ssa_name (rtype), tem);
3730 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3731 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3732 gimple_assign_lhs (new_stmt));
3734 tree clobber = build_constructor (ratype, NULL);
3735 TREE_THIS_VOLATILE (clobber) = 1;
3736 new_stmt = gimple_build_assign (new_temp, clobber);
3737 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3739 else
3740 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3741 if ((j & (k - 1)) != k - 1)
3742 continue;
3743 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3744 new_stmt
3745 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3746 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3748 if ((unsigned) j == k - 1)
3749 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3750 else
3751 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3753 prev_stmt_info = vinfo_for_stmt (new_stmt);
3754 continue;
3756 else if (ratype)
3758 tree t = build_fold_addr_expr (new_temp);
3759 t = build2 (MEM_REF, vectype, t,
3760 build_int_cst (TREE_TYPE (t), 0));
3761 new_stmt
3762 = gimple_build_assign (make_ssa_name (vec_dest), t);
3763 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3764 tree clobber = build_constructor (ratype, NULL);
3765 TREE_THIS_VOLATILE (clobber) = 1;
3766 vect_finish_stmt_generation (stmt,
3767 gimple_build_assign (new_temp,
3768 clobber), gsi);
3772 if (j == 0)
3773 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3774 else
3775 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3777 prev_stmt_info = vinfo_for_stmt (new_stmt);
3780 vargs.release ();
3782 /* The call in STMT might prevent it from being removed in dce.
3783 We however cannot remove it here, due to the way the ssa name
3784 it defines is mapped to the new definition. So just replace
3785 rhs of the statement with something harmless. */
3787 if (slp_node)
3788 return true;
3790 if (scalar_dest)
3792 type = TREE_TYPE (scalar_dest);
3793 if (is_pattern_stmt_p (stmt_info))
3794 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3795 else
3796 lhs = gimple_call_lhs (stmt);
3797 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3799 else
3800 new_stmt = gimple_build_nop ();
3801 set_vinfo_for_stmt (new_stmt, stmt_info);
3802 set_vinfo_for_stmt (stmt, NULL);
3803 STMT_VINFO_STMT (stmt_info) = new_stmt;
3804 gsi_replace (gsi, new_stmt, true);
3805 unlink_stmt_vdef (stmt);
3807 return true;
3811 /* Function vect_gen_widened_results_half
3813 Create a vector stmt whose code, type, number of arguments, and result
3814 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3815 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3816 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3817 needs to be created (DECL is a function-decl of a target-builtin).
3818 STMT is the original scalar stmt that we are vectorizing. */
3820 static gimple *
3821 vect_gen_widened_results_half (enum tree_code code,
3822 tree decl,
3823 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3824 tree vec_dest, gimple_stmt_iterator *gsi,
3825 gimple *stmt)
3827 gimple *new_stmt;
3828 tree new_temp;
3830 /* Generate half of the widened result: */
3831 if (code == CALL_EXPR)
3833 /* Target specific support */
3834 if (op_type == binary_op)
3835 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3836 else
3837 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3838 new_temp = make_ssa_name (vec_dest, new_stmt);
3839 gimple_call_set_lhs (new_stmt, new_temp);
3841 else
3843 /* Generic support */
3844 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3845 if (op_type != binary_op)
3846 vec_oprnd1 = NULL;
3847 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3848 new_temp = make_ssa_name (vec_dest, new_stmt);
3849 gimple_assign_set_lhs (new_stmt, new_temp);
3851 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3853 return new_stmt;
3857 /* Get vectorized definitions for loop-based vectorization. For the first
3858 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3859 scalar operand), and for the rest we get a copy with
3860 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3861 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3862 The vectors are collected into VEC_OPRNDS. */
3864 static void
3865 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3866 vec<tree> *vec_oprnds, int multi_step_cvt)
3868 tree vec_oprnd;
3870 /* Get first vector operand. */
3871 /* All the vector operands except the very first one (that is scalar oprnd)
3872 are stmt copies. */
3873 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3874 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3875 else
3876 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3878 vec_oprnds->quick_push (vec_oprnd);
3880 /* Get second vector operand. */
3881 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3882 vec_oprnds->quick_push (vec_oprnd);
3884 *oprnd = vec_oprnd;
3886 /* For conversion in multiple steps, continue to get operands
3887 recursively. */
3888 if (multi_step_cvt)
3889 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3893 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3894 For multi-step conversions store the resulting vectors and call the function
3895 recursively. */
3897 static void
3898 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3899 int multi_step_cvt, gimple *stmt,
3900 vec<tree> vec_dsts,
3901 gimple_stmt_iterator *gsi,
3902 slp_tree slp_node, enum tree_code code,
3903 stmt_vec_info *prev_stmt_info)
3905 unsigned int i;
3906 tree vop0, vop1, new_tmp, vec_dest;
3907 gimple *new_stmt;
3908 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3910 vec_dest = vec_dsts.pop ();
3912 for (i = 0; i < vec_oprnds->length (); i += 2)
3914 /* Create demotion operation. */
3915 vop0 = (*vec_oprnds)[i];
3916 vop1 = (*vec_oprnds)[i + 1];
3917 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3918 new_tmp = make_ssa_name (vec_dest, new_stmt);
3919 gimple_assign_set_lhs (new_stmt, new_tmp);
3920 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3922 if (multi_step_cvt)
3923 /* Store the resulting vector for next recursive call. */
3924 (*vec_oprnds)[i/2] = new_tmp;
3925 else
3927 /* This is the last step of the conversion sequence. Store the
3928 vectors in SLP_NODE or in vector info of the scalar statement
3929 (or in STMT_VINFO_RELATED_STMT chain). */
3930 if (slp_node)
3931 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3932 else
3934 if (!*prev_stmt_info)
3935 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3936 else
3937 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3939 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3944 /* For multi-step demotion operations we first generate demotion operations
3945 from the source type to the intermediate types, and then combine the
3946 results (stored in VEC_OPRNDS) in demotion operation to the destination
3947 type. */
3948 if (multi_step_cvt)
3950 /* At each level of recursion we have half of the operands we had at the
3951 previous level. */
3952 vec_oprnds->truncate ((i+1)/2);
3953 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3954 stmt, vec_dsts, gsi, slp_node,
3955 VEC_PACK_TRUNC_EXPR,
3956 prev_stmt_info);
3959 vec_dsts.quick_push (vec_dest);
3963 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3964 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3965 the resulting vectors and call the function recursively. */
3967 static void
3968 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3969 vec<tree> *vec_oprnds1,
3970 gimple *stmt, tree vec_dest,
3971 gimple_stmt_iterator *gsi,
3972 enum tree_code code1,
3973 enum tree_code code2, tree decl1,
3974 tree decl2, int op_type)
3976 int i;
3977 tree vop0, vop1, new_tmp1, new_tmp2;
3978 gimple *new_stmt1, *new_stmt2;
3979 vec<tree> vec_tmp = vNULL;
3981 vec_tmp.create (vec_oprnds0->length () * 2);
3982 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3984 if (op_type == binary_op)
3985 vop1 = (*vec_oprnds1)[i];
3986 else
3987 vop1 = NULL_TREE;
3989 /* Generate the two halves of promotion operation. */
3990 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3991 op_type, vec_dest, gsi, stmt);
3992 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3993 op_type, vec_dest, gsi, stmt);
3994 if (is_gimple_call (new_stmt1))
3996 new_tmp1 = gimple_call_lhs (new_stmt1);
3997 new_tmp2 = gimple_call_lhs (new_stmt2);
3999 else
4001 new_tmp1 = gimple_assign_lhs (new_stmt1);
4002 new_tmp2 = gimple_assign_lhs (new_stmt2);
4005 /* Store the results for the next step. */
4006 vec_tmp.quick_push (new_tmp1);
4007 vec_tmp.quick_push (new_tmp2);
4010 vec_oprnds0->release ();
4011 *vec_oprnds0 = vec_tmp;
4015 /* Check if STMT performs a conversion operation, that can be vectorized.
4016 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4017 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4018 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4020 static bool
4021 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
4022 gimple **vec_stmt, slp_tree slp_node)
4024 tree vec_dest;
4025 tree scalar_dest;
4026 tree op0, op1 = NULL_TREE;
4027 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
4028 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4029 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4030 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4031 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
4032 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4033 tree new_temp;
4034 gimple *def_stmt;
4035 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4036 int ndts = 2;
4037 gimple *new_stmt = NULL;
4038 stmt_vec_info prev_stmt_info;
4039 int nunits_in;
4040 int nunits_out;
4041 tree vectype_out, vectype_in;
4042 int ncopies, i, j;
4043 tree lhs_type, rhs_type;
4044 enum { NARROW, NONE, WIDEN } modifier;
4045 vec<tree> vec_oprnds0 = vNULL;
4046 vec<tree> vec_oprnds1 = vNULL;
4047 tree vop0;
4048 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4049 vec_info *vinfo = stmt_info->vinfo;
4050 int multi_step_cvt = 0;
4051 vec<tree> interm_types = vNULL;
4052 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
4053 int op_type;
4054 machine_mode rhs_mode;
4055 unsigned short fltsz;
4057 /* Is STMT a vectorizable conversion? */
4059 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4060 return false;
4062 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4063 && ! vec_stmt)
4064 return false;
4066 if (!is_gimple_assign (stmt))
4067 return false;
4069 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4070 return false;
4072 code = gimple_assign_rhs_code (stmt);
4073 if (!CONVERT_EXPR_CODE_P (code)
4074 && code != FIX_TRUNC_EXPR
4075 && code != FLOAT_EXPR
4076 && code != WIDEN_MULT_EXPR
4077 && code != WIDEN_LSHIFT_EXPR)
4078 return false;
4080 op_type = TREE_CODE_LENGTH (code);
4082 /* Check types of lhs and rhs. */
4083 scalar_dest = gimple_assign_lhs (stmt);
4084 lhs_type = TREE_TYPE (scalar_dest);
4085 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4087 op0 = gimple_assign_rhs1 (stmt);
4088 rhs_type = TREE_TYPE (op0);
4090 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4091 && !((INTEGRAL_TYPE_P (lhs_type)
4092 && INTEGRAL_TYPE_P (rhs_type))
4093 || (SCALAR_FLOAT_TYPE_P (lhs_type)
4094 && SCALAR_FLOAT_TYPE_P (rhs_type))))
4095 return false;
4097 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
4098 && ((INTEGRAL_TYPE_P (lhs_type)
4099 && (TYPE_PRECISION (lhs_type)
4100 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
4101 || (INTEGRAL_TYPE_P (rhs_type)
4102 && (TYPE_PRECISION (rhs_type)
4103 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
4105 if (dump_enabled_p ())
4106 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4107 "type conversion to/from bit-precision unsupported."
4108 "\n");
4109 return false;
4112 /* Check the operands of the operation. */
4113 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
4115 if (dump_enabled_p ())
4116 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4117 "use not simple.\n");
4118 return false;
4120 if (op_type == binary_op)
4122 bool ok;
4124 op1 = gimple_assign_rhs2 (stmt);
4125 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
4126 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4127 OP1. */
4128 if (CONSTANT_CLASS_P (op0))
4129 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
4130 else
4131 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
4133 if (!ok)
4135 if (dump_enabled_p ())
4136 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4137 "use not simple.\n");
4138 return false;
4142 /* If op0 is an external or constant defs use a vector type of
4143 the same size as the output vector type. */
4144 if (!vectype_in)
4145 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4146 if (vec_stmt)
4147 gcc_assert (vectype_in);
4148 if (!vectype_in)
4150 if (dump_enabled_p ())
4152 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4153 "no vectype for scalar type ");
4154 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4155 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4158 return false;
4161 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4162 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4164 if (dump_enabled_p ())
4166 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4167 "can't convert between boolean and non "
4168 "boolean vectors");
4169 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4170 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4173 return false;
4176 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4177 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4178 if (nunits_in < nunits_out)
4179 modifier = NARROW;
4180 else if (nunits_out == nunits_in)
4181 modifier = NONE;
4182 else
4183 modifier = WIDEN;
4185 /* Multiple types in SLP are handled by creating the appropriate number of
4186 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4187 case of SLP. */
4188 if (slp_node)
4189 ncopies = 1;
4190 else if (modifier == NARROW)
4191 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4192 else
4193 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4195 /* Sanity check: make sure that at least one copy of the vectorized stmt
4196 needs to be generated. */
4197 gcc_assert (ncopies >= 1);
4199 /* Supportable by target? */
4200 switch (modifier)
4202 case NONE:
4203 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4204 return false;
4205 if (supportable_convert_operation (code, vectype_out, vectype_in,
4206 &decl1, &code1))
4207 break;
4208 /* FALLTHRU */
4209 unsupported:
4210 if (dump_enabled_p ())
4211 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4212 "conversion not supported by target.\n");
4213 return false;
4215 case WIDEN:
4216 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4217 &code1, &code2, &multi_step_cvt,
4218 &interm_types))
4220 /* Binary widening operation can only be supported directly by the
4221 architecture. */
4222 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4223 break;
4226 if (code != FLOAT_EXPR
4227 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4228 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4229 goto unsupported;
4231 rhs_mode = TYPE_MODE (rhs_type);
4232 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
4233 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
4234 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
4235 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
4237 cvt_type
4238 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4239 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4240 if (cvt_type == NULL_TREE)
4241 goto unsupported;
4243 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4245 if (!supportable_convert_operation (code, vectype_out,
4246 cvt_type, &decl1, &codecvt1))
4247 goto unsupported;
4249 else if (!supportable_widening_operation (code, stmt, vectype_out,
4250 cvt_type, &codecvt1,
4251 &codecvt2, &multi_step_cvt,
4252 &interm_types))
4253 continue;
4254 else
4255 gcc_assert (multi_step_cvt == 0);
4257 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4258 vectype_in, &code1, &code2,
4259 &multi_step_cvt, &interm_types))
4260 break;
4263 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
4264 goto unsupported;
4266 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4267 codecvt2 = ERROR_MARK;
4268 else
4270 multi_step_cvt++;
4271 interm_types.safe_push (cvt_type);
4272 cvt_type = NULL_TREE;
4274 break;
4276 case NARROW:
4277 gcc_assert (op_type == unary_op);
4278 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4279 &code1, &multi_step_cvt,
4280 &interm_types))
4281 break;
4283 if (code != FIX_TRUNC_EXPR
4284 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4285 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4286 goto unsupported;
4288 rhs_mode = TYPE_MODE (rhs_type);
4289 cvt_type
4290 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4291 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4292 if (cvt_type == NULL_TREE)
4293 goto unsupported;
4294 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4295 &decl1, &codecvt1))
4296 goto unsupported;
4297 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4298 &code1, &multi_step_cvt,
4299 &interm_types))
4300 break;
4301 goto unsupported;
4303 default:
4304 gcc_unreachable ();
4307 if (!vec_stmt) /* transformation not required. */
4309 if (dump_enabled_p ())
4310 dump_printf_loc (MSG_NOTE, vect_location,
4311 "=== vectorizable_conversion ===\n");
4312 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4314 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4315 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4317 else if (modifier == NARROW)
4319 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4320 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4322 else
4324 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4325 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4327 interm_types.release ();
4328 return true;
4331 /* Transform. */
4332 if (dump_enabled_p ())
4333 dump_printf_loc (MSG_NOTE, vect_location,
4334 "transform conversion. ncopies = %d.\n", ncopies);
4336 if (op_type == binary_op)
4338 if (CONSTANT_CLASS_P (op0))
4339 op0 = fold_convert (TREE_TYPE (op1), op0);
4340 else if (CONSTANT_CLASS_P (op1))
4341 op1 = fold_convert (TREE_TYPE (op0), op1);
4344 /* In case of multi-step conversion, we first generate conversion operations
4345 to the intermediate types, and then from that types to the final one.
4346 We create vector destinations for the intermediate type (TYPES) received
4347 from supportable_*_operation, and store them in the correct order
4348 for future use in vect_create_vectorized_*_stmts (). */
4349 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4350 vec_dest = vect_create_destination_var (scalar_dest,
4351 (cvt_type && modifier == WIDEN)
4352 ? cvt_type : vectype_out);
4353 vec_dsts.quick_push (vec_dest);
4355 if (multi_step_cvt)
4357 for (i = interm_types.length () - 1;
4358 interm_types.iterate (i, &intermediate_type); i--)
4360 vec_dest = vect_create_destination_var (scalar_dest,
4361 intermediate_type);
4362 vec_dsts.quick_push (vec_dest);
4366 if (cvt_type)
4367 vec_dest = vect_create_destination_var (scalar_dest,
4368 modifier == WIDEN
4369 ? vectype_out : cvt_type);
4371 if (!slp_node)
4373 if (modifier == WIDEN)
4375 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4376 if (op_type == binary_op)
4377 vec_oprnds1.create (1);
4379 else if (modifier == NARROW)
4380 vec_oprnds0.create (
4381 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4383 else if (code == WIDEN_LSHIFT_EXPR)
4384 vec_oprnds1.create (slp_node->vec_stmts_size);
4386 last_oprnd = op0;
4387 prev_stmt_info = NULL;
4388 switch (modifier)
4390 case NONE:
4391 for (j = 0; j < ncopies; j++)
4393 if (j == 0)
4394 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
4395 else
4396 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4398 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4400 /* Arguments are ready, create the new vector stmt. */
4401 if (code1 == CALL_EXPR)
4403 new_stmt = gimple_build_call (decl1, 1, vop0);
4404 new_temp = make_ssa_name (vec_dest, new_stmt);
4405 gimple_call_set_lhs (new_stmt, new_temp);
4407 else
4409 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4410 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4411 new_temp = make_ssa_name (vec_dest, new_stmt);
4412 gimple_assign_set_lhs (new_stmt, new_temp);
4415 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4416 if (slp_node)
4417 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4418 else
4420 if (!prev_stmt_info)
4421 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4422 else
4423 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4424 prev_stmt_info = vinfo_for_stmt (new_stmt);
4428 break;
4430 case WIDEN:
4431 /* In case the vectorization factor (VF) is bigger than the number
4432 of elements that we can fit in a vectype (nunits), we have to
4433 generate more than one vector stmt - i.e - we need to "unroll"
4434 the vector stmt by a factor VF/nunits. */
4435 for (j = 0; j < ncopies; j++)
4437 /* Handle uses. */
4438 if (j == 0)
4440 if (slp_node)
4442 if (code == WIDEN_LSHIFT_EXPR)
4444 unsigned int k;
4446 vec_oprnd1 = op1;
4447 /* Store vec_oprnd1 for every vector stmt to be created
4448 for SLP_NODE. We check during the analysis that all
4449 the shift arguments are the same. */
4450 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4451 vec_oprnds1.quick_push (vec_oprnd1);
4453 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4454 slp_node);
4456 else
4457 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4458 &vec_oprnds1, slp_node);
4460 else
4462 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4463 vec_oprnds0.quick_push (vec_oprnd0);
4464 if (op_type == binary_op)
4466 if (code == WIDEN_LSHIFT_EXPR)
4467 vec_oprnd1 = op1;
4468 else
4469 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4470 vec_oprnds1.quick_push (vec_oprnd1);
4474 else
4476 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4477 vec_oprnds0.truncate (0);
4478 vec_oprnds0.quick_push (vec_oprnd0);
4479 if (op_type == binary_op)
4481 if (code == WIDEN_LSHIFT_EXPR)
4482 vec_oprnd1 = op1;
4483 else
4484 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4485 vec_oprnd1);
4486 vec_oprnds1.truncate (0);
4487 vec_oprnds1.quick_push (vec_oprnd1);
4491 /* Arguments are ready. Create the new vector stmts. */
4492 for (i = multi_step_cvt; i >= 0; i--)
4494 tree this_dest = vec_dsts[i];
4495 enum tree_code c1 = code1, c2 = code2;
4496 if (i == 0 && codecvt2 != ERROR_MARK)
4498 c1 = codecvt1;
4499 c2 = codecvt2;
4501 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4502 &vec_oprnds1,
4503 stmt, this_dest, gsi,
4504 c1, c2, decl1, decl2,
4505 op_type);
4508 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4510 if (cvt_type)
4512 if (codecvt1 == CALL_EXPR)
4514 new_stmt = gimple_build_call (decl1, 1, vop0);
4515 new_temp = make_ssa_name (vec_dest, new_stmt);
4516 gimple_call_set_lhs (new_stmt, new_temp);
4518 else
4520 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4521 new_temp = make_ssa_name (vec_dest);
4522 new_stmt = gimple_build_assign (new_temp, codecvt1,
4523 vop0);
4526 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4528 else
4529 new_stmt = SSA_NAME_DEF_STMT (vop0);
4531 if (slp_node)
4532 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4533 else
4535 if (!prev_stmt_info)
4536 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4537 else
4538 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4539 prev_stmt_info = vinfo_for_stmt (new_stmt);
4544 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4545 break;
4547 case NARROW:
4548 /* In case the vectorization factor (VF) is bigger than the number
4549 of elements that we can fit in a vectype (nunits), we have to
4550 generate more than one vector stmt - i.e - we need to "unroll"
4551 the vector stmt by a factor VF/nunits. */
4552 for (j = 0; j < ncopies; j++)
4554 /* Handle uses. */
4555 if (slp_node)
4556 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4557 slp_node);
4558 else
4560 vec_oprnds0.truncate (0);
4561 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4562 vect_pow2 (multi_step_cvt) - 1);
4565 /* Arguments are ready. Create the new vector stmts. */
4566 if (cvt_type)
4567 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4569 if (codecvt1 == CALL_EXPR)
4571 new_stmt = gimple_build_call (decl1, 1, vop0);
4572 new_temp = make_ssa_name (vec_dest, new_stmt);
4573 gimple_call_set_lhs (new_stmt, new_temp);
4575 else
4577 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4578 new_temp = make_ssa_name (vec_dest);
4579 new_stmt = gimple_build_assign (new_temp, codecvt1,
4580 vop0);
4583 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4584 vec_oprnds0[i] = new_temp;
4587 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4588 stmt, vec_dsts, gsi,
4589 slp_node, code1,
4590 &prev_stmt_info);
4593 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4594 break;
4597 vec_oprnds0.release ();
4598 vec_oprnds1.release ();
4599 interm_types.release ();
4601 return true;
4605 /* Function vectorizable_assignment.
4607 Check if STMT performs an assignment (copy) that can be vectorized.
4608 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4609 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4610 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4612 static bool
4613 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4614 gimple **vec_stmt, slp_tree slp_node)
4616 tree vec_dest;
4617 tree scalar_dest;
4618 tree op;
4619 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4620 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4621 tree new_temp;
4622 gimple *def_stmt;
4623 enum vect_def_type dt[1] = {vect_unknown_def_type};
4624 int ndts = 1;
4625 int ncopies;
4626 int i, j;
4627 vec<tree> vec_oprnds = vNULL;
4628 tree vop;
4629 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4630 vec_info *vinfo = stmt_info->vinfo;
4631 gimple *new_stmt = NULL;
4632 stmt_vec_info prev_stmt_info = NULL;
4633 enum tree_code code;
4634 tree vectype_in;
4636 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4637 return false;
4639 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4640 && ! vec_stmt)
4641 return false;
4643 /* Is vectorizable assignment? */
4644 if (!is_gimple_assign (stmt))
4645 return false;
4647 scalar_dest = gimple_assign_lhs (stmt);
4648 if (TREE_CODE (scalar_dest) != SSA_NAME)
4649 return false;
4651 code = gimple_assign_rhs_code (stmt);
4652 if (gimple_assign_single_p (stmt)
4653 || code == PAREN_EXPR
4654 || CONVERT_EXPR_CODE_P (code))
4655 op = gimple_assign_rhs1 (stmt);
4656 else
4657 return false;
4659 if (code == VIEW_CONVERT_EXPR)
4660 op = TREE_OPERAND (op, 0);
4662 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4663 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4665 /* Multiple types in SLP are handled by creating the appropriate number of
4666 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4667 case of SLP. */
4668 if (slp_node)
4669 ncopies = 1;
4670 else
4671 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4673 gcc_assert (ncopies >= 1);
4675 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4677 if (dump_enabled_p ())
4678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4679 "use not simple.\n");
4680 return false;
4683 /* We can handle NOP_EXPR conversions that do not change the number
4684 of elements or the vector size. */
4685 if ((CONVERT_EXPR_CODE_P (code)
4686 || code == VIEW_CONVERT_EXPR)
4687 && (!vectype_in
4688 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4689 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4690 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4691 return false;
4693 /* We do not handle bit-precision changes. */
4694 if ((CONVERT_EXPR_CODE_P (code)
4695 || code == VIEW_CONVERT_EXPR)
4696 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4697 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4698 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4699 || ((TYPE_PRECISION (TREE_TYPE (op))
4700 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4701 /* But a conversion that does not change the bit-pattern is ok. */
4702 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4703 > TYPE_PRECISION (TREE_TYPE (op)))
4704 && TYPE_UNSIGNED (TREE_TYPE (op)))
4705 /* Conversion between boolean types of different sizes is
4706 a simple assignment in case their vectypes are same
4707 boolean vectors. */
4708 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4709 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4711 if (dump_enabled_p ())
4712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4713 "type conversion to/from bit-precision "
4714 "unsupported.\n");
4715 return false;
4718 if (!vec_stmt) /* transformation not required. */
4720 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4721 if (dump_enabled_p ())
4722 dump_printf_loc (MSG_NOTE, vect_location,
4723 "=== vectorizable_assignment ===\n");
4724 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
4725 return true;
4728 /* Transform. */
4729 if (dump_enabled_p ())
4730 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4732 /* Handle def. */
4733 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4735 /* Handle use. */
4736 for (j = 0; j < ncopies; j++)
4738 /* Handle uses. */
4739 if (j == 0)
4740 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
4741 else
4742 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4744 /* Arguments are ready. create the new vector stmt. */
4745 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4747 if (CONVERT_EXPR_CODE_P (code)
4748 || code == VIEW_CONVERT_EXPR)
4749 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4750 new_stmt = gimple_build_assign (vec_dest, vop);
4751 new_temp = make_ssa_name (vec_dest, new_stmt);
4752 gimple_assign_set_lhs (new_stmt, new_temp);
4753 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4754 if (slp_node)
4755 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4758 if (slp_node)
4759 continue;
4761 if (j == 0)
4762 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4763 else
4764 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4766 prev_stmt_info = vinfo_for_stmt (new_stmt);
4769 vec_oprnds.release ();
4770 return true;
4774 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4775 either as shift by a scalar or by a vector. */
4777 bool
4778 vect_supportable_shift (enum tree_code code, tree scalar_type)
4781 machine_mode vec_mode;
4782 optab optab;
4783 int icode;
4784 tree vectype;
4786 vectype = get_vectype_for_scalar_type (scalar_type);
4787 if (!vectype)
4788 return false;
4790 optab = optab_for_tree_code (code, vectype, optab_scalar);
4791 if (!optab
4792 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4794 optab = optab_for_tree_code (code, vectype, optab_vector);
4795 if (!optab
4796 || (optab_handler (optab, TYPE_MODE (vectype))
4797 == CODE_FOR_nothing))
4798 return false;
4801 vec_mode = TYPE_MODE (vectype);
4802 icode = (int) optab_handler (optab, vec_mode);
4803 if (icode == CODE_FOR_nothing)
4804 return false;
4806 return true;
4810 /* Function vectorizable_shift.
4812 Check if STMT performs a shift operation that can be vectorized.
4813 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4814 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4815 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4817 static bool
4818 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4819 gimple **vec_stmt, slp_tree slp_node)
4821 tree vec_dest;
4822 tree scalar_dest;
4823 tree op0, op1 = NULL;
4824 tree vec_oprnd1 = NULL_TREE;
4825 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4826 tree vectype;
4827 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4828 enum tree_code code;
4829 machine_mode vec_mode;
4830 tree new_temp;
4831 optab optab;
4832 int icode;
4833 machine_mode optab_op2_mode;
4834 gimple *def_stmt;
4835 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4836 int ndts = 2;
4837 gimple *new_stmt = NULL;
4838 stmt_vec_info prev_stmt_info;
4839 int nunits_in;
4840 int nunits_out;
4841 tree vectype_out;
4842 tree op1_vectype;
4843 int ncopies;
4844 int j, i;
4845 vec<tree> vec_oprnds0 = vNULL;
4846 vec<tree> vec_oprnds1 = vNULL;
4847 tree vop0, vop1;
4848 unsigned int k;
4849 bool scalar_shift_arg = true;
4850 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4851 vec_info *vinfo = stmt_info->vinfo;
4852 int vf;
4854 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4855 return false;
4857 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4858 && ! vec_stmt)
4859 return false;
4861 /* Is STMT a vectorizable binary/unary operation? */
4862 if (!is_gimple_assign (stmt))
4863 return false;
4865 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4866 return false;
4868 code = gimple_assign_rhs_code (stmt);
4870 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4871 || code == RROTATE_EXPR))
4872 return false;
4874 scalar_dest = gimple_assign_lhs (stmt);
4875 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4876 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4877 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4879 if (dump_enabled_p ())
4880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4881 "bit-precision shifts not supported.\n");
4882 return false;
4885 op0 = gimple_assign_rhs1 (stmt);
4886 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4888 if (dump_enabled_p ())
4889 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4890 "use not simple.\n");
4891 return false;
4893 /* If op0 is an external or constant def use a vector type with
4894 the same size as the output vector type. */
4895 if (!vectype)
4896 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4897 if (vec_stmt)
4898 gcc_assert (vectype);
4899 if (!vectype)
4901 if (dump_enabled_p ())
4902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4903 "no vectype for scalar type\n");
4904 return false;
4907 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4908 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4909 if (nunits_out != nunits_in)
4910 return false;
4912 op1 = gimple_assign_rhs2 (stmt);
4913 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4915 if (dump_enabled_p ())
4916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4917 "use not simple.\n");
4918 return false;
4921 if (loop_vinfo)
4922 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4923 else
4924 vf = 1;
4926 /* Multiple types in SLP are handled by creating the appropriate number of
4927 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4928 case of SLP. */
4929 if (slp_node)
4930 ncopies = 1;
4931 else
4932 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4934 gcc_assert (ncopies >= 1);
4936 /* Determine whether the shift amount is a vector, or scalar. If the
4937 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4939 if ((dt[1] == vect_internal_def
4940 || dt[1] == vect_induction_def)
4941 && !slp_node)
4942 scalar_shift_arg = false;
4943 else if (dt[1] == vect_constant_def
4944 || dt[1] == vect_external_def
4945 || dt[1] == vect_internal_def)
4947 /* In SLP, need to check whether the shift count is the same,
4948 in loops if it is a constant or invariant, it is always
4949 a scalar shift. */
4950 if (slp_node)
4952 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4953 gimple *slpstmt;
4955 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4956 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4957 scalar_shift_arg = false;
4960 /* If the shift amount is computed by a pattern stmt we cannot
4961 use the scalar amount directly thus give up and use a vector
4962 shift. */
4963 if (dt[1] == vect_internal_def)
4965 gimple *def = SSA_NAME_DEF_STMT (op1);
4966 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4967 scalar_shift_arg = false;
4970 else
4972 if (dump_enabled_p ())
4973 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4974 "operand mode requires invariant argument.\n");
4975 return false;
4978 /* Vector shifted by vector. */
4979 if (!scalar_shift_arg)
4981 optab = optab_for_tree_code (code, vectype, optab_vector);
4982 if (dump_enabled_p ())
4983 dump_printf_loc (MSG_NOTE, vect_location,
4984 "vector/vector shift/rotate found.\n");
4986 if (!op1_vectype)
4987 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4988 if (op1_vectype == NULL_TREE
4989 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4991 if (dump_enabled_p ())
4992 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4993 "unusable type for last operand in"
4994 " vector/vector shift/rotate.\n");
4995 return false;
4998 /* See if the machine has a vector shifted by scalar insn and if not
4999 then see if it has a vector shifted by vector insn. */
5000 else
5002 optab = optab_for_tree_code (code, vectype, optab_scalar);
5003 if (optab
5004 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
5006 if (dump_enabled_p ())
5007 dump_printf_loc (MSG_NOTE, vect_location,
5008 "vector/scalar shift/rotate found.\n");
5010 else
5012 optab = optab_for_tree_code (code, vectype, optab_vector);
5013 if (optab
5014 && (optab_handler (optab, TYPE_MODE (vectype))
5015 != CODE_FOR_nothing))
5017 scalar_shift_arg = false;
5019 if (dump_enabled_p ())
5020 dump_printf_loc (MSG_NOTE, vect_location,
5021 "vector/vector shift/rotate found.\n");
5023 /* Unlike the other binary operators, shifts/rotates have
5024 the rhs being int, instead of the same type as the lhs,
5025 so make sure the scalar is the right type if we are
5026 dealing with vectors of long long/long/short/char. */
5027 if (dt[1] == vect_constant_def)
5028 op1 = fold_convert (TREE_TYPE (vectype), op1);
5029 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
5030 TREE_TYPE (op1)))
5032 if (slp_node
5033 && TYPE_MODE (TREE_TYPE (vectype))
5034 != TYPE_MODE (TREE_TYPE (op1)))
5036 if (dump_enabled_p ())
5037 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5038 "unusable type for last operand in"
5039 " vector/vector shift/rotate.\n");
5040 return false;
5042 if (vec_stmt && !slp_node)
5044 op1 = fold_convert (TREE_TYPE (vectype), op1);
5045 op1 = vect_init_vector (stmt, op1,
5046 TREE_TYPE (vectype), NULL);
5053 /* Supportable by target? */
5054 if (!optab)
5056 if (dump_enabled_p ())
5057 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5058 "no optab.\n");
5059 return false;
5061 vec_mode = TYPE_MODE (vectype);
5062 icode = (int) optab_handler (optab, vec_mode);
5063 if (icode == CODE_FOR_nothing)
5065 if (dump_enabled_p ())
5066 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5067 "op not supported by target.\n");
5068 /* Check only during analysis. */
5069 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5070 || (vf < vect_min_worthwhile_factor (code)
5071 && !vec_stmt))
5072 return false;
5073 if (dump_enabled_p ())
5074 dump_printf_loc (MSG_NOTE, vect_location,
5075 "proceeding using word mode.\n");
5078 /* Worthwhile without SIMD support? Check only during analysis. */
5079 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
5080 && vf < vect_min_worthwhile_factor (code)
5081 && !vec_stmt)
5083 if (dump_enabled_p ())
5084 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5085 "not worthwhile without SIMD support.\n");
5086 return false;
5089 if (!vec_stmt) /* transformation not required. */
5091 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
5092 if (dump_enabled_p ())
5093 dump_printf_loc (MSG_NOTE, vect_location,
5094 "=== vectorizable_shift ===\n");
5095 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5096 return true;
5099 /* Transform. */
5101 if (dump_enabled_p ())
5102 dump_printf_loc (MSG_NOTE, vect_location,
5103 "transform binary/unary operation.\n");
5105 /* Handle def. */
5106 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5108 prev_stmt_info = NULL;
5109 for (j = 0; j < ncopies; j++)
5111 /* Handle uses. */
5112 if (j == 0)
5114 if (scalar_shift_arg)
5116 /* Vector shl and shr insn patterns can be defined with scalar
5117 operand 2 (shift operand). In this case, use constant or loop
5118 invariant op1 directly, without extending it to vector mode
5119 first. */
5120 optab_op2_mode = insn_data[icode].operand[2].mode;
5121 if (!VECTOR_MODE_P (optab_op2_mode))
5123 if (dump_enabled_p ())
5124 dump_printf_loc (MSG_NOTE, vect_location,
5125 "operand 1 using scalar mode.\n");
5126 vec_oprnd1 = op1;
5127 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
5128 vec_oprnds1.quick_push (vec_oprnd1);
5129 if (slp_node)
5131 /* Store vec_oprnd1 for every vector stmt to be created
5132 for SLP_NODE. We check during the analysis that all
5133 the shift arguments are the same.
5134 TODO: Allow different constants for different vector
5135 stmts generated for an SLP instance. */
5136 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
5137 vec_oprnds1.quick_push (vec_oprnd1);
5142 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5143 (a special case for certain kind of vector shifts); otherwise,
5144 operand 1 should be of a vector type (the usual case). */
5145 if (vec_oprnd1)
5146 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5147 slp_node);
5148 else
5149 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5150 slp_node);
5152 else
5153 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5155 /* Arguments are ready. Create the new vector stmt. */
5156 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5158 vop1 = vec_oprnds1[i];
5159 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5160 new_temp = make_ssa_name (vec_dest, new_stmt);
5161 gimple_assign_set_lhs (new_stmt, new_temp);
5162 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5163 if (slp_node)
5164 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5167 if (slp_node)
5168 continue;
5170 if (j == 0)
5171 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5172 else
5173 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5174 prev_stmt_info = vinfo_for_stmt (new_stmt);
5177 vec_oprnds0.release ();
5178 vec_oprnds1.release ();
5180 return true;
5184 /* Function vectorizable_operation.
5186 Check if STMT performs a binary, unary or ternary operation that can
5187 be vectorized.
5188 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5189 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5190 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5192 static bool
5193 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5194 gimple **vec_stmt, slp_tree slp_node)
5196 tree vec_dest;
5197 tree scalar_dest;
5198 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5199 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5200 tree vectype;
5201 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5202 enum tree_code code;
5203 machine_mode vec_mode;
5204 tree new_temp;
5205 int op_type;
5206 optab optab;
5207 bool target_support_p;
5208 gimple *def_stmt;
5209 enum vect_def_type dt[3]
5210 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5211 int ndts = 3;
5212 gimple *new_stmt = NULL;
5213 stmt_vec_info prev_stmt_info;
5214 int nunits_in;
5215 int nunits_out;
5216 tree vectype_out;
5217 int ncopies;
5218 int j, i;
5219 vec<tree> vec_oprnds0 = vNULL;
5220 vec<tree> vec_oprnds1 = vNULL;
5221 vec<tree> vec_oprnds2 = vNULL;
5222 tree vop0, vop1, vop2;
5223 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5224 vec_info *vinfo = stmt_info->vinfo;
5225 int vf;
5227 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5228 return false;
5230 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5231 && ! vec_stmt)
5232 return false;
5234 /* Is STMT a vectorizable binary/unary operation? */
5235 if (!is_gimple_assign (stmt))
5236 return false;
5238 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5239 return false;
5241 code = gimple_assign_rhs_code (stmt);
5243 /* For pointer addition, we should use the normal plus for
5244 the vector addition. */
5245 if (code == POINTER_PLUS_EXPR)
5246 code = PLUS_EXPR;
5248 /* Support only unary or binary operations. */
5249 op_type = TREE_CODE_LENGTH (code);
5250 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5252 if (dump_enabled_p ())
5253 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5254 "num. args = %d (not unary/binary/ternary op).\n",
5255 op_type);
5256 return false;
5259 scalar_dest = gimple_assign_lhs (stmt);
5260 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5262 /* Most operations cannot handle bit-precision types without extra
5263 truncations. */
5264 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5265 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5266 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
5267 /* Exception are bitwise binary operations. */
5268 && code != BIT_IOR_EXPR
5269 && code != BIT_XOR_EXPR
5270 && code != BIT_AND_EXPR)
5272 if (dump_enabled_p ())
5273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5274 "bit-precision arithmetic not supported.\n");
5275 return false;
5278 op0 = gimple_assign_rhs1 (stmt);
5279 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5281 if (dump_enabled_p ())
5282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5283 "use not simple.\n");
5284 return false;
5286 /* If op0 is an external or constant def use a vector type with
5287 the same size as the output vector type. */
5288 if (!vectype)
5290 /* For boolean type we cannot determine vectype by
5291 invariant value (don't know whether it is a vector
5292 of booleans or vector of integers). We use output
5293 vectype because operations on boolean don't change
5294 type. */
5295 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0)))
5297 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest)))
5299 if (dump_enabled_p ())
5300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5301 "not supported operation on bool value.\n");
5302 return false;
5304 vectype = vectype_out;
5306 else
5307 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5309 if (vec_stmt)
5310 gcc_assert (vectype);
5311 if (!vectype)
5313 if (dump_enabled_p ())
5315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5316 "no vectype for scalar type ");
5317 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5318 TREE_TYPE (op0));
5319 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5322 return false;
5325 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5326 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5327 if (nunits_out != nunits_in)
5328 return false;
5330 if (op_type == binary_op || op_type == ternary_op)
5332 op1 = gimple_assign_rhs2 (stmt);
5333 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5335 if (dump_enabled_p ())
5336 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5337 "use not simple.\n");
5338 return false;
5341 if (op_type == ternary_op)
5343 op2 = gimple_assign_rhs3 (stmt);
5344 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5346 if (dump_enabled_p ())
5347 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5348 "use not simple.\n");
5349 return false;
5353 if (loop_vinfo)
5354 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5355 else
5356 vf = 1;
5358 /* Multiple types in SLP are handled by creating the appropriate number of
5359 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5360 case of SLP. */
5361 if (slp_node)
5362 ncopies = 1;
5363 else
5364 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
5366 gcc_assert (ncopies >= 1);
5368 /* Shifts are handled in vectorizable_shift (). */
5369 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5370 || code == RROTATE_EXPR)
5371 return false;
5373 /* Supportable by target? */
5375 vec_mode = TYPE_MODE (vectype);
5376 if (code == MULT_HIGHPART_EXPR)
5377 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5378 else
5380 optab = optab_for_tree_code (code, vectype, optab_default);
5381 if (!optab)
5383 if (dump_enabled_p ())
5384 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5385 "no optab.\n");
5386 return false;
5388 target_support_p = (optab_handler (optab, vec_mode)
5389 != CODE_FOR_nothing);
5392 if (!target_support_p)
5394 if (dump_enabled_p ())
5395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5396 "op not supported by target.\n");
5397 /* Check only during analysis. */
5398 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5399 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
5400 return false;
5401 if (dump_enabled_p ())
5402 dump_printf_loc (MSG_NOTE, vect_location,
5403 "proceeding using word mode.\n");
5406 /* Worthwhile without SIMD support? Check only during analysis. */
5407 if (!VECTOR_MODE_P (vec_mode)
5408 && !vec_stmt
5409 && vf < vect_min_worthwhile_factor (code))
5411 if (dump_enabled_p ())
5412 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5413 "not worthwhile without SIMD support.\n");
5414 return false;
5417 if (!vec_stmt) /* transformation not required. */
5419 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5420 if (dump_enabled_p ())
5421 dump_printf_loc (MSG_NOTE, vect_location,
5422 "=== vectorizable_operation ===\n");
5423 vect_model_simple_cost (stmt_info, ncopies, dt, ndts, NULL, NULL);
5424 return true;
5427 /* Transform. */
5429 if (dump_enabled_p ())
5430 dump_printf_loc (MSG_NOTE, vect_location,
5431 "transform binary/unary operation.\n");
5433 /* Handle def. */
5434 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5436 /* In case the vectorization factor (VF) is bigger than the number
5437 of elements that we can fit in a vectype (nunits), we have to generate
5438 more than one vector stmt - i.e - we need to "unroll" the
5439 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5440 from one copy of the vector stmt to the next, in the field
5441 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5442 stages to find the correct vector defs to be used when vectorizing
5443 stmts that use the defs of the current stmt. The example below
5444 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5445 we need to create 4 vectorized stmts):
5447 before vectorization:
5448 RELATED_STMT VEC_STMT
5449 S1: x = memref - -
5450 S2: z = x + 1 - -
5452 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5453 there):
5454 RELATED_STMT VEC_STMT
5455 VS1_0: vx0 = memref0 VS1_1 -
5456 VS1_1: vx1 = memref1 VS1_2 -
5457 VS1_2: vx2 = memref2 VS1_3 -
5458 VS1_3: vx3 = memref3 - -
5459 S1: x = load - VS1_0
5460 S2: z = x + 1 - -
5462 step2: vectorize stmt S2 (done here):
5463 To vectorize stmt S2 we first need to find the relevant vector
5464 def for the first operand 'x'. This is, as usual, obtained from
5465 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5466 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5467 relevant vector def 'vx0'. Having found 'vx0' we can generate
5468 the vector stmt VS2_0, and as usual, record it in the
5469 STMT_VINFO_VEC_STMT of stmt S2.
5470 When creating the second copy (VS2_1), we obtain the relevant vector
5471 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5472 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5473 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5474 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5475 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5476 chain of stmts and pointers:
5477 RELATED_STMT VEC_STMT
5478 VS1_0: vx0 = memref0 VS1_1 -
5479 VS1_1: vx1 = memref1 VS1_2 -
5480 VS1_2: vx2 = memref2 VS1_3 -
5481 VS1_3: vx3 = memref3 - -
5482 S1: x = load - VS1_0
5483 VS2_0: vz0 = vx0 + v1 VS2_1 -
5484 VS2_1: vz1 = vx1 + v1 VS2_2 -
5485 VS2_2: vz2 = vx2 + v1 VS2_3 -
5486 VS2_3: vz3 = vx3 + v1 - -
5487 S2: z = x + 1 - VS2_0 */
5489 prev_stmt_info = NULL;
5490 for (j = 0; j < ncopies; j++)
5492 /* Handle uses. */
5493 if (j == 0)
5495 if (op_type == binary_op || op_type == ternary_op)
5496 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5497 slp_node);
5498 else
5499 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5500 slp_node);
5501 if (op_type == ternary_op)
5502 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5503 slp_node);
5505 else
5507 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5508 if (op_type == ternary_op)
5510 tree vec_oprnd = vec_oprnds2.pop ();
5511 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5512 vec_oprnd));
5516 /* Arguments are ready. Create the new vector stmt. */
5517 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5519 vop1 = ((op_type == binary_op || op_type == ternary_op)
5520 ? vec_oprnds1[i] : NULL_TREE);
5521 vop2 = ((op_type == ternary_op)
5522 ? vec_oprnds2[i] : NULL_TREE);
5523 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5524 new_temp = make_ssa_name (vec_dest, new_stmt);
5525 gimple_assign_set_lhs (new_stmt, new_temp);
5526 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5527 if (slp_node)
5528 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5531 if (slp_node)
5532 continue;
5534 if (j == 0)
5535 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5536 else
5537 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5538 prev_stmt_info = vinfo_for_stmt (new_stmt);
5541 vec_oprnds0.release ();
5542 vec_oprnds1.release ();
5543 vec_oprnds2.release ();
5545 return true;
5548 /* A helper function to ensure data reference DR's base alignment
5549 for STMT_INFO. */
5551 static void
5552 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5554 if (!dr->aux)
5555 return;
5557 if (DR_VECT_AUX (dr)->base_misaligned)
5559 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5560 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5562 if (decl_in_symtab_p (base_decl))
5563 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5564 else
5566 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
5567 DECL_USER_ALIGN (base_decl) = 1;
5569 DR_VECT_AUX (dr)->base_misaligned = false;
5574 /* Function get_group_alias_ptr_type.
5576 Return the alias type for the group starting at FIRST_STMT. */
5578 static tree
5579 get_group_alias_ptr_type (gimple *first_stmt)
5581 struct data_reference *first_dr, *next_dr;
5582 gimple *next_stmt;
5584 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5585 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5586 while (next_stmt)
5588 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5589 if (get_alias_set (DR_REF (first_dr))
5590 != get_alias_set (DR_REF (next_dr)))
5592 if (dump_enabled_p ())
5593 dump_printf_loc (MSG_NOTE, vect_location,
5594 "conflicting alias set types.\n");
5595 return ptr_type_node;
5597 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5599 return reference_alias_ptr_type (DR_REF (first_dr));
5603 /* Function vectorizable_store.
5605 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5606 can be vectorized.
5607 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5608 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5609 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5611 static bool
5612 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5613 slp_tree slp_node)
5615 tree scalar_dest;
5616 tree data_ref;
5617 tree op;
5618 tree vec_oprnd = NULL_TREE;
5619 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5620 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5621 tree elem_type;
5622 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5623 struct loop *loop = NULL;
5624 machine_mode vec_mode;
5625 tree dummy;
5626 enum dr_alignment_support alignment_support_scheme;
5627 gimple *def_stmt;
5628 enum vect_def_type dt;
5629 stmt_vec_info prev_stmt_info = NULL;
5630 tree dataref_ptr = NULL_TREE;
5631 tree dataref_offset = NULL_TREE;
5632 gimple *ptr_incr = NULL;
5633 int ncopies;
5634 int j;
5635 gimple *next_stmt, *first_stmt;
5636 bool grouped_store;
5637 unsigned int group_size, i;
5638 vec<tree> oprnds = vNULL;
5639 vec<tree> result_chain = vNULL;
5640 bool inv_p;
5641 tree offset = NULL_TREE;
5642 vec<tree> vec_oprnds = vNULL;
5643 bool slp = (slp_node != NULL);
5644 unsigned int vec_num;
5645 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5646 vec_info *vinfo = stmt_info->vinfo;
5647 tree aggr_type;
5648 gather_scatter_info gs_info;
5649 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5650 gimple *new_stmt;
5651 int vf;
5652 vec_load_store_type vls_type;
5653 tree ref_type;
5655 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5656 return false;
5658 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5659 && ! vec_stmt)
5660 return false;
5662 /* Is vectorizable store? */
5664 if (!is_gimple_assign (stmt))
5665 return false;
5667 scalar_dest = gimple_assign_lhs (stmt);
5668 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5669 && is_pattern_stmt_p (stmt_info))
5670 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5671 if (TREE_CODE (scalar_dest) != ARRAY_REF
5672 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5673 && TREE_CODE (scalar_dest) != INDIRECT_REF
5674 && TREE_CODE (scalar_dest) != COMPONENT_REF
5675 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5676 && TREE_CODE (scalar_dest) != REALPART_EXPR
5677 && TREE_CODE (scalar_dest) != MEM_REF)
5678 return false;
5680 /* Cannot have hybrid store SLP -- that would mean storing to the
5681 same location twice. */
5682 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5684 gcc_assert (gimple_assign_single_p (stmt));
5686 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5687 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5689 if (loop_vinfo)
5691 loop = LOOP_VINFO_LOOP (loop_vinfo);
5692 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5694 else
5695 vf = 1;
5697 /* Multiple types in SLP are handled by creating the appropriate number of
5698 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5699 case of SLP. */
5700 if (slp)
5701 ncopies = 1;
5702 else
5703 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5705 gcc_assert (ncopies >= 1);
5707 /* FORNOW. This restriction should be relaxed. */
5708 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5710 if (dump_enabled_p ())
5711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5712 "multiple types in nested loop.\n");
5713 return false;
5716 op = gimple_assign_rhs1 (stmt);
5718 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5720 if (dump_enabled_p ())
5721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5722 "use not simple.\n");
5723 return false;
5726 if (dt == vect_constant_def || dt == vect_external_def)
5727 vls_type = VLS_STORE_INVARIANT;
5728 else
5729 vls_type = VLS_STORE;
5731 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5732 return false;
5734 elem_type = TREE_TYPE (vectype);
5735 vec_mode = TYPE_MODE (vectype);
5737 /* FORNOW. In some cases can vectorize even if data-type not supported
5738 (e.g. - array initialization with 0). */
5739 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5740 return false;
5742 if (!STMT_VINFO_DATA_REF (stmt_info))
5743 return false;
5745 vect_memory_access_type memory_access_type;
5746 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5747 &memory_access_type, &gs_info))
5748 return false;
5750 if (!vec_stmt) /* transformation not required. */
5752 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5753 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5754 /* The SLP costs are calculated during SLP analysis. */
5755 if (!PURE_SLP_STMT (stmt_info))
5756 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5757 NULL, NULL, NULL);
5758 return true;
5760 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5762 /* Transform. */
5764 ensure_base_align (stmt_info, dr);
5766 if (memory_access_type == VMAT_GATHER_SCATTER)
5768 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5769 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5770 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5771 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5772 edge pe = loop_preheader_edge (loop);
5773 gimple_seq seq;
5774 basic_block new_bb;
5775 enum { NARROW, NONE, WIDEN } modifier;
5776 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5778 if (nunits == (unsigned int) scatter_off_nunits)
5779 modifier = NONE;
5780 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5782 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5783 modifier = WIDEN;
5785 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5786 sel[i] = i | nunits;
5788 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
5789 gcc_assert (perm_mask != NULL_TREE);
5791 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5793 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5794 modifier = NARROW;
5796 for (i = 0; i < (unsigned int) nunits; ++i)
5797 sel[i] = i | scatter_off_nunits;
5799 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5800 gcc_assert (perm_mask != NULL_TREE);
5801 ncopies *= 2;
5803 else
5804 gcc_unreachable ();
5806 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5807 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5808 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5809 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5810 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5811 scaletype = TREE_VALUE (arglist);
5813 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5814 && TREE_CODE (rettype) == VOID_TYPE);
5816 ptr = fold_convert (ptrtype, gs_info.base);
5817 if (!is_gimple_min_invariant (ptr))
5819 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5820 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5821 gcc_assert (!new_bb);
5824 /* Currently we support only unconditional scatter stores,
5825 so mask should be all ones. */
5826 mask = build_int_cst (masktype, -1);
5827 mask = vect_init_vector (stmt, mask, masktype, NULL);
5829 scale = build_int_cst (scaletype, gs_info.scale);
5831 prev_stmt_info = NULL;
5832 for (j = 0; j < ncopies; ++j)
5834 if (j == 0)
5836 src = vec_oprnd1
5837 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5838 op = vec_oprnd0
5839 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5841 else if (modifier != NONE && (j & 1))
5843 if (modifier == WIDEN)
5845 src = vec_oprnd1
5846 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5847 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5848 stmt, gsi);
5850 else if (modifier == NARROW)
5852 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5853 stmt, gsi);
5854 op = vec_oprnd0
5855 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5856 vec_oprnd0);
5858 else
5859 gcc_unreachable ();
5861 else
5863 src = vec_oprnd1
5864 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5865 op = vec_oprnd0
5866 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5867 vec_oprnd0);
5870 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5872 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5873 == TYPE_VECTOR_SUBPARTS (srctype));
5874 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5875 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5876 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5877 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5878 src = var;
5881 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5883 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5884 == TYPE_VECTOR_SUBPARTS (idxtype));
5885 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5886 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5887 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5888 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5889 op = var;
5892 new_stmt
5893 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5895 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5897 if (prev_stmt_info == NULL)
5898 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5899 else
5900 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5901 prev_stmt_info = vinfo_for_stmt (new_stmt);
5903 return true;
5906 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5907 if (grouped_store)
5909 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5910 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5911 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5913 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5915 /* FORNOW */
5916 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5918 /* We vectorize all the stmts of the interleaving group when we
5919 reach the last stmt in the group. */
5920 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5921 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5922 && !slp)
5924 *vec_stmt = NULL;
5925 return true;
5928 if (slp)
5930 grouped_store = false;
5931 /* VEC_NUM is the number of vect stmts to be created for this
5932 group. */
5933 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5934 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5935 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5936 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5937 op = gimple_assign_rhs1 (first_stmt);
5939 else
5940 /* VEC_NUM is the number of vect stmts to be created for this
5941 group. */
5942 vec_num = group_size;
5944 ref_type = get_group_alias_ptr_type (first_stmt);
5946 else
5948 first_stmt = stmt;
5949 first_dr = dr;
5950 group_size = vec_num = 1;
5951 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5954 if (dump_enabled_p ())
5955 dump_printf_loc (MSG_NOTE, vect_location,
5956 "transform store. ncopies = %d\n", ncopies);
5958 if (memory_access_type == VMAT_ELEMENTWISE
5959 || memory_access_type == VMAT_STRIDED_SLP)
5961 gimple_stmt_iterator incr_gsi;
5962 bool insert_after;
5963 gimple *incr;
5964 tree offvar;
5965 tree ivstep;
5966 tree running_off;
5967 gimple_seq stmts = NULL;
5968 tree stride_base, stride_step, alias_off;
5969 tree vec_oprnd;
5970 unsigned int g;
5972 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5974 stride_base
5975 = fold_build_pointer_plus
5976 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5977 size_binop (PLUS_EXPR,
5978 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5979 convert_to_ptrofftype (DR_INIT (first_dr))));
5980 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5982 /* For a store with loop-invariant (but other than power-of-2)
5983 stride (i.e. not a grouped access) like so:
5985 for (i = 0; i < n; i += stride)
5986 array[i] = ...;
5988 we generate a new induction variable and new stores from
5989 the components of the (vectorized) rhs:
5991 for (j = 0; ; j += VF*stride)
5992 vectemp = ...;
5993 tmp1 = vectemp[0];
5994 array[j] = tmp1;
5995 tmp2 = vectemp[1];
5996 array[j + stride] = tmp2;
6000 unsigned nstores = nunits;
6001 unsigned lnel = 1;
6002 tree ltype = elem_type;
6003 if (slp)
6005 if (group_size < nunits
6006 && nunits % group_size == 0)
6008 nstores = nunits / group_size;
6009 lnel = group_size;
6010 ltype = build_vector_type (elem_type, group_size);
6012 else if (group_size >= nunits
6013 && group_size % nunits == 0)
6015 nstores = 1;
6016 lnel = nunits;
6017 ltype = vectype;
6019 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
6020 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6023 ivstep = stride_step;
6024 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6025 build_int_cst (TREE_TYPE (ivstep), vf));
6027 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6029 create_iv (stride_base, ivstep, NULL,
6030 loop, &incr_gsi, insert_after,
6031 &offvar, NULL);
6032 incr = gsi_stmt (incr_gsi);
6033 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6035 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6036 if (stmts)
6037 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6039 prev_stmt_info = NULL;
6040 alias_off = build_int_cst (ref_type, 0);
6041 next_stmt = first_stmt;
6042 for (g = 0; g < group_size; g++)
6044 running_off = offvar;
6045 if (g)
6047 tree size = TYPE_SIZE_UNIT (ltype);
6048 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
6049 size);
6050 tree newoff = copy_ssa_name (running_off, NULL);
6051 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6052 running_off, pos);
6053 vect_finish_stmt_generation (stmt, incr, gsi);
6054 running_off = newoff;
6056 unsigned int group_el = 0;
6057 unsigned HOST_WIDE_INT
6058 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6059 for (j = 0; j < ncopies; j++)
6061 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
6062 and first_stmt == stmt. */
6063 if (j == 0)
6065 if (slp)
6067 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
6068 slp_node);
6069 vec_oprnd = vec_oprnds[0];
6071 else
6073 gcc_assert (gimple_assign_single_p (next_stmt));
6074 op = gimple_assign_rhs1 (next_stmt);
6075 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6078 else
6080 if (slp)
6081 vec_oprnd = vec_oprnds[j];
6082 else
6084 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
6085 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
6089 for (i = 0; i < nstores; i++)
6091 tree newref, newoff;
6092 gimple *incr, *assign;
6093 tree size = TYPE_SIZE (ltype);
6094 /* Extract the i'th component. */
6095 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
6096 bitsize_int (i), size);
6097 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
6098 size, pos);
6100 elem = force_gimple_operand_gsi (gsi, elem, true,
6101 NULL_TREE, true,
6102 GSI_SAME_STMT);
6104 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6105 group_el * elsz);
6106 newref = build2 (MEM_REF, ltype,
6107 running_off, this_off);
6109 /* And store it to *running_off. */
6110 assign = gimple_build_assign (newref, elem);
6111 vect_finish_stmt_generation (stmt, assign, gsi);
6113 group_el += lnel;
6114 if (! slp
6115 || group_el == group_size)
6117 newoff = copy_ssa_name (running_off, NULL);
6118 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6119 running_off, stride_step);
6120 vect_finish_stmt_generation (stmt, incr, gsi);
6122 running_off = newoff;
6123 group_el = 0;
6125 if (g == group_size - 1
6126 && !slp)
6128 if (j == 0 && i == 0)
6129 STMT_VINFO_VEC_STMT (stmt_info)
6130 = *vec_stmt = assign;
6131 else
6132 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
6133 prev_stmt_info = vinfo_for_stmt (assign);
6137 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6138 if (slp)
6139 break;
6142 vec_oprnds.release ();
6143 return true;
6146 auto_vec<tree> dr_chain (group_size);
6147 oprnds.create (group_size);
6149 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6150 gcc_assert (alignment_support_scheme);
6151 /* Targets with store-lane instructions must not require explicit
6152 realignment. */
6153 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6154 || alignment_support_scheme == dr_aligned
6155 || alignment_support_scheme == dr_unaligned_supported);
6157 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6158 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6159 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6161 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6162 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6163 else
6164 aggr_type = vectype;
6166 /* In case the vectorization factor (VF) is bigger than the number
6167 of elements that we can fit in a vectype (nunits), we have to generate
6168 more than one vector stmt - i.e - we need to "unroll" the
6169 vector stmt by a factor VF/nunits. For more details see documentation in
6170 vect_get_vec_def_for_copy_stmt. */
6172 /* In case of interleaving (non-unit grouped access):
6174 S1: &base + 2 = x2
6175 S2: &base = x0
6176 S3: &base + 1 = x1
6177 S4: &base + 3 = x3
6179 We create vectorized stores starting from base address (the access of the
6180 first stmt in the chain (S2 in the above example), when the last store stmt
6181 of the chain (S4) is reached:
6183 VS1: &base = vx2
6184 VS2: &base + vec_size*1 = vx0
6185 VS3: &base + vec_size*2 = vx1
6186 VS4: &base + vec_size*3 = vx3
6188 Then permutation statements are generated:
6190 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6191 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6194 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6195 (the order of the data-refs in the output of vect_permute_store_chain
6196 corresponds to the order of scalar stmts in the interleaving chain - see
6197 the documentation of vect_permute_store_chain()).
6199 In case of both multiple types and interleaving, above vector stores and
6200 permutation stmts are created for every copy. The result vector stmts are
6201 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6202 STMT_VINFO_RELATED_STMT for the next copies.
6205 prev_stmt_info = NULL;
6206 for (j = 0; j < ncopies; j++)
6209 if (j == 0)
6211 if (slp)
6213 /* Get vectorized arguments for SLP_NODE. */
6214 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6215 NULL, slp_node);
6217 vec_oprnd = vec_oprnds[0];
6219 else
6221 /* For interleaved stores we collect vectorized defs for all the
6222 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6223 used as an input to vect_permute_store_chain(), and OPRNDS as
6224 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6226 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6227 OPRNDS are of size 1. */
6228 next_stmt = first_stmt;
6229 for (i = 0; i < group_size; i++)
6231 /* Since gaps are not supported for interleaved stores,
6232 GROUP_SIZE is the exact number of stmts in the chain.
6233 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6234 there is no interleaving, GROUP_SIZE is 1, and only one
6235 iteration of the loop will be executed. */
6236 gcc_assert (next_stmt
6237 && gimple_assign_single_p (next_stmt));
6238 op = gimple_assign_rhs1 (next_stmt);
6240 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6241 dr_chain.quick_push (vec_oprnd);
6242 oprnds.quick_push (vec_oprnd);
6243 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6247 /* We should have catched mismatched types earlier. */
6248 gcc_assert (useless_type_conversion_p (vectype,
6249 TREE_TYPE (vec_oprnd)));
6250 bool simd_lane_access_p
6251 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6252 if (simd_lane_access_p
6253 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6254 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6255 && integer_zerop (DR_OFFSET (first_dr))
6256 && integer_zerop (DR_INIT (first_dr))
6257 && alias_sets_conflict_p (get_alias_set (aggr_type),
6258 get_alias_set (TREE_TYPE (ref_type))))
6260 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6261 dataref_offset = build_int_cst (ref_type, 0);
6262 inv_p = false;
6264 else
6265 dataref_ptr
6266 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6267 simd_lane_access_p ? loop : NULL,
6268 offset, &dummy, gsi, &ptr_incr,
6269 simd_lane_access_p, &inv_p);
6270 gcc_assert (bb_vinfo || !inv_p);
6272 else
6274 /* For interleaved stores we created vectorized defs for all the
6275 defs stored in OPRNDS in the previous iteration (previous copy).
6276 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6277 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6278 next copy.
6279 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6280 OPRNDS are of size 1. */
6281 for (i = 0; i < group_size; i++)
6283 op = oprnds[i];
6284 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6285 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6286 dr_chain[i] = vec_oprnd;
6287 oprnds[i] = vec_oprnd;
6289 if (dataref_offset)
6290 dataref_offset
6291 = int_const_binop (PLUS_EXPR, dataref_offset,
6292 TYPE_SIZE_UNIT (aggr_type));
6293 else
6294 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6295 TYPE_SIZE_UNIT (aggr_type));
6298 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6300 tree vec_array;
6302 /* Combine all the vectors into an array. */
6303 vec_array = create_vector_array (vectype, vec_num);
6304 for (i = 0; i < vec_num; i++)
6306 vec_oprnd = dr_chain[i];
6307 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6310 /* Emit:
6311 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6312 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6313 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
6314 gimple_call_set_lhs (new_stmt, data_ref);
6315 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6317 else
6319 new_stmt = NULL;
6320 if (grouped_store)
6322 if (j == 0)
6323 result_chain.create (group_size);
6324 /* Permute. */
6325 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6326 &result_chain);
6329 next_stmt = first_stmt;
6330 for (i = 0; i < vec_num; i++)
6332 unsigned align, misalign;
6334 if (i > 0)
6335 /* Bump the vector pointer. */
6336 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6337 stmt, NULL_TREE);
6339 if (slp)
6340 vec_oprnd = vec_oprnds[i];
6341 else if (grouped_store)
6342 /* For grouped stores vectorized defs are interleaved in
6343 vect_permute_store_chain(). */
6344 vec_oprnd = result_chain[i];
6346 data_ref = fold_build2 (MEM_REF, vectype,
6347 dataref_ptr,
6348 dataref_offset
6349 ? dataref_offset
6350 : build_int_cst (ref_type, 0));
6351 align = TYPE_ALIGN_UNIT (vectype);
6352 if (aligned_access_p (first_dr))
6353 misalign = 0;
6354 else if (DR_MISALIGNMENT (first_dr) == -1)
6356 align = dr_alignment (vect_dr_behavior (first_dr));
6357 misalign = 0;
6358 TREE_TYPE (data_ref)
6359 = build_aligned_type (TREE_TYPE (data_ref),
6360 align * BITS_PER_UNIT);
6362 else
6364 TREE_TYPE (data_ref)
6365 = build_aligned_type (TREE_TYPE (data_ref),
6366 TYPE_ALIGN (elem_type));
6367 misalign = DR_MISALIGNMENT (first_dr);
6369 if (dataref_offset == NULL_TREE
6370 && TREE_CODE (dataref_ptr) == SSA_NAME)
6371 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6372 misalign);
6374 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6376 tree perm_mask = perm_mask_for_reverse (vectype);
6377 tree perm_dest
6378 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6379 vectype);
6380 tree new_temp = make_ssa_name (perm_dest);
6382 /* Generate the permute statement. */
6383 gimple *perm_stmt
6384 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6385 vec_oprnd, perm_mask);
6386 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6388 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6389 vec_oprnd = new_temp;
6392 /* Arguments are ready. Create the new vector stmt. */
6393 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6394 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6396 if (slp)
6397 continue;
6399 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6400 if (!next_stmt)
6401 break;
6404 if (!slp)
6406 if (j == 0)
6407 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6408 else
6409 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6410 prev_stmt_info = vinfo_for_stmt (new_stmt);
6414 oprnds.release ();
6415 result_chain.release ();
6416 vec_oprnds.release ();
6418 return true;
6421 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6422 VECTOR_CST mask. No checks are made that the target platform supports the
6423 mask, so callers may wish to test can_vec_perm_p separately, or use
6424 vect_gen_perm_mask_checked. */
6426 tree
6427 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6429 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6430 int i, nunits;
6432 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6434 mask_elt_type = lang_hooks.types.type_for_mode
6435 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6436 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6438 mask_elts = XALLOCAVEC (tree, nunits);
6439 for (i = nunits - 1; i >= 0; i--)
6440 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6441 mask_vec = build_vector (mask_type, mask_elts);
6443 return mask_vec;
6446 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6447 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6449 tree
6450 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6452 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6453 return vect_gen_perm_mask_any (vectype, sel);
6456 /* Given a vector variable X and Y, that was generated for the scalar
6457 STMT, generate instructions to permute the vector elements of X and Y
6458 using permutation mask MASK_VEC, insert them at *GSI and return the
6459 permuted vector variable. */
6461 static tree
6462 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6463 gimple_stmt_iterator *gsi)
6465 tree vectype = TREE_TYPE (x);
6466 tree perm_dest, data_ref;
6467 gimple *perm_stmt;
6469 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6470 data_ref = make_ssa_name (perm_dest);
6472 /* Generate the permute statement. */
6473 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6474 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6476 return data_ref;
6479 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6480 inserting them on the loops preheader edge. Returns true if we
6481 were successful in doing so (and thus STMT can be moved then),
6482 otherwise returns false. */
6484 static bool
6485 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6487 ssa_op_iter i;
6488 tree op;
6489 bool any = false;
6491 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6493 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6494 if (!gimple_nop_p (def_stmt)
6495 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6497 /* Make sure we don't need to recurse. While we could do
6498 so in simple cases when there are more complex use webs
6499 we don't have an easy way to preserve stmt order to fulfil
6500 dependencies within them. */
6501 tree op2;
6502 ssa_op_iter i2;
6503 if (gimple_code (def_stmt) == GIMPLE_PHI)
6504 return false;
6505 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6507 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6508 if (!gimple_nop_p (def_stmt2)
6509 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6510 return false;
6512 any = true;
6516 if (!any)
6517 return true;
6519 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6521 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6522 if (!gimple_nop_p (def_stmt)
6523 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6525 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6526 gsi_remove (&gsi, false);
6527 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6531 return true;
6534 /* vectorizable_load.
6536 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6537 can be vectorized.
6538 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6539 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6540 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6542 static bool
6543 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6544 slp_tree slp_node, slp_instance slp_node_instance)
6546 tree scalar_dest;
6547 tree vec_dest = NULL;
6548 tree data_ref = NULL;
6549 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6550 stmt_vec_info prev_stmt_info;
6551 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6552 struct loop *loop = NULL;
6553 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6554 bool nested_in_vect_loop = false;
6555 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6556 tree elem_type;
6557 tree new_temp;
6558 machine_mode mode;
6559 gimple *new_stmt = NULL;
6560 tree dummy;
6561 enum dr_alignment_support alignment_support_scheme;
6562 tree dataref_ptr = NULL_TREE;
6563 tree dataref_offset = NULL_TREE;
6564 gimple *ptr_incr = NULL;
6565 int ncopies;
6566 int i, j, group_size, group_gap_adj;
6567 tree msq = NULL_TREE, lsq;
6568 tree offset = NULL_TREE;
6569 tree byte_offset = NULL_TREE;
6570 tree realignment_token = NULL_TREE;
6571 gphi *phi = NULL;
6572 vec<tree> dr_chain = vNULL;
6573 bool grouped_load = false;
6574 gimple *first_stmt;
6575 gimple *first_stmt_for_drptr = NULL;
6576 bool inv_p;
6577 bool compute_in_loop = false;
6578 struct loop *at_loop;
6579 int vec_num;
6580 bool slp = (slp_node != NULL);
6581 bool slp_perm = false;
6582 enum tree_code code;
6583 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6584 int vf;
6585 tree aggr_type;
6586 gather_scatter_info gs_info;
6587 vec_info *vinfo = stmt_info->vinfo;
6588 tree ref_type;
6590 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6591 return false;
6593 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6594 && ! vec_stmt)
6595 return false;
6597 /* Is vectorizable load? */
6598 if (!is_gimple_assign (stmt))
6599 return false;
6601 scalar_dest = gimple_assign_lhs (stmt);
6602 if (TREE_CODE (scalar_dest) != SSA_NAME)
6603 return false;
6605 code = gimple_assign_rhs_code (stmt);
6606 if (code != ARRAY_REF
6607 && code != BIT_FIELD_REF
6608 && code != INDIRECT_REF
6609 && code != COMPONENT_REF
6610 && code != IMAGPART_EXPR
6611 && code != REALPART_EXPR
6612 && code != MEM_REF
6613 && TREE_CODE_CLASS (code) != tcc_declaration)
6614 return false;
6616 if (!STMT_VINFO_DATA_REF (stmt_info))
6617 return false;
6619 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6620 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6622 if (loop_vinfo)
6624 loop = LOOP_VINFO_LOOP (loop_vinfo);
6625 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6626 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6628 else
6629 vf = 1;
6631 /* Multiple types in SLP are handled by creating the appropriate number of
6632 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6633 case of SLP. */
6634 if (slp)
6635 ncopies = 1;
6636 else
6637 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6639 gcc_assert (ncopies >= 1);
6641 /* FORNOW. This restriction should be relaxed. */
6642 if (nested_in_vect_loop && ncopies > 1)
6644 if (dump_enabled_p ())
6645 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6646 "multiple types in nested loop.\n");
6647 return false;
6650 /* Invalidate assumptions made by dependence analysis when vectorization
6651 on the unrolled body effectively re-orders stmts. */
6652 if (ncopies > 1
6653 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6654 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6655 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6657 if (dump_enabled_p ())
6658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6659 "cannot perform implicit CSE when unrolling "
6660 "with negative dependence distance\n");
6661 return false;
6664 elem_type = TREE_TYPE (vectype);
6665 mode = TYPE_MODE (vectype);
6667 /* FORNOW. In some cases can vectorize even if data-type not supported
6668 (e.g. - data copies). */
6669 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6671 if (dump_enabled_p ())
6672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6673 "Aligned load, but unsupported type.\n");
6674 return false;
6677 /* Check if the load is a part of an interleaving chain. */
6678 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6680 grouped_load = true;
6681 /* FORNOW */
6682 gcc_assert (!nested_in_vect_loop);
6683 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6685 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6686 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6688 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6689 slp_perm = true;
6691 /* Invalidate assumptions made by dependence analysis when vectorization
6692 on the unrolled body effectively re-orders stmts. */
6693 if (!PURE_SLP_STMT (stmt_info)
6694 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6695 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6696 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6698 if (dump_enabled_p ())
6699 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6700 "cannot perform implicit CSE when performing "
6701 "group loads with negative dependence distance\n");
6702 return false;
6705 /* Similarly when the stmt is a load that is both part of a SLP
6706 instance and a loop vectorized stmt via the same-dr mechanism
6707 we have to give up. */
6708 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6709 && (STMT_SLP_TYPE (stmt_info)
6710 != STMT_SLP_TYPE (vinfo_for_stmt
6711 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6713 if (dump_enabled_p ())
6714 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6715 "conflicting SLP types for CSEd load\n");
6716 return false;
6720 vect_memory_access_type memory_access_type;
6721 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6722 &memory_access_type, &gs_info))
6723 return false;
6725 if (!vec_stmt) /* transformation not required. */
6727 if (!slp)
6728 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6729 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6730 /* The SLP costs are calculated during SLP analysis. */
6731 if (!PURE_SLP_STMT (stmt_info))
6732 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6733 NULL, NULL, NULL);
6734 return true;
6737 if (!slp)
6738 gcc_assert (memory_access_type
6739 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6741 if (dump_enabled_p ())
6742 dump_printf_loc (MSG_NOTE, vect_location,
6743 "transform load. ncopies = %d\n", ncopies);
6745 /* Transform. */
6747 ensure_base_align (stmt_info, dr);
6749 if (memory_access_type == VMAT_GATHER_SCATTER)
6751 tree vec_oprnd0 = NULL_TREE, op;
6752 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6753 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6754 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6755 edge pe = loop_preheader_edge (loop);
6756 gimple_seq seq;
6757 basic_block new_bb;
6758 enum { NARROW, NONE, WIDEN } modifier;
6759 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6761 if (nunits == gather_off_nunits)
6762 modifier = NONE;
6763 else if (nunits == gather_off_nunits / 2)
6765 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6766 modifier = WIDEN;
6768 for (i = 0; i < gather_off_nunits; ++i)
6769 sel[i] = i | nunits;
6771 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
6773 else if (nunits == gather_off_nunits * 2)
6775 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6776 modifier = NARROW;
6778 for (i = 0; i < nunits; ++i)
6779 sel[i] = i < gather_off_nunits
6780 ? i : i + nunits - gather_off_nunits;
6782 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6783 ncopies *= 2;
6785 else
6786 gcc_unreachable ();
6788 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6789 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6790 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6791 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6792 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6793 scaletype = TREE_VALUE (arglist);
6794 gcc_checking_assert (types_compatible_p (srctype, rettype));
6796 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6798 ptr = fold_convert (ptrtype, gs_info.base);
6799 if (!is_gimple_min_invariant (ptr))
6801 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6802 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6803 gcc_assert (!new_bb);
6806 /* Currently we support only unconditional gather loads,
6807 so mask should be all ones. */
6808 if (TREE_CODE (masktype) == INTEGER_TYPE)
6809 mask = build_int_cst (masktype, -1);
6810 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6812 mask = build_int_cst (TREE_TYPE (masktype), -1);
6813 mask = build_vector_from_val (masktype, mask);
6814 mask = vect_init_vector (stmt, mask, masktype, NULL);
6816 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6818 REAL_VALUE_TYPE r;
6819 long tmp[6];
6820 for (j = 0; j < 6; ++j)
6821 tmp[j] = -1;
6822 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6823 mask = build_real (TREE_TYPE (masktype), r);
6824 mask = build_vector_from_val (masktype, mask);
6825 mask = vect_init_vector (stmt, mask, masktype, NULL);
6827 else
6828 gcc_unreachable ();
6830 scale = build_int_cst (scaletype, gs_info.scale);
6832 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6833 merge = build_int_cst (TREE_TYPE (rettype), 0);
6834 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6836 REAL_VALUE_TYPE r;
6837 long tmp[6];
6838 for (j = 0; j < 6; ++j)
6839 tmp[j] = 0;
6840 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6841 merge = build_real (TREE_TYPE (rettype), r);
6843 else
6844 gcc_unreachable ();
6845 merge = build_vector_from_val (rettype, merge);
6846 merge = vect_init_vector (stmt, merge, rettype, NULL);
6848 prev_stmt_info = NULL;
6849 for (j = 0; j < ncopies; ++j)
6851 if (modifier == WIDEN && (j & 1))
6852 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6853 perm_mask, stmt, gsi);
6854 else if (j == 0)
6855 op = vec_oprnd0
6856 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6857 else
6858 op = vec_oprnd0
6859 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6861 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6863 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6864 == TYPE_VECTOR_SUBPARTS (idxtype));
6865 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6866 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6867 new_stmt
6868 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6869 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6870 op = var;
6873 new_stmt
6874 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6876 if (!useless_type_conversion_p (vectype, rettype))
6878 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6879 == TYPE_VECTOR_SUBPARTS (rettype));
6880 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6881 gimple_call_set_lhs (new_stmt, op);
6882 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6883 var = make_ssa_name (vec_dest);
6884 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6885 new_stmt
6886 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6888 else
6890 var = make_ssa_name (vec_dest, new_stmt);
6891 gimple_call_set_lhs (new_stmt, var);
6894 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6896 if (modifier == NARROW)
6898 if ((j & 1) == 0)
6900 prev_res = var;
6901 continue;
6903 var = permute_vec_elements (prev_res, var,
6904 perm_mask, stmt, gsi);
6905 new_stmt = SSA_NAME_DEF_STMT (var);
6908 if (prev_stmt_info == NULL)
6909 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6910 else
6911 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6912 prev_stmt_info = vinfo_for_stmt (new_stmt);
6914 return true;
6917 if (memory_access_type == VMAT_ELEMENTWISE
6918 || memory_access_type == VMAT_STRIDED_SLP)
6920 gimple_stmt_iterator incr_gsi;
6921 bool insert_after;
6922 gimple *incr;
6923 tree offvar;
6924 tree ivstep;
6925 tree running_off;
6926 vec<constructor_elt, va_gc> *v = NULL;
6927 gimple_seq stmts = NULL;
6928 tree stride_base, stride_step, alias_off;
6930 gcc_assert (!nested_in_vect_loop);
6932 if (slp && grouped_load)
6934 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6935 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6936 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6937 ref_type = get_group_alias_ptr_type (first_stmt);
6939 else
6941 first_stmt = stmt;
6942 first_dr = dr;
6943 group_size = 1;
6944 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6947 stride_base
6948 = fold_build_pointer_plus
6949 (DR_BASE_ADDRESS (first_dr),
6950 size_binop (PLUS_EXPR,
6951 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6952 convert_to_ptrofftype (DR_INIT (first_dr))));
6953 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6955 /* For a load with loop-invariant (but other than power-of-2)
6956 stride (i.e. not a grouped access) like so:
6958 for (i = 0; i < n; i += stride)
6959 ... = array[i];
6961 we generate a new induction variable and new accesses to
6962 form a new vector (or vectors, depending on ncopies):
6964 for (j = 0; ; j += VF*stride)
6965 tmp1 = array[j];
6966 tmp2 = array[j + stride];
6968 vectemp = {tmp1, tmp2, ...}
6971 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6972 build_int_cst (TREE_TYPE (stride_step), vf));
6974 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6976 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6977 loop, &incr_gsi, insert_after,
6978 &offvar, NULL);
6979 incr = gsi_stmt (incr_gsi);
6980 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6982 stride_step = force_gimple_operand (unshare_expr (stride_step),
6983 &stmts, true, NULL_TREE);
6984 if (stmts)
6985 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6987 prev_stmt_info = NULL;
6988 running_off = offvar;
6989 alias_off = build_int_cst (ref_type, 0);
6990 int nloads = nunits;
6991 int lnel = 1;
6992 tree ltype = TREE_TYPE (vectype);
6993 tree lvectype = vectype;
6994 auto_vec<tree> dr_chain;
6995 if (memory_access_type == VMAT_STRIDED_SLP)
6997 if (group_size < nunits)
6999 /* Avoid emitting a constructor of vector elements by performing
7000 the loads using an integer type of the same size,
7001 constructing a vector of those and then re-interpreting it
7002 as the original vector type. This works around the fact
7003 that the vec_init optab was only designed for scalar
7004 element modes and thus expansion goes through memory.
7005 This avoids a huge runtime penalty due to the general
7006 inability to perform store forwarding from smaller stores
7007 to a larger load. */
7008 unsigned lsize
7009 = group_size * TYPE_PRECISION (TREE_TYPE (vectype));
7010 machine_mode elmode = mode_for_size (lsize, MODE_INT, 0);
7011 machine_mode vmode = mode_for_vector (elmode,
7012 nunits / group_size);
7013 /* If we can't construct such a vector fall back to
7014 element loads of the original vector type. */
7015 if (VECTOR_MODE_P (vmode)
7016 && optab_handler (vec_init_optab, vmode) != CODE_FOR_nothing)
7018 nloads = nunits / group_size;
7019 lnel = group_size;
7020 ltype = build_nonstandard_integer_type (lsize, 1);
7021 lvectype = build_vector_type (ltype, nloads);
7024 else
7026 nloads = 1;
7027 lnel = nunits;
7028 ltype = vectype;
7030 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
7032 if (slp)
7034 /* For SLP permutation support we need to load the whole group,
7035 not only the number of vector stmts the permutation result
7036 fits in. */
7037 if (slp_perm)
7039 ncopies = (group_size * vf + nunits - 1) / nunits;
7040 dr_chain.create (ncopies);
7042 else
7043 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7045 int group_el = 0;
7046 unsigned HOST_WIDE_INT
7047 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
7048 for (j = 0; j < ncopies; j++)
7050 if (nloads > 1)
7051 vec_alloc (v, nloads);
7052 for (i = 0; i < nloads; i++)
7054 tree this_off = build_int_cst (TREE_TYPE (alias_off),
7055 group_el * elsz);
7056 new_stmt = gimple_build_assign (make_ssa_name (ltype),
7057 build2 (MEM_REF, ltype,
7058 running_off, this_off));
7059 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7060 if (nloads > 1)
7061 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
7062 gimple_assign_lhs (new_stmt));
7064 group_el += lnel;
7065 if (! slp
7066 || group_el == group_size)
7068 tree newoff = copy_ssa_name (running_off);
7069 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
7070 running_off, stride_step);
7071 vect_finish_stmt_generation (stmt, incr, gsi);
7073 running_off = newoff;
7074 group_el = 0;
7077 if (nloads > 1)
7079 tree vec_inv = build_constructor (lvectype, v);
7080 new_temp = vect_init_vector (stmt, vec_inv, lvectype, gsi);
7081 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7082 if (lvectype != vectype)
7084 new_stmt = gimple_build_assign (make_ssa_name (vectype),
7085 VIEW_CONVERT_EXPR,
7086 build1 (VIEW_CONVERT_EXPR,
7087 vectype, new_temp));
7088 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7092 if (slp)
7094 if (slp_perm)
7095 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
7096 else
7097 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7099 else
7101 if (j == 0)
7102 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7103 else
7104 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7105 prev_stmt_info = vinfo_for_stmt (new_stmt);
7108 if (slp_perm)
7110 unsigned n_perms;
7111 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7112 slp_node_instance, false, &n_perms);
7114 return true;
7117 if (grouped_load)
7119 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
7120 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
7121 int group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
7122 /* For SLP vectorization we directly vectorize a subchain
7123 without permutation. */
7124 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
7125 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7126 /* For BB vectorization always use the first stmt to base
7127 the data ref pointer on. */
7128 if (bb_vinfo)
7129 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7131 /* Check if the chain of loads is already vectorized. */
7132 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
7133 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7134 ??? But we can only do so if there is exactly one
7135 as we have no way to get at the rest. Leave the CSE
7136 opportunity alone.
7137 ??? With the group load eventually participating
7138 in multiple different permutations (having multiple
7139 slp nodes which refer to the same group) the CSE
7140 is even wrong code. See PR56270. */
7141 && !slp)
7143 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7144 return true;
7146 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
7147 group_gap_adj = 0;
7149 /* VEC_NUM is the number of vect stmts to be created for this group. */
7150 if (slp)
7152 grouped_load = false;
7153 /* For SLP permutation support we need to load the whole group,
7154 not only the number of vector stmts the permutation result
7155 fits in. */
7156 if (slp_perm)
7158 vec_num = (group_size * vf + nunits - 1) / nunits;
7159 group_gap_adj = vf * group_size - nunits * vec_num;
7161 else
7163 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7164 group_gap_adj = group_gap;
7167 else
7168 vec_num = group_size;
7170 ref_type = get_group_alias_ptr_type (first_stmt);
7172 else
7174 first_stmt = stmt;
7175 first_dr = dr;
7176 group_size = vec_num = 1;
7177 group_gap_adj = 0;
7178 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7181 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7182 gcc_assert (alignment_support_scheme);
7183 /* Targets with load-lane instructions must not require explicit
7184 realignment. */
7185 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7186 || alignment_support_scheme == dr_aligned
7187 || alignment_support_scheme == dr_unaligned_supported);
7189 /* In case the vectorization factor (VF) is bigger than the number
7190 of elements that we can fit in a vectype (nunits), we have to generate
7191 more than one vector stmt - i.e - we need to "unroll" the
7192 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7193 from one copy of the vector stmt to the next, in the field
7194 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7195 stages to find the correct vector defs to be used when vectorizing
7196 stmts that use the defs of the current stmt. The example below
7197 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7198 need to create 4 vectorized stmts):
7200 before vectorization:
7201 RELATED_STMT VEC_STMT
7202 S1: x = memref - -
7203 S2: z = x + 1 - -
7205 step 1: vectorize stmt S1:
7206 We first create the vector stmt VS1_0, and, as usual, record a
7207 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7208 Next, we create the vector stmt VS1_1, and record a pointer to
7209 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7210 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7211 stmts and pointers:
7212 RELATED_STMT VEC_STMT
7213 VS1_0: vx0 = memref0 VS1_1 -
7214 VS1_1: vx1 = memref1 VS1_2 -
7215 VS1_2: vx2 = memref2 VS1_3 -
7216 VS1_3: vx3 = memref3 - -
7217 S1: x = load - VS1_0
7218 S2: z = x + 1 - -
7220 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7221 information we recorded in RELATED_STMT field is used to vectorize
7222 stmt S2. */
7224 /* In case of interleaving (non-unit grouped access):
7226 S1: x2 = &base + 2
7227 S2: x0 = &base
7228 S3: x1 = &base + 1
7229 S4: x3 = &base + 3
7231 Vectorized loads are created in the order of memory accesses
7232 starting from the access of the first stmt of the chain:
7234 VS1: vx0 = &base
7235 VS2: vx1 = &base + vec_size*1
7236 VS3: vx3 = &base + vec_size*2
7237 VS4: vx4 = &base + vec_size*3
7239 Then permutation statements are generated:
7241 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7242 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7245 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7246 (the order of the data-refs in the output of vect_permute_load_chain
7247 corresponds to the order of scalar stmts in the interleaving chain - see
7248 the documentation of vect_permute_load_chain()).
7249 The generation of permutation stmts and recording them in
7250 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7252 In case of both multiple types and interleaving, the vector loads and
7253 permutation stmts above are created for every copy. The result vector
7254 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7255 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7257 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7258 on a target that supports unaligned accesses (dr_unaligned_supported)
7259 we generate the following code:
7260 p = initial_addr;
7261 indx = 0;
7262 loop {
7263 p = p + indx * vectype_size;
7264 vec_dest = *(p);
7265 indx = indx + 1;
7268 Otherwise, the data reference is potentially unaligned on a target that
7269 does not support unaligned accesses (dr_explicit_realign_optimized) -
7270 then generate the following code, in which the data in each iteration is
7271 obtained by two vector loads, one from the previous iteration, and one
7272 from the current iteration:
7273 p1 = initial_addr;
7274 msq_init = *(floor(p1))
7275 p2 = initial_addr + VS - 1;
7276 realignment_token = call target_builtin;
7277 indx = 0;
7278 loop {
7279 p2 = p2 + indx * vectype_size
7280 lsq = *(floor(p2))
7281 vec_dest = realign_load (msq, lsq, realignment_token)
7282 indx = indx + 1;
7283 msq = lsq;
7284 } */
7286 /* If the misalignment remains the same throughout the execution of the
7287 loop, we can create the init_addr and permutation mask at the loop
7288 preheader. Otherwise, it needs to be created inside the loop.
7289 This can only occur when vectorizing memory accesses in the inner-loop
7290 nested within an outer-loop that is being vectorized. */
7292 if (nested_in_vect_loop
7293 && (DR_STEP_ALIGNMENT (dr) % GET_MODE_SIZE (TYPE_MODE (vectype))) != 0)
7295 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7296 compute_in_loop = true;
7299 if ((alignment_support_scheme == dr_explicit_realign_optimized
7300 || alignment_support_scheme == dr_explicit_realign)
7301 && !compute_in_loop)
7303 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7304 alignment_support_scheme, NULL_TREE,
7305 &at_loop);
7306 if (alignment_support_scheme == dr_explicit_realign_optimized)
7308 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7309 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7310 size_one_node);
7313 else
7314 at_loop = loop;
7316 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7317 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7319 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7320 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7321 else
7322 aggr_type = vectype;
7324 prev_stmt_info = NULL;
7325 int group_elt = 0;
7326 for (j = 0; j < ncopies; j++)
7328 /* 1. Create the vector or array pointer update chain. */
7329 if (j == 0)
7331 bool simd_lane_access_p
7332 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7333 if (simd_lane_access_p
7334 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7335 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7336 && integer_zerop (DR_OFFSET (first_dr))
7337 && integer_zerop (DR_INIT (first_dr))
7338 && alias_sets_conflict_p (get_alias_set (aggr_type),
7339 get_alias_set (TREE_TYPE (ref_type)))
7340 && (alignment_support_scheme == dr_aligned
7341 || alignment_support_scheme == dr_unaligned_supported))
7343 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7344 dataref_offset = build_int_cst (ref_type, 0);
7345 inv_p = false;
7347 else if (first_stmt_for_drptr
7348 && first_stmt != first_stmt_for_drptr)
7350 dataref_ptr
7351 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7352 at_loop, offset, &dummy, gsi,
7353 &ptr_incr, simd_lane_access_p,
7354 &inv_p, byte_offset);
7355 /* Adjust the pointer by the difference to first_stmt. */
7356 data_reference_p ptrdr
7357 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7358 tree diff = fold_convert (sizetype,
7359 size_binop (MINUS_EXPR,
7360 DR_INIT (first_dr),
7361 DR_INIT (ptrdr)));
7362 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7363 stmt, diff);
7365 else
7366 dataref_ptr
7367 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7368 offset, &dummy, gsi, &ptr_incr,
7369 simd_lane_access_p, &inv_p,
7370 byte_offset);
7372 else if (dataref_offset)
7373 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7374 TYPE_SIZE_UNIT (aggr_type));
7375 else
7376 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7377 TYPE_SIZE_UNIT (aggr_type));
7379 if (grouped_load || slp_perm)
7380 dr_chain.create (vec_num);
7382 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7384 tree vec_array;
7386 vec_array = create_vector_array (vectype, vec_num);
7388 /* Emit:
7389 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7390 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7391 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7392 gimple_call_set_lhs (new_stmt, vec_array);
7393 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7395 /* Extract each vector into an SSA_NAME. */
7396 for (i = 0; i < vec_num; i++)
7398 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7399 vec_array, i);
7400 dr_chain.quick_push (new_temp);
7403 /* Record the mapping between SSA_NAMEs and statements. */
7404 vect_record_grouped_load_vectors (stmt, dr_chain);
7406 else
7408 for (i = 0; i < vec_num; i++)
7410 if (i > 0)
7411 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7412 stmt, NULL_TREE);
7414 /* 2. Create the vector-load in the loop. */
7415 switch (alignment_support_scheme)
7417 case dr_aligned:
7418 case dr_unaligned_supported:
7420 unsigned int align, misalign;
7422 data_ref
7423 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7424 dataref_offset
7425 ? dataref_offset
7426 : build_int_cst (ref_type, 0));
7427 align = TYPE_ALIGN_UNIT (vectype);
7428 if (alignment_support_scheme == dr_aligned)
7430 gcc_assert (aligned_access_p (first_dr));
7431 misalign = 0;
7433 else if (DR_MISALIGNMENT (first_dr) == -1)
7435 align = dr_alignment (vect_dr_behavior (first_dr));
7436 misalign = 0;
7437 TREE_TYPE (data_ref)
7438 = build_aligned_type (TREE_TYPE (data_ref),
7439 align * BITS_PER_UNIT);
7441 else
7443 TREE_TYPE (data_ref)
7444 = build_aligned_type (TREE_TYPE (data_ref),
7445 TYPE_ALIGN (elem_type));
7446 misalign = DR_MISALIGNMENT (first_dr);
7448 if (dataref_offset == NULL_TREE
7449 && TREE_CODE (dataref_ptr) == SSA_NAME)
7450 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7451 align, misalign);
7452 break;
7454 case dr_explicit_realign:
7456 tree ptr, bump;
7458 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7460 if (compute_in_loop)
7461 msq = vect_setup_realignment (first_stmt, gsi,
7462 &realignment_token,
7463 dr_explicit_realign,
7464 dataref_ptr, NULL);
7466 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7467 ptr = copy_ssa_name (dataref_ptr);
7468 else
7469 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7470 new_stmt = gimple_build_assign
7471 (ptr, BIT_AND_EXPR, dataref_ptr,
7472 build_int_cst
7473 (TREE_TYPE (dataref_ptr),
7474 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7475 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7476 data_ref
7477 = build2 (MEM_REF, vectype, ptr,
7478 build_int_cst (ref_type, 0));
7479 vec_dest = vect_create_destination_var (scalar_dest,
7480 vectype);
7481 new_stmt = gimple_build_assign (vec_dest, data_ref);
7482 new_temp = make_ssa_name (vec_dest, new_stmt);
7483 gimple_assign_set_lhs (new_stmt, new_temp);
7484 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7485 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7486 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7487 msq = new_temp;
7489 bump = size_binop (MULT_EXPR, vs,
7490 TYPE_SIZE_UNIT (elem_type));
7491 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7492 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7493 new_stmt = gimple_build_assign
7494 (NULL_TREE, BIT_AND_EXPR, ptr,
7495 build_int_cst
7496 (TREE_TYPE (ptr),
7497 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7498 ptr = copy_ssa_name (ptr, new_stmt);
7499 gimple_assign_set_lhs (new_stmt, ptr);
7500 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7501 data_ref
7502 = build2 (MEM_REF, vectype, ptr,
7503 build_int_cst (ref_type, 0));
7504 break;
7506 case dr_explicit_realign_optimized:
7507 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7508 new_temp = copy_ssa_name (dataref_ptr);
7509 else
7510 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7511 new_stmt = gimple_build_assign
7512 (new_temp, BIT_AND_EXPR, dataref_ptr,
7513 build_int_cst
7514 (TREE_TYPE (dataref_ptr),
7515 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7516 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7517 data_ref
7518 = build2 (MEM_REF, vectype, new_temp,
7519 build_int_cst (ref_type, 0));
7520 break;
7521 default:
7522 gcc_unreachable ();
7524 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7525 new_stmt = gimple_build_assign (vec_dest, data_ref);
7526 new_temp = make_ssa_name (vec_dest, new_stmt);
7527 gimple_assign_set_lhs (new_stmt, new_temp);
7528 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7530 /* 3. Handle explicit realignment if necessary/supported.
7531 Create in loop:
7532 vec_dest = realign_load (msq, lsq, realignment_token) */
7533 if (alignment_support_scheme == dr_explicit_realign_optimized
7534 || alignment_support_scheme == dr_explicit_realign)
7536 lsq = gimple_assign_lhs (new_stmt);
7537 if (!realignment_token)
7538 realignment_token = dataref_ptr;
7539 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7540 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7541 msq, lsq, realignment_token);
7542 new_temp = make_ssa_name (vec_dest, new_stmt);
7543 gimple_assign_set_lhs (new_stmt, new_temp);
7544 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7546 if (alignment_support_scheme == dr_explicit_realign_optimized)
7548 gcc_assert (phi);
7549 if (i == vec_num - 1 && j == ncopies - 1)
7550 add_phi_arg (phi, lsq,
7551 loop_latch_edge (containing_loop),
7552 UNKNOWN_LOCATION);
7553 msq = lsq;
7557 /* 4. Handle invariant-load. */
7558 if (inv_p && !bb_vinfo)
7560 gcc_assert (!grouped_load);
7561 /* If we have versioned for aliasing or the loop doesn't
7562 have any data dependencies that would preclude this,
7563 then we are sure this is a loop invariant load and
7564 thus we can insert it on the preheader edge. */
7565 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7566 && !nested_in_vect_loop
7567 && hoist_defs_of_uses (stmt, loop))
7569 if (dump_enabled_p ())
7571 dump_printf_loc (MSG_NOTE, vect_location,
7572 "hoisting out of the vectorized "
7573 "loop: ");
7574 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7576 tree tem = copy_ssa_name (scalar_dest);
7577 gsi_insert_on_edge_immediate
7578 (loop_preheader_edge (loop),
7579 gimple_build_assign (tem,
7580 unshare_expr
7581 (gimple_assign_rhs1 (stmt))));
7582 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7583 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7584 set_vinfo_for_stmt (new_stmt,
7585 new_stmt_vec_info (new_stmt, vinfo));
7587 else
7589 gimple_stmt_iterator gsi2 = *gsi;
7590 gsi_next (&gsi2);
7591 new_temp = vect_init_vector (stmt, scalar_dest,
7592 vectype, &gsi2);
7593 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7597 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7599 tree perm_mask = perm_mask_for_reverse (vectype);
7600 new_temp = permute_vec_elements (new_temp, new_temp,
7601 perm_mask, stmt, gsi);
7602 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7605 /* Collect vector loads and later create their permutation in
7606 vect_transform_grouped_load (). */
7607 if (grouped_load || slp_perm)
7608 dr_chain.quick_push (new_temp);
7610 /* Store vector loads in the corresponding SLP_NODE. */
7611 if (slp && !slp_perm)
7612 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7614 /* With SLP permutation we load the gaps as well, without
7615 we need to skip the gaps after we manage to fully load
7616 all elements. group_gap_adj is GROUP_SIZE here. */
7617 group_elt += nunits;
7618 if (group_gap_adj != 0 && ! slp_perm
7619 && group_elt == group_size - group_gap_adj)
7621 bool ovf;
7622 tree bump
7623 = wide_int_to_tree (sizetype,
7624 wi::smul (TYPE_SIZE_UNIT (elem_type),
7625 group_gap_adj, &ovf));
7626 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7627 stmt, bump);
7628 group_elt = 0;
7631 /* Bump the vector pointer to account for a gap or for excess
7632 elements loaded for a permuted SLP load. */
7633 if (group_gap_adj != 0 && slp_perm)
7635 bool ovf;
7636 tree bump
7637 = wide_int_to_tree (sizetype,
7638 wi::smul (TYPE_SIZE_UNIT (elem_type),
7639 group_gap_adj, &ovf));
7640 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7641 stmt, bump);
7645 if (slp && !slp_perm)
7646 continue;
7648 if (slp_perm)
7650 unsigned n_perms;
7651 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7652 slp_node_instance, false,
7653 &n_perms))
7655 dr_chain.release ();
7656 return false;
7659 else
7661 if (grouped_load)
7663 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7664 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7665 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7667 else
7669 if (j == 0)
7670 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7671 else
7672 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7673 prev_stmt_info = vinfo_for_stmt (new_stmt);
7676 dr_chain.release ();
7679 return true;
7682 /* Function vect_is_simple_cond.
7684 Input:
7685 LOOP - the loop that is being vectorized.
7686 COND - Condition that is checked for simple use.
7688 Output:
7689 *COMP_VECTYPE - the vector type for the comparison.
7690 *DTS - The def types for the arguments of the comparison
7692 Returns whether a COND can be vectorized. Checks whether
7693 condition operands are supportable using vec_is_simple_use. */
7695 static bool
7696 vect_is_simple_cond (tree cond, vec_info *vinfo,
7697 tree *comp_vectype, enum vect_def_type *dts)
7699 tree lhs, rhs;
7700 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7702 /* Mask case. */
7703 if (TREE_CODE (cond) == SSA_NAME
7704 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond)))
7706 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7707 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7708 &dts[0], comp_vectype)
7709 || !*comp_vectype
7710 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7711 return false;
7712 return true;
7715 if (!COMPARISON_CLASS_P (cond))
7716 return false;
7718 lhs = TREE_OPERAND (cond, 0);
7719 rhs = TREE_OPERAND (cond, 1);
7721 if (TREE_CODE (lhs) == SSA_NAME)
7723 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7724 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dts[0], &vectype1))
7725 return false;
7727 else if (TREE_CODE (lhs) == INTEGER_CST || TREE_CODE (lhs) == REAL_CST
7728 || TREE_CODE (lhs) == FIXED_CST)
7729 dts[0] = vect_constant_def;
7730 else
7731 return false;
7733 if (TREE_CODE (rhs) == SSA_NAME)
7735 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7736 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dts[1], &vectype2))
7737 return false;
7739 else if (TREE_CODE (rhs) == INTEGER_CST || TREE_CODE (rhs) == REAL_CST
7740 || TREE_CODE (rhs) == FIXED_CST)
7741 dts[1] = vect_constant_def;
7742 else
7743 return false;
7745 if (vectype1 && vectype2
7746 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7747 return false;
7749 *comp_vectype = vectype1 ? vectype1 : vectype2;
7750 return true;
7753 /* vectorizable_condition.
7755 Check if STMT is conditional modify expression that can be vectorized.
7756 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7757 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7758 at GSI.
7760 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7761 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7762 else clause if it is 2).
7764 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7766 bool
7767 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7768 gimple **vec_stmt, tree reduc_def, int reduc_index,
7769 slp_tree slp_node)
7771 tree scalar_dest = NULL_TREE;
7772 tree vec_dest = NULL_TREE;
7773 tree cond_expr, cond_expr0 = NULL_TREE, cond_expr1 = NULL_TREE;
7774 tree then_clause, else_clause;
7775 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7776 tree comp_vectype = NULL_TREE;
7777 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7778 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7779 tree vec_compare;
7780 tree new_temp;
7781 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7782 enum vect_def_type dts[4]
7783 = {vect_unknown_def_type, vect_unknown_def_type,
7784 vect_unknown_def_type, vect_unknown_def_type};
7785 int ndts = 4;
7786 int ncopies;
7787 enum tree_code code, cond_code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7788 stmt_vec_info prev_stmt_info = NULL;
7789 int i, j;
7790 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7791 vec<tree> vec_oprnds0 = vNULL;
7792 vec<tree> vec_oprnds1 = vNULL;
7793 vec<tree> vec_oprnds2 = vNULL;
7794 vec<tree> vec_oprnds3 = vNULL;
7795 tree vec_cmp_type;
7796 bool masked = false;
7798 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7799 return false;
7801 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7803 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7804 return false;
7806 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7807 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7808 && reduc_def))
7809 return false;
7811 /* FORNOW: not yet supported. */
7812 if (STMT_VINFO_LIVE_P (stmt_info))
7814 if (dump_enabled_p ())
7815 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7816 "value used after loop.\n");
7817 return false;
7821 /* Is vectorizable conditional operation? */
7822 if (!is_gimple_assign (stmt))
7823 return false;
7825 code = gimple_assign_rhs_code (stmt);
7827 if (code != COND_EXPR)
7828 return false;
7830 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7831 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7832 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7834 if (slp_node)
7835 ncopies = 1;
7836 else
7837 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7839 gcc_assert (ncopies >= 1);
7840 if (reduc_index && ncopies > 1)
7841 return false; /* FORNOW */
7843 cond_expr = gimple_assign_rhs1 (stmt);
7844 then_clause = gimple_assign_rhs2 (stmt);
7845 else_clause = gimple_assign_rhs3 (stmt);
7847 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo,
7848 &comp_vectype, &dts[0])
7849 || !comp_vectype)
7850 return false;
7852 gimple *def_stmt;
7853 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dts[2],
7854 &vectype1))
7855 return false;
7856 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dts[3],
7857 &vectype2))
7858 return false;
7860 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7861 return false;
7863 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7864 return false;
7866 masked = !COMPARISON_CLASS_P (cond_expr);
7867 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7869 if (vec_cmp_type == NULL_TREE)
7870 return false;
7872 cond_code = TREE_CODE (cond_expr);
7873 if (!masked)
7875 cond_expr0 = TREE_OPERAND (cond_expr, 0);
7876 cond_expr1 = TREE_OPERAND (cond_expr, 1);
7879 if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
7881 /* Boolean values may have another representation in vectors
7882 and therefore we prefer bit operations over comparison for
7883 them (which also works for scalar masks). We store opcodes
7884 to use in bitop1 and bitop2. Statement is vectorized as
7885 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
7886 depending on bitop1 and bitop2 arity. */
7887 switch (cond_code)
7889 case GT_EXPR:
7890 bitop1 = BIT_NOT_EXPR;
7891 bitop2 = BIT_AND_EXPR;
7892 break;
7893 case GE_EXPR:
7894 bitop1 = BIT_NOT_EXPR;
7895 bitop2 = BIT_IOR_EXPR;
7896 break;
7897 case LT_EXPR:
7898 bitop1 = BIT_NOT_EXPR;
7899 bitop2 = BIT_AND_EXPR;
7900 std::swap (cond_expr0, cond_expr1);
7901 break;
7902 case LE_EXPR:
7903 bitop1 = BIT_NOT_EXPR;
7904 bitop2 = BIT_IOR_EXPR;
7905 std::swap (cond_expr0, cond_expr1);
7906 break;
7907 case NE_EXPR:
7908 bitop1 = BIT_XOR_EXPR;
7909 break;
7910 case EQ_EXPR:
7911 bitop1 = BIT_XOR_EXPR;
7912 bitop2 = BIT_NOT_EXPR;
7913 break;
7914 default:
7915 return false;
7917 cond_code = SSA_NAME;
7920 if (!vec_stmt)
7922 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7923 if (bitop1 != NOP_EXPR)
7925 machine_mode mode = TYPE_MODE (comp_vectype);
7926 optab optab;
7928 optab = optab_for_tree_code (bitop1, comp_vectype, optab_default);
7929 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7930 return false;
7932 if (bitop2 != NOP_EXPR)
7934 optab = optab_for_tree_code (bitop2, comp_vectype,
7935 optab_default);
7936 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7937 return false;
7940 if (expand_vec_cond_expr_p (vectype, comp_vectype,
7941 cond_code))
7943 vect_model_simple_cost (stmt_info, ncopies, dts, ndts, NULL, NULL);
7944 return true;
7946 return false;
7949 /* Transform. */
7951 if (!slp_node)
7953 vec_oprnds0.create (1);
7954 vec_oprnds1.create (1);
7955 vec_oprnds2.create (1);
7956 vec_oprnds3.create (1);
7959 /* Handle def. */
7960 scalar_dest = gimple_assign_lhs (stmt);
7961 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7963 /* Handle cond expr. */
7964 for (j = 0; j < ncopies; j++)
7966 gassign *new_stmt = NULL;
7967 if (j == 0)
7969 if (slp_node)
7971 auto_vec<tree, 4> ops;
7972 auto_vec<vec<tree>, 4> vec_defs;
7974 if (masked)
7975 ops.safe_push (cond_expr);
7976 else
7978 ops.safe_push (cond_expr0);
7979 ops.safe_push (cond_expr1);
7981 ops.safe_push (then_clause);
7982 ops.safe_push (else_clause);
7983 vect_get_slp_defs (ops, slp_node, &vec_defs);
7984 vec_oprnds3 = vec_defs.pop ();
7985 vec_oprnds2 = vec_defs.pop ();
7986 if (!masked)
7987 vec_oprnds1 = vec_defs.pop ();
7988 vec_oprnds0 = vec_defs.pop ();
7990 else
7992 gimple *gtemp;
7993 if (masked)
7995 vec_cond_lhs
7996 = vect_get_vec_def_for_operand (cond_expr, stmt,
7997 comp_vectype);
7998 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7999 &gtemp, &dts[0]);
8001 else
8003 vec_cond_lhs
8004 = vect_get_vec_def_for_operand (cond_expr0,
8005 stmt, comp_vectype);
8006 vect_is_simple_use (cond_expr0, loop_vinfo, &gtemp, &dts[0]);
8008 vec_cond_rhs
8009 = vect_get_vec_def_for_operand (cond_expr1,
8010 stmt, comp_vectype);
8011 vect_is_simple_use (cond_expr1, loop_vinfo, &gtemp, &dts[1]);
8013 if (reduc_index == 1)
8014 vec_then_clause = reduc_def;
8015 else
8017 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
8018 stmt);
8019 vect_is_simple_use (then_clause, loop_vinfo,
8020 &gtemp, &dts[2]);
8022 if (reduc_index == 2)
8023 vec_else_clause = reduc_def;
8024 else
8026 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
8027 stmt);
8028 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
8032 else
8034 vec_cond_lhs
8035 = vect_get_vec_def_for_stmt_copy (dts[0],
8036 vec_oprnds0.pop ());
8037 if (!masked)
8038 vec_cond_rhs
8039 = vect_get_vec_def_for_stmt_copy (dts[1],
8040 vec_oprnds1.pop ());
8042 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
8043 vec_oprnds2.pop ());
8044 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
8045 vec_oprnds3.pop ());
8048 if (!slp_node)
8050 vec_oprnds0.quick_push (vec_cond_lhs);
8051 if (!masked)
8052 vec_oprnds1.quick_push (vec_cond_rhs);
8053 vec_oprnds2.quick_push (vec_then_clause);
8054 vec_oprnds3.quick_push (vec_else_clause);
8057 /* Arguments are ready. Create the new vector stmt. */
8058 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
8060 vec_then_clause = vec_oprnds2[i];
8061 vec_else_clause = vec_oprnds3[i];
8063 if (masked)
8064 vec_compare = vec_cond_lhs;
8065 else
8067 vec_cond_rhs = vec_oprnds1[i];
8068 if (bitop1 == NOP_EXPR)
8069 vec_compare = build2 (cond_code, vec_cmp_type,
8070 vec_cond_lhs, vec_cond_rhs);
8071 else
8073 new_temp = make_ssa_name (vec_cmp_type);
8074 if (bitop1 == BIT_NOT_EXPR)
8075 new_stmt = gimple_build_assign (new_temp, bitop1,
8076 vec_cond_rhs);
8077 else
8078 new_stmt
8079 = gimple_build_assign (new_temp, bitop1, vec_cond_lhs,
8080 vec_cond_rhs);
8081 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8082 if (bitop2 == NOP_EXPR)
8083 vec_compare = new_temp;
8084 else if (bitop2 == BIT_NOT_EXPR)
8086 /* Instead of doing ~x ? y : z do x ? z : y. */
8087 vec_compare = new_temp;
8088 std::swap (vec_then_clause, vec_else_clause);
8090 else
8092 vec_compare = make_ssa_name (vec_cmp_type);
8093 new_stmt
8094 = gimple_build_assign (vec_compare, bitop2,
8095 vec_cond_lhs, new_temp);
8096 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8100 new_temp = make_ssa_name (vec_dest);
8101 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
8102 vec_compare, vec_then_clause,
8103 vec_else_clause);
8104 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8105 if (slp_node)
8106 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8109 if (slp_node)
8110 continue;
8112 if (j == 0)
8113 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8114 else
8115 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8117 prev_stmt_info = vinfo_for_stmt (new_stmt);
8120 vec_oprnds0.release ();
8121 vec_oprnds1.release ();
8122 vec_oprnds2.release ();
8123 vec_oprnds3.release ();
8125 return true;
8128 /* vectorizable_comparison.
8130 Check if STMT is comparison expression that can be vectorized.
8131 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
8132 comparison, put it in VEC_STMT, and insert it at GSI.
8134 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
8136 static bool
8137 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
8138 gimple **vec_stmt, tree reduc_def,
8139 slp_tree slp_node)
8141 tree lhs, rhs1, rhs2;
8142 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8143 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
8144 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8145 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
8146 tree new_temp;
8147 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
8148 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
8149 int ndts = 2;
8150 unsigned nunits;
8151 int ncopies;
8152 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
8153 stmt_vec_info prev_stmt_info = NULL;
8154 int i, j;
8155 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8156 vec<tree> vec_oprnds0 = vNULL;
8157 vec<tree> vec_oprnds1 = vNULL;
8158 gimple *def_stmt;
8159 tree mask_type;
8160 tree mask;
8162 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
8163 return false;
8165 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
8166 return false;
8168 mask_type = vectype;
8169 nunits = TYPE_VECTOR_SUBPARTS (vectype);
8171 if (slp_node)
8172 ncopies = 1;
8173 else
8174 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
8176 gcc_assert (ncopies >= 1);
8177 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
8178 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
8179 && reduc_def))
8180 return false;
8182 if (STMT_VINFO_LIVE_P (stmt_info))
8184 if (dump_enabled_p ())
8185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8186 "value used after loop.\n");
8187 return false;
8190 if (!is_gimple_assign (stmt))
8191 return false;
8193 code = gimple_assign_rhs_code (stmt);
8195 if (TREE_CODE_CLASS (code) != tcc_comparison)
8196 return false;
8198 rhs1 = gimple_assign_rhs1 (stmt);
8199 rhs2 = gimple_assign_rhs2 (stmt);
8201 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
8202 &dts[0], &vectype1))
8203 return false;
8205 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
8206 &dts[1], &vectype2))
8207 return false;
8209 if (vectype1 && vectype2
8210 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
8211 return false;
8213 vectype = vectype1 ? vectype1 : vectype2;
8215 /* Invariant comparison. */
8216 if (!vectype)
8218 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
8219 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
8220 return false;
8222 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
8223 return false;
8225 /* Can't compare mask and non-mask types. */
8226 if (vectype1 && vectype2
8227 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
8228 return false;
8230 /* Boolean values may have another representation in vectors
8231 and therefore we prefer bit operations over comparison for
8232 them (which also works for scalar masks). We store opcodes
8233 to use in bitop1 and bitop2. Statement is vectorized as
8234 BITOP2 (rhs1 BITOP1 rhs2) or
8235 rhs1 BITOP2 (BITOP1 rhs2)
8236 depending on bitop1 and bitop2 arity. */
8237 if (VECTOR_BOOLEAN_TYPE_P (vectype))
8239 if (code == GT_EXPR)
8241 bitop1 = BIT_NOT_EXPR;
8242 bitop2 = BIT_AND_EXPR;
8244 else if (code == GE_EXPR)
8246 bitop1 = BIT_NOT_EXPR;
8247 bitop2 = BIT_IOR_EXPR;
8249 else if (code == LT_EXPR)
8251 bitop1 = BIT_NOT_EXPR;
8252 bitop2 = BIT_AND_EXPR;
8253 std::swap (rhs1, rhs2);
8254 std::swap (dts[0], dts[1]);
8256 else if (code == LE_EXPR)
8258 bitop1 = BIT_NOT_EXPR;
8259 bitop2 = BIT_IOR_EXPR;
8260 std::swap (rhs1, rhs2);
8261 std::swap (dts[0], dts[1]);
8263 else
8265 bitop1 = BIT_XOR_EXPR;
8266 if (code == EQ_EXPR)
8267 bitop2 = BIT_NOT_EXPR;
8271 if (!vec_stmt)
8273 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
8274 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
8275 dts, ndts, NULL, NULL);
8276 if (bitop1 == NOP_EXPR)
8277 return expand_vec_cmp_expr_p (vectype, mask_type, code);
8278 else
8280 machine_mode mode = TYPE_MODE (vectype);
8281 optab optab;
8283 optab = optab_for_tree_code (bitop1, vectype, optab_default);
8284 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8285 return false;
8287 if (bitop2 != NOP_EXPR)
8289 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8290 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8291 return false;
8293 return true;
8297 /* Transform. */
8298 if (!slp_node)
8300 vec_oprnds0.create (1);
8301 vec_oprnds1.create (1);
8304 /* Handle def. */
8305 lhs = gimple_assign_lhs (stmt);
8306 mask = vect_create_destination_var (lhs, mask_type);
8308 /* Handle cmp expr. */
8309 for (j = 0; j < ncopies; j++)
8311 gassign *new_stmt = NULL;
8312 if (j == 0)
8314 if (slp_node)
8316 auto_vec<tree, 2> ops;
8317 auto_vec<vec<tree>, 2> vec_defs;
8319 ops.safe_push (rhs1);
8320 ops.safe_push (rhs2);
8321 vect_get_slp_defs (ops, slp_node, &vec_defs);
8322 vec_oprnds1 = vec_defs.pop ();
8323 vec_oprnds0 = vec_defs.pop ();
8325 else
8327 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8328 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8331 else
8333 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8334 vec_oprnds0.pop ());
8335 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8336 vec_oprnds1.pop ());
8339 if (!slp_node)
8341 vec_oprnds0.quick_push (vec_rhs1);
8342 vec_oprnds1.quick_push (vec_rhs2);
8345 /* Arguments are ready. Create the new vector stmt. */
8346 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8348 vec_rhs2 = vec_oprnds1[i];
8350 new_temp = make_ssa_name (mask);
8351 if (bitop1 == NOP_EXPR)
8353 new_stmt = gimple_build_assign (new_temp, code,
8354 vec_rhs1, vec_rhs2);
8355 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8357 else
8359 if (bitop1 == BIT_NOT_EXPR)
8360 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8361 else
8362 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8363 vec_rhs2);
8364 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8365 if (bitop2 != NOP_EXPR)
8367 tree res = make_ssa_name (mask);
8368 if (bitop2 == BIT_NOT_EXPR)
8369 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8370 else
8371 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8372 new_temp);
8373 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8376 if (slp_node)
8377 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8380 if (slp_node)
8381 continue;
8383 if (j == 0)
8384 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8385 else
8386 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8388 prev_stmt_info = vinfo_for_stmt (new_stmt);
8391 vec_oprnds0.release ();
8392 vec_oprnds1.release ();
8394 return true;
8397 /* Make sure the statement is vectorizable. */
8399 bool
8400 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
8402 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8403 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8404 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8405 bool ok;
8406 gimple *pattern_stmt;
8407 gimple_seq pattern_def_seq;
8409 if (dump_enabled_p ())
8411 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8412 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8415 if (gimple_has_volatile_ops (stmt))
8417 if (dump_enabled_p ())
8418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8419 "not vectorized: stmt has volatile operands\n");
8421 return false;
8424 /* Skip stmts that do not need to be vectorized. In loops this is expected
8425 to include:
8426 - the COND_EXPR which is the loop exit condition
8427 - any LABEL_EXPRs in the loop
8428 - computations that are used only for array indexing or loop control.
8429 In basic blocks we only analyze statements that are a part of some SLP
8430 instance, therefore, all the statements are relevant.
8432 Pattern statement needs to be analyzed instead of the original statement
8433 if the original statement is not relevant. Otherwise, we analyze both
8434 statements. In basic blocks we are called from some SLP instance
8435 traversal, don't analyze pattern stmts instead, the pattern stmts
8436 already will be part of SLP instance. */
8438 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8439 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8440 && !STMT_VINFO_LIVE_P (stmt_info))
8442 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8443 && pattern_stmt
8444 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8445 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8447 /* Analyze PATTERN_STMT instead of the original stmt. */
8448 stmt = pattern_stmt;
8449 stmt_info = vinfo_for_stmt (pattern_stmt);
8450 if (dump_enabled_p ())
8452 dump_printf_loc (MSG_NOTE, vect_location,
8453 "==> examining pattern statement: ");
8454 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8457 else
8459 if (dump_enabled_p ())
8460 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8462 return true;
8465 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8466 && node == NULL
8467 && pattern_stmt
8468 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8469 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8471 /* Analyze PATTERN_STMT too. */
8472 if (dump_enabled_p ())
8474 dump_printf_loc (MSG_NOTE, vect_location,
8475 "==> examining pattern statement: ");
8476 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8479 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
8480 return false;
8483 if (is_pattern_stmt_p (stmt_info)
8484 && node == NULL
8485 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8487 gimple_stmt_iterator si;
8489 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8491 gimple *pattern_def_stmt = gsi_stmt (si);
8492 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8493 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8495 /* Analyze def stmt of STMT if it's a pattern stmt. */
8496 if (dump_enabled_p ())
8498 dump_printf_loc (MSG_NOTE, vect_location,
8499 "==> examining pattern def statement: ");
8500 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8503 if (!vect_analyze_stmt (pattern_def_stmt,
8504 need_to_vectorize, node))
8505 return false;
8510 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8512 case vect_internal_def:
8513 break;
8515 case vect_reduction_def:
8516 case vect_nested_cycle:
8517 gcc_assert (!bb_vinfo
8518 && (relevance == vect_used_in_outer
8519 || relevance == vect_used_in_outer_by_reduction
8520 || relevance == vect_used_by_reduction
8521 || relevance == vect_unused_in_scope
8522 || relevance == vect_used_only_live));
8523 break;
8525 case vect_induction_def:
8526 gcc_assert (!bb_vinfo);
8527 break;
8529 case vect_constant_def:
8530 case vect_external_def:
8531 case vect_unknown_def_type:
8532 default:
8533 gcc_unreachable ();
8536 if (STMT_VINFO_RELEVANT_P (stmt_info))
8538 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8539 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8540 || (is_gimple_call (stmt)
8541 && gimple_call_lhs (stmt) == NULL_TREE));
8542 *need_to_vectorize = true;
8545 if (PURE_SLP_STMT (stmt_info) && !node)
8547 dump_printf_loc (MSG_NOTE, vect_location,
8548 "handled only by SLP analysis\n");
8549 return true;
8552 ok = true;
8553 if (!bb_vinfo
8554 && (STMT_VINFO_RELEVANT_P (stmt_info)
8555 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8556 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8557 || vectorizable_conversion (stmt, NULL, NULL, node)
8558 || vectorizable_shift (stmt, NULL, NULL, node)
8559 || vectorizable_operation (stmt, NULL, NULL, node)
8560 || vectorizable_assignment (stmt, NULL, NULL, node)
8561 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8562 || vectorizable_call (stmt, NULL, NULL, node)
8563 || vectorizable_store (stmt, NULL, NULL, node)
8564 || vectorizable_reduction (stmt, NULL, NULL, node)
8565 || vectorizable_induction (stmt, NULL, NULL, node)
8566 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8567 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8568 else
8570 if (bb_vinfo)
8571 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8572 || vectorizable_conversion (stmt, NULL, NULL, node)
8573 || vectorizable_shift (stmt, NULL, NULL, node)
8574 || vectorizable_operation (stmt, NULL, NULL, node)
8575 || vectorizable_assignment (stmt, NULL, NULL, node)
8576 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8577 || vectorizable_call (stmt, NULL, NULL, node)
8578 || vectorizable_store (stmt, NULL, NULL, node)
8579 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8580 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8583 if (!ok)
8585 if (dump_enabled_p ())
8587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8588 "not vectorized: relevant stmt not ");
8589 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8590 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8593 return false;
8596 if (bb_vinfo)
8597 return true;
8599 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8600 need extra handling, except for vectorizable reductions. */
8601 if (STMT_VINFO_LIVE_P (stmt_info)
8602 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8603 ok = vectorizable_live_operation (stmt, NULL, NULL, -1, NULL);
8605 if (!ok)
8607 if (dump_enabled_p ())
8609 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8610 "not vectorized: live stmt not ");
8611 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8612 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8615 return false;
8618 return true;
8622 /* Function vect_transform_stmt.
8624 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8626 bool
8627 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8628 bool *grouped_store, slp_tree slp_node,
8629 slp_instance slp_node_instance)
8631 bool is_store = false;
8632 gimple *vec_stmt = NULL;
8633 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8634 bool done;
8636 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8637 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8639 switch (STMT_VINFO_TYPE (stmt_info))
8641 case type_demotion_vec_info_type:
8642 case type_promotion_vec_info_type:
8643 case type_conversion_vec_info_type:
8644 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8645 gcc_assert (done);
8646 break;
8648 case induc_vec_info_type:
8649 done = vectorizable_induction (stmt, gsi, &vec_stmt, slp_node);
8650 gcc_assert (done);
8651 break;
8653 case shift_vec_info_type:
8654 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8655 gcc_assert (done);
8656 break;
8658 case op_vec_info_type:
8659 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8660 gcc_assert (done);
8661 break;
8663 case assignment_vec_info_type:
8664 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8665 gcc_assert (done);
8666 break;
8668 case load_vec_info_type:
8669 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8670 slp_node_instance);
8671 gcc_assert (done);
8672 break;
8674 case store_vec_info_type:
8675 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8676 gcc_assert (done);
8677 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8679 /* In case of interleaving, the whole chain is vectorized when the
8680 last store in the chain is reached. Store stmts before the last
8681 one are skipped, and there vec_stmt_info shouldn't be freed
8682 meanwhile. */
8683 *grouped_store = true;
8684 if (STMT_VINFO_VEC_STMT (stmt_info))
8685 is_store = true;
8687 else
8688 is_store = true;
8689 break;
8691 case condition_vec_info_type:
8692 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8693 gcc_assert (done);
8694 break;
8696 case comparison_vec_info_type:
8697 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8698 gcc_assert (done);
8699 break;
8701 case call_vec_info_type:
8702 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8703 stmt = gsi_stmt (*gsi);
8704 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
8705 is_store = true;
8706 break;
8708 case call_simd_clone_vec_info_type:
8709 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8710 stmt = gsi_stmt (*gsi);
8711 break;
8713 case reduc_vec_info_type:
8714 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8715 gcc_assert (done);
8716 break;
8718 default:
8719 if (!STMT_VINFO_LIVE_P (stmt_info))
8721 if (dump_enabled_p ())
8722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8723 "stmt not supported.\n");
8724 gcc_unreachable ();
8728 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8729 This would break hybrid SLP vectorization. */
8730 if (slp_node)
8731 gcc_assert (!vec_stmt
8732 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8734 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8735 is being vectorized, but outside the immediately enclosing loop. */
8736 if (vec_stmt
8737 && STMT_VINFO_LOOP_VINFO (stmt_info)
8738 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8739 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8740 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8741 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8742 || STMT_VINFO_RELEVANT (stmt_info) ==
8743 vect_used_in_outer_by_reduction))
8745 struct loop *innerloop = LOOP_VINFO_LOOP (
8746 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8747 imm_use_iterator imm_iter;
8748 use_operand_p use_p;
8749 tree scalar_dest;
8750 gimple *exit_phi;
8752 if (dump_enabled_p ())
8753 dump_printf_loc (MSG_NOTE, vect_location,
8754 "Record the vdef for outer-loop vectorization.\n");
8756 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8757 (to be used when vectorizing outer-loop stmts that use the DEF of
8758 STMT). */
8759 if (gimple_code (stmt) == GIMPLE_PHI)
8760 scalar_dest = PHI_RESULT (stmt);
8761 else
8762 scalar_dest = gimple_assign_lhs (stmt);
8764 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8766 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8768 exit_phi = USE_STMT (use_p);
8769 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8774 /* Handle stmts whose DEF is used outside the loop-nest that is
8775 being vectorized. */
8776 if (slp_node)
8778 gimple *slp_stmt;
8779 int i;
8780 if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8781 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8783 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8784 if (STMT_VINFO_LIVE_P (slp_stmt_info))
8786 done = vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8787 &vec_stmt);
8788 gcc_assert (done);
8792 else if (STMT_VINFO_LIVE_P (stmt_info)
8793 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8795 done = vectorizable_live_operation (stmt, gsi, slp_node, -1, &vec_stmt);
8796 gcc_assert (done);
8799 if (vec_stmt)
8800 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8802 return is_store;
8806 /* Remove a group of stores (for SLP or interleaving), free their
8807 stmt_vec_info. */
8809 void
8810 vect_remove_stores (gimple *first_stmt)
8812 gimple *next = first_stmt;
8813 gimple *tmp;
8814 gimple_stmt_iterator next_si;
8816 while (next)
8818 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8820 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8821 if (is_pattern_stmt_p (stmt_info))
8822 next = STMT_VINFO_RELATED_STMT (stmt_info);
8823 /* Free the attached stmt_vec_info and remove the stmt. */
8824 next_si = gsi_for_stmt (next);
8825 unlink_stmt_vdef (next);
8826 gsi_remove (&next_si, true);
8827 release_defs (next);
8828 free_stmt_vec_info (next);
8829 next = tmp;
8834 /* Function new_stmt_vec_info.
8836 Create and initialize a new stmt_vec_info struct for STMT. */
8838 stmt_vec_info
8839 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8841 stmt_vec_info res;
8842 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8844 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8845 STMT_VINFO_STMT (res) = stmt;
8846 res->vinfo = vinfo;
8847 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8848 STMT_VINFO_LIVE_P (res) = false;
8849 STMT_VINFO_VECTYPE (res) = NULL;
8850 STMT_VINFO_VEC_STMT (res) = NULL;
8851 STMT_VINFO_VECTORIZABLE (res) = true;
8852 STMT_VINFO_IN_PATTERN_P (res) = false;
8853 STMT_VINFO_RELATED_STMT (res) = NULL;
8854 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8855 STMT_VINFO_DATA_REF (res) = NULL;
8856 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8857 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
8859 if (gimple_code (stmt) == GIMPLE_PHI
8860 && is_loop_header_bb_p (gimple_bb (stmt)))
8861 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8862 else
8863 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8865 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8866 STMT_SLP_TYPE (res) = loop_vect;
8867 STMT_VINFO_NUM_SLP_USES (res) = 0;
8869 GROUP_FIRST_ELEMENT (res) = NULL;
8870 GROUP_NEXT_ELEMENT (res) = NULL;
8871 GROUP_SIZE (res) = 0;
8872 GROUP_STORE_COUNT (res) = 0;
8873 GROUP_GAP (res) = 0;
8874 GROUP_SAME_DR_STMT (res) = NULL;
8876 return res;
8880 /* Create a hash table for stmt_vec_info. */
8882 void
8883 init_stmt_vec_info_vec (void)
8885 gcc_assert (!stmt_vec_info_vec.exists ());
8886 stmt_vec_info_vec.create (50);
8890 /* Free hash table for stmt_vec_info. */
8892 void
8893 free_stmt_vec_info_vec (void)
8895 unsigned int i;
8896 stmt_vec_info info;
8897 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8898 if (info != NULL)
8899 free_stmt_vec_info (STMT_VINFO_STMT (info));
8900 gcc_assert (stmt_vec_info_vec.exists ());
8901 stmt_vec_info_vec.release ();
8905 /* Free stmt vectorization related info. */
8907 void
8908 free_stmt_vec_info (gimple *stmt)
8910 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8912 if (!stmt_info)
8913 return;
8915 /* Check if this statement has a related "pattern stmt"
8916 (introduced by the vectorizer during the pattern recognition
8917 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8918 too. */
8919 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8921 stmt_vec_info patt_info
8922 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8923 if (patt_info)
8925 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8926 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8927 gimple_set_bb (patt_stmt, NULL);
8928 tree lhs = gimple_get_lhs (patt_stmt);
8929 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8930 release_ssa_name (lhs);
8931 if (seq)
8933 gimple_stmt_iterator si;
8934 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8936 gimple *seq_stmt = gsi_stmt (si);
8937 gimple_set_bb (seq_stmt, NULL);
8938 lhs = gimple_get_lhs (seq_stmt);
8939 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8940 release_ssa_name (lhs);
8941 free_stmt_vec_info (seq_stmt);
8944 free_stmt_vec_info (patt_stmt);
8948 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8949 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8950 set_vinfo_for_stmt (stmt, NULL);
8951 free (stmt_info);
8955 /* Function get_vectype_for_scalar_type_and_size.
8957 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8958 by the target. */
8960 static tree
8961 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8963 tree orig_scalar_type = scalar_type;
8964 machine_mode inner_mode = TYPE_MODE (scalar_type);
8965 machine_mode simd_mode;
8966 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8967 int nunits;
8968 tree vectype;
8970 if (nbytes == 0)
8971 return NULL_TREE;
8973 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8974 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8975 return NULL_TREE;
8977 /* For vector types of elements whose mode precision doesn't
8978 match their types precision we use a element type of mode
8979 precision. The vectorization routines will have to make sure
8980 they support the proper result truncation/extension.
8981 We also make sure to build vector types with INTEGER_TYPE
8982 component type only. */
8983 if (INTEGRAL_TYPE_P (scalar_type)
8984 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8985 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8986 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8987 TYPE_UNSIGNED (scalar_type));
8989 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8990 When the component mode passes the above test simply use a type
8991 corresponding to that mode. The theory is that any use that
8992 would cause problems with this will disable vectorization anyway. */
8993 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8994 && !INTEGRAL_TYPE_P (scalar_type))
8995 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8997 /* We can't build a vector type of elements with alignment bigger than
8998 their size. */
8999 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
9000 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
9001 TYPE_UNSIGNED (scalar_type));
9003 /* If we felt back to using the mode fail if there was
9004 no scalar type for it. */
9005 if (scalar_type == NULL_TREE)
9006 return NULL_TREE;
9008 /* If no size was supplied use the mode the target prefers. Otherwise
9009 lookup a vector mode of the specified size. */
9010 if (size == 0)
9011 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
9012 else
9013 simd_mode = mode_for_vector (inner_mode, size / nbytes);
9014 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
9015 if (nunits <= 1)
9016 return NULL_TREE;
9018 vectype = build_vector_type (scalar_type, nunits);
9020 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
9021 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
9022 return NULL_TREE;
9024 /* Re-attach the address-space qualifier if we canonicalized the scalar
9025 type. */
9026 if (TYPE_ADDR_SPACE (orig_scalar_type) != TYPE_ADDR_SPACE (vectype))
9027 return build_qualified_type
9028 (vectype, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type)));
9030 return vectype;
9033 unsigned int current_vector_size;
9035 /* Function get_vectype_for_scalar_type.
9037 Returns the vector type corresponding to SCALAR_TYPE as supported
9038 by the target. */
9040 tree
9041 get_vectype_for_scalar_type (tree scalar_type)
9043 tree vectype;
9044 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
9045 current_vector_size);
9046 if (vectype
9047 && current_vector_size == 0)
9048 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
9049 return vectype;
9052 /* Function get_mask_type_for_scalar_type.
9054 Returns the mask type corresponding to a result of comparison
9055 of vectors of specified SCALAR_TYPE as supported by target. */
9057 tree
9058 get_mask_type_for_scalar_type (tree scalar_type)
9060 tree vectype = get_vectype_for_scalar_type (scalar_type);
9062 if (!vectype)
9063 return NULL;
9065 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
9066 current_vector_size);
9069 /* Function get_same_sized_vectype
9071 Returns a vector type corresponding to SCALAR_TYPE of size
9072 VECTOR_TYPE if supported by the target. */
9074 tree
9075 get_same_sized_vectype (tree scalar_type, tree vector_type)
9077 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
9078 return build_same_sized_truth_vector_type (vector_type);
9080 return get_vectype_for_scalar_type_and_size
9081 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
9084 /* Function vect_is_simple_use.
9086 Input:
9087 VINFO - the vect info of the loop or basic block that is being vectorized.
9088 OPERAND - operand in the loop or bb.
9089 Output:
9090 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
9091 DT - the type of definition
9093 Returns whether a stmt with OPERAND can be vectorized.
9094 For loops, supportable operands are constants, loop invariants, and operands
9095 that are defined by the current iteration of the loop. Unsupportable
9096 operands are those that are defined by a previous iteration of the loop (as
9097 is the case in reduction/induction computations).
9098 For basic blocks, supportable operands are constants and bb invariants.
9099 For now, operands defined outside the basic block are not supported. */
9101 bool
9102 vect_is_simple_use (tree operand, vec_info *vinfo,
9103 gimple **def_stmt, enum vect_def_type *dt)
9105 *def_stmt = NULL;
9106 *dt = vect_unknown_def_type;
9108 if (dump_enabled_p ())
9110 dump_printf_loc (MSG_NOTE, vect_location,
9111 "vect_is_simple_use: operand ");
9112 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
9113 dump_printf (MSG_NOTE, "\n");
9116 if (CONSTANT_CLASS_P (operand))
9118 *dt = vect_constant_def;
9119 return true;
9122 if (is_gimple_min_invariant (operand))
9124 *dt = vect_external_def;
9125 return true;
9128 if (TREE_CODE (operand) != SSA_NAME)
9130 if (dump_enabled_p ())
9131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9132 "not ssa-name.\n");
9133 return false;
9136 if (SSA_NAME_IS_DEFAULT_DEF (operand))
9138 *dt = vect_external_def;
9139 return true;
9142 *def_stmt = SSA_NAME_DEF_STMT (operand);
9143 if (dump_enabled_p ())
9145 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
9146 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
9149 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
9150 *dt = vect_external_def;
9151 else
9153 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
9154 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
9157 if (dump_enabled_p ())
9159 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
9160 switch (*dt)
9162 case vect_uninitialized_def:
9163 dump_printf (MSG_NOTE, "uninitialized\n");
9164 break;
9165 case vect_constant_def:
9166 dump_printf (MSG_NOTE, "constant\n");
9167 break;
9168 case vect_external_def:
9169 dump_printf (MSG_NOTE, "external\n");
9170 break;
9171 case vect_internal_def:
9172 dump_printf (MSG_NOTE, "internal\n");
9173 break;
9174 case vect_induction_def:
9175 dump_printf (MSG_NOTE, "induction\n");
9176 break;
9177 case vect_reduction_def:
9178 dump_printf (MSG_NOTE, "reduction\n");
9179 break;
9180 case vect_double_reduction_def:
9181 dump_printf (MSG_NOTE, "double reduction\n");
9182 break;
9183 case vect_nested_cycle:
9184 dump_printf (MSG_NOTE, "nested cycle\n");
9185 break;
9186 case vect_unknown_def_type:
9187 dump_printf (MSG_NOTE, "unknown\n");
9188 break;
9192 if (*dt == vect_unknown_def_type)
9194 if (dump_enabled_p ())
9195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9196 "Unsupported pattern.\n");
9197 return false;
9200 switch (gimple_code (*def_stmt))
9202 case GIMPLE_PHI:
9203 case GIMPLE_ASSIGN:
9204 case GIMPLE_CALL:
9205 break;
9206 default:
9207 if (dump_enabled_p ())
9208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
9209 "unsupported defining stmt:\n");
9210 return false;
9213 return true;
9216 /* Function vect_is_simple_use.
9218 Same as vect_is_simple_use but also determines the vector operand
9219 type of OPERAND and stores it to *VECTYPE. If the definition of
9220 OPERAND is vect_uninitialized_def, vect_constant_def or
9221 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
9222 is responsible to compute the best suited vector type for the
9223 scalar operand. */
9225 bool
9226 vect_is_simple_use (tree operand, vec_info *vinfo,
9227 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
9229 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
9230 return false;
9232 /* Now get a vector type if the def is internal, otherwise supply
9233 NULL_TREE and leave it up to the caller to figure out a proper
9234 type for the use stmt. */
9235 if (*dt == vect_internal_def
9236 || *dt == vect_induction_def
9237 || *dt == vect_reduction_def
9238 || *dt == vect_double_reduction_def
9239 || *dt == vect_nested_cycle)
9241 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
9243 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
9244 && !STMT_VINFO_RELEVANT (stmt_info)
9245 && !STMT_VINFO_LIVE_P (stmt_info))
9246 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
9248 *vectype = STMT_VINFO_VECTYPE (stmt_info);
9249 gcc_assert (*vectype != NULL_TREE);
9251 else if (*dt == vect_uninitialized_def
9252 || *dt == vect_constant_def
9253 || *dt == vect_external_def)
9254 *vectype = NULL_TREE;
9255 else
9256 gcc_unreachable ();
9258 return true;
9262 /* Function supportable_widening_operation
9264 Check whether an operation represented by the code CODE is a
9265 widening operation that is supported by the target platform in
9266 vector form (i.e., when operating on arguments of type VECTYPE_IN
9267 producing a result of type VECTYPE_OUT).
9269 Widening operations we currently support are NOP (CONVERT), FLOAT
9270 and WIDEN_MULT. This function checks if these operations are supported
9271 by the target platform either directly (via vector tree-codes), or via
9272 target builtins.
9274 Output:
9275 - CODE1 and CODE2 are codes of vector operations to be used when
9276 vectorizing the operation, if available.
9277 - MULTI_STEP_CVT determines the number of required intermediate steps in
9278 case of multi-step conversion (like char->short->int - in that case
9279 MULTI_STEP_CVT will be 1).
9280 - INTERM_TYPES contains the intermediate type required to perform the
9281 widening operation (short in the above example). */
9283 bool
9284 supportable_widening_operation (enum tree_code code, gimple *stmt,
9285 tree vectype_out, tree vectype_in,
9286 enum tree_code *code1, enum tree_code *code2,
9287 int *multi_step_cvt,
9288 vec<tree> *interm_types)
9290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9291 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9292 struct loop *vect_loop = NULL;
9293 machine_mode vec_mode;
9294 enum insn_code icode1, icode2;
9295 optab optab1, optab2;
9296 tree vectype = vectype_in;
9297 tree wide_vectype = vectype_out;
9298 enum tree_code c1, c2;
9299 int i;
9300 tree prev_type, intermediate_type;
9301 machine_mode intermediate_mode, prev_mode;
9302 optab optab3, optab4;
9304 *multi_step_cvt = 0;
9305 if (loop_info)
9306 vect_loop = LOOP_VINFO_LOOP (loop_info);
9308 switch (code)
9310 case WIDEN_MULT_EXPR:
9311 /* The result of a vectorized widening operation usually requires
9312 two vectors (because the widened results do not fit into one vector).
9313 The generated vector results would normally be expected to be
9314 generated in the same order as in the original scalar computation,
9315 i.e. if 8 results are generated in each vector iteration, they are
9316 to be organized as follows:
9317 vect1: [res1,res2,res3,res4],
9318 vect2: [res5,res6,res7,res8].
9320 However, in the special case that the result of the widening
9321 operation is used in a reduction computation only, the order doesn't
9322 matter (because when vectorizing a reduction we change the order of
9323 the computation). Some targets can take advantage of this and
9324 generate more efficient code. For example, targets like Altivec,
9325 that support widen_mult using a sequence of {mult_even,mult_odd}
9326 generate the following vectors:
9327 vect1: [res1,res3,res5,res7],
9328 vect2: [res2,res4,res6,res8].
9330 When vectorizing outer-loops, we execute the inner-loop sequentially
9331 (each vectorized inner-loop iteration contributes to VF outer-loop
9332 iterations in parallel). We therefore don't allow to change the
9333 order of the computation in the inner-loop during outer-loop
9334 vectorization. */
9335 /* TODO: Another case in which order doesn't *really* matter is when we
9336 widen and then contract again, e.g. (short)((int)x * y >> 8).
9337 Normally, pack_trunc performs an even/odd permute, whereas the
9338 repack from an even/odd expansion would be an interleave, which
9339 would be significantly simpler for e.g. AVX2. */
9340 /* In any case, in order to avoid duplicating the code below, recurse
9341 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9342 are properly set up for the caller. If we fail, we'll continue with
9343 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9344 if (vect_loop
9345 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9346 && !nested_in_vect_loop_p (vect_loop, stmt)
9347 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9348 stmt, vectype_out, vectype_in,
9349 code1, code2, multi_step_cvt,
9350 interm_types))
9352 /* Elements in a vector with vect_used_by_reduction property cannot
9353 be reordered if the use chain with this property does not have the
9354 same operation. One such an example is s += a * b, where elements
9355 in a and b cannot be reordered. Here we check if the vector defined
9356 by STMT is only directly used in the reduction statement. */
9357 tree lhs = gimple_assign_lhs (stmt);
9358 use_operand_p dummy;
9359 gimple *use_stmt;
9360 stmt_vec_info use_stmt_info = NULL;
9361 if (single_imm_use (lhs, &dummy, &use_stmt)
9362 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9363 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9364 return true;
9366 c1 = VEC_WIDEN_MULT_LO_EXPR;
9367 c2 = VEC_WIDEN_MULT_HI_EXPR;
9368 break;
9370 case DOT_PROD_EXPR:
9371 c1 = DOT_PROD_EXPR;
9372 c2 = DOT_PROD_EXPR;
9373 break;
9375 case SAD_EXPR:
9376 c1 = SAD_EXPR;
9377 c2 = SAD_EXPR;
9378 break;
9380 case VEC_WIDEN_MULT_EVEN_EXPR:
9381 /* Support the recursion induced just above. */
9382 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9383 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9384 break;
9386 case WIDEN_LSHIFT_EXPR:
9387 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9388 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9389 break;
9391 CASE_CONVERT:
9392 c1 = VEC_UNPACK_LO_EXPR;
9393 c2 = VEC_UNPACK_HI_EXPR;
9394 break;
9396 case FLOAT_EXPR:
9397 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9398 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9399 break;
9401 case FIX_TRUNC_EXPR:
9402 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9403 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9404 computing the operation. */
9405 return false;
9407 default:
9408 gcc_unreachable ();
9411 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9412 std::swap (c1, c2);
9414 if (code == FIX_TRUNC_EXPR)
9416 /* The signedness is determined from output operand. */
9417 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9418 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9420 else
9422 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9423 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9426 if (!optab1 || !optab2)
9427 return false;
9429 vec_mode = TYPE_MODE (vectype);
9430 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9431 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9432 return false;
9434 *code1 = c1;
9435 *code2 = c2;
9437 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9438 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9439 /* For scalar masks we may have different boolean
9440 vector types having the same QImode. Thus we
9441 add additional check for elements number. */
9442 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9443 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9444 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9446 /* Check if it's a multi-step conversion that can be done using intermediate
9447 types. */
9449 prev_type = vectype;
9450 prev_mode = vec_mode;
9452 if (!CONVERT_EXPR_CODE_P (code))
9453 return false;
9455 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9456 intermediate steps in promotion sequence. We try
9457 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9458 not. */
9459 interm_types->create (MAX_INTERM_CVT_STEPS);
9460 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9462 intermediate_mode = insn_data[icode1].operand[0].mode;
9463 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9465 intermediate_type
9466 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9467 current_vector_size);
9468 if (intermediate_mode != TYPE_MODE (intermediate_type))
9469 return false;
9471 else
9472 intermediate_type
9473 = lang_hooks.types.type_for_mode (intermediate_mode,
9474 TYPE_UNSIGNED (prev_type));
9476 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9477 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9479 if (!optab3 || !optab4
9480 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9481 || insn_data[icode1].operand[0].mode != intermediate_mode
9482 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9483 || insn_data[icode2].operand[0].mode != intermediate_mode
9484 || ((icode1 = optab_handler (optab3, intermediate_mode))
9485 == CODE_FOR_nothing)
9486 || ((icode2 = optab_handler (optab4, intermediate_mode))
9487 == CODE_FOR_nothing))
9488 break;
9490 interm_types->quick_push (intermediate_type);
9491 (*multi_step_cvt)++;
9493 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9494 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9495 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9496 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9497 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9499 prev_type = intermediate_type;
9500 prev_mode = intermediate_mode;
9503 interm_types->release ();
9504 return false;
9508 /* Function supportable_narrowing_operation
9510 Check whether an operation represented by the code CODE is a
9511 narrowing operation that is supported by the target platform in
9512 vector form (i.e., when operating on arguments of type VECTYPE_IN
9513 and producing a result of type VECTYPE_OUT).
9515 Narrowing operations we currently support are NOP (CONVERT) and
9516 FIX_TRUNC. This function checks if these operations are supported by
9517 the target platform directly via vector tree-codes.
9519 Output:
9520 - CODE1 is the code of a vector operation to be used when
9521 vectorizing the operation, if available.
9522 - MULTI_STEP_CVT determines the number of required intermediate steps in
9523 case of multi-step conversion (like int->short->char - in that case
9524 MULTI_STEP_CVT will be 1).
9525 - INTERM_TYPES contains the intermediate type required to perform the
9526 narrowing operation (short in the above example). */
9528 bool
9529 supportable_narrowing_operation (enum tree_code code,
9530 tree vectype_out, tree vectype_in,
9531 enum tree_code *code1, int *multi_step_cvt,
9532 vec<tree> *interm_types)
9534 machine_mode vec_mode;
9535 enum insn_code icode1;
9536 optab optab1, interm_optab;
9537 tree vectype = vectype_in;
9538 tree narrow_vectype = vectype_out;
9539 enum tree_code c1;
9540 tree intermediate_type, prev_type;
9541 machine_mode intermediate_mode, prev_mode;
9542 int i;
9543 bool uns;
9545 *multi_step_cvt = 0;
9546 switch (code)
9548 CASE_CONVERT:
9549 c1 = VEC_PACK_TRUNC_EXPR;
9550 break;
9552 case FIX_TRUNC_EXPR:
9553 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9554 break;
9556 case FLOAT_EXPR:
9557 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9558 tree code and optabs used for computing the operation. */
9559 return false;
9561 default:
9562 gcc_unreachable ();
9565 if (code == FIX_TRUNC_EXPR)
9566 /* The signedness is determined from output operand. */
9567 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9568 else
9569 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9571 if (!optab1)
9572 return false;
9574 vec_mode = TYPE_MODE (vectype);
9575 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9576 return false;
9578 *code1 = c1;
9580 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9581 /* For scalar masks we may have different boolean
9582 vector types having the same QImode. Thus we
9583 add additional check for elements number. */
9584 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9585 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9586 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9588 /* Check if it's a multi-step conversion that can be done using intermediate
9589 types. */
9590 prev_mode = vec_mode;
9591 prev_type = vectype;
9592 if (code == FIX_TRUNC_EXPR)
9593 uns = TYPE_UNSIGNED (vectype_out);
9594 else
9595 uns = TYPE_UNSIGNED (vectype);
9597 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9598 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9599 costly than signed. */
9600 if (code == FIX_TRUNC_EXPR && uns)
9602 enum insn_code icode2;
9604 intermediate_type
9605 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9606 interm_optab
9607 = optab_for_tree_code (c1, intermediate_type, optab_default);
9608 if (interm_optab != unknown_optab
9609 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9610 && insn_data[icode1].operand[0].mode
9611 == insn_data[icode2].operand[0].mode)
9613 uns = false;
9614 optab1 = interm_optab;
9615 icode1 = icode2;
9619 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9620 intermediate steps in promotion sequence. We try
9621 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9622 interm_types->create (MAX_INTERM_CVT_STEPS);
9623 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9625 intermediate_mode = insn_data[icode1].operand[0].mode;
9626 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9628 intermediate_type
9629 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9630 current_vector_size);
9631 if (intermediate_mode != TYPE_MODE (intermediate_type))
9632 return false;
9634 else
9635 intermediate_type
9636 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9637 interm_optab
9638 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9639 optab_default);
9640 if (!interm_optab
9641 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9642 || insn_data[icode1].operand[0].mode != intermediate_mode
9643 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9644 == CODE_FOR_nothing))
9645 break;
9647 interm_types->quick_push (intermediate_type);
9648 (*multi_step_cvt)++;
9650 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9651 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9652 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9653 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9655 prev_mode = intermediate_mode;
9656 prev_type = intermediate_type;
9657 optab1 = interm_optab;
9660 interm_types->release ();
9661 return false;