2016-09-25 François Dumont <fdumont@gcc.gnu.org>
[official-gcc.git] / gcc / tree-vect-stmts.c
blobdbbd731a12dd044340468bcd8d909dfe91b298fb
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "ssa.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
34 #include "cgraph.h"
35 #include "dumpfile.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "tree-cfg.h"
44 #include "tree-ssa-loop-manip.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
49 #include "builtins.h"
50 #include "internal-fn.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Says whether a statement is a load, a store of a vectorized statement
56 result, or a store of an invariant value. */
57 enum vec_load_store_type {
58 VLS_LOAD,
59 VLS_STORE,
60 VLS_STORE_INVARIANT
63 /* Return the vectorized type for the given statement. */
65 tree
66 stmt_vectype (struct _stmt_vec_info *stmt_info)
68 return STMT_VINFO_VECTYPE (stmt_info);
71 /* Return TRUE iff the given statement is in an inner loop relative to
72 the loop being vectorized. */
73 bool
74 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
76 gimple *stmt = STMT_VINFO_STMT (stmt_info);
77 basic_block bb = gimple_bb (stmt);
78 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
79 struct loop* loop;
81 if (!loop_vinfo)
82 return false;
84 loop = LOOP_VINFO_LOOP (loop_vinfo);
86 return (bb->loop_father == loop->inner);
89 /* Record the cost of a statement, either by directly informing the
90 target model or by saving it in a vector for later processing.
91 Return a preliminary estimate of the statement's cost. */
93 unsigned
94 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
95 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
96 int misalign, enum vect_cost_model_location where)
98 if (body_cost_vec)
100 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
101 stmt_info_for_cost si = { count, kind,
102 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
103 misalign };
104 body_cost_vec->safe_push (si);
105 return (unsigned)
106 (builtin_vectorization_cost (kind, vectype, misalign) * count);
108 else
109 return add_stmt_cost (stmt_info->vinfo->target_cost_data,
110 count, kind, stmt_info, misalign, where);
113 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 static tree
116 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
118 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
119 "vect_array");
122 /* ARRAY is an array of vectors created by create_vector_array.
123 Return an SSA_NAME for the vector in index N. The reference
124 is part of the vectorization of STMT and the vector is associated
125 with scalar destination SCALAR_DEST. */
127 static tree
128 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
129 tree array, unsigned HOST_WIDE_INT n)
131 tree vect_type, vect, vect_name, array_ref;
132 gimple *new_stmt;
134 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
135 vect_type = TREE_TYPE (TREE_TYPE (array));
136 vect = vect_create_destination_var (scalar_dest, vect_type);
137 array_ref = build4 (ARRAY_REF, vect_type, array,
138 build_int_cst (size_type_node, n),
139 NULL_TREE, NULL_TREE);
141 new_stmt = gimple_build_assign (vect, array_ref);
142 vect_name = make_ssa_name (vect, new_stmt);
143 gimple_assign_set_lhs (new_stmt, vect_name);
144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
146 return vect_name;
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT. */
153 static void
154 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
155 tree array, unsigned HOST_WIDE_INT n)
157 tree array_ref;
158 gimple *new_stmt;
160 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
161 build_int_cst (size_type_node, n),
162 NULL_TREE, NULL_TREE);
164 new_stmt = gimple_build_assign (array_ref, vect);
165 vect_finish_stmt_generation (stmt, new_stmt, gsi);
168 /* PTR is a pointer to an array of type TYPE. Return a representation
169 of *PTR. The memory reference replaces those in FIRST_DR
170 (and its group). */
172 static tree
173 create_array_ref (tree type, tree ptr, tree alias_ptr_type)
175 tree mem_ref;
177 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
178 /* Arrays have the same alignment as their type. */
179 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
180 return mem_ref;
183 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
185 /* Function vect_mark_relevant.
187 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
189 static void
190 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
191 enum vect_relevant relevant, bool live_p)
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
195 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
196 gimple *pattern_stmt;
198 if (dump_enabled_p ())
200 dump_printf_loc (MSG_NOTE, vect_location,
201 "mark relevant %d, live %d: ", relevant, live_p);
202 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
205 /* If this stmt is an original stmt in a pattern, we might need to mark its
206 related pattern stmt instead of the original stmt. However, such stmts
207 may have their own uses that are not in any pattern, in such cases the
208 stmt itself should be marked. */
209 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
211 /* This is the last stmt in a sequence that was detected as a
212 pattern that can potentially be vectorized. Don't mark the stmt
213 as relevant/live because it's not going to be vectorized.
214 Instead mark the pattern-stmt that replaces it. */
216 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE, vect_location,
220 "last stmt in pattern. don't mark"
221 " relevant/live.\n");
222 stmt_info = vinfo_for_stmt (pattern_stmt);
223 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
224 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
225 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
226 stmt = pattern_stmt;
229 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
230 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
231 STMT_VINFO_RELEVANT (stmt_info) = relevant;
233 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
234 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
236 if (dump_enabled_p ())
237 dump_printf_loc (MSG_NOTE, vect_location,
238 "already marked relevant/live.\n");
239 return;
242 worklist->safe_push (stmt);
246 /* Function is_simple_and_all_uses_invariant
248 Return true if STMT is simple and all uses of it are invariant. */
250 bool
251 is_simple_and_all_uses_invariant (gimple *stmt, loop_vec_info loop_vinfo)
253 tree op;
254 gimple *def_stmt;
255 ssa_op_iter iter;
257 if (!is_gimple_assign (stmt))
258 return false;
260 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
262 enum vect_def_type dt = vect_uninitialized_def;
264 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt))
266 if (dump_enabled_p ())
267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268 "use not simple.\n");
269 return false;
272 if (dt != vect_external_def && dt != vect_constant_def)
273 return false;
275 return true;
278 /* Function vect_stmt_relevant_p.
280 Return true if STMT in loop that is represented by LOOP_VINFO is
281 "relevant for vectorization".
283 A stmt is considered "relevant for vectorization" if:
284 - it has uses outside the loop.
285 - it has vdefs (it alters memory).
286 - control stmts in the loop (except for the exit condition).
288 CHECKME: what other side effects would the vectorizer allow? */
290 static bool
291 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
292 enum vect_relevant *relevant, bool *live_p)
294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
295 ssa_op_iter op_iter;
296 imm_use_iterator imm_iter;
297 use_operand_p use_p;
298 def_operand_p def_p;
300 *relevant = vect_unused_in_scope;
301 *live_p = false;
303 /* cond stmt other than loop exit cond. */
304 if (is_ctrl_stmt (stmt)
305 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
306 != loop_exit_ctrl_vec_info_type)
307 *relevant = vect_used_in_scope;
309 /* changing memory. */
310 if (gimple_code (stmt) != GIMPLE_PHI)
311 if (gimple_vdef (stmt)
312 && !gimple_clobber_p (stmt))
314 if (dump_enabled_p ())
315 dump_printf_loc (MSG_NOTE, vect_location,
316 "vec_stmt_relevant_p: stmt has vdefs.\n");
317 *relevant = vect_used_in_scope;
320 /* uses outside the loop. */
321 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
323 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
325 basic_block bb = gimple_bb (USE_STMT (use_p));
326 if (!flow_bb_inside_loop_p (loop, bb))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE, vect_location,
330 "vec_stmt_relevant_p: used out of loop.\n");
332 if (is_gimple_debug (USE_STMT (use_p)))
333 continue;
335 /* We expect all such uses to be in the loop exit phis
336 (because of loop closed form) */
337 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
338 gcc_assert (bb == single_exit (loop)->dest);
340 *live_p = true;
345 if (*live_p && *relevant == vect_unused_in_scope
346 && !is_simple_and_all_uses_invariant (stmt, loop_vinfo))
348 if (dump_enabled_p ())
349 dump_printf_loc (MSG_NOTE, vect_location,
350 "vec_stmt_relevant_p: stmt live but not relevant.\n");
351 *relevant = vect_used_only_live;
354 return (*live_p || *relevant);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
363 static bool
364 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
366 tree operand;
367 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info))
373 return true;
375 /* STMT has a data_ref. FORNOW this means that its of one of
376 the following forms:
377 -1- ARRAY_REF = var
378 -2- var = ARRAY_REF
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
383 for array indexing.
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt))
390 if (is_gimple_call (stmt)
391 && gimple_call_internal_p (stmt))
392 switch (gimple_call_internal_fn (stmt))
394 case IFN_MASK_STORE:
395 operand = gimple_call_arg (stmt, 3);
396 if (operand == use)
397 return true;
398 /* FALLTHRU */
399 case IFN_MASK_LOAD:
400 operand = gimple_call_arg (stmt, 2);
401 if (operand == use)
402 return true;
403 break;
404 default:
405 break;
407 return false;
410 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
411 return false;
412 operand = gimple_assign_rhs1 (stmt);
413 if (TREE_CODE (operand) != SSA_NAME)
414 return false;
416 if (operand == use)
417 return true;
419 return false;
424 Function process_use.
426 Inputs:
427 - a USE in STMT in a loop represented by LOOP_VINFO
428 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
429 that defined USE. This is done by calling mark_relevant and passing it
430 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
431 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
432 be performed.
434 Outputs:
435 Generally, LIVE_P and RELEVANT are used to define the liveness and
436 relevance info of the DEF_STMT of this USE:
437 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
438 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
439 Exceptions:
440 - case 1: If USE is used only for address computations (e.g. array indexing),
441 which does not need to be directly vectorized, then the liveness/relevance
442 of the respective DEF_STMT is left unchanged.
443 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
444 skip DEF_STMT cause it had already been processed.
445 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
446 be modified accordingly.
448 Return true if everything is as expected. Return false otherwise. */
450 static bool
451 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo,
452 enum vect_relevant relevant, vec<gimple *> *worklist,
453 bool force)
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
456 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
457 stmt_vec_info dstmt_vinfo;
458 basic_block bb, def_bb;
459 gimple *def_stmt;
460 enum vect_def_type dt;
462 /* case 1: we are only interested in uses that need to be vectorized. Uses
463 that are used for address computation are not considered relevant. */
464 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
465 return true;
467 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &dt))
469 if (dump_enabled_p ())
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
471 "not vectorized: unsupported use in stmt.\n");
472 return false;
475 if (!def_stmt || gimple_nop_p (def_stmt))
476 return true;
478 def_bb = gimple_bb (def_stmt);
479 if (!flow_bb_inside_loop_p (loop, def_bb))
481 if (dump_enabled_p ())
482 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
483 return true;
486 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
487 DEF_STMT must have already been processed, because this should be the
488 only way that STMT, which is a reduction-phi, was put in the worklist,
489 as there should be no other uses for DEF_STMT in the loop. So we just
490 check that everything is as expected, and we are done. */
491 dstmt_vinfo = vinfo_for_stmt (def_stmt);
492 bb = gimple_bb (stmt);
493 if (gimple_code (stmt) == GIMPLE_PHI
494 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
495 && gimple_code (def_stmt) != GIMPLE_PHI
496 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
497 && bb->loop_father == def_bb->loop_father)
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_NOTE, vect_location,
501 "reduc-stmt defining reduc-phi in the same nest.\n");
502 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
503 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
504 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
505 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
506 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
507 return true;
510 /* case 3a: outer-loop stmt defining an inner-loop stmt:
511 outer-loop-header-bb:
512 d = def_stmt
513 inner-loop:
514 stmt # use (d)
515 outer-loop-tail-bb:
516 ... */
517 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_NOTE, vect_location,
521 "outer-loop def-stmt defining inner-loop stmt.\n");
523 switch (relevant)
525 case vect_unused_in_scope:
526 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
527 vect_used_in_scope : vect_unused_in_scope;
528 break;
530 case vect_used_in_outer_by_reduction:
531 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
532 relevant = vect_used_by_reduction;
533 break;
535 case vect_used_in_outer:
536 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
537 relevant = vect_used_in_scope;
538 break;
540 case vect_used_in_scope:
541 break;
543 default:
544 gcc_unreachable ();
548 /* case 3b: inner-loop stmt defining an outer-loop stmt:
549 outer-loop-header-bb:
551 inner-loop:
552 d = def_stmt
553 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
554 stmt # use (d) */
555 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
557 if (dump_enabled_p ())
558 dump_printf_loc (MSG_NOTE, vect_location,
559 "inner-loop def-stmt defining outer-loop stmt.\n");
561 switch (relevant)
563 case vect_unused_in_scope:
564 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
565 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
566 vect_used_in_outer_by_reduction : vect_unused_in_scope;
567 break;
569 case vect_used_by_reduction:
570 case vect_used_only_live:
571 relevant = vect_used_in_outer_by_reduction;
572 break;
574 case vect_used_in_scope:
575 relevant = vect_used_in_outer;
576 break;
578 default:
579 gcc_unreachable ();
583 vect_mark_relevant (worklist, def_stmt, relevant, false);
584 return true;
588 /* Function vect_mark_stmts_to_be_vectorized.
590 Not all stmts in the loop need to be vectorized. For example:
592 for i...
593 for j...
594 1. T0 = i + j
595 2. T1 = a[T0]
597 3. j = j + 1
599 Stmt 1 and 3 do not need to be vectorized, because loop control and
600 addressing of vectorized data-refs are handled differently.
602 This pass detects such stmts. */
604 bool
605 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
607 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
608 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
609 unsigned int nbbs = loop->num_nodes;
610 gimple_stmt_iterator si;
611 gimple *stmt;
612 unsigned int i;
613 stmt_vec_info stmt_vinfo;
614 basic_block bb;
615 gimple *phi;
616 bool live_p;
617 enum vect_relevant relevant;
619 if (dump_enabled_p ())
620 dump_printf_loc (MSG_NOTE, vect_location,
621 "=== vect_mark_stmts_to_be_vectorized ===\n");
623 auto_vec<gimple *, 64> worklist;
625 /* 1. Init worklist. */
626 for (i = 0; i < nbbs; i++)
628 bb = bbs[i];
629 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
631 phi = gsi_stmt (si);
632 if (dump_enabled_p ())
634 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
635 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
638 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
639 vect_mark_relevant (&worklist, phi, relevant, live_p);
641 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
643 stmt = gsi_stmt (si);
644 if (dump_enabled_p ())
646 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
647 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
650 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
651 vect_mark_relevant (&worklist, stmt, relevant, live_p);
655 /* 2. Process_worklist */
656 while (worklist.length () > 0)
658 use_operand_p use_p;
659 ssa_op_iter iter;
661 stmt = worklist.pop ();
662 if (dump_enabled_p ())
664 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
665 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
668 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
669 (DEF_STMT) as relevant/irrelevant according to the relevance property
670 of STMT. */
671 stmt_vinfo = vinfo_for_stmt (stmt);
672 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
674 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
675 propagated as is to the DEF_STMTs of its USEs.
677 One exception is when STMT has been identified as defining a reduction
678 variable; in this case we set the relevance to vect_used_by_reduction.
679 This is because we distinguish between two kinds of relevant stmts -
680 those that are used by a reduction computation, and those that are
681 (also) used by a regular computation. This allows us later on to
682 identify stmts that are used solely by a reduction, and therefore the
683 order of the results that they produce does not have to be kept. */
685 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo))
687 case vect_reduction_def:
688 gcc_assert (relevant != vect_unused_in_scope);
689 if (relevant != vect_unused_in_scope
690 && relevant != vect_used_in_scope
691 && relevant != vect_used_by_reduction
692 && relevant != vect_used_only_live)
694 if (dump_enabled_p ())
695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
696 "unsupported use of reduction.\n");
697 return false;
699 break;
701 case vect_nested_cycle:
702 if (relevant != vect_unused_in_scope
703 && relevant != vect_used_in_outer_by_reduction
704 && relevant != vect_used_in_outer)
706 if (dump_enabled_p ())
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
708 "unsupported use of nested cycle.\n");
710 return false;
712 break;
714 case vect_double_reduction_def:
715 if (relevant != vect_unused_in_scope
716 && relevant != vect_used_by_reduction
717 && relevant != vect_used_only_live)
719 if (dump_enabled_p ())
720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
721 "unsupported use of double reduction.\n");
723 return false;
725 break;
727 default:
728 break;
731 if (is_pattern_stmt_p (stmt_vinfo))
733 /* Pattern statements are not inserted into the code, so
734 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
735 have to scan the RHS or function arguments instead. */
736 if (is_gimple_assign (stmt))
738 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
739 tree op = gimple_assign_rhs1 (stmt);
741 i = 1;
742 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
744 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
745 relevant, &worklist, false)
746 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
747 relevant, &worklist, false))
748 return false;
749 i = 2;
751 for (; i < gimple_num_ops (stmt); i++)
753 op = gimple_op (stmt, i);
754 if (TREE_CODE (op) == SSA_NAME
755 && !process_use (stmt, op, loop_vinfo, relevant,
756 &worklist, false))
757 return false;
760 else if (is_gimple_call (stmt))
762 for (i = 0; i < gimple_call_num_args (stmt); i++)
764 tree arg = gimple_call_arg (stmt, i);
765 if (!process_use (stmt, arg, loop_vinfo, relevant,
766 &worklist, false))
767 return false;
771 else
772 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
774 tree op = USE_FROM_PTR (use_p);
775 if (!process_use (stmt, op, loop_vinfo, relevant,
776 &worklist, false))
777 return false;
780 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
782 gather_scatter_info gs_info;
783 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info))
784 gcc_unreachable ();
785 if (!process_use (stmt, gs_info.offset, loop_vinfo, relevant,
786 &worklist, true))
787 return false;
789 } /* while worklist */
791 return true;
795 /* Function vect_model_simple_cost.
797 Models cost for simple operations, i.e. those that only emit ncopies of a
798 single op. Right now, this does not account for multiple insns that could
799 be generated for the single vector op. We will handle that shortly. */
801 void
802 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
803 enum vect_def_type *dt,
804 stmt_vector_for_cost *prologue_cost_vec,
805 stmt_vector_for_cost *body_cost_vec)
807 int i;
808 int inside_cost = 0, prologue_cost = 0;
810 /* The SLP costs were already calculated during SLP tree build. */
811 if (PURE_SLP_STMT (stmt_info))
812 return;
814 /* FORNOW: Assuming maximum 2 args per stmts. */
815 for (i = 0; i < 2; i++)
816 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
817 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
818 stmt_info, 0, vect_prologue);
820 /* Pass the inside-of-loop statements to the target-specific cost model. */
821 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
822 stmt_info, 0, vect_body);
824 if (dump_enabled_p ())
825 dump_printf_loc (MSG_NOTE, vect_location,
826 "vect_model_simple_cost: inside_cost = %d, "
827 "prologue_cost = %d .\n", inside_cost, prologue_cost);
831 /* Model cost for type demotion and promotion operations. PWR is normally
832 zero for single-step promotions and demotions. It will be one if
833 two-step promotion/demotion is required, and so on. Each additional
834 step doubles the number of instructions required. */
836 static void
837 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
838 enum vect_def_type *dt, int pwr)
840 int i, tmp;
841 int inside_cost = 0, prologue_cost = 0;
842 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
843 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
844 void *target_cost_data;
846 /* The SLP costs were already calculated during SLP tree build. */
847 if (PURE_SLP_STMT (stmt_info))
848 return;
850 if (loop_vinfo)
851 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
852 else
853 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
855 for (i = 0; i < pwr + 1; i++)
857 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
858 (i + 1) : i;
859 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
860 vec_promote_demote, stmt_info, 0,
861 vect_body);
864 /* FORNOW: Assuming maximum 2 args per stmts. */
865 for (i = 0; i < 2; i++)
866 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
867 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
868 stmt_info, 0, vect_prologue);
870 if (dump_enabled_p ())
871 dump_printf_loc (MSG_NOTE, vect_location,
872 "vect_model_promotion_demotion_cost: inside_cost = %d, "
873 "prologue_cost = %d .\n", inside_cost, prologue_cost);
876 /* Function vect_model_store_cost
878 Models cost for stores. In the case of grouped accesses, one access
879 has the overhead of the grouped access attributed to it. */
881 void
882 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
883 vect_memory_access_type memory_access_type,
884 enum vect_def_type dt, slp_tree slp_node,
885 stmt_vector_for_cost *prologue_cost_vec,
886 stmt_vector_for_cost *body_cost_vec)
888 unsigned int inside_cost = 0, prologue_cost = 0;
889 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
890 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
891 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
893 if (dt == vect_constant_def || dt == vect_external_def)
894 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
895 stmt_info, 0, vect_prologue);
897 /* Grouped stores update all elements in the group at once,
898 so we want the DR for the first statement. */
899 if (!slp_node && grouped_access_p)
901 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
902 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
905 /* True if we should include any once-per-group costs as well as
906 the cost of the statement itself. For SLP we only get called
907 once per group anyhow. */
908 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
910 /* We assume that the cost of a single store-lanes instruction is
911 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
912 access is instead being provided by a permute-and-store operation,
913 include the cost of the permutes. */
914 if (first_stmt_p
915 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
917 /* Uses a high and low interleave or shuffle operations for each
918 needed permute. */
919 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
920 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
921 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
922 stmt_info, 0, vect_body);
924 if (dump_enabled_p ())
925 dump_printf_loc (MSG_NOTE, vect_location,
926 "vect_model_store_cost: strided group_size = %d .\n",
927 group_size);
930 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
931 /* Costs of the stores. */
932 if (memory_access_type == VMAT_ELEMENTWISE)
933 /* N scalar stores plus extracting the elements. */
934 inside_cost += record_stmt_cost (body_cost_vec,
935 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
936 scalar_store, stmt_info, 0, vect_body);
937 else
938 vect_get_store_cost (dr, ncopies, &inside_cost, body_cost_vec);
940 if (memory_access_type == VMAT_ELEMENTWISE
941 || memory_access_type == VMAT_STRIDED_SLP)
942 inside_cost += record_stmt_cost (body_cost_vec,
943 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
944 vec_to_scalar, stmt_info, 0, vect_body);
946 if (dump_enabled_p ())
947 dump_printf_loc (MSG_NOTE, vect_location,
948 "vect_model_store_cost: inside_cost = %d, "
949 "prologue_cost = %d .\n", inside_cost, prologue_cost);
953 /* Calculate cost of DR's memory access. */
954 void
955 vect_get_store_cost (struct data_reference *dr, int ncopies,
956 unsigned int *inside_cost,
957 stmt_vector_for_cost *body_cost_vec)
959 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
960 gimple *stmt = DR_STMT (dr);
961 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
963 switch (alignment_support_scheme)
965 case dr_aligned:
967 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
968 vector_store, stmt_info, 0,
969 vect_body);
971 if (dump_enabled_p ())
972 dump_printf_loc (MSG_NOTE, vect_location,
973 "vect_model_store_cost: aligned.\n");
974 break;
977 case dr_unaligned_supported:
979 /* Here, we assign an additional cost for the unaligned store. */
980 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
981 unaligned_store, stmt_info,
982 DR_MISALIGNMENT (dr), vect_body);
983 if (dump_enabled_p ())
984 dump_printf_loc (MSG_NOTE, vect_location,
985 "vect_model_store_cost: unaligned supported by "
986 "hardware.\n");
987 break;
990 case dr_unaligned_unsupported:
992 *inside_cost = VECT_MAX_COST;
994 if (dump_enabled_p ())
995 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
996 "vect_model_store_cost: unsupported access.\n");
997 break;
1000 default:
1001 gcc_unreachable ();
1006 /* Function vect_model_load_cost
1008 Models cost for loads. In the case of grouped accesses, one access has
1009 the overhead of the grouped access attributed to it. Since unaligned
1010 accesses are supported for loads, we also account for the costs of the
1011 access scheme chosen. */
1013 void
1014 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1015 vect_memory_access_type memory_access_type,
1016 slp_tree slp_node,
1017 stmt_vector_for_cost *prologue_cost_vec,
1018 stmt_vector_for_cost *body_cost_vec)
1020 gimple *first_stmt = STMT_VINFO_STMT (stmt_info);
1021 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1022 unsigned int inside_cost = 0, prologue_cost = 0;
1023 bool grouped_access_p = STMT_VINFO_GROUPED_ACCESS (stmt_info);
1025 /* Grouped loads read all elements in the group at once,
1026 so we want the DR for the first statement. */
1027 if (!slp_node && grouped_access_p)
1029 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1030 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1033 /* True if we should include any once-per-group costs as well as
1034 the cost of the statement itself. For SLP we only get called
1035 once per group anyhow. */
1036 bool first_stmt_p = (first_stmt == STMT_VINFO_STMT (stmt_info));
1038 /* We assume that the cost of a single load-lanes instruction is
1039 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1040 access is instead being provided by a load-and-permute operation,
1041 include the cost of the permutes. */
1042 if (first_stmt_p
1043 && memory_access_type == VMAT_CONTIGUOUS_PERMUTE)
1045 /* Uses an even and odd extract operations or shuffle operations
1046 for each needed permute. */
1047 int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1048 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1049 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1050 stmt_info, 0, vect_body);
1052 if (dump_enabled_p ())
1053 dump_printf_loc (MSG_NOTE, vect_location,
1054 "vect_model_load_cost: strided group_size = %d .\n",
1055 group_size);
1058 /* The loads themselves. */
1059 if (memory_access_type == VMAT_ELEMENTWISE)
1061 /* N scalar loads plus gathering them into a vector. */
1062 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1063 inside_cost += record_stmt_cost (body_cost_vec,
1064 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1065 scalar_load, stmt_info, 0, vect_body);
1067 else
1068 vect_get_load_cost (dr, ncopies, first_stmt_p,
1069 &inside_cost, &prologue_cost,
1070 prologue_cost_vec, body_cost_vec, true);
1071 if (memory_access_type == VMAT_ELEMENTWISE
1072 || memory_access_type == VMAT_STRIDED_SLP)
1073 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1074 stmt_info, 0, vect_body);
1076 if (dump_enabled_p ())
1077 dump_printf_loc (MSG_NOTE, vect_location,
1078 "vect_model_load_cost: inside_cost = %d, "
1079 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1083 /* Calculate cost of DR's memory access. */
1084 void
1085 vect_get_load_cost (struct data_reference *dr, int ncopies,
1086 bool add_realign_cost, unsigned int *inside_cost,
1087 unsigned int *prologue_cost,
1088 stmt_vector_for_cost *prologue_cost_vec,
1089 stmt_vector_for_cost *body_cost_vec,
1090 bool record_prologue_costs)
1092 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1093 gimple *stmt = DR_STMT (dr);
1094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1096 switch (alignment_support_scheme)
1098 case dr_aligned:
1100 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1101 stmt_info, 0, vect_body);
1103 if (dump_enabled_p ())
1104 dump_printf_loc (MSG_NOTE, vect_location,
1105 "vect_model_load_cost: aligned.\n");
1107 break;
1109 case dr_unaligned_supported:
1111 /* Here, we assign an additional cost for the unaligned load. */
1112 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1113 unaligned_load, stmt_info,
1114 DR_MISALIGNMENT (dr), vect_body);
1116 if (dump_enabled_p ())
1117 dump_printf_loc (MSG_NOTE, vect_location,
1118 "vect_model_load_cost: unaligned supported by "
1119 "hardware.\n");
1121 break;
1123 case dr_explicit_realign:
1125 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1126 vector_load, stmt_info, 0, vect_body);
1127 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1128 vec_perm, stmt_info, 0, vect_body);
1130 /* FIXME: If the misalignment remains fixed across the iterations of
1131 the containing loop, the following cost should be added to the
1132 prologue costs. */
1133 if (targetm.vectorize.builtin_mask_for_load)
1134 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1135 stmt_info, 0, vect_body);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE, vect_location,
1139 "vect_model_load_cost: explicit realign\n");
1141 break;
1143 case dr_explicit_realign_optimized:
1145 if (dump_enabled_p ())
1146 dump_printf_loc (MSG_NOTE, vect_location,
1147 "vect_model_load_cost: unaligned software "
1148 "pipelined.\n");
1150 /* Unaligned software pipeline has a load of an address, an initial
1151 load, and possibly a mask operation to "prime" the loop. However,
1152 if this is an access in a group of loads, which provide grouped
1153 access, then the above cost should only be considered for one
1154 access in the group. Inside the loop, there is a load op
1155 and a realignment op. */
1157 if (add_realign_cost && record_prologue_costs)
1159 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1160 vector_stmt, stmt_info,
1161 0, vect_prologue);
1162 if (targetm.vectorize.builtin_mask_for_load)
1163 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1164 vector_stmt, stmt_info,
1165 0, vect_prologue);
1168 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1169 stmt_info, 0, vect_body);
1170 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1171 stmt_info, 0, vect_body);
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE, vect_location,
1175 "vect_model_load_cost: explicit realign optimized"
1176 "\n");
1178 break;
1181 case dr_unaligned_unsupported:
1183 *inside_cost = VECT_MAX_COST;
1185 if (dump_enabled_p ())
1186 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1187 "vect_model_load_cost: unsupported access.\n");
1188 break;
1191 default:
1192 gcc_unreachable ();
1196 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1197 the loop preheader for the vectorized stmt STMT. */
1199 static void
1200 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1202 if (gsi)
1203 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1204 else
1206 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1207 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1209 if (loop_vinfo)
1211 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1212 basic_block new_bb;
1213 edge pe;
1215 if (nested_in_vect_loop_p (loop, stmt))
1216 loop = loop->inner;
1218 pe = loop_preheader_edge (loop);
1219 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1220 gcc_assert (!new_bb);
1222 else
1224 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1225 basic_block bb;
1226 gimple_stmt_iterator gsi_bb_start;
1228 gcc_assert (bb_vinfo);
1229 bb = BB_VINFO_BB (bb_vinfo);
1230 gsi_bb_start = gsi_after_labels (bb);
1231 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1235 if (dump_enabled_p ())
1237 dump_printf_loc (MSG_NOTE, vect_location,
1238 "created new init_stmt: ");
1239 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1243 /* Function vect_init_vector.
1245 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1246 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1247 vector type a vector with all elements equal to VAL is created first.
1248 Place the initialization at BSI if it is not NULL. Otherwise, place the
1249 initialization at the loop preheader.
1250 Return the DEF of INIT_STMT.
1251 It will be used in the vectorization of STMT. */
1253 tree
1254 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1256 gimple *init_stmt;
1257 tree new_temp;
1259 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1260 if (! useless_type_conversion_p (type, TREE_TYPE (val)))
1262 gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
1263 if (! types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1265 /* Scalar boolean value should be transformed into
1266 all zeros or all ones value before building a vector. */
1267 if (VECTOR_BOOLEAN_TYPE_P (type))
1269 tree true_val = build_all_ones_cst (TREE_TYPE (type));
1270 tree false_val = build_zero_cst (TREE_TYPE (type));
1272 if (CONSTANT_CLASS_P (val))
1273 val = integer_zerop (val) ? false_val : true_val;
1274 else
1276 new_temp = make_ssa_name (TREE_TYPE (type));
1277 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
1278 val, true_val, false_val);
1279 vect_init_vector_1 (stmt, init_stmt, gsi);
1280 val = new_temp;
1283 else if (CONSTANT_CLASS_P (val))
1284 val = fold_convert (TREE_TYPE (type), val);
1285 else
1287 new_temp = make_ssa_name (TREE_TYPE (type));
1288 if (! INTEGRAL_TYPE_P (TREE_TYPE (val)))
1289 init_stmt = gimple_build_assign (new_temp,
1290 fold_build1 (VIEW_CONVERT_EXPR,
1291 TREE_TYPE (type),
1292 val));
1293 else
1294 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1295 vect_init_vector_1 (stmt, init_stmt, gsi);
1296 val = new_temp;
1299 val = build_vector_from_val (type, val);
1302 new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
1303 init_stmt = gimple_build_assign (new_temp, val);
1304 vect_init_vector_1 (stmt, init_stmt, gsi);
1305 return new_temp;
1308 /* Function vect_get_vec_def_for_operand_1.
1310 For a defining stmt DEF_STMT of a scalar stmt, return a vector def with type
1311 DT that will be used in the vectorized stmt. */
1313 tree
1314 vect_get_vec_def_for_operand_1 (gimple *def_stmt, enum vect_def_type dt)
1316 tree vec_oprnd;
1317 gimple *vec_stmt;
1318 stmt_vec_info def_stmt_info = NULL;
1320 switch (dt)
1322 /* operand is a constant or a loop invariant. */
1323 case vect_constant_def:
1324 case vect_external_def:
1325 /* Code should use vect_get_vec_def_for_operand. */
1326 gcc_unreachable ();
1328 /* operand is defined inside the loop. */
1329 case vect_internal_def:
1331 /* Get the def from the vectorized stmt. */
1332 def_stmt_info = vinfo_for_stmt (def_stmt);
1334 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1335 /* Get vectorized pattern statement. */
1336 if (!vec_stmt
1337 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1338 && !STMT_VINFO_RELEVANT (def_stmt_info))
1339 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1340 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1341 gcc_assert (vec_stmt);
1342 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1343 vec_oprnd = PHI_RESULT (vec_stmt);
1344 else if (is_gimple_call (vec_stmt))
1345 vec_oprnd = gimple_call_lhs (vec_stmt);
1346 else
1347 vec_oprnd = gimple_assign_lhs (vec_stmt);
1348 return vec_oprnd;
1351 /* operand is defined by a loop header phi - reduction */
1352 case vect_reduction_def:
1353 case vect_double_reduction_def:
1354 case vect_nested_cycle:
1355 /* Code should use get_initial_def_for_reduction. */
1356 gcc_unreachable ();
1358 /* operand is defined by loop-header phi - induction. */
1359 case vect_induction_def:
1361 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1363 /* Get the def from the vectorized stmt. */
1364 def_stmt_info = vinfo_for_stmt (def_stmt);
1365 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1366 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1367 vec_oprnd = PHI_RESULT (vec_stmt);
1368 else
1369 vec_oprnd = gimple_get_lhs (vec_stmt);
1370 return vec_oprnd;
1373 default:
1374 gcc_unreachable ();
1379 /* Function vect_get_vec_def_for_operand.
1381 OP is an operand in STMT. This function returns a (vector) def that will be
1382 used in the vectorized stmt for STMT.
1384 In the case that OP is an SSA_NAME which is defined in the loop, then
1385 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1387 In case OP is an invariant or constant, a new stmt that creates a vector def
1388 needs to be introduced. VECTYPE may be used to specify a required type for
1389 vector invariant. */
1391 tree
1392 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree vectype)
1394 gimple *def_stmt;
1395 enum vect_def_type dt;
1396 bool is_simple_use;
1397 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1398 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1400 if (dump_enabled_p ())
1402 dump_printf_loc (MSG_NOTE, vect_location,
1403 "vect_get_vec_def_for_operand: ");
1404 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1405 dump_printf (MSG_NOTE, "\n");
1408 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &dt);
1409 gcc_assert (is_simple_use);
1410 if (def_stmt && dump_enabled_p ())
1412 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1413 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1416 if (dt == vect_constant_def || dt == vect_external_def)
1418 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1419 tree vector_type;
1421 if (vectype)
1422 vector_type = vectype;
1423 else if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
1424 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
1425 vector_type = build_same_sized_truth_vector_type (stmt_vectype);
1426 else
1427 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1429 gcc_assert (vector_type);
1430 return vect_init_vector (stmt, op, vector_type, NULL);
1432 else
1433 return vect_get_vec_def_for_operand_1 (def_stmt, dt);
1437 /* Function vect_get_vec_def_for_stmt_copy
1439 Return a vector-def for an operand. This function is used when the
1440 vectorized stmt to be created (by the caller to this function) is a "copy"
1441 created in case the vectorized result cannot fit in one vector, and several
1442 copies of the vector-stmt are required. In this case the vector-def is
1443 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1444 of the stmt that defines VEC_OPRND.
1445 DT is the type of the vector def VEC_OPRND.
1447 Context:
1448 In case the vectorization factor (VF) is bigger than the number
1449 of elements that can fit in a vectype (nunits), we have to generate
1450 more than one vector stmt to vectorize the scalar stmt. This situation
1451 arises when there are multiple data-types operated upon in the loop; the
1452 smallest data-type determines the VF, and as a result, when vectorizing
1453 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1454 vector stmt (each computing a vector of 'nunits' results, and together
1455 computing 'VF' results in each iteration). This function is called when
1456 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1457 which VF=16 and nunits=4, so the number of copies required is 4):
1459 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1461 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1462 VS1.1: vx.1 = memref1 VS1.2
1463 VS1.2: vx.2 = memref2 VS1.3
1464 VS1.3: vx.3 = memref3
1466 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1467 VSnew.1: vz1 = vx.1 + ... VSnew.2
1468 VSnew.2: vz2 = vx.2 + ... VSnew.3
1469 VSnew.3: vz3 = vx.3 + ...
1471 The vectorization of S1 is explained in vectorizable_load.
1472 The vectorization of S2:
1473 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1474 the function 'vect_get_vec_def_for_operand' is called to
1475 get the relevant vector-def for each operand of S2. For operand x it
1476 returns the vector-def 'vx.0'.
1478 To create the remaining copies of the vector-stmt (VSnew.j), this
1479 function is called to get the relevant vector-def for each operand. It is
1480 obtained from the respective VS1.j stmt, which is recorded in the
1481 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1483 For example, to obtain the vector-def 'vx.1' in order to create the
1484 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1485 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1486 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1487 and return its def ('vx.1').
1488 Overall, to create the above sequence this function will be called 3 times:
1489 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1490 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1491 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1493 tree
1494 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1496 gimple *vec_stmt_for_operand;
1497 stmt_vec_info def_stmt_info;
1499 /* Do nothing; can reuse same def. */
1500 if (dt == vect_external_def || dt == vect_constant_def )
1501 return vec_oprnd;
1503 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1504 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1505 gcc_assert (def_stmt_info);
1506 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1507 gcc_assert (vec_stmt_for_operand);
1508 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1509 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1510 else
1511 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1512 return vec_oprnd;
1516 /* Get vectorized definitions for the operands to create a copy of an original
1517 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1519 static void
1520 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1521 vec<tree> *vec_oprnds0,
1522 vec<tree> *vec_oprnds1)
1524 tree vec_oprnd = vec_oprnds0->pop ();
1526 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1527 vec_oprnds0->quick_push (vec_oprnd);
1529 if (vec_oprnds1 && vec_oprnds1->length ())
1531 vec_oprnd = vec_oprnds1->pop ();
1532 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1533 vec_oprnds1->quick_push (vec_oprnd);
1538 /* Get vectorized definitions for OP0 and OP1.
1539 REDUC_INDEX is the index of reduction operand in case of reduction,
1540 and -1 otherwise. */
1542 void
1543 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1544 vec<tree> *vec_oprnds0,
1545 vec<tree> *vec_oprnds1,
1546 slp_tree slp_node, int reduc_index)
1548 if (slp_node)
1550 int nops = (op1 == NULL_TREE) ? 1 : 2;
1551 auto_vec<tree> ops (nops);
1552 auto_vec<vec<tree> > vec_defs (nops);
1554 ops.quick_push (op0);
1555 if (op1)
1556 ops.quick_push (op1);
1558 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1560 *vec_oprnds0 = vec_defs[0];
1561 if (op1)
1562 *vec_oprnds1 = vec_defs[1];
1564 else
1566 tree vec_oprnd;
1568 vec_oprnds0->create (1);
1569 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt);
1570 vec_oprnds0->quick_push (vec_oprnd);
1572 if (op1)
1574 vec_oprnds1->create (1);
1575 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt);
1576 vec_oprnds1->quick_push (vec_oprnd);
1582 /* Function vect_finish_stmt_generation.
1584 Insert a new stmt. */
1586 void
1587 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1588 gimple_stmt_iterator *gsi)
1590 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1591 vec_info *vinfo = stmt_info->vinfo;
1593 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1595 if (!gsi_end_p (*gsi)
1596 && gimple_has_mem_ops (vec_stmt))
1598 gimple *at_stmt = gsi_stmt (*gsi);
1599 tree vuse = gimple_vuse (at_stmt);
1600 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1602 tree vdef = gimple_vdef (at_stmt);
1603 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1604 /* If we have an SSA vuse and insert a store, update virtual
1605 SSA form to avoid triggering the renamer. Do so only
1606 if we can easily see all uses - which is what almost always
1607 happens with the way vectorized stmts are inserted. */
1608 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1609 && ((is_gimple_assign (vec_stmt)
1610 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1611 || (is_gimple_call (vec_stmt)
1612 && !(gimple_call_flags (vec_stmt)
1613 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1615 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1616 gimple_set_vdef (vec_stmt, new_vdef);
1617 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1621 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1623 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, vinfo));
1625 if (dump_enabled_p ())
1627 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1628 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1631 gimple_set_location (vec_stmt, gimple_location (stmt));
1633 /* While EH edges will generally prevent vectorization, stmt might
1634 e.g. be in a must-not-throw region. Ensure newly created stmts
1635 that could throw are part of the same region. */
1636 int lp_nr = lookup_stmt_eh_lp (stmt);
1637 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1638 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1641 /* We want to vectorize a call to combined function CFN with function
1642 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1643 as the types of all inputs. Check whether this is possible using
1644 an internal function, returning its code if so or IFN_LAST if not. */
1646 static internal_fn
1647 vectorizable_internal_function (combined_fn cfn, tree fndecl,
1648 tree vectype_out, tree vectype_in)
1650 internal_fn ifn;
1651 if (internal_fn_p (cfn))
1652 ifn = as_internal_fn (cfn);
1653 else
1654 ifn = associated_internal_fn (fndecl);
1655 if (ifn != IFN_LAST && direct_internal_fn_p (ifn))
1657 const direct_internal_fn_info &info = direct_internal_fn (ifn);
1658 if (info.vectorizable)
1660 tree type0 = (info.type0 < 0 ? vectype_out : vectype_in);
1661 tree type1 = (info.type1 < 0 ? vectype_out : vectype_in);
1662 if (direct_internal_fn_supported_p (ifn, tree_pair (type0, type1),
1663 OPTIMIZE_FOR_SPEED))
1664 return ifn;
1667 return IFN_LAST;
1671 static tree permute_vec_elements (tree, tree, tree, gimple *,
1672 gimple_stmt_iterator *);
1674 /* STMT is a non-strided load or store, meaning that it accesses
1675 elements with a known constant step. Return -1 if that step
1676 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1678 static int
1679 compare_step_with_zero (gimple *stmt)
1681 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1682 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1683 tree step;
1684 if (loop_vinfo && nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt))
1685 step = STMT_VINFO_DR_STEP (stmt_info);
1686 else
1687 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
1688 return tree_int_cst_compare (step, size_zero_node);
1691 /* If the target supports a permute mask that reverses the elements in
1692 a vector of type VECTYPE, return that mask, otherwise return null. */
1694 static tree
1695 perm_mask_for_reverse (tree vectype)
1697 int i, nunits;
1698 unsigned char *sel;
1700 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1701 sel = XALLOCAVEC (unsigned char, nunits);
1703 for (i = 0; i < nunits; ++i)
1704 sel[i] = nunits - 1 - i;
1706 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
1707 return NULL_TREE;
1708 return vect_gen_perm_mask_checked (vectype, sel);
1711 /* A subroutine of get_load_store_type, with a subset of the same
1712 arguments. Handle the case where STMT is part of a grouped load
1713 or store.
1715 For stores, the statements in the group are all consecutive
1716 and there is no gap at the end. For loads, the statements in the
1717 group might not be consecutive; there can be gaps between statements
1718 as well as at the end. */
1720 static bool
1721 get_group_load_store_type (gimple *stmt, tree vectype, bool slp,
1722 vec_load_store_type vls_type,
1723 vect_memory_access_type *memory_access_type)
1725 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1726 vec_info *vinfo = stmt_info->vinfo;
1727 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1728 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
1729 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1730 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
1731 bool single_element_p = (stmt == first_stmt
1732 && !GROUP_NEXT_ELEMENT (stmt_info));
1733 unsigned HOST_WIDE_INT gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
1734 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1736 /* True if the vectorized statements would access beyond the last
1737 statement in the group. */
1738 bool overrun_p = false;
1740 /* True if we can cope with such overrun by peeling for gaps, so that
1741 there is at least one final scalar iteration after the vector loop. */
1742 bool can_overrun_p = (vls_type == VLS_LOAD && loop_vinfo && !loop->inner);
1744 /* There can only be a gap at the end of the group if the stride is
1745 known at compile time. */
1746 gcc_assert (!STMT_VINFO_STRIDED_P (stmt_info) || gap == 0);
1748 /* Stores can't yet have gaps. */
1749 gcc_assert (slp || vls_type == VLS_LOAD || gap == 0);
1751 if (slp)
1753 if (STMT_VINFO_STRIDED_P (stmt_info))
1755 /* Try to use consecutive accesses of GROUP_SIZE elements,
1756 separated by the stride, until we have a complete vector.
1757 Fall back to scalar accesses if that isn't possible. */
1758 if (nunits % group_size == 0)
1759 *memory_access_type = VMAT_STRIDED_SLP;
1760 else
1761 *memory_access_type = VMAT_ELEMENTWISE;
1763 else
1765 overrun_p = loop_vinfo && gap != 0;
1766 if (overrun_p && vls_type != VLS_LOAD)
1768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1769 "Grouped store with gaps requires"
1770 " non-consecutive accesses\n");
1771 return false;
1773 if (overrun_p && !can_overrun_p)
1775 if (dump_enabled_p ())
1776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1777 "Peeling for outer loop is not supported\n");
1778 return false;
1780 *memory_access_type = VMAT_CONTIGUOUS;
1783 else
1785 /* We can always handle this case using elementwise accesses,
1786 but see if something more efficient is available. */
1787 *memory_access_type = VMAT_ELEMENTWISE;
1789 /* If there is a gap at the end of the group then these optimizations
1790 would access excess elements in the last iteration. */
1791 bool would_overrun_p = (gap != 0);
1792 if (!STMT_VINFO_STRIDED_P (stmt_info)
1793 && (can_overrun_p || !would_overrun_p)
1794 && compare_step_with_zero (stmt) > 0)
1796 /* First try using LOAD/STORE_LANES. */
1797 if (vls_type == VLS_LOAD
1798 ? vect_load_lanes_supported (vectype, group_size)
1799 : vect_store_lanes_supported (vectype, group_size))
1801 *memory_access_type = VMAT_LOAD_STORE_LANES;
1802 overrun_p = would_overrun_p;
1805 /* If that fails, try using permuting loads. */
1806 if (*memory_access_type == VMAT_ELEMENTWISE
1807 && (vls_type == VLS_LOAD
1808 ? vect_grouped_load_supported (vectype, single_element_p,
1809 group_size)
1810 : vect_grouped_store_supported (vectype, group_size)))
1812 *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
1813 overrun_p = would_overrun_p;
1818 if (vls_type != VLS_LOAD && first_stmt == stmt)
1820 /* STMT is the leader of the group. Check the operands of all the
1821 stmts of the group. */
1822 gimple *next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
1823 while (next_stmt)
1825 gcc_assert (gimple_assign_single_p (next_stmt));
1826 tree op = gimple_assign_rhs1 (next_stmt);
1827 gimple *def_stmt;
1828 enum vect_def_type dt;
1829 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt))
1831 if (dump_enabled_p ())
1832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1833 "use not simple.\n");
1834 return false;
1836 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
1840 if (overrun_p)
1842 gcc_assert (can_overrun_p);
1843 if (dump_enabled_p ())
1844 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1845 "Data access with gaps requires scalar "
1846 "epilogue loop\n");
1847 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
1850 return true;
1853 /* A subroutine of get_load_store_type, with a subset of the same
1854 arguments. Handle the case where STMT is a load or store that
1855 accesses consecutive elements with a negative step. */
1857 static vect_memory_access_type
1858 get_negative_load_store_type (gimple *stmt, tree vectype,
1859 vec_load_store_type vls_type,
1860 unsigned int ncopies)
1862 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1863 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1864 dr_alignment_support alignment_support_scheme;
1866 if (ncopies > 1)
1868 if (dump_enabled_p ())
1869 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1870 "multiple types with negative step.\n");
1871 return VMAT_ELEMENTWISE;
1874 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1875 if (alignment_support_scheme != dr_aligned
1876 && alignment_support_scheme != dr_unaligned_supported)
1878 if (dump_enabled_p ())
1879 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1880 "negative step but alignment required.\n");
1881 return VMAT_ELEMENTWISE;
1884 if (vls_type == VLS_STORE_INVARIANT)
1886 if (dump_enabled_p ())
1887 dump_printf_loc (MSG_NOTE, vect_location,
1888 "negative step with invariant source;"
1889 " no permute needed.\n");
1890 return VMAT_CONTIGUOUS_DOWN;
1893 if (!perm_mask_for_reverse (vectype))
1895 if (dump_enabled_p ())
1896 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1897 "negative step and reversing not supported.\n");
1898 return VMAT_ELEMENTWISE;
1901 return VMAT_CONTIGUOUS_REVERSE;
1904 /* Analyze load or store statement STMT of type VLS_TYPE. Return true
1905 if there is a memory access type that the vectorized form can use,
1906 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
1907 or scatters, fill in GS_INFO accordingly.
1909 SLP says whether we're performing SLP rather than loop vectorization.
1910 VECTYPE is the vector type that the vectorized statements will use.
1911 NCOPIES is the number of vector statements that will be needed. */
1913 static bool
1914 get_load_store_type (gimple *stmt, tree vectype, bool slp,
1915 vec_load_store_type vls_type, unsigned int ncopies,
1916 vect_memory_access_type *memory_access_type,
1917 gather_scatter_info *gs_info)
1919 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1920 vec_info *vinfo = stmt_info->vinfo;
1921 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1922 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1924 *memory_access_type = VMAT_GATHER_SCATTER;
1925 gimple *def_stmt;
1926 if (!vect_check_gather_scatter (stmt, loop_vinfo, gs_info))
1927 gcc_unreachable ();
1928 else if (!vect_is_simple_use (gs_info->offset, vinfo, &def_stmt,
1929 &gs_info->offset_dt,
1930 &gs_info->offset_vectype))
1932 if (dump_enabled_p ())
1933 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1934 "%s index use not simple.\n",
1935 vls_type == VLS_LOAD ? "gather" : "scatter");
1936 return false;
1939 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1941 if (!get_group_load_store_type (stmt, vectype, slp, vls_type,
1942 memory_access_type))
1943 return false;
1945 else if (STMT_VINFO_STRIDED_P (stmt_info))
1947 gcc_assert (!slp);
1948 *memory_access_type = VMAT_ELEMENTWISE;
1950 else
1952 int cmp = compare_step_with_zero (stmt);
1953 if (cmp < 0)
1954 *memory_access_type = get_negative_load_store_type
1955 (stmt, vectype, vls_type, ncopies);
1956 else if (cmp == 0)
1958 gcc_assert (vls_type == VLS_LOAD);
1959 *memory_access_type = VMAT_INVARIANT;
1961 else
1962 *memory_access_type = VMAT_CONTIGUOUS;
1965 /* FIXME: At the moment the cost model seems to underestimate the
1966 cost of using elementwise accesses. This check preserves the
1967 traditional behavior until that can be fixed. */
1968 if (*memory_access_type == VMAT_ELEMENTWISE
1969 && !STMT_VINFO_STRIDED_P (stmt_info))
1971 if (dump_enabled_p ())
1972 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1973 "not falling back to elementwise accesses\n");
1974 return false;
1976 return true;
1979 /* Function vectorizable_mask_load_store.
1981 Check if STMT performs a conditional load or store that can be vectorized.
1982 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1983 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1984 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1986 static bool
1987 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1988 gimple **vec_stmt, slp_tree slp_node)
1990 tree vec_dest = NULL;
1991 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1992 stmt_vec_info prev_stmt_info;
1993 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1994 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1995 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1996 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1997 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1998 tree rhs_vectype = NULL_TREE;
1999 tree mask_vectype;
2000 tree elem_type;
2001 gimple *new_stmt;
2002 tree dummy;
2003 tree dataref_ptr = NULL_TREE;
2004 gimple *ptr_incr;
2005 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2006 int ncopies;
2007 int i, j;
2008 bool inv_p;
2009 gather_scatter_info gs_info;
2010 vec_load_store_type vls_type;
2011 tree mask;
2012 gimple *def_stmt;
2013 enum vect_def_type dt;
2015 if (slp_node != NULL)
2016 return false;
2018 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2019 gcc_assert (ncopies >= 1);
2021 mask = gimple_call_arg (stmt, 2);
2023 if (TREE_CODE (TREE_TYPE (mask)) != BOOLEAN_TYPE)
2024 return false;
2026 /* FORNOW. This restriction should be relaxed. */
2027 if (nested_in_vect_loop && ncopies > 1)
2029 if (dump_enabled_p ())
2030 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2031 "multiple types in nested loop.");
2032 return false;
2035 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2036 return false;
2038 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2039 && ! vec_stmt)
2040 return false;
2042 if (!STMT_VINFO_DATA_REF (stmt_info))
2043 return false;
2045 elem_type = TREE_TYPE (vectype);
2047 if (TREE_CODE (mask) != SSA_NAME)
2048 return false;
2050 if (!vect_is_simple_use (mask, loop_vinfo, &def_stmt, &dt, &mask_vectype))
2051 return false;
2053 if (!mask_vectype)
2054 mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
2056 if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype)
2057 || TYPE_VECTOR_SUBPARTS (mask_vectype) != TYPE_VECTOR_SUBPARTS (vectype))
2058 return false;
2060 if (gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2062 tree rhs = gimple_call_arg (stmt, 3);
2063 if (!vect_is_simple_use (rhs, loop_vinfo, &def_stmt, &dt, &rhs_vectype))
2064 return false;
2065 if (dt == vect_constant_def || dt == vect_external_def)
2066 vls_type = VLS_STORE_INVARIANT;
2067 else
2068 vls_type = VLS_STORE;
2070 else
2071 vls_type = VLS_LOAD;
2073 vect_memory_access_type memory_access_type;
2074 if (!get_load_store_type (stmt, vectype, false, vls_type, ncopies,
2075 &memory_access_type, &gs_info))
2076 return false;
2078 if (memory_access_type == VMAT_GATHER_SCATTER)
2080 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2081 tree masktype
2082 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
2083 if (TREE_CODE (masktype) == INTEGER_TYPE)
2085 if (dump_enabled_p ())
2086 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2087 "masked gather with integer mask not supported.");
2088 return false;
2091 else if (memory_access_type != VMAT_CONTIGUOUS)
2093 if (dump_enabled_p ())
2094 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2095 "unsupported access type for masked %s.\n",
2096 vls_type == VLS_LOAD ? "load" : "store");
2097 return false;
2099 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2100 || !can_vec_mask_load_store_p (TYPE_MODE (vectype),
2101 TYPE_MODE (mask_vectype),
2102 vls_type == VLS_LOAD)
2103 || (rhs_vectype
2104 && !useless_type_conversion_p (vectype, rhs_vectype)))
2105 return false;
2107 if (!vec_stmt) /* transformation not required. */
2109 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
2110 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2111 if (vls_type == VLS_LOAD)
2112 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
2113 NULL, NULL, NULL);
2114 else
2115 vect_model_store_cost (stmt_info, ncopies, memory_access_type,
2116 dt, NULL, NULL, NULL);
2117 return true;
2119 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
2121 /** Transform. **/
2123 if (memory_access_type == VMAT_GATHER_SCATTER)
2125 tree vec_oprnd0 = NULL_TREE, op;
2126 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
2127 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
2128 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
2129 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
2130 tree mask_perm_mask = NULL_TREE;
2131 edge pe = loop_preheader_edge (loop);
2132 gimple_seq seq;
2133 basic_block new_bb;
2134 enum { NARROW, NONE, WIDEN } modifier;
2135 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
2137 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
2138 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2139 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2140 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2141 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
2142 scaletype = TREE_VALUE (arglist);
2143 gcc_checking_assert (types_compatible_p (srctype, rettype)
2144 && types_compatible_p (srctype, masktype));
2146 if (nunits == gather_off_nunits)
2147 modifier = NONE;
2148 else if (nunits == gather_off_nunits / 2)
2150 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
2151 modifier = WIDEN;
2153 for (i = 0; i < gather_off_nunits; ++i)
2154 sel[i] = i | nunits;
2156 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
2158 else if (nunits == gather_off_nunits * 2)
2160 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
2161 modifier = NARROW;
2163 for (i = 0; i < nunits; ++i)
2164 sel[i] = i < gather_off_nunits
2165 ? i : i + nunits - gather_off_nunits;
2167 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
2168 ncopies *= 2;
2169 for (i = 0; i < nunits; ++i)
2170 sel[i] = i | gather_off_nunits;
2171 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
2173 else
2174 gcc_unreachable ();
2176 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2178 ptr = fold_convert (ptrtype, gs_info.base);
2179 if (!is_gimple_min_invariant (ptr))
2181 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
2182 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2183 gcc_assert (!new_bb);
2186 scale = build_int_cst (scaletype, gs_info.scale);
2188 prev_stmt_info = NULL;
2189 for (j = 0; j < ncopies; ++j)
2191 if (modifier == WIDEN && (j & 1))
2192 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
2193 perm_mask, stmt, gsi);
2194 else if (j == 0)
2195 op = vec_oprnd0
2196 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
2197 else
2198 op = vec_oprnd0
2199 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
2201 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
2203 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
2204 == TYPE_VECTOR_SUBPARTS (idxtype));
2205 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
2206 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
2207 new_stmt
2208 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2209 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2210 op = var;
2213 if (mask_perm_mask && (j & 1))
2214 mask_op = permute_vec_elements (mask_op, mask_op,
2215 mask_perm_mask, stmt, gsi);
2216 else
2218 if (j == 0)
2219 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2220 else
2222 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2223 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2226 mask_op = vec_mask;
2227 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2229 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2230 == TYPE_VECTOR_SUBPARTS (masktype));
2231 var = vect_get_new_ssa_name (masktype, vect_simple_var);
2232 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2233 new_stmt
2234 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2235 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2236 mask_op = var;
2240 new_stmt
2241 = gimple_build_call (gs_info.decl, 5, mask_op, ptr, op, mask_op,
2242 scale);
2244 if (!useless_type_conversion_p (vectype, rettype))
2246 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2247 == TYPE_VECTOR_SUBPARTS (rettype));
2248 op = vect_get_new_ssa_name (rettype, vect_simple_var);
2249 gimple_call_set_lhs (new_stmt, op);
2250 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2251 var = make_ssa_name (vec_dest);
2252 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2253 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2255 else
2257 var = make_ssa_name (vec_dest, new_stmt);
2258 gimple_call_set_lhs (new_stmt, var);
2261 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2263 if (modifier == NARROW)
2265 if ((j & 1) == 0)
2267 prev_res = var;
2268 continue;
2270 var = permute_vec_elements (prev_res, var,
2271 perm_mask, stmt, gsi);
2272 new_stmt = SSA_NAME_DEF_STMT (var);
2275 if (prev_stmt_info == NULL)
2276 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2277 else
2278 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2279 prev_stmt_info = vinfo_for_stmt (new_stmt);
2282 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2283 from the IL. */
2284 if (STMT_VINFO_RELATED_STMT (stmt_info))
2286 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2287 stmt_info = vinfo_for_stmt (stmt);
2289 tree lhs = gimple_call_lhs (stmt);
2290 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2291 set_vinfo_for_stmt (new_stmt, stmt_info);
2292 set_vinfo_for_stmt (stmt, NULL);
2293 STMT_VINFO_STMT (stmt_info) = new_stmt;
2294 gsi_replace (gsi, new_stmt, true);
2295 return true;
2297 else if (vls_type != VLS_LOAD)
2299 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2300 prev_stmt_info = NULL;
2301 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo) = true;
2302 for (i = 0; i < ncopies; i++)
2304 unsigned align, misalign;
2306 if (i == 0)
2308 tree rhs = gimple_call_arg (stmt, 3);
2309 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt);
2310 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2311 /* We should have catched mismatched types earlier. */
2312 gcc_assert (useless_type_conversion_p (vectype,
2313 TREE_TYPE (vec_rhs)));
2314 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2315 NULL_TREE, &dummy, gsi,
2316 &ptr_incr, false, &inv_p);
2317 gcc_assert (!inv_p);
2319 else
2321 vect_is_simple_use (vec_rhs, loop_vinfo, &def_stmt, &dt);
2322 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2323 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2324 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2325 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2326 TYPE_SIZE_UNIT (vectype));
2329 align = TYPE_ALIGN_UNIT (vectype);
2330 if (aligned_access_p (dr))
2331 misalign = 0;
2332 else if (DR_MISALIGNMENT (dr) == -1)
2334 align = TYPE_ALIGN_UNIT (elem_type);
2335 misalign = 0;
2337 else
2338 misalign = DR_MISALIGNMENT (dr);
2339 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2340 misalign);
2341 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2342 misalign ? least_bit_hwi (misalign) : align);
2343 new_stmt
2344 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2345 ptr, vec_mask, vec_rhs);
2346 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2347 if (i == 0)
2348 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2349 else
2350 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2351 prev_stmt_info = vinfo_for_stmt (new_stmt);
2354 else
2356 tree vec_mask = NULL_TREE;
2357 prev_stmt_info = NULL;
2358 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2359 for (i = 0; i < ncopies; i++)
2361 unsigned align, misalign;
2363 if (i == 0)
2365 vec_mask = vect_get_vec_def_for_operand (mask, stmt);
2366 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2367 NULL_TREE, &dummy, gsi,
2368 &ptr_incr, false, &inv_p);
2369 gcc_assert (!inv_p);
2371 else
2373 vect_is_simple_use (vec_mask, loop_vinfo, &def_stmt, &dt);
2374 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2375 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2376 TYPE_SIZE_UNIT (vectype));
2379 align = TYPE_ALIGN_UNIT (vectype);
2380 if (aligned_access_p (dr))
2381 misalign = 0;
2382 else if (DR_MISALIGNMENT (dr) == -1)
2384 align = TYPE_ALIGN_UNIT (elem_type);
2385 misalign = 0;
2387 else
2388 misalign = DR_MISALIGNMENT (dr);
2389 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2390 misalign);
2391 tree ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)),
2392 misalign ? least_bit_hwi (misalign) : align);
2393 new_stmt
2394 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2395 ptr, vec_mask);
2396 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2397 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2398 if (i == 0)
2399 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2400 else
2401 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2402 prev_stmt_info = vinfo_for_stmt (new_stmt);
2406 if (vls_type == VLS_LOAD)
2408 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2409 from the IL. */
2410 if (STMT_VINFO_RELATED_STMT (stmt_info))
2412 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2413 stmt_info = vinfo_for_stmt (stmt);
2415 tree lhs = gimple_call_lhs (stmt);
2416 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2417 set_vinfo_for_stmt (new_stmt, stmt_info);
2418 set_vinfo_for_stmt (stmt, NULL);
2419 STMT_VINFO_STMT (stmt_info) = new_stmt;
2420 gsi_replace (gsi, new_stmt, true);
2423 return true;
2426 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
2427 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
2428 in a single step. On success, store the binary pack code in
2429 *CONVERT_CODE. */
2431 static bool
2432 simple_integer_narrowing (tree vectype_out, tree vectype_in,
2433 tree_code *convert_code)
2435 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out))
2436 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in)))
2437 return false;
2439 tree_code code;
2440 int multi_step_cvt = 0;
2441 auto_vec <tree, 8> interm_types;
2442 if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
2443 &code, &multi_step_cvt,
2444 &interm_types)
2445 || multi_step_cvt)
2446 return false;
2448 *convert_code = code;
2449 return true;
2452 /* Function vectorizable_call.
2454 Check if GS performs a function call that can be vectorized.
2455 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2456 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2457 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2459 static bool
2460 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2461 slp_tree slp_node)
2463 gcall *stmt;
2464 tree vec_dest;
2465 tree scalar_dest;
2466 tree op, type;
2467 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2468 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2469 tree vectype_out, vectype_in;
2470 int nunits_in;
2471 int nunits_out;
2472 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2473 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2474 vec_info *vinfo = stmt_info->vinfo;
2475 tree fndecl, new_temp, rhs_type;
2476 gimple *def_stmt;
2477 enum vect_def_type dt[3]
2478 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2479 gimple *new_stmt = NULL;
2480 int ncopies, j;
2481 vec<tree> vargs = vNULL;
2482 enum { NARROW, NONE, WIDEN } modifier;
2483 size_t i, nargs;
2484 tree lhs;
2486 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2487 return false;
2489 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
2490 && ! vec_stmt)
2491 return false;
2493 /* Is GS a vectorizable call? */
2494 stmt = dyn_cast <gcall *> (gs);
2495 if (!stmt)
2496 return false;
2498 if (gimple_call_internal_p (stmt)
2499 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2500 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2501 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2502 slp_node);
2504 if (gimple_call_lhs (stmt) == NULL_TREE
2505 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2506 return false;
2508 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2510 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2512 /* Process function arguments. */
2513 rhs_type = NULL_TREE;
2514 vectype_in = NULL_TREE;
2515 nargs = gimple_call_num_args (stmt);
2517 /* Bail out if the function has more than three arguments, we do not have
2518 interesting builtin functions to vectorize with more than two arguments
2519 except for fma. No arguments is also not good. */
2520 if (nargs == 0 || nargs > 3)
2521 return false;
2523 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2524 if (gimple_call_internal_p (stmt)
2525 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2527 nargs = 0;
2528 rhs_type = unsigned_type_node;
2531 for (i = 0; i < nargs; i++)
2533 tree opvectype;
2535 op = gimple_call_arg (stmt, i);
2537 /* We can only handle calls with arguments of the same type. */
2538 if (rhs_type
2539 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2541 if (dump_enabled_p ())
2542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2543 "argument types differ.\n");
2544 return false;
2546 if (!rhs_type)
2547 rhs_type = TREE_TYPE (op);
2549 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[i], &opvectype))
2551 if (dump_enabled_p ())
2552 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2553 "use not simple.\n");
2554 return false;
2557 if (!vectype_in)
2558 vectype_in = opvectype;
2559 else if (opvectype
2560 && opvectype != vectype_in)
2562 if (dump_enabled_p ())
2563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2564 "argument vector types differ.\n");
2565 return false;
2568 /* If all arguments are external or constant defs use a vector type with
2569 the same size as the output vector type. */
2570 if (!vectype_in)
2571 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2572 if (vec_stmt)
2573 gcc_assert (vectype_in);
2574 if (!vectype_in)
2576 if (dump_enabled_p ())
2578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2579 "no vectype for scalar type ");
2580 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2581 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2584 return false;
2587 /* FORNOW */
2588 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2589 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2590 if (nunits_in == nunits_out / 2)
2591 modifier = NARROW;
2592 else if (nunits_out == nunits_in)
2593 modifier = NONE;
2594 else if (nunits_out == nunits_in / 2)
2595 modifier = WIDEN;
2596 else
2597 return false;
2599 /* We only handle functions that do not read or clobber memory. */
2600 if (gimple_vuse (stmt))
2602 if (dump_enabled_p ())
2603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2604 "function reads from or writes to memory.\n");
2605 return false;
2608 /* For now, we only vectorize functions if a target specific builtin
2609 is available. TODO -- in some cases, it might be profitable to
2610 insert the calls for pieces of the vector, in order to be able
2611 to vectorize other operations in the loop. */
2612 fndecl = NULL_TREE;
2613 internal_fn ifn = IFN_LAST;
2614 combined_fn cfn = gimple_call_combined_fn (stmt);
2615 tree callee = gimple_call_fndecl (stmt);
2617 /* First try using an internal function. */
2618 tree_code convert_code = ERROR_MARK;
2619 if (cfn != CFN_LAST
2620 && (modifier == NONE
2621 || (modifier == NARROW
2622 && simple_integer_narrowing (vectype_out, vectype_in,
2623 &convert_code))))
2624 ifn = vectorizable_internal_function (cfn, callee, vectype_out,
2625 vectype_in);
2627 /* If that fails, try asking for a target-specific built-in function. */
2628 if (ifn == IFN_LAST)
2630 if (cfn != CFN_LAST)
2631 fndecl = targetm.vectorize.builtin_vectorized_function
2632 (cfn, vectype_out, vectype_in);
2633 else
2634 fndecl = targetm.vectorize.builtin_md_vectorized_function
2635 (callee, vectype_out, vectype_in);
2638 if (ifn == IFN_LAST && !fndecl)
2640 if (cfn == CFN_GOMP_SIMD_LANE
2641 && !slp_node
2642 && loop_vinfo
2643 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2644 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2645 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2646 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2648 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2649 { 0, 1, 2, ... vf - 1 } vector. */
2650 gcc_assert (nargs == 0);
2652 else
2654 if (dump_enabled_p ())
2655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2656 "function is not vectorizable.\n");
2657 return false;
2661 if (slp_node)
2662 ncopies = 1;
2663 else if (modifier == NARROW && ifn == IFN_LAST)
2664 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2665 else
2666 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2668 /* Sanity check: make sure that at least one copy of the vectorized stmt
2669 needs to be generated. */
2670 gcc_assert (ncopies >= 1);
2672 if (!vec_stmt) /* transformation not required. */
2674 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2675 if (dump_enabled_p ())
2676 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2677 "\n");
2678 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2679 if (ifn != IFN_LAST && modifier == NARROW && !slp_node)
2680 add_stmt_cost (stmt_info->vinfo->target_cost_data, ncopies / 2,
2681 vec_promote_demote, stmt_info, 0, vect_body);
2683 return true;
2686 /** Transform. **/
2688 if (dump_enabled_p ())
2689 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2691 /* Handle def. */
2692 scalar_dest = gimple_call_lhs (stmt);
2693 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2695 prev_stmt_info = NULL;
2696 if (modifier == NONE || ifn != IFN_LAST)
2698 tree prev_res = NULL_TREE;
2699 for (j = 0; j < ncopies; ++j)
2701 /* Build argument list for the vectorized call. */
2702 if (j == 0)
2703 vargs.create (nargs);
2704 else
2705 vargs.truncate (0);
2707 if (slp_node)
2709 auto_vec<vec<tree> > vec_defs (nargs);
2710 vec<tree> vec_oprnds0;
2712 for (i = 0; i < nargs; i++)
2713 vargs.quick_push (gimple_call_arg (stmt, i));
2714 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2715 vec_oprnds0 = vec_defs[0];
2717 /* Arguments are ready. Create the new vector stmt. */
2718 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2720 size_t k;
2721 for (k = 0; k < nargs; k++)
2723 vec<tree> vec_oprndsk = vec_defs[k];
2724 vargs[k] = vec_oprndsk[i];
2726 if (modifier == NARROW)
2728 tree half_res = make_ssa_name (vectype_in);
2729 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2730 gimple_call_set_lhs (new_stmt, half_res);
2731 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2732 if ((i & 1) == 0)
2734 prev_res = half_res;
2735 continue;
2737 new_temp = make_ssa_name (vec_dest);
2738 new_stmt = gimple_build_assign (new_temp, convert_code,
2739 prev_res, half_res);
2741 else
2743 if (ifn != IFN_LAST)
2744 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2745 else
2746 new_stmt = gimple_build_call_vec (fndecl, vargs);
2747 new_temp = make_ssa_name (vec_dest, new_stmt);
2748 gimple_call_set_lhs (new_stmt, new_temp);
2750 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2751 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2754 for (i = 0; i < nargs; i++)
2756 vec<tree> vec_oprndsi = vec_defs[i];
2757 vec_oprndsi.release ();
2759 continue;
2762 for (i = 0; i < nargs; i++)
2764 op = gimple_call_arg (stmt, i);
2765 if (j == 0)
2766 vec_oprnd0
2767 = vect_get_vec_def_for_operand (op, stmt);
2768 else
2770 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2771 vec_oprnd0
2772 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2775 vargs.quick_push (vec_oprnd0);
2778 if (gimple_call_internal_p (stmt)
2779 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2781 tree *v = XALLOCAVEC (tree, nunits_out);
2782 int k;
2783 for (k = 0; k < nunits_out; ++k)
2784 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2785 tree cst = build_vector (vectype_out, v);
2786 tree new_var
2787 = vect_get_new_ssa_name (vectype_out, vect_simple_var, "cst_");
2788 gimple *init_stmt = gimple_build_assign (new_var, cst);
2789 vect_init_vector_1 (stmt, init_stmt, NULL);
2790 new_temp = make_ssa_name (vec_dest);
2791 new_stmt = gimple_build_assign (new_temp, new_var);
2793 else if (modifier == NARROW)
2795 tree half_res = make_ssa_name (vectype_in);
2796 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2797 gimple_call_set_lhs (new_stmt, half_res);
2798 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2799 if ((j & 1) == 0)
2801 prev_res = half_res;
2802 continue;
2804 new_temp = make_ssa_name (vec_dest);
2805 new_stmt = gimple_build_assign (new_temp, convert_code,
2806 prev_res, half_res);
2808 else
2810 if (ifn != IFN_LAST)
2811 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2812 else
2813 new_stmt = gimple_build_call_vec (fndecl, vargs);
2814 new_temp = make_ssa_name (vec_dest, new_stmt);
2815 gimple_call_set_lhs (new_stmt, new_temp);
2817 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2819 if (j == (modifier == NARROW ? 1 : 0))
2820 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2821 else
2822 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2824 prev_stmt_info = vinfo_for_stmt (new_stmt);
2827 else if (modifier == NARROW)
2829 for (j = 0; j < ncopies; ++j)
2831 /* Build argument list for the vectorized call. */
2832 if (j == 0)
2833 vargs.create (nargs * 2);
2834 else
2835 vargs.truncate (0);
2837 if (slp_node)
2839 auto_vec<vec<tree> > vec_defs (nargs);
2840 vec<tree> vec_oprnds0;
2842 for (i = 0; i < nargs; i++)
2843 vargs.quick_push (gimple_call_arg (stmt, i));
2844 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2845 vec_oprnds0 = vec_defs[0];
2847 /* Arguments are ready. Create the new vector stmt. */
2848 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2850 size_t k;
2851 vargs.truncate (0);
2852 for (k = 0; k < nargs; k++)
2854 vec<tree> vec_oprndsk = vec_defs[k];
2855 vargs.quick_push (vec_oprndsk[i]);
2856 vargs.quick_push (vec_oprndsk[i + 1]);
2858 if (ifn != IFN_LAST)
2859 new_stmt = gimple_build_call_internal_vec (ifn, vargs);
2860 else
2861 new_stmt = gimple_build_call_vec (fndecl, vargs);
2862 new_temp = make_ssa_name (vec_dest, new_stmt);
2863 gimple_call_set_lhs (new_stmt, new_temp);
2864 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2865 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2868 for (i = 0; i < nargs; i++)
2870 vec<tree> vec_oprndsi = vec_defs[i];
2871 vec_oprndsi.release ();
2873 continue;
2876 for (i = 0; i < nargs; i++)
2878 op = gimple_call_arg (stmt, i);
2879 if (j == 0)
2881 vec_oprnd0
2882 = vect_get_vec_def_for_operand (op, stmt);
2883 vec_oprnd1
2884 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2886 else
2888 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2889 vec_oprnd0
2890 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2891 vec_oprnd1
2892 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2895 vargs.quick_push (vec_oprnd0);
2896 vargs.quick_push (vec_oprnd1);
2899 new_stmt = gimple_build_call_vec (fndecl, vargs);
2900 new_temp = make_ssa_name (vec_dest, new_stmt);
2901 gimple_call_set_lhs (new_stmt, new_temp);
2902 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2904 if (j == 0)
2905 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2906 else
2907 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2909 prev_stmt_info = vinfo_for_stmt (new_stmt);
2912 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2914 else
2915 /* No current target implements this case. */
2916 return false;
2918 vargs.release ();
2920 /* The call in STMT might prevent it from being removed in dce.
2921 We however cannot remove it here, due to the way the ssa name
2922 it defines is mapped to the new definition. So just replace
2923 rhs of the statement with something harmless. */
2925 if (slp_node)
2926 return true;
2928 type = TREE_TYPE (scalar_dest);
2929 if (is_pattern_stmt_p (stmt_info))
2930 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2931 else
2932 lhs = gimple_call_lhs (stmt);
2934 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2935 set_vinfo_for_stmt (new_stmt, stmt_info);
2936 set_vinfo_for_stmt (stmt, NULL);
2937 STMT_VINFO_STMT (stmt_info) = new_stmt;
2938 gsi_replace (gsi, new_stmt, false);
2940 return true;
2944 struct simd_call_arg_info
2946 tree vectype;
2947 tree op;
2948 enum vect_def_type dt;
2949 HOST_WIDE_INT linear_step;
2950 unsigned int align;
2951 bool simd_lane_linear;
2954 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2955 is linear within simd lane (but not within whole loop), note it in
2956 *ARGINFO. */
2958 static void
2959 vect_simd_lane_linear (tree op, struct loop *loop,
2960 struct simd_call_arg_info *arginfo)
2962 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
2964 if (!is_gimple_assign (def_stmt)
2965 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2966 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2967 return;
2969 tree base = gimple_assign_rhs1 (def_stmt);
2970 HOST_WIDE_INT linear_step = 0;
2971 tree v = gimple_assign_rhs2 (def_stmt);
2972 while (TREE_CODE (v) == SSA_NAME)
2974 tree t;
2975 def_stmt = SSA_NAME_DEF_STMT (v);
2976 if (is_gimple_assign (def_stmt))
2977 switch (gimple_assign_rhs_code (def_stmt))
2979 case PLUS_EXPR:
2980 t = gimple_assign_rhs2 (def_stmt);
2981 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2982 return;
2983 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2984 v = gimple_assign_rhs1 (def_stmt);
2985 continue;
2986 case MULT_EXPR:
2987 t = gimple_assign_rhs2 (def_stmt);
2988 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2989 return;
2990 linear_step = tree_to_shwi (t);
2991 v = gimple_assign_rhs1 (def_stmt);
2992 continue;
2993 CASE_CONVERT:
2994 t = gimple_assign_rhs1 (def_stmt);
2995 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
2996 || (TYPE_PRECISION (TREE_TYPE (v))
2997 < TYPE_PRECISION (TREE_TYPE (t))))
2998 return;
2999 if (!linear_step)
3000 linear_step = 1;
3001 v = t;
3002 continue;
3003 default:
3004 return;
3006 else if (is_gimple_call (def_stmt)
3007 && gimple_call_internal_p (def_stmt)
3008 && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
3009 && loop->simduid
3010 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
3011 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
3012 == loop->simduid))
3014 if (!linear_step)
3015 linear_step = 1;
3016 arginfo->linear_step = linear_step;
3017 arginfo->op = base;
3018 arginfo->simd_lane_linear = true;
3019 return;
3024 /* Function vectorizable_simd_clone_call.
3026 Check if STMT performs a function call that can be vectorized
3027 by calling a simd clone of the function.
3028 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3029 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3030 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3032 static bool
3033 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
3034 gimple **vec_stmt, slp_tree slp_node)
3036 tree vec_dest;
3037 tree scalar_dest;
3038 tree op, type;
3039 tree vec_oprnd0 = NULL_TREE;
3040 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3041 tree vectype;
3042 unsigned int nunits;
3043 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3044 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3045 vec_info *vinfo = stmt_info->vinfo;
3046 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
3047 tree fndecl, new_temp;
3048 gimple *def_stmt;
3049 gimple *new_stmt = NULL;
3050 int ncopies, j;
3051 auto_vec<simd_call_arg_info> arginfo;
3052 vec<tree> vargs = vNULL;
3053 size_t i, nargs;
3054 tree lhs, rtype, ratype;
3055 vec<constructor_elt, va_gc> *ret_ctor_elts;
3057 /* Is STMT a vectorizable call? */
3058 if (!is_gimple_call (stmt))
3059 return false;
3061 fndecl = gimple_call_fndecl (stmt);
3062 if (fndecl == NULL_TREE)
3063 return false;
3065 struct cgraph_node *node = cgraph_node::get (fndecl);
3066 if (node == NULL || node->simd_clones == NULL)
3067 return false;
3069 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3070 return false;
3072 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3073 && ! vec_stmt)
3074 return false;
3076 if (gimple_call_lhs (stmt)
3077 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3078 return false;
3080 gcc_checking_assert (!stmt_can_throw_internal (stmt));
3082 vectype = STMT_VINFO_VECTYPE (stmt_info);
3084 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
3085 return false;
3087 /* FORNOW */
3088 if (slp_node)
3089 return false;
3091 /* Process function arguments. */
3092 nargs = gimple_call_num_args (stmt);
3094 /* Bail out if the function has zero arguments. */
3095 if (nargs == 0)
3096 return false;
3098 arginfo.reserve (nargs, true);
3100 for (i = 0; i < nargs; i++)
3102 simd_call_arg_info thisarginfo;
3103 affine_iv iv;
3105 thisarginfo.linear_step = 0;
3106 thisarginfo.align = 0;
3107 thisarginfo.op = NULL_TREE;
3108 thisarginfo.simd_lane_linear = false;
3110 op = gimple_call_arg (stmt, i);
3111 if (!vect_is_simple_use (op, vinfo, &def_stmt, &thisarginfo.dt,
3112 &thisarginfo.vectype)
3113 || thisarginfo.dt == vect_uninitialized_def)
3115 if (dump_enabled_p ())
3116 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3117 "use not simple.\n");
3118 return false;
3121 if (thisarginfo.dt == vect_constant_def
3122 || thisarginfo.dt == vect_external_def)
3123 gcc_assert (thisarginfo.vectype == NULL_TREE);
3124 else
3125 gcc_assert (thisarginfo.vectype != NULL_TREE);
3127 /* For linear arguments, the analyze phase should have saved
3128 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3129 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
3130 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
3132 gcc_assert (vec_stmt);
3133 thisarginfo.linear_step
3134 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
3135 thisarginfo.op
3136 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
3137 thisarginfo.simd_lane_linear
3138 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
3139 == boolean_true_node);
3140 /* If loop has been peeled for alignment, we need to adjust it. */
3141 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
3142 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
3143 if (n1 != n2 && !thisarginfo.simd_lane_linear)
3145 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
3146 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
3147 tree opt = TREE_TYPE (thisarginfo.op);
3148 bias = fold_convert (TREE_TYPE (step), bias);
3149 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
3150 thisarginfo.op
3151 = fold_build2 (POINTER_TYPE_P (opt)
3152 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
3153 thisarginfo.op, bias);
3156 else if (!vec_stmt
3157 && thisarginfo.dt != vect_constant_def
3158 && thisarginfo.dt != vect_external_def
3159 && loop_vinfo
3160 && TREE_CODE (op) == SSA_NAME
3161 && simple_iv (loop, loop_containing_stmt (stmt), op,
3162 &iv, false)
3163 && tree_fits_shwi_p (iv.step))
3165 thisarginfo.linear_step = tree_to_shwi (iv.step);
3166 thisarginfo.op = iv.base;
3168 else if ((thisarginfo.dt == vect_constant_def
3169 || thisarginfo.dt == vect_external_def)
3170 && POINTER_TYPE_P (TREE_TYPE (op)))
3171 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
3172 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3173 linear too. */
3174 if (POINTER_TYPE_P (TREE_TYPE (op))
3175 && !thisarginfo.linear_step
3176 && !vec_stmt
3177 && thisarginfo.dt != vect_constant_def
3178 && thisarginfo.dt != vect_external_def
3179 && loop_vinfo
3180 && !slp_node
3181 && TREE_CODE (op) == SSA_NAME)
3182 vect_simd_lane_linear (op, loop, &thisarginfo);
3184 arginfo.quick_push (thisarginfo);
3187 unsigned int badness = 0;
3188 struct cgraph_node *bestn = NULL;
3189 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
3190 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
3191 else
3192 for (struct cgraph_node *n = node->simd_clones; n != NULL;
3193 n = n->simdclone->next_clone)
3195 unsigned int this_badness = 0;
3196 if (n->simdclone->simdlen
3197 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3198 || n->simdclone->nargs != nargs)
3199 continue;
3200 if (n->simdclone->simdlen
3201 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3202 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
3203 - exact_log2 (n->simdclone->simdlen)) * 1024;
3204 if (n->simdclone->inbranch)
3205 this_badness += 2048;
3206 int target_badness = targetm.simd_clone.usable (n);
3207 if (target_badness < 0)
3208 continue;
3209 this_badness += target_badness * 512;
3210 /* FORNOW: Have to add code to add the mask argument. */
3211 if (n->simdclone->inbranch)
3212 continue;
3213 for (i = 0; i < nargs; i++)
3215 switch (n->simdclone->args[i].arg_type)
3217 case SIMD_CLONE_ARG_TYPE_VECTOR:
3218 if (!useless_type_conversion_p
3219 (n->simdclone->args[i].orig_type,
3220 TREE_TYPE (gimple_call_arg (stmt, i))))
3221 i = -1;
3222 else if (arginfo[i].dt == vect_constant_def
3223 || arginfo[i].dt == vect_external_def
3224 || arginfo[i].linear_step)
3225 this_badness += 64;
3226 break;
3227 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3228 if (arginfo[i].dt != vect_constant_def
3229 && arginfo[i].dt != vect_external_def)
3230 i = -1;
3231 break;
3232 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3233 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3234 if (arginfo[i].dt == vect_constant_def
3235 || arginfo[i].dt == vect_external_def
3236 || (arginfo[i].linear_step
3237 != n->simdclone->args[i].linear_step))
3238 i = -1;
3239 break;
3240 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3241 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3242 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3243 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3244 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3245 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3246 /* FORNOW */
3247 i = -1;
3248 break;
3249 case SIMD_CLONE_ARG_TYPE_MASK:
3250 gcc_unreachable ();
3252 if (i == (size_t) -1)
3253 break;
3254 if (n->simdclone->args[i].alignment > arginfo[i].align)
3256 i = -1;
3257 break;
3259 if (arginfo[i].align)
3260 this_badness += (exact_log2 (arginfo[i].align)
3261 - exact_log2 (n->simdclone->args[i].alignment));
3263 if (i == (size_t) -1)
3264 continue;
3265 if (bestn == NULL || this_badness < badness)
3267 bestn = n;
3268 badness = this_badness;
3272 if (bestn == NULL)
3273 return false;
3275 for (i = 0; i < nargs; i++)
3276 if ((arginfo[i].dt == vect_constant_def
3277 || arginfo[i].dt == vect_external_def)
3278 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
3280 arginfo[i].vectype
3281 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
3282 i)));
3283 if (arginfo[i].vectype == NULL
3284 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3285 > bestn->simdclone->simdlen))
3286 return false;
3289 fndecl = bestn->decl;
3290 nunits = bestn->simdclone->simdlen;
3291 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3293 /* If the function isn't const, only allow it in simd loops where user
3294 has asserted that at least nunits consecutive iterations can be
3295 performed using SIMD instructions. */
3296 if ((loop == NULL || (unsigned) loop->safelen < nunits)
3297 && gimple_vuse (stmt))
3298 return false;
3300 /* Sanity check: make sure that at least one copy of the vectorized stmt
3301 needs to be generated. */
3302 gcc_assert (ncopies >= 1);
3304 if (!vec_stmt) /* transformation not required. */
3306 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
3307 for (i = 0; i < nargs; i++)
3308 if ((bestn->simdclone->args[i].arg_type
3309 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
3310 || (bestn->simdclone->args[i].arg_type
3311 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP))
3313 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
3314 + 1);
3315 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
3316 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
3317 ? size_type_node : TREE_TYPE (arginfo[i].op);
3318 tree ls = build_int_cst (lst, arginfo[i].linear_step);
3319 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
3320 tree sll = arginfo[i].simd_lane_linear
3321 ? boolean_true_node : boolean_false_node;
3322 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
3324 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
3325 if (dump_enabled_p ())
3326 dump_printf_loc (MSG_NOTE, vect_location,
3327 "=== vectorizable_simd_clone_call ===\n");
3328 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3329 return true;
3332 /** Transform. **/
3334 if (dump_enabled_p ())
3335 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3337 /* Handle def. */
3338 scalar_dest = gimple_call_lhs (stmt);
3339 vec_dest = NULL_TREE;
3340 rtype = NULL_TREE;
3341 ratype = NULL_TREE;
3342 if (scalar_dest)
3344 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3345 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3346 if (TREE_CODE (rtype) == ARRAY_TYPE)
3348 ratype = rtype;
3349 rtype = TREE_TYPE (ratype);
3353 prev_stmt_info = NULL;
3354 for (j = 0; j < ncopies; ++j)
3356 /* Build argument list for the vectorized call. */
3357 if (j == 0)
3358 vargs.create (nargs);
3359 else
3360 vargs.truncate (0);
3362 for (i = 0; i < nargs; i++)
3364 unsigned int k, l, m, o;
3365 tree atype;
3366 op = gimple_call_arg (stmt, i);
3367 switch (bestn->simdclone->args[i].arg_type)
3369 case SIMD_CLONE_ARG_TYPE_VECTOR:
3370 atype = bestn->simdclone->args[i].vector_type;
3371 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3372 for (m = j * o; m < (j + 1) * o; m++)
3374 if (TYPE_VECTOR_SUBPARTS (atype)
3375 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3377 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3378 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3379 / TYPE_VECTOR_SUBPARTS (atype));
3380 gcc_assert ((k & (k - 1)) == 0);
3381 if (m == 0)
3382 vec_oprnd0
3383 = vect_get_vec_def_for_operand (op, stmt);
3384 else
3386 vec_oprnd0 = arginfo[i].op;
3387 if ((m & (k - 1)) == 0)
3388 vec_oprnd0
3389 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3390 vec_oprnd0);
3392 arginfo[i].op = vec_oprnd0;
3393 vec_oprnd0
3394 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3395 size_int (prec),
3396 bitsize_int ((m & (k - 1)) * prec));
3397 new_stmt
3398 = gimple_build_assign (make_ssa_name (atype),
3399 vec_oprnd0);
3400 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3401 vargs.safe_push (gimple_assign_lhs (new_stmt));
3403 else
3405 k = (TYPE_VECTOR_SUBPARTS (atype)
3406 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3407 gcc_assert ((k & (k - 1)) == 0);
3408 vec<constructor_elt, va_gc> *ctor_elts;
3409 if (k != 1)
3410 vec_alloc (ctor_elts, k);
3411 else
3412 ctor_elts = NULL;
3413 for (l = 0; l < k; l++)
3415 if (m == 0 && l == 0)
3416 vec_oprnd0
3417 = vect_get_vec_def_for_operand (op, stmt);
3418 else
3419 vec_oprnd0
3420 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3421 arginfo[i].op);
3422 arginfo[i].op = vec_oprnd0;
3423 if (k == 1)
3424 break;
3425 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3426 vec_oprnd0);
3428 if (k == 1)
3429 vargs.safe_push (vec_oprnd0);
3430 else
3432 vec_oprnd0 = build_constructor (atype, ctor_elts);
3433 new_stmt
3434 = gimple_build_assign (make_ssa_name (atype),
3435 vec_oprnd0);
3436 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3437 vargs.safe_push (gimple_assign_lhs (new_stmt));
3441 break;
3442 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3443 vargs.safe_push (op);
3444 break;
3445 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3446 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP:
3447 if (j == 0)
3449 gimple_seq stmts;
3450 arginfo[i].op
3451 = force_gimple_operand (arginfo[i].op, &stmts, true,
3452 NULL_TREE);
3453 if (stmts != NULL)
3455 basic_block new_bb;
3456 edge pe = loop_preheader_edge (loop);
3457 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3458 gcc_assert (!new_bb);
3460 if (arginfo[i].simd_lane_linear)
3462 vargs.safe_push (arginfo[i].op);
3463 break;
3465 tree phi_res = copy_ssa_name (op);
3466 gphi *new_phi = create_phi_node (phi_res, loop->header);
3467 set_vinfo_for_stmt (new_phi,
3468 new_stmt_vec_info (new_phi, loop_vinfo));
3469 add_phi_arg (new_phi, arginfo[i].op,
3470 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3471 enum tree_code code
3472 = POINTER_TYPE_P (TREE_TYPE (op))
3473 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3474 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3475 ? sizetype : TREE_TYPE (op);
3476 widest_int cst
3477 = wi::mul (bestn->simdclone->args[i].linear_step,
3478 ncopies * nunits);
3479 tree tcst = wide_int_to_tree (type, cst);
3480 tree phi_arg = copy_ssa_name (op);
3481 new_stmt
3482 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3483 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3484 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3485 set_vinfo_for_stmt (new_stmt,
3486 new_stmt_vec_info (new_stmt, loop_vinfo));
3487 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3488 UNKNOWN_LOCATION);
3489 arginfo[i].op = phi_res;
3490 vargs.safe_push (phi_res);
3492 else
3494 enum tree_code code
3495 = POINTER_TYPE_P (TREE_TYPE (op))
3496 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3497 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3498 ? sizetype : TREE_TYPE (op);
3499 widest_int cst
3500 = wi::mul (bestn->simdclone->args[i].linear_step,
3501 j * nunits);
3502 tree tcst = wide_int_to_tree (type, cst);
3503 new_temp = make_ssa_name (TREE_TYPE (op));
3504 new_stmt = gimple_build_assign (new_temp, code,
3505 arginfo[i].op, tcst);
3506 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3507 vargs.safe_push (new_temp);
3509 break;
3510 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
3511 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP:
3512 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3513 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP:
3514 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
3515 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP:
3516 default:
3517 gcc_unreachable ();
3521 new_stmt = gimple_build_call_vec (fndecl, vargs);
3522 if (vec_dest)
3524 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3525 if (ratype)
3526 new_temp = create_tmp_var (ratype);
3527 else if (TYPE_VECTOR_SUBPARTS (vectype)
3528 == TYPE_VECTOR_SUBPARTS (rtype))
3529 new_temp = make_ssa_name (vec_dest, new_stmt);
3530 else
3531 new_temp = make_ssa_name (rtype, new_stmt);
3532 gimple_call_set_lhs (new_stmt, new_temp);
3534 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3536 if (vec_dest)
3538 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3540 unsigned int k, l;
3541 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3542 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3543 gcc_assert ((k & (k - 1)) == 0);
3544 for (l = 0; l < k; l++)
3546 tree t;
3547 if (ratype)
3549 t = build_fold_addr_expr (new_temp);
3550 t = build2 (MEM_REF, vectype, t,
3551 build_int_cst (TREE_TYPE (t),
3552 l * prec / BITS_PER_UNIT));
3554 else
3555 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3556 size_int (prec), bitsize_int (l * prec));
3557 new_stmt
3558 = gimple_build_assign (make_ssa_name (vectype), t);
3559 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3560 if (j == 0 && l == 0)
3561 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3562 else
3563 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3565 prev_stmt_info = vinfo_for_stmt (new_stmt);
3568 if (ratype)
3570 tree clobber = build_constructor (ratype, NULL);
3571 TREE_THIS_VOLATILE (clobber) = 1;
3572 new_stmt = gimple_build_assign (new_temp, clobber);
3573 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3575 continue;
3577 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3579 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3580 / TYPE_VECTOR_SUBPARTS (rtype));
3581 gcc_assert ((k & (k - 1)) == 0);
3582 if ((j & (k - 1)) == 0)
3583 vec_alloc (ret_ctor_elts, k);
3584 if (ratype)
3586 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3587 for (m = 0; m < o; m++)
3589 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3590 size_int (m), NULL_TREE, NULL_TREE);
3591 new_stmt
3592 = gimple_build_assign (make_ssa_name (rtype), tem);
3593 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3594 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3595 gimple_assign_lhs (new_stmt));
3597 tree clobber = build_constructor (ratype, NULL);
3598 TREE_THIS_VOLATILE (clobber) = 1;
3599 new_stmt = gimple_build_assign (new_temp, clobber);
3600 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3602 else
3603 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3604 if ((j & (k - 1)) != k - 1)
3605 continue;
3606 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3607 new_stmt
3608 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3609 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3611 if ((unsigned) j == k - 1)
3612 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3613 else
3614 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3616 prev_stmt_info = vinfo_for_stmt (new_stmt);
3617 continue;
3619 else if (ratype)
3621 tree t = build_fold_addr_expr (new_temp);
3622 t = build2 (MEM_REF, vectype, t,
3623 build_int_cst (TREE_TYPE (t), 0));
3624 new_stmt
3625 = gimple_build_assign (make_ssa_name (vec_dest), t);
3626 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3627 tree clobber = build_constructor (ratype, NULL);
3628 TREE_THIS_VOLATILE (clobber) = 1;
3629 vect_finish_stmt_generation (stmt,
3630 gimple_build_assign (new_temp,
3631 clobber), gsi);
3635 if (j == 0)
3636 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3637 else
3638 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3640 prev_stmt_info = vinfo_for_stmt (new_stmt);
3643 vargs.release ();
3645 /* The call in STMT might prevent it from being removed in dce.
3646 We however cannot remove it here, due to the way the ssa name
3647 it defines is mapped to the new definition. So just replace
3648 rhs of the statement with something harmless. */
3650 if (slp_node)
3651 return true;
3653 if (scalar_dest)
3655 type = TREE_TYPE (scalar_dest);
3656 if (is_pattern_stmt_p (stmt_info))
3657 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3658 else
3659 lhs = gimple_call_lhs (stmt);
3660 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3662 else
3663 new_stmt = gimple_build_nop ();
3664 set_vinfo_for_stmt (new_stmt, stmt_info);
3665 set_vinfo_for_stmt (stmt, NULL);
3666 STMT_VINFO_STMT (stmt_info) = new_stmt;
3667 gsi_replace (gsi, new_stmt, true);
3668 unlink_stmt_vdef (stmt);
3670 return true;
3674 /* Function vect_gen_widened_results_half
3676 Create a vector stmt whose code, type, number of arguments, and result
3677 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3678 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3679 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3680 needs to be created (DECL is a function-decl of a target-builtin).
3681 STMT is the original scalar stmt that we are vectorizing. */
3683 static gimple *
3684 vect_gen_widened_results_half (enum tree_code code,
3685 tree decl,
3686 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3687 tree vec_dest, gimple_stmt_iterator *gsi,
3688 gimple *stmt)
3690 gimple *new_stmt;
3691 tree new_temp;
3693 /* Generate half of the widened result: */
3694 if (code == CALL_EXPR)
3696 /* Target specific support */
3697 if (op_type == binary_op)
3698 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3699 else
3700 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3701 new_temp = make_ssa_name (vec_dest, new_stmt);
3702 gimple_call_set_lhs (new_stmt, new_temp);
3704 else
3706 /* Generic support */
3707 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3708 if (op_type != binary_op)
3709 vec_oprnd1 = NULL;
3710 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3711 new_temp = make_ssa_name (vec_dest, new_stmt);
3712 gimple_assign_set_lhs (new_stmt, new_temp);
3714 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3716 return new_stmt;
3720 /* Get vectorized definitions for loop-based vectorization. For the first
3721 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3722 scalar operand), and for the rest we get a copy with
3723 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3724 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3725 The vectors are collected into VEC_OPRNDS. */
3727 static void
3728 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3729 vec<tree> *vec_oprnds, int multi_step_cvt)
3731 tree vec_oprnd;
3733 /* Get first vector operand. */
3734 /* All the vector operands except the very first one (that is scalar oprnd)
3735 are stmt copies. */
3736 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3737 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt);
3738 else
3739 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3741 vec_oprnds->quick_push (vec_oprnd);
3743 /* Get second vector operand. */
3744 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3745 vec_oprnds->quick_push (vec_oprnd);
3747 *oprnd = vec_oprnd;
3749 /* For conversion in multiple steps, continue to get operands
3750 recursively. */
3751 if (multi_step_cvt)
3752 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3756 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3757 For multi-step conversions store the resulting vectors and call the function
3758 recursively. */
3760 static void
3761 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3762 int multi_step_cvt, gimple *stmt,
3763 vec<tree> vec_dsts,
3764 gimple_stmt_iterator *gsi,
3765 slp_tree slp_node, enum tree_code code,
3766 stmt_vec_info *prev_stmt_info)
3768 unsigned int i;
3769 tree vop0, vop1, new_tmp, vec_dest;
3770 gimple *new_stmt;
3771 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3773 vec_dest = vec_dsts.pop ();
3775 for (i = 0; i < vec_oprnds->length (); i += 2)
3777 /* Create demotion operation. */
3778 vop0 = (*vec_oprnds)[i];
3779 vop1 = (*vec_oprnds)[i + 1];
3780 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3781 new_tmp = make_ssa_name (vec_dest, new_stmt);
3782 gimple_assign_set_lhs (new_stmt, new_tmp);
3783 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3785 if (multi_step_cvt)
3786 /* Store the resulting vector for next recursive call. */
3787 (*vec_oprnds)[i/2] = new_tmp;
3788 else
3790 /* This is the last step of the conversion sequence. Store the
3791 vectors in SLP_NODE or in vector info of the scalar statement
3792 (or in STMT_VINFO_RELATED_STMT chain). */
3793 if (slp_node)
3794 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3795 else
3797 if (!*prev_stmt_info)
3798 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3799 else
3800 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3802 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3807 /* For multi-step demotion operations we first generate demotion operations
3808 from the source type to the intermediate types, and then combine the
3809 results (stored in VEC_OPRNDS) in demotion operation to the destination
3810 type. */
3811 if (multi_step_cvt)
3813 /* At each level of recursion we have half of the operands we had at the
3814 previous level. */
3815 vec_oprnds->truncate ((i+1)/2);
3816 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3817 stmt, vec_dsts, gsi, slp_node,
3818 VEC_PACK_TRUNC_EXPR,
3819 prev_stmt_info);
3822 vec_dsts.quick_push (vec_dest);
3826 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3827 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3828 the resulting vectors and call the function recursively. */
3830 static void
3831 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3832 vec<tree> *vec_oprnds1,
3833 gimple *stmt, tree vec_dest,
3834 gimple_stmt_iterator *gsi,
3835 enum tree_code code1,
3836 enum tree_code code2, tree decl1,
3837 tree decl2, int op_type)
3839 int i;
3840 tree vop0, vop1, new_tmp1, new_tmp2;
3841 gimple *new_stmt1, *new_stmt2;
3842 vec<tree> vec_tmp = vNULL;
3844 vec_tmp.create (vec_oprnds0->length () * 2);
3845 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3847 if (op_type == binary_op)
3848 vop1 = (*vec_oprnds1)[i];
3849 else
3850 vop1 = NULL_TREE;
3852 /* Generate the two halves of promotion operation. */
3853 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3854 op_type, vec_dest, gsi, stmt);
3855 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3856 op_type, vec_dest, gsi, stmt);
3857 if (is_gimple_call (new_stmt1))
3859 new_tmp1 = gimple_call_lhs (new_stmt1);
3860 new_tmp2 = gimple_call_lhs (new_stmt2);
3862 else
3864 new_tmp1 = gimple_assign_lhs (new_stmt1);
3865 new_tmp2 = gimple_assign_lhs (new_stmt2);
3868 /* Store the results for the next step. */
3869 vec_tmp.quick_push (new_tmp1);
3870 vec_tmp.quick_push (new_tmp2);
3873 vec_oprnds0->release ();
3874 *vec_oprnds0 = vec_tmp;
3878 /* Check if STMT performs a conversion operation, that can be vectorized.
3879 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3880 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3881 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3883 static bool
3884 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
3885 gimple **vec_stmt, slp_tree slp_node)
3887 tree vec_dest;
3888 tree scalar_dest;
3889 tree op0, op1 = NULL_TREE;
3890 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3891 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3892 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3893 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3894 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3895 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3896 tree new_temp;
3897 gimple *def_stmt;
3898 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3899 gimple *new_stmt = NULL;
3900 stmt_vec_info prev_stmt_info;
3901 int nunits_in;
3902 int nunits_out;
3903 tree vectype_out, vectype_in;
3904 int ncopies, i, j;
3905 tree lhs_type, rhs_type;
3906 enum { NARROW, NONE, WIDEN } modifier;
3907 vec<tree> vec_oprnds0 = vNULL;
3908 vec<tree> vec_oprnds1 = vNULL;
3909 tree vop0;
3910 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3911 vec_info *vinfo = stmt_info->vinfo;
3912 int multi_step_cvt = 0;
3913 vec<tree> interm_types = vNULL;
3914 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3915 int op_type;
3916 machine_mode rhs_mode;
3917 unsigned short fltsz;
3919 /* Is STMT a vectorizable conversion? */
3921 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3922 return false;
3924 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
3925 && ! vec_stmt)
3926 return false;
3928 if (!is_gimple_assign (stmt))
3929 return false;
3931 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3932 return false;
3934 code = gimple_assign_rhs_code (stmt);
3935 if (!CONVERT_EXPR_CODE_P (code)
3936 && code != FIX_TRUNC_EXPR
3937 && code != FLOAT_EXPR
3938 && code != WIDEN_MULT_EXPR
3939 && code != WIDEN_LSHIFT_EXPR)
3940 return false;
3942 op_type = TREE_CODE_LENGTH (code);
3944 /* Check types of lhs and rhs. */
3945 scalar_dest = gimple_assign_lhs (stmt);
3946 lhs_type = TREE_TYPE (scalar_dest);
3947 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3949 op0 = gimple_assign_rhs1 (stmt);
3950 rhs_type = TREE_TYPE (op0);
3952 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3953 && !((INTEGRAL_TYPE_P (lhs_type)
3954 && INTEGRAL_TYPE_P (rhs_type))
3955 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3956 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3957 return false;
3959 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
3960 && ((INTEGRAL_TYPE_P (lhs_type)
3961 && (TYPE_PRECISION (lhs_type)
3962 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3963 || (INTEGRAL_TYPE_P (rhs_type)
3964 && (TYPE_PRECISION (rhs_type)
3965 != GET_MODE_PRECISION (TYPE_MODE (rhs_type))))))
3967 if (dump_enabled_p ())
3968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3969 "type conversion to/from bit-precision unsupported."
3970 "\n");
3971 return false;
3974 /* Check the operands of the operation. */
3975 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype_in))
3977 if (dump_enabled_p ())
3978 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3979 "use not simple.\n");
3980 return false;
3982 if (op_type == binary_op)
3984 bool ok;
3986 op1 = gimple_assign_rhs2 (stmt);
3987 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3988 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3989 OP1. */
3990 if (CONSTANT_CLASS_P (op0))
3991 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &vectype_in);
3992 else
3993 ok = vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]);
3995 if (!ok)
3997 if (dump_enabled_p ())
3998 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3999 "use not simple.\n");
4000 return false;
4004 /* If op0 is an external or constant defs use a vector type of
4005 the same size as the output vector type. */
4006 if (!vectype_in)
4007 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
4008 if (vec_stmt)
4009 gcc_assert (vectype_in);
4010 if (!vectype_in)
4012 if (dump_enabled_p ())
4014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4015 "no vectype for scalar type ");
4016 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4017 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4020 return false;
4023 if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
4024 && !VECTOR_BOOLEAN_TYPE_P (vectype_in))
4026 if (dump_enabled_p ())
4028 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4029 "can't convert between boolean and non "
4030 "boolean vectors");
4031 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
4032 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4035 return false;
4038 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4039 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4040 if (nunits_in < nunits_out)
4041 modifier = NARROW;
4042 else if (nunits_out == nunits_in)
4043 modifier = NONE;
4044 else
4045 modifier = WIDEN;
4047 /* Multiple types in SLP are handled by creating the appropriate number of
4048 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4049 case of SLP. */
4050 if (slp_node)
4051 ncopies = 1;
4052 else if (modifier == NARROW)
4053 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4054 else
4055 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4057 /* Sanity check: make sure that at least one copy of the vectorized stmt
4058 needs to be generated. */
4059 gcc_assert (ncopies >= 1);
4061 /* Supportable by target? */
4062 switch (modifier)
4064 case NONE:
4065 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
4066 return false;
4067 if (supportable_convert_operation (code, vectype_out, vectype_in,
4068 &decl1, &code1))
4069 break;
4070 /* FALLTHRU */
4071 unsupported:
4072 if (dump_enabled_p ())
4073 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4074 "conversion not supported by target.\n");
4075 return false;
4077 case WIDEN:
4078 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
4079 &code1, &code2, &multi_step_cvt,
4080 &interm_types))
4082 /* Binary widening operation can only be supported directly by the
4083 architecture. */
4084 gcc_assert (!(multi_step_cvt && op_type == binary_op));
4085 break;
4088 if (code != FLOAT_EXPR
4089 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4090 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4091 goto unsupported;
4093 rhs_mode = TYPE_MODE (rhs_type);
4094 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
4095 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
4096 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
4097 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
4099 cvt_type
4100 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4101 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4102 if (cvt_type == NULL_TREE)
4103 goto unsupported;
4105 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4107 if (!supportable_convert_operation (code, vectype_out,
4108 cvt_type, &decl1, &codecvt1))
4109 goto unsupported;
4111 else if (!supportable_widening_operation (code, stmt, vectype_out,
4112 cvt_type, &codecvt1,
4113 &codecvt2, &multi_step_cvt,
4114 &interm_types))
4115 continue;
4116 else
4117 gcc_assert (multi_step_cvt == 0);
4119 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
4120 vectype_in, &code1, &code2,
4121 &multi_step_cvt, &interm_types))
4122 break;
4125 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
4126 goto unsupported;
4128 if (GET_MODE_SIZE (rhs_mode) == fltsz)
4129 codecvt2 = ERROR_MARK;
4130 else
4132 multi_step_cvt++;
4133 interm_types.safe_push (cvt_type);
4134 cvt_type = NULL_TREE;
4136 break;
4138 case NARROW:
4139 gcc_assert (op_type == unary_op);
4140 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
4141 &code1, &multi_step_cvt,
4142 &interm_types))
4143 break;
4145 if (code != FIX_TRUNC_EXPR
4146 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
4147 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
4148 goto unsupported;
4150 rhs_mode = TYPE_MODE (rhs_type);
4151 cvt_type
4152 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
4153 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
4154 if (cvt_type == NULL_TREE)
4155 goto unsupported;
4156 if (!supportable_convert_operation (code, cvt_type, vectype_in,
4157 &decl1, &codecvt1))
4158 goto unsupported;
4159 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
4160 &code1, &multi_step_cvt,
4161 &interm_types))
4162 break;
4163 goto unsupported;
4165 default:
4166 gcc_unreachable ();
4169 if (!vec_stmt) /* transformation not required. */
4171 if (dump_enabled_p ())
4172 dump_printf_loc (MSG_NOTE, vect_location,
4173 "=== vectorizable_conversion ===\n");
4174 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
4176 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
4177 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4179 else if (modifier == NARROW)
4181 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4182 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4184 else
4186 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4187 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
4189 interm_types.release ();
4190 return true;
4193 /** Transform. **/
4194 if (dump_enabled_p ())
4195 dump_printf_loc (MSG_NOTE, vect_location,
4196 "transform conversion. ncopies = %d.\n", ncopies);
4198 if (op_type == binary_op)
4200 if (CONSTANT_CLASS_P (op0))
4201 op0 = fold_convert (TREE_TYPE (op1), op0);
4202 else if (CONSTANT_CLASS_P (op1))
4203 op1 = fold_convert (TREE_TYPE (op0), op1);
4206 /* In case of multi-step conversion, we first generate conversion operations
4207 to the intermediate types, and then from that types to the final one.
4208 We create vector destinations for the intermediate type (TYPES) received
4209 from supportable_*_operation, and store them in the correct order
4210 for future use in vect_create_vectorized_*_stmts (). */
4211 auto_vec<tree> vec_dsts (multi_step_cvt + 1);
4212 vec_dest = vect_create_destination_var (scalar_dest,
4213 (cvt_type && modifier == WIDEN)
4214 ? cvt_type : vectype_out);
4215 vec_dsts.quick_push (vec_dest);
4217 if (multi_step_cvt)
4219 for (i = interm_types.length () - 1;
4220 interm_types.iterate (i, &intermediate_type); i--)
4222 vec_dest = vect_create_destination_var (scalar_dest,
4223 intermediate_type);
4224 vec_dsts.quick_push (vec_dest);
4228 if (cvt_type)
4229 vec_dest = vect_create_destination_var (scalar_dest,
4230 modifier == WIDEN
4231 ? vectype_out : cvt_type);
4233 if (!slp_node)
4235 if (modifier == WIDEN)
4237 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
4238 if (op_type == binary_op)
4239 vec_oprnds1.create (1);
4241 else if (modifier == NARROW)
4242 vec_oprnds0.create (
4243 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
4245 else if (code == WIDEN_LSHIFT_EXPR)
4246 vec_oprnds1.create (slp_node->vec_stmts_size);
4248 last_oprnd = op0;
4249 prev_stmt_info = NULL;
4250 switch (modifier)
4252 case NONE:
4253 for (j = 0; j < ncopies; j++)
4255 if (j == 0)
4256 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
4257 -1);
4258 else
4259 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
4261 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4263 /* Arguments are ready, create the new vector stmt. */
4264 if (code1 == CALL_EXPR)
4266 new_stmt = gimple_build_call (decl1, 1, vop0);
4267 new_temp = make_ssa_name (vec_dest, new_stmt);
4268 gimple_call_set_lhs (new_stmt, new_temp);
4270 else
4272 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
4273 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
4274 new_temp = make_ssa_name (vec_dest, new_stmt);
4275 gimple_assign_set_lhs (new_stmt, new_temp);
4278 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4279 if (slp_node)
4280 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4281 else
4283 if (!prev_stmt_info)
4284 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4285 else
4286 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4287 prev_stmt_info = vinfo_for_stmt (new_stmt);
4291 break;
4293 case WIDEN:
4294 /* In case the vectorization factor (VF) is bigger than the number
4295 of elements that we can fit in a vectype (nunits), we have to
4296 generate more than one vector stmt - i.e - we need to "unroll"
4297 the vector stmt by a factor VF/nunits. */
4298 for (j = 0; j < ncopies; j++)
4300 /* Handle uses. */
4301 if (j == 0)
4303 if (slp_node)
4305 if (code == WIDEN_LSHIFT_EXPR)
4307 unsigned int k;
4309 vec_oprnd1 = op1;
4310 /* Store vec_oprnd1 for every vector stmt to be created
4311 for SLP_NODE. We check during the analysis that all
4312 the shift arguments are the same. */
4313 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4314 vec_oprnds1.quick_push (vec_oprnd1);
4316 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4317 slp_node, -1);
4319 else
4320 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
4321 &vec_oprnds1, slp_node, -1);
4323 else
4325 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
4326 vec_oprnds0.quick_push (vec_oprnd0);
4327 if (op_type == binary_op)
4329 if (code == WIDEN_LSHIFT_EXPR)
4330 vec_oprnd1 = op1;
4331 else
4332 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
4333 vec_oprnds1.quick_push (vec_oprnd1);
4337 else
4339 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4340 vec_oprnds0.truncate (0);
4341 vec_oprnds0.quick_push (vec_oprnd0);
4342 if (op_type == binary_op)
4344 if (code == WIDEN_LSHIFT_EXPR)
4345 vec_oprnd1 = op1;
4346 else
4347 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4348 vec_oprnd1);
4349 vec_oprnds1.truncate (0);
4350 vec_oprnds1.quick_push (vec_oprnd1);
4354 /* Arguments are ready. Create the new vector stmts. */
4355 for (i = multi_step_cvt; i >= 0; i--)
4357 tree this_dest = vec_dsts[i];
4358 enum tree_code c1 = code1, c2 = code2;
4359 if (i == 0 && codecvt2 != ERROR_MARK)
4361 c1 = codecvt1;
4362 c2 = codecvt2;
4364 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4365 &vec_oprnds1,
4366 stmt, this_dest, gsi,
4367 c1, c2, decl1, decl2,
4368 op_type);
4371 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4373 if (cvt_type)
4375 if (codecvt1 == CALL_EXPR)
4377 new_stmt = gimple_build_call (decl1, 1, vop0);
4378 new_temp = make_ssa_name (vec_dest, new_stmt);
4379 gimple_call_set_lhs (new_stmt, new_temp);
4381 else
4383 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4384 new_temp = make_ssa_name (vec_dest);
4385 new_stmt = gimple_build_assign (new_temp, codecvt1,
4386 vop0);
4389 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4391 else
4392 new_stmt = SSA_NAME_DEF_STMT (vop0);
4394 if (slp_node)
4395 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4396 else
4398 if (!prev_stmt_info)
4399 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4400 else
4401 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4402 prev_stmt_info = vinfo_for_stmt (new_stmt);
4407 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4408 break;
4410 case NARROW:
4411 /* In case the vectorization factor (VF) is bigger than the number
4412 of elements that we can fit in a vectype (nunits), we have to
4413 generate more than one vector stmt - i.e - we need to "unroll"
4414 the vector stmt by a factor VF/nunits. */
4415 for (j = 0; j < ncopies; j++)
4417 /* Handle uses. */
4418 if (slp_node)
4419 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4420 slp_node, -1);
4421 else
4423 vec_oprnds0.truncate (0);
4424 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4425 vect_pow2 (multi_step_cvt) - 1);
4428 /* Arguments are ready. Create the new vector stmts. */
4429 if (cvt_type)
4430 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4432 if (codecvt1 == CALL_EXPR)
4434 new_stmt = gimple_build_call (decl1, 1, vop0);
4435 new_temp = make_ssa_name (vec_dest, new_stmt);
4436 gimple_call_set_lhs (new_stmt, new_temp);
4438 else
4440 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4441 new_temp = make_ssa_name (vec_dest);
4442 new_stmt = gimple_build_assign (new_temp, codecvt1,
4443 vop0);
4446 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4447 vec_oprnds0[i] = new_temp;
4450 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4451 stmt, vec_dsts, gsi,
4452 slp_node, code1,
4453 &prev_stmt_info);
4456 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4457 break;
4460 vec_oprnds0.release ();
4461 vec_oprnds1.release ();
4462 interm_types.release ();
4464 return true;
4468 /* Function vectorizable_assignment.
4470 Check if STMT performs an assignment (copy) that can be vectorized.
4471 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4472 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4473 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4475 static bool
4476 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4477 gimple **vec_stmt, slp_tree slp_node)
4479 tree vec_dest;
4480 tree scalar_dest;
4481 tree op;
4482 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4483 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4484 tree new_temp;
4485 gimple *def_stmt;
4486 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4487 int ncopies;
4488 int i, j;
4489 vec<tree> vec_oprnds = vNULL;
4490 tree vop;
4491 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4492 vec_info *vinfo = stmt_info->vinfo;
4493 gimple *new_stmt = NULL;
4494 stmt_vec_info prev_stmt_info = NULL;
4495 enum tree_code code;
4496 tree vectype_in;
4498 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4499 return false;
4501 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4502 && ! vec_stmt)
4503 return false;
4505 /* Is vectorizable assignment? */
4506 if (!is_gimple_assign (stmt))
4507 return false;
4509 scalar_dest = gimple_assign_lhs (stmt);
4510 if (TREE_CODE (scalar_dest) != SSA_NAME)
4511 return false;
4513 code = gimple_assign_rhs_code (stmt);
4514 if (gimple_assign_single_p (stmt)
4515 || code == PAREN_EXPR
4516 || CONVERT_EXPR_CODE_P (code))
4517 op = gimple_assign_rhs1 (stmt);
4518 else
4519 return false;
4521 if (code == VIEW_CONVERT_EXPR)
4522 op = TREE_OPERAND (op, 0);
4524 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4525 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4527 /* Multiple types in SLP are handled by creating the appropriate number of
4528 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4529 case of SLP. */
4530 if (slp_node)
4531 ncopies = 1;
4532 else
4533 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4535 gcc_assert (ncopies >= 1);
4537 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt[0], &vectype_in))
4539 if (dump_enabled_p ())
4540 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4541 "use not simple.\n");
4542 return false;
4545 /* We can handle NOP_EXPR conversions that do not change the number
4546 of elements or the vector size. */
4547 if ((CONVERT_EXPR_CODE_P (code)
4548 || code == VIEW_CONVERT_EXPR)
4549 && (!vectype_in
4550 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4551 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4552 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4553 return false;
4555 /* We do not handle bit-precision changes. */
4556 if ((CONVERT_EXPR_CODE_P (code)
4557 || code == VIEW_CONVERT_EXPR)
4558 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4559 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4560 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4561 || ((TYPE_PRECISION (TREE_TYPE (op))
4562 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4563 /* But a conversion that does not change the bit-pattern is ok. */
4564 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4565 > TYPE_PRECISION (TREE_TYPE (op)))
4566 && TYPE_UNSIGNED (TREE_TYPE (op)))
4567 /* Conversion between boolean types of different sizes is
4568 a simple assignment in case their vectypes are same
4569 boolean vectors. */
4570 && (!VECTOR_BOOLEAN_TYPE_P (vectype)
4571 || !VECTOR_BOOLEAN_TYPE_P (vectype_in)))
4573 if (dump_enabled_p ())
4574 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4575 "type conversion to/from bit-precision "
4576 "unsupported.\n");
4577 return false;
4580 if (!vec_stmt) /* transformation not required. */
4582 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4583 if (dump_enabled_p ())
4584 dump_printf_loc (MSG_NOTE, vect_location,
4585 "=== vectorizable_assignment ===\n");
4586 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4587 return true;
4590 /** Transform. **/
4591 if (dump_enabled_p ())
4592 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4594 /* Handle def. */
4595 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4597 /* Handle use. */
4598 for (j = 0; j < ncopies; j++)
4600 /* Handle uses. */
4601 if (j == 0)
4602 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4603 else
4604 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4606 /* Arguments are ready. create the new vector stmt. */
4607 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4609 if (CONVERT_EXPR_CODE_P (code)
4610 || code == VIEW_CONVERT_EXPR)
4611 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4612 new_stmt = gimple_build_assign (vec_dest, vop);
4613 new_temp = make_ssa_name (vec_dest, new_stmt);
4614 gimple_assign_set_lhs (new_stmt, new_temp);
4615 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4616 if (slp_node)
4617 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4620 if (slp_node)
4621 continue;
4623 if (j == 0)
4624 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4625 else
4626 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4628 prev_stmt_info = vinfo_for_stmt (new_stmt);
4631 vec_oprnds.release ();
4632 return true;
4636 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4637 either as shift by a scalar or by a vector. */
4639 bool
4640 vect_supportable_shift (enum tree_code code, tree scalar_type)
4643 machine_mode vec_mode;
4644 optab optab;
4645 int icode;
4646 tree vectype;
4648 vectype = get_vectype_for_scalar_type (scalar_type);
4649 if (!vectype)
4650 return false;
4652 optab = optab_for_tree_code (code, vectype, optab_scalar);
4653 if (!optab
4654 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4656 optab = optab_for_tree_code (code, vectype, optab_vector);
4657 if (!optab
4658 || (optab_handler (optab, TYPE_MODE (vectype))
4659 == CODE_FOR_nothing))
4660 return false;
4663 vec_mode = TYPE_MODE (vectype);
4664 icode = (int) optab_handler (optab, vec_mode);
4665 if (icode == CODE_FOR_nothing)
4666 return false;
4668 return true;
4672 /* Function vectorizable_shift.
4674 Check if STMT performs a shift operation that can be vectorized.
4675 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4676 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4677 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4679 static bool
4680 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4681 gimple **vec_stmt, slp_tree slp_node)
4683 tree vec_dest;
4684 tree scalar_dest;
4685 tree op0, op1 = NULL;
4686 tree vec_oprnd1 = NULL_TREE;
4687 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4688 tree vectype;
4689 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4690 enum tree_code code;
4691 machine_mode vec_mode;
4692 tree new_temp;
4693 optab optab;
4694 int icode;
4695 machine_mode optab_op2_mode;
4696 gimple *def_stmt;
4697 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4698 gimple *new_stmt = NULL;
4699 stmt_vec_info prev_stmt_info;
4700 int nunits_in;
4701 int nunits_out;
4702 tree vectype_out;
4703 tree op1_vectype;
4704 int ncopies;
4705 int j, i;
4706 vec<tree> vec_oprnds0 = vNULL;
4707 vec<tree> vec_oprnds1 = vNULL;
4708 tree vop0, vop1;
4709 unsigned int k;
4710 bool scalar_shift_arg = true;
4711 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4712 vec_info *vinfo = stmt_info->vinfo;
4713 int vf;
4715 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4716 return false;
4718 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
4719 && ! vec_stmt)
4720 return false;
4722 /* Is STMT a vectorizable binary/unary operation? */
4723 if (!is_gimple_assign (stmt))
4724 return false;
4726 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4727 return false;
4729 code = gimple_assign_rhs_code (stmt);
4731 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4732 || code == RROTATE_EXPR))
4733 return false;
4735 scalar_dest = gimple_assign_lhs (stmt);
4736 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4737 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4738 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4740 if (dump_enabled_p ())
4741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4742 "bit-precision shifts not supported.\n");
4743 return false;
4746 op0 = gimple_assign_rhs1 (stmt);
4747 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
4749 if (dump_enabled_p ())
4750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4751 "use not simple.\n");
4752 return false;
4754 /* If op0 is an external or constant def use a vector type with
4755 the same size as the output vector type. */
4756 if (!vectype)
4757 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4758 if (vec_stmt)
4759 gcc_assert (vectype);
4760 if (!vectype)
4762 if (dump_enabled_p ())
4763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4764 "no vectype for scalar type\n");
4765 return false;
4768 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4769 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4770 if (nunits_out != nunits_in)
4771 return false;
4773 op1 = gimple_assign_rhs2 (stmt);
4774 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1], &op1_vectype))
4776 if (dump_enabled_p ())
4777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4778 "use not simple.\n");
4779 return false;
4782 if (loop_vinfo)
4783 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4784 else
4785 vf = 1;
4787 /* Multiple types in SLP are handled by creating the appropriate number of
4788 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4789 case of SLP. */
4790 if (slp_node)
4791 ncopies = 1;
4792 else
4793 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4795 gcc_assert (ncopies >= 1);
4797 /* Determine whether the shift amount is a vector, or scalar. If the
4798 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4800 if ((dt[1] == vect_internal_def
4801 || dt[1] == vect_induction_def)
4802 && !slp_node)
4803 scalar_shift_arg = false;
4804 else if (dt[1] == vect_constant_def
4805 || dt[1] == vect_external_def
4806 || dt[1] == vect_internal_def)
4808 /* In SLP, need to check whether the shift count is the same,
4809 in loops if it is a constant or invariant, it is always
4810 a scalar shift. */
4811 if (slp_node)
4813 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4814 gimple *slpstmt;
4816 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4817 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4818 scalar_shift_arg = false;
4821 /* If the shift amount is computed by a pattern stmt we cannot
4822 use the scalar amount directly thus give up and use a vector
4823 shift. */
4824 if (dt[1] == vect_internal_def)
4826 gimple *def = SSA_NAME_DEF_STMT (op1);
4827 if (is_pattern_stmt_p (vinfo_for_stmt (def)))
4828 scalar_shift_arg = false;
4831 else
4833 if (dump_enabled_p ())
4834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4835 "operand mode requires invariant argument.\n");
4836 return false;
4839 /* Vector shifted by vector. */
4840 if (!scalar_shift_arg)
4842 optab = optab_for_tree_code (code, vectype, optab_vector);
4843 if (dump_enabled_p ())
4844 dump_printf_loc (MSG_NOTE, vect_location,
4845 "vector/vector shift/rotate found.\n");
4847 if (!op1_vectype)
4848 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4849 if (op1_vectype == NULL_TREE
4850 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4852 if (dump_enabled_p ())
4853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4854 "unusable type for last operand in"
4855 " vector/vector shift/rotate.\n");
4856 return false;
4859 /* See if the machine has a vector shifted by scalar insn and if not
4860 then see if it has a vector shifted by vector insn. */
4861 else
4863 optab = optab_for_tree_code (code, vectype, optab_scalar);
4864 if (optab
4865 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4867 if (dump_enabled_p ())
4868 dump_printf_loc (MSG_NOTE, vect_location,
4869 "vector/scalar shift/rotate found.\n");
4871 else
4873 optab = optab_for_tree_code (code, vectype, optab_vector);
4874 if (optab
4875 && (optab_handler (optab, TYPE_MODE (vectype))
4876 != CODE_FOR_nothing))
4878 scalar_shift_arg = false;
4880 if (dump_enabled_p ())
4881 dump_printf_loc (MSG_NOTE, vect_location,
4882 "vector/vector shift/rotate found.\n");
4884 /* Unlike the other binary operators, shifts/rotates have
4885 the rhs being int, instead of the same type as the lhs,
4886 so make sure the scalar is the right type if we are
4887 dealing with vectors of long long/long/short/char. */
4888 if (dt[1] == vect_constant_def)
4889 op1 = fold_convert (TREE_TYPE (vectype), op1);
4890 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4891 TREE_TYPE (op1)))
4893 if (slp_node
4894 && TYPE_MODE (TREE_TYPE (vectype))
4895 != TYPE_MODE (TREE_TYPE (op1)))
4897 if (dump_enabled_p ())
4898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4899 "unusable type for last operand in"
4900 " vector/vector shift/rotate.\n");
4901 return false;
4903 if (vec_stmt && !slp_node)
4905 op1 = fold_convert (TREE_TYPE (vectype), op1);
4906 op1 = vect_init_vector (stmt, op1,
4907 TREE_TYPE (vectype), NULL);
4914 /* Supportable by target? */
4915 if (!optab)
4917 if (dump_enabled_p ())
4918 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4919 "no optab.\n");
4920 return false;
4922 vec_mode = TYPE_MODE (vectype);
4923 icode = (int) optab_handler (optab, vec_mode);
4924 if (icode == CODE_FOR_nothing)
4926 if (dump_enabled_p ())
4927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4928 "op not supported by target.\n");
4929 /* Check only during analysis. */
4930 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4931 || (vf < vect_min_worthwhile_factor (code)
4932 && !vec_stmt))
4933 return false;
4934 if (dump_enabled_p ())
4935 dump_printf_loc (MSG_NOTE, vect_location,
4936 "proceeding using word mode.\n");
4939 /* Worthwhile without SIMD support? Check only during analysis. */
4940 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4941 && vf < vect_min_worthwhile_factor (code)
4942 && !vec_stmt)
4944 if (dump_enabled_p ())
4945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4946 "not worthwhile without SIMD support.\n");
4947 return false;
4950 if (!vec_stmt) /* transformation not required. */
4952 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4953 if (dump_enabled_p ())
4954 dump_printf_loc (MSG_NOTE, vect_location,
4955 "=== vectorizable_shift ===\n");
4956 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4957 return true;
4960 /** Transform. **/
4962 if (dump_enabled_p ())
4963 dump_printf_loc (MSG_NOTE, vect_location,
4964 "transform binary/unary operation.\n");
4966 /* Handle def. */
4967 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4969 prev_stmt_info = NULL;
4970 for (j = 0; j < ncopies; j++)
4972 /* Handle uses. */
4973 if (j == 0)
4975 if (scalar_shift_arg)
4977 /* Vector shl and shr insn patterns can be defined with scalar
4978 operand 2 (shift operand). In this case, use constant or loop
4979 invariant op1 directly, without extending it to vector mode
4980 first. */
4981 optab_op2_mode = insn_data[icode].operand[2].mode;
4982 if (!VECTOR_MODE_P (optab_op2_mode))
4984 if (dump_enabled_p ())
4985 dump_printf_loc (MSG_NOTE, vect_location,
4986 "operand 1 using scalar mode.\n");
4987 vec_oprnd1 = op1;
4988 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4989 vec_oprnds1.quick_push (vec_oprnd1);
4990 if (slp_node)
4992 /* Store vec_oprnd1 for every vector stmt to be created
4993 for SLP_NODE. We check during the analysis that all
4994 the shift arguments are the same.
4995 TODO: Allow different constants for different vector
4996 stmts generated for an SLP instance. */
4997 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4998 vec_oprnds1.quick_push (vec_oprnd1);
5003 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5004 (a special case for certain kind of vector shifts); otherwise,
5005 operand 1 should be of a vector type (the usual case). */
5006 if (vec_oprnd1)
5007 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5008 slp_node, -1);
5009 else
5010 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5011 slp_node, -1);
5013 else
5014 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5016 /* Arguments are ready. Create the new vector stmt. */
5017 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5019 vop1 = vec_oprnds1[i];
5020 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
5021 new_temp = make_ssa_name (vec_dest, new_stmt);
5022 gimple_assign_set_lhs (new_stmt, new_temp);
5023 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5024 if (slp_node)
5025 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5028 if (slp_node)
5029 continue;
5031 if (j == 0)
5032 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5033 else
5034 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5035 prev_stmt_info = vinfo_for_stmt (new_stmt);
5038 vec_oprnds0.release ();
5039 vec_oprnds1.release ();
5041 return true;
5045 /* Function vectorizable_operation.
5047 Check if STMT performs a binary, unary or ternary operation that can
5048 be vectorized.
5049 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5050 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5051 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5053 static bool
5054 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
5055 gimple **vec_stmt, slp_tree slp_node)
5057 tree vec_dest;
5058 tree scalar_dest;
5059 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
5060 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5061 tree vectype;
5062 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5063 enum tree_code code;
5064 machine_mode vec_mode;
5065 tree new_temp;
5066 int op_type;
5067 optab optab;
5068 bool target_support_p;
5069 gimple *def_stmt;
5070 enum vect_def_type dt[3]
5071 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
5072 gimple *new_stmt = NULL;
5073 stmt_vec_info prev_stmt_info;
5074 int nunits_in;
5075 int nunits_out;
5076 tree vectype_out;
5077 int ncopies;
5078 int j, i;
5079 vec<tree> vec_oprnds0 = vNULL;
5080 vec<tree> vec_oprnds1 = vNULL;
5081 vec<tree> vec_oprnds2 = vNULL;
5082 tree vop0, vop1, vop2;
5083 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5084 vec_info *vinfo = stmt_info->vinfo;
5085 int vf;
5087 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5088 return false;
5090 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5091 && ! vec_stmt)
5092 return false;
5094 /* Is STMT a vectorizable binary/unary operation? */
5095 if (!is_gimple_assign (stmt))
5096 return false;
5098 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5099 return false;
5101 code = gimple_assign_rhs_code (stmt);
5103 /* For pointer addition, we should use the normal plus for
5104 the vector addition. */
5105 if (code == POINTER_PLUS_EXPR)
5106 code = PLUS_EXPR;
5108 /* Support only unary or binary operations. */
5109 op_type = TREE_CODE_LENGTH (code);
5110 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
5112 if (dump_enabled_p ())
5113 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5114 "num. args = %d (not unary/binary/ternary op).\n",
5115 op_type);
5116 return false;
5119 scalar_dest = gimple_assign_lhs (stmt);
5120 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5122 /* Most operations cannot handle bit-precision types without extra
5123 truncations. */
5124 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
5125 && (TYPE_PRECISION (TREE_TYPE (scalar_dest))
5126 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
5127 /* Exception are bitwise binary operations. */
5128 && code != BIT_IOR_EXPR
5129 && code != BIT_XOR_EXPR
5130 && code != BIT_AND_EXPR)
5132 if (dump_enabled_p ())
5133 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5134 "bit-precision arithmetic not supported.\n");
5135 return false;
5138 op0 = gimple_assign_rhs1 (stmt);
5139 if (!vect_is_simple_use (op0, vinfo, &def_stmt, &dt[0], &vectype))
5141 if (dump_enabled_p ())
5142 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5143 "use not simple.\n");
5144 return false;
5146 /* If op0 is an external or constant def use a vector type with
5147 the same size as the output vector type. */
5148 if (!vectype)
5150 /* For boolean type we cannot determine vectype by
5151 invariant value (don't know whether it is a vector
5152 of booleans or vector of integers). We use output
5153 vectype because operations on boolean don't change
5154 type. */
5155 if (TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE)
5157 if (TREE_CODE (TREE_TYPE (scalar_dest)) != BOOLEAN_TYPE)
5159 if (dump_enabled_p ())
5160 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5161 "not supported operation on bool value.\n");
5162 return false;
5164 vectype = vectype_out;
5166 else
5167 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
5169 if (vec_stmt)
5170 gcc_assert (vectype);
5171 if (!vectype)
5173 if (dump_enabled_p ())
5175 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5176 "no vectype for scalar type ");
5177 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
5178 TREE_TYPE (op0));
5179 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5182 return false;
5185 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5186 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
5187 if (nunits_out != nunits_in)
5188 return false;
5190 if (op_type == binary_op || op_type == ternary_op)
5192 op1 = gimple_assign_rhs2 (stmt);
5193 if (!vect_is_simple_use (op1, vinfo, &def_stmt, &dt[1]))
5195 if (dump_enabled_p ())
5196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5197 "use not simple.\n");
5198 return false;
5201 if (op_type == ternary_op)
5203 op2 = gimple_assign_rhs3 (stmt);
5204 if (!vect_is_simple_use (op2, vinfo, &def_stmt, &dt[2]))
5206 if (dump_enabled_p ())
5207 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5208 "use not simple.\n");
5209 return false;
5213 if (loop_vinfo)
5214 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5215 else
5216 vf = 1;
5218 /* Multiple types in SLP are handled by creating the appropriate number of
5219 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5220 case of SLP. */
5221 if (slp_node)
5222 ncopies = 1;
5223 else
5224 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
5226 gcc_assert (ncopies >= 1);
5228 /* Shifts are handled in vectorizable_shift (). */
5229 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
5230 || code == RROTATE_EXPR)
5231 return false;
5233 /* Supportable by target? */
5235 vec_mode = TYPE_MODE (vectype);
5236 if (code == MULT_HIGHPART_EXPR)
5237 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
5238 else
5240 optab = optab_for_tree_code (code, vectype, optab_default);
5241 if (!optab)
5243 if (dump_enabled_p ())
5244 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5245 "no optab.\n");
5246 return false;
5248 target_support_p = (optab_handler (optab, vec_mode)
5249 != CODE_FOR_nothing);
5252 if (!target_support_p)
5254 if (dump_enabled_p ())
5255 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5256 "op not supported by target.\n");
5257 /* Check only during analysis. */
5258 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5259 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
5260 return false;
5261 if (dump_enabled_p ())
5262 dump_printf_loc (MSG_NOTE, vect_location,
5263 "proceeding using word mode.\n");
5266 /* Worthwhile without SIMD support? Check only during analysis. */
5267 if (!VECTOR_MODE_P (vec_mode)
5268 && !vec_stmt
5269 && vf < vect_min_worthwhile_factor (code))
5271 if (dump_enabled_p ())
5272 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5273 "not worthwhile without SIMD support.\n");
5274 return false;
5277 if (!vec_stmt) /* transformation not required. */
5279 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
5280 if (dump_enabled_p ())
5281 dump_printf_loc (MSG_NOTE, vect_location,
5282 "=== vectorizable_operation ===\n");
5283 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
5284 return true;
5287 /** Transform. **/
5289 if (dump_enabled_p ())
5290 dump_printf_loc (MSG_NOTE, vect_location,
5291 "transform binary/unary operation.\n");
5293 /* Handle def. */
5294 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5296 /* In case the vectorization factor (VF) is bigger than the number
5297 of elements that we can fit in a vectype (nunits), we have to generate
5298 more than one vector stmt - i.e - we need to "unroll" the
5299 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5300 from one copy of the vector stmt to the next, in the field
5301 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5302 stages to find the correct vector defs to be used when vectorizing
5303 stmts that use the defs of the current stmt. The example below
5304 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5305 we need to create 4 vectorized stmts):
5307 before vectorization:
5308 RELATED_STMT VEC_STMT
5309 S1: x = memref - -
5310 S2: z = x + 1 - -
5312 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5313 there):
5314 RELATED_STMT VEC_STMT
5315 VS1_0: vx0 = memref0 VS1_1 -
5316 VS1_1: vx1 = memref1 VS1_2 -
5317 VS1_2: vx2 = memref2 VS1_3 -
5318 VS1_3: vx3 = memref3 - -
5319 S1: x = load - VS1_0
5320 S2: z = x + 1 - -
5322 step2: vectorize stmt S2 (done here):
5323 To vectorize stmt S2 we first need to find the relevant vector
5324 def for the first operand 'x'. This is, as usual, obtained from
5325 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5326 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5327 relevant vector def 'vx0'. Having found 'vx0' we can generate
5328 the vector stmt VS2_0, and as usual, record it in the
5329 STMT_VINFO_VEC_STMT of stmt S2.
5330 When creating the second copy (VS2_1), we obtain the relevant vector
5331 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5332 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5333 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5334 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5335 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5336 chain of stmts and pointers:
5337 RELATED_STMT VEC_STMT
5338 VS1_0: vx0 = memref0 VS1_1 -
5339 VS1_1: vx1 = memref1 VS1_2 -
5340 VS1_2: vx2 = memref2 VS1_3 -
5341 VS1_3: vx3 = memref3 - -
5342 S1: x = load - VS1_0
5343 VS2_0: vz0 = vx0 + v1 VS2_1 -
5344 VS2_1: vz1 = vx1 + v1 VS2_2 -
5345 VS2_2: vz2 = vx2 + v1 VS2_3 -
5346 VS2_3: vz3 = vx3 + v1 - -
5347 S2: z = x + 1 - VS2_0 */
5349 prev_stmt_info = NULL;
5350 for (j = 0; j < ncopies; j++)
5352 /* Handle uses. */
5353 if (j == 0)
5355 if (op_type == binary_op || op_type == ternary_op)
5356 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5357 slp_node, -1);
5358 else
5359 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
5360 slp_node, -1);
5361 if (op_type == ternary_op)
5362 vect_get_vec_defs (op2, NULL_TREE, stmt, &vec_oprnds2, NULL,
5363 slp_node, -1);
5365 else
5367 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5368 if (op_type == ternary_op)
5370 tree vec_oprnd = vec_oprnds2.pop ();
5371 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5372 vec_oprnd));
5376 /* Arguments are ready. Create the new vector stmt. */
5377 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5379 vop1 = ((op_type == binary_op || op_type == ternary_op)
5380 ? vec_oprnds1[i] : NULL_TREE);
5381 vop2 = ((op_type == ternary_op)
5382 ? vec_oprnds2[i] : NULL_TREE);
5383 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5384 new_temp = make_ssa_name (vec_dest, new_stmt);
5385 gimple_assign_set_lhs (new_stmt, new_temp);
5386 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5387 if (slp_node)
5388 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5391 if (slp_node)
5392 continue;
5394 if (j == 0)
5395 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5396 else
5397 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5398 prev_stmt_info = vinfo_for_stmt (new_stmt);
5401 vec_oprnds0.release ();
5402 vec_oprnds1.release ();
5403 vec_oprnds2.release ();
5405 return true;
5408 /* A helper function to ensure data reference DR's base alignment
5409 for STMT_INFO. */
5411 static void
5412 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5414 if (!dr->aux)
5415 return;
5417 if (DR_VECT_AUX (dr)->base_misaligned)
5419 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5420 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5422 if (decl_in_symtab_p (base_decl))
5423 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5424 else
5426 SET_DECL_ALIGN (base_decl, TYPE_ALIGN (vectype));
5427 DECL_USER_ALIGN (base_decl) = 1;
5429 DR_VECT_AUX (dr)->base_misaligned = false;
5434 /* Function get_group_alias_ptr_type.
5436 Return the alias type for the group starting at FIRST_STMT. */
5438 static tree
5439 get_group_alias_ptr_type (gimple *first_stmt)
5441 struct data_reference *first_dr, *next_dr;
5442 gimple *next_stmt;
5444 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5445 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first_stmt));
5446 while (next_stmt)
5448 next_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (next_stmt));
5449 if (get_alias_set (DR_REF (first_dr))
5450 != get_alias_set (DR_REF (next_dr)))
5452 if (dump_enabled_p ())
5453 dump_printf_loc (MSG_NOTE, vect_location,
5454 "conflicting alias set types.\n");
5455 return ptr_type_node;
5457 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5459 return reference_alias_ptr_type (DR_REF (first_dr));
5463 /* Function vectorizable_store.
5465 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5466 can be vectorized.
5467 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5468 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5469 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5471 static bool
5472 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5473 slp_tree slp_node)
5475 tree scalar_dest;
5476 tree data_ref;
5477 tree op;
5478 tree vec_oprnd = NULL_TREE;
5479 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5480 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5481 tree elem_type;
5482 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5483 struct loop *loop = NULL;
5484 machine_mode vec_mode;
5485 tree dummy;
5486 enum dr_alignment_support alignment_support_scheme;
5487 gimple *def_stmt;
5488 enum vect_def_type dt;
5489 stmt_vec_info prev_stmt_info = NULL;
5490 tree dataref_ptr = NULL_TREE;
5491 tree dataref_offset = NULL_TREE;
5492 gimple *ptr_incr = NULL;
5493 int ncopies;
5494 int j;
5495 gimple *next_stmt, *first_stmt;
5496 bool grouped_store;
5497 unsigned int group_size, i;
5498 vec<tree> oprnds = vNULL;
5499 vec<tree> result_chain = vNULL;
5500 bool inv_p;
5501 tree offset = NULL_TREE;
5502 vec<tree> vec_oprnds = vNULL;
5503 bool slp = (slp_node != NULL);
5504 unsigned int vec_num;
5505 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5506 vec_info *vinfo = stmt_info->vinfo;
5507 tree aggr_type;
5508 gather_scatter_info gs_info;
5509 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5510 gimple *new_stmt;
5511 int vf;
5512 vec_load_store_type vls_type;
5513 tree ref_type;
5515 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5516 return false;
5518 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
5519 && ! vec_stmt)
5520 return false;
5522 /* Is vectorizable store? */
5524 if (!is_gimple_assign (stmt))
5525 return false;
5527 scalar_dest = gimple_assign_lhs (stmt);
5528 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5529 && is_pattern_stmt_p (stmt_info))
5530 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5531 if (TREE_CODE (scalar_dest) != ARRAY_REF
5532 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5533 && TREE_CODE (scalar_dest) != INDIRECT_REF
5534 && TREE_CODE (scalar_dest) != COMPONENT_REF
5535 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5536 && TREE_CODE (scalar_dest) != REALPART_EXPR
5537 && TREE_CODE (scalar_dest) != MEM_REF)
5538 return false;
5540 /* Cannot have hybrid store SLP -- that would mean storing to the
5541 same location twice. */
5542 gcc_assert (slp == PURE_SLP_STMT (stmt_info));
5544 gcc_assert (gimple_assign_single_p (stmt));
5546 tree vectype = STMT_VINFO_VECTYPE (stmt_info), rhs_vectype = NULL_TREE;
5547 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5549 if (loop_vinfo)
5551 loop = LOOP_VINFO_LOOP (loop_vinfo);
5552 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5554 else
5555 vf = 1;
5557 /* Multiple types in SLP are handled by creating the appropriate number of
5558 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5559 case of SLP. */
5560 if (slp)
5561 ncopies = 1;
5562 else
5563 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5565 gcc_assert (ncopies >= 1);
5567 /* FORNOW. This restriction should be relaxed. */
5568 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5570 if (dump_enabled_p ())
5571 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5572 "multiple types in nested loop.\n");
5573 return false;
5576 op = gimple_assign_rhs1 (stmt);
5578 if (!vect_is_simple_use (op, vinfo, &def_stmt, &dt, &rhs_vectype))
5580 if (dump_enabled_p ())
5581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5582 "use not simple.\n");
5583 return false;
5586 if (dt == vect_constant_def || dt == vect_external_def)
5587 vls_type = VLS_STORE_INVARIANT;
5588 else
5589 vls_type = VLS_STORE;
5591 if (rhs_vectype && !useless_type_conversion_p (vectype, rhs_vectype))
5592 return false;
5594 elem_type = TREE_TYPE (vectype);
5595 vec_mode = TYPE_MODE (vectype);
5597 /* FORNOW. In some cases can vectorize even if data-type not supported
5598 (e.g. - array initialization with 0). */
5599 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5600 return false;
5602 if (!STMT_VINFO_DATA_REF (stmt_info))
5603 return false;
5605 vect_memory_access_type memory_access_type;
5606 if (!get_load_store_type (stmt, vectype, slp, vls_type, ncopies,
5607 &memory_access_type, &gs_info))
5608 return false;
5610 if (!vec_stmt) /* transformation not required. */
5612 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
5613 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5614 /* The SLP costs are calculated during SLP analysis. */
5615 if (!PURE_SLP_STMT (stmt_info))
5616 vect_model_store_cost (stmt_info, ncopies, memory_access_type, dt,
5617 NULL, NULL, NULL);
5618 return true;
5620 gcc_assert (memory_access_type == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
5622 /** Transform. **/
5624 ensure_base_align (stmt_info, dr);
5626 if (memory_access_type == VMAT_GATHER_SCATTER)
5628 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5629 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
5630 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5631 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5632 edge pe = loop_preheader_edge (loop);
5633 gimple_seq seq;
5634 basic_block new_bb;
5635 enum { NARROW, NONE, WIDEN } modifier;
5636 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
5638 if (nunits == (unsigned int) scatter_off_nunits)
5639 modifier = NONE;
5640 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5642 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5643 modifier = WIDEN;
5645 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5646 sel[i] = i | nunits;
5648 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
5649 gcc_assert (perm_mask != NULL_TREE);
5651 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5653 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5654 modifier = NARROW;
5656 for (i = 0; i < (unsigned int) nunits; ++i)
5657 sel[i] = i | scatter_off_nunits;
5659 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5660 gcc_assert (perm_mask != NULL_TREE);
5661 ncopies *= 2;
5663 else
5664 gcc_unreachable ();
5666 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
5667 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5668 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5669 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5670 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5671 scaletype = TREE_VALUE (arglist);
5673 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5674 && TREE_CODE (rettype) == VOID_TYPE);
5676 ptr = fold_convert (ptrtype, gs_info.base);
5677 if (!is_gimple_min_invariant (ptr))
5679 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5680 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5681 gcc_assert (!new_bb);
5684 /* Currently we support only unconditional scatter stores,
5685 so mask should be all ones. */
5686 mask = build_int_cst (masktype, -1);
5687 mask = vect_init_vector (stmt, mask, masktype, NULL);
5689 scale = build_int_cst (scaletype, gs_info.scale);
5691 prev_stmt_info = NULL;
5692 for (j = 0; j < ncopies; ++j)
5694 if (j == 0)
5696 src = vec_oprnd1
5697 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt);
5698 op = vec_oprnd0
5699 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
5701 else if (modifier != NONE && (j & 1))
5703 if (modifier == WIDEN)
5705 src = vec_oprnd1
5706 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5707 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5708 stmt, gsi);
5710 else if (modifier == NARROW)
5712 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5713 stmt, gsi);
5714 op = vec_oprnd0
5715 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5716 vec_oprnd0);
5718 else
5719 gcc_unreachable ();
5721 else
5723 src = vec_oprnd1
5724 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5725 op = vec_oprnd0
5726 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt,
5727 vec_oprnd0);
5730 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5732 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5733 == TYPE_VECTOR_SUBPARTS (srctype));
5734 var = vect_get_new_ssa_name (srctype, vect_simple_var);
5735 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5736 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5737 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5738 src = var;
5741 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5743 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5744 == TYPE_VECTOR_SUBPARTS (idxtype));
5745 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
5746 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5747 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5748 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5749 op = var;
5752 new_stmt
5753 = gimple_build_call (gs_info.decl, 5, ptr, mask, op, src, scale);
5755 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5757 if (prev_stmt_info == NULL)
5758 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5759 else
5760 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5761 prev_stmt_info = vinfo_for_stmt (new_stmt);
5763 return true;
5766 grouped_store = STMT_VINFO_GROUPED_ACCESS (stmt_info);
5767 if (grouped_store)
5769 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5770 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5771 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5773 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5775 /* FORNOW */
5776 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5778 /* We vectorize all the stmts of the interleaving group when we
5779 reach the last stmt in the group. */
5780 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5781 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5782 && !slp)
5784 *vec_stmt = NULL;
5785 return true;
5788 if (slp)
5790 grouped_store = false;
5791 /* VEC_NUM is the number of vect stmts to be created for this
5792 group. */
5793 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5794 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5795 gcc_assert (GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)) == first_stmt);
5796 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5797 op = gimple_assign_rhs1 (first_stmt);
5799 else
5800 /* VEC_NUM is the number of vect stmts to be created for this
5801 group. */
5802 vec_num = group_size;
5804 ref_type = get_group_alias_ptr_type (first_stmt);
5806 else
5808 first_stmt = stmt;
5809 first_dr = dr;
5810 group_size = vec_num = 1;
5811 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
5814 if (dump_enabled_p ())
5815 dump_printf_loc (MSG_NOTE, vect_location,
5816 "transform store. ncopies = %d\n", ncopies);
5818 if (memory_access_type == VMAT_ELEMENTWISE
5819 || memory_access_type == VMAT_STRIDED_SLP)
5821 gimple_stmt_iterator incr_gsi;
5822 bool insert_after;
5823 gimple *incr;
5824 tree offvar;
5825 tree ivstep;
5826 tree running_off;
5827 gimple_seq stmts = NULL;
5828 tree stride_base, stride_step, alias_off;
5829 tree vec_oprnd;
5830 unsigned int g;
5832 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5834 stride_base
5835 = fold_build_pointer_plus
5836 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5837 size_binop (PLUS_EXPR,
5838 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5839 convert_to_ptrofftype (DR_INIT (first_dr))));
5840 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5842 /* For a store with loop-invariant (but other than power-of-2)
5843 stride (i.e. not a grouped access) like so:
5845 for (i = 0; i < n; i += stride)
5846 array[i] = ...;
5848 we generate a new induction variable and new stores from
5849 the components of the (vectorized) rhs:
5851 for (j = 0; ; j += VF*stride)
5852 vectemp = ...;
5853 tmp1 = vectemp[0];
5854 array[j] = tmp1;
5855 tmp2 = vectemp[1];
5856 array[j + stride] = tmp2;
5860 unsigned nstores = nunits;
5861 unsigned lnel = 1;
5862 tree ltype = elem_type;
5863 if (slp)
5865 if (group_size < nunits
5866 && nunits % group_size == 0)
5868 nstores = nunits / group_size;
5869 lnel = group_size;
5870 ltype = build_vector_type (elem_type, group_size);
5872 else if (group_size >= nunits
5873 && group_size % nunits == 0)
5875 nstores = 1;
5876 lnel = nunits;
5877 ltype = vectype;
5879 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5880 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5883 ivstep = stride_step;
5884 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5885 build_int_cst (TREE_TYPE (ivstep), vf));
5887 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5889 create_iv (stride_base, ivstep, NULL,
5890 loop, &incr_gsi, insert_after,
5891 &offvar, NULL);
5892 incr = gsi_stmt (incr_gsi);
5893 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
5895 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5896 if (stmts)
5897 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5899 prev_stmt_info = NULL;
5900 alias_off = build_int_cst (ref_type, 0);
5901 next_stmt = first_stmt;
5902 for (g = 0; g < group_size; g++)
5904 running_off = offvar;
5905 if (g)
5907 tree size = TYPE_SIZE_UNIT (ltype);
5908 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
5909 size);
5910 tree newoff = copy_ssa_name (running_off, NULL);
5911 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5912 running_off, pos);
5913 vect_finish_stmt_generation (stmt, incr, gsi);
5914 running_off = newoff;
5916 unsigned int group_el = 0;
5917 unsigned HOST_WIDE_INT
5918 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
5919 for (j = 0; j < ncopies; j++)
5921 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5922 and first_stmt == stmt. */
5923 if (j == 0)
5925 if (slp)
5927 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5928 slp_node, -1);
5929 vec_oprnd = vec_oprnds[0];
5931 else
5933 gcc_assert (gimple_assign_single_p (next_stmt));
5934 op = gimple_assign_rhs1 (next_stmt);
5935 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
5938 else
5940 if (slp)
5941 vec_oprnd = vec_oprnds[j];
5942 else
5944 vect_is_simple_use (vec_oprnd, vinfo, &def_stmt, &dt);
5945 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5949 for (i = 0; i < nstores; i++)
5951 tree newref, newoff;
5952 gimple *incr, *assign;
5953 tree size = TYPE_SIZE (ltype);
5954 /* Extract the i'th component. */
5955 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5956 bitsize_int (i), size);
5957 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5958 size, pos);
5960 elem = force_gimple_operand_gsi (gsi, elem, true,
5961 NULL_TREE, true,
5962 GSI_SAME_STMT);
5964 tree this_off = build_int_cst (TREE_TYPE (alias_off),
5965 group_el * elsz);
5966 newref = build2 (MEM_REF, ltype,
5967 running_off, this_off);
5969 /* And store it to *running_off. */
5970 assign = gimple_build_assign (newref, elem);
5971 vect_finish_stmt_generation (stmt, assign, gsi);
5973 group_el += lnel;
5974 if (! slp
5975 || group_el == group_size)
5977 newoff = copy_ssa_name (running_off, NULL);
5978 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5979 running_off, stride_step);
5980 vect_finish_stmt_generation (stmt, incr, gsi);
5982 running_off = newoff;
5983 group_el = 0;
5985 if (g == group_size - 1
5986 && !slp)
5988 if (j == 0 && i == 0)
5989 STMT_VINFO_VEC_STMT (stmt_info)
5990 = *vec_stmt = assign;
5991 else
5992 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5993 prev_stmt_info = vinfo_for_stmt (assign);
5997 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5998 if (slp)
5999 break;
6001 return true;
6004 auto_vec<tree> dr_chain (group_size);
6005 oprnds.create (group_size);
6007 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6008 gcc_assert (alignment_support_scheme);
6009 /* Targets with store-lane instructions must not require explicit
6010 realignment. */
6011 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
6012 || alignment_support_scheme == dr_aligned
6013 || alignment_support_scheme == dr_unaligned_supported);
6015 if (memory_access_type == VMAT_CONTIGUOUS_DOWN
6016 || memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6017 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6019 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6020 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6021 else
6022 aggr_type = vectype;
6024 /* In case the vectorization factor (VF) is bigger than the number
6025 of elements that we can fit in a vectype (nunits), we have to generate
6026 more than one vector stmt - i.e - we need to "unroll" the
6027 vector stmt by a factor VF/nunits. For more details see documentation in
6028 vect_get_vec_def_for_copy_stmt. */
6030 /* In case of interleaving (non-unit grouped access):
6032 S1: &base + 2 = x2
6033 S2: &base = x0
6034 S3: &base + 1 = x1
6035 S4: &base + 3 = x3
6037 We create vectorized stores starting from base address (the access of the
6038 first stmt in the chain (S2 in the above example), when the last store stmt
6039 of the chain (S4) is reached:
6041 VS1: &base = vx2
6042 VS2: &base + vec_size*1 = vx0
6043 VS3: &base + vec_size*2 = vx1
6044 VS4: &base + vec_size*3 = vx3
6046 Then permutation statements are generated:
6048 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6049 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6052 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6053 (the order of the data-refs in the output of vect_permute_store_chain
6054 corresponds to the order of scalar stmts in the interleaving chain - see
6055 the documentation of vect_permute_store_chain()).
6057 In case of both multiple types and interleaving, above vector stores and
6058 permutation stmts are created for every copy. The result vector stmts are
6059 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6060 STMT_VINFO_RELATED_STMT for the next copies.
6063 prev_stmt_info = NULL;
6064 for (j = 0; j < ncopies; j++)
6067 if (j == 0)
6069 if (slp)
6071 /* Get vectorized arguments for SLP_NODE. */
6072 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
6073 NULL, slp_node, -1);
6075 vec_oprnd = vec_oprnds[0];
6077 else
6079 /* For interleaved stores we collect vectorized defs for all the
6080 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6081 used as an input to vect_permute_store_chain(), and OPRNDS as
6082 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6084 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6085 OPRNDS are of size 1. */
6086 next_stmt = first_stmt;
6087 for (i = 0; i < group_size; i++)
6089 /* Since gaps are not supported for interleaved stores,
6090 GROUP_SIZE is the exact number of stmts in the chain.
6091 Therefore, NEXT_STMT can't be NULL_TREE. In case that
6092 there is no interleaving, GROUP_SIZE is 1, and only one
6093 iteration of the loop will be executed. */
6094 gcc_assert (next_stmt
6095 && gimple_assign_single_p (next_stmt));
6096 op = gimple_assign_rhs1 (next_stmt);
6098 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt);
6099 dr_chain.quick_push (vec_oprnd);
6100 oprnds.quick_push (vec_oprnd);
6101 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6105 /* We should have catched mismatched types earlier. */
6106 gcc_assert (useless_type_conversion_p (vectype,
6107 TREE_TYPE (vec_oprnd)));
6108 bool simd_lane_access_p
6109 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6110 if (simd_lane_access_p
6111 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6112 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6113 && integer_zerop (DR_OFFSET (first_dr))
6114 && integer_zerop (DR_INIT (first_dr))
6115 && alias_sets_conflict_p (get_alias_set (aggr_type),
6116 get_alias_set (TREE_TYPE (ref_type))))
6118 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6119 dataref_offset = build_int_cst (ref_type, 0);
6120 inv_p = false;
6122 else
6123 dataref_ptr
6124 = vect_create_data_ref_ptr (first_stmt, aggr_type,
6125 simd_lane_access_p ? loop : NULL,
6126 offset, &dummy, gsi, &ptr_incr,
6127 simd_lane_access_p, &inv_p);
6128 gcc_assert (bb_vinfo || !inv_p);
6130 else
6132 /* For interleaved stores we created vectorized defs for all the
6133 defs stored in OPRNDS in the previous iteration (previous copy).
6134 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6135 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6136 next copy.
6137 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
6138 OPRNDS are of size 1. */
6139 for (i = 0; i < group_size; i++)
6141 op = oprnds[i];
6142 vect_is_simple_use (op, vinfo, &def_stmt, &dt);
6143 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
6144 dr_chain[i] = vec_oprnd;
6145 oprnds[i] = vec_oprnd;
6147 if (dataref_offset)
6148 dataref_offset
6149 = int_const_binop (PLUS_EXPR, dataref_offset,
6150 TYPE_SIZE_UNIT (aggr_type));
6151 else
6152 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6153 TYPE_SIZE_UNIT (aggr_type));
6156 if (memory_access_type == VMAT_LOAD_STORE_LANES)
6158 tree vec_array;
6160 /* Combine all the vectors into an array. */
6161 vec_array = create_vector_array (vectype, vec_num);
6162 for (i = 0; i < vec_num; i++)
6164 vec_oprnd = dr_chain[i];
6165 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
6168 /* Emit:
6169 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
6170 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
6171 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
6172 gimple_call_set_lhs (new_stmt, data_ref);
6173 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6175 else
6177 new_stmt = NULL;
6178 if (grouped_store)
6180 if (j == 0)
6181 result_chain.create (group_size);
6182 /* Permute. */
6183 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
6184 &result_chain);
6187 next_stmt = first_stmt;
6188 for (i = 0; i < vec_num; i++)
6190 unsigned align, misalign;
6192 if (i > 0)
6193 /* Bump the vector pointer. */
6194 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6195 stmt, NULL_TREE);
6197 if (slp)
6198 vec_oprnd = vec_oprnds[i];
6199 else if (grouped_store)
6200 /* For grouped stores vectorized defs are interleaved in
6201 vect_permute_store_chain(). */
6202 vec_oprnd = result_chain[i];
6204 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
6205 dataref_ptr,
6206 dataref_offset
6207 ? dataref_offset
6208 : build_int_cst (ref_type, 0));
6209 align = TYPE_ALIGN_UNIT (vectype);
6210 if (aligned_access_p (first_dr))
6211 misalign = 0;
6212 else if (DR_MISALIGNMENT (first_dr) == -1)
6214 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6215 align = TYPE_ALIGN_UNIT (elem_type);
6216 else
6217 align = get_object_alignment (DR_REF (first_dr))
6218 / BITS_PER_UNIT;
6219 misalign = 0;
6220 TREE_TYPE (data_ref)
6221 = build_aligned_type (TREE_TYPE (data_ref),
6222 align * BITS_PER_UNIT);
6224 else
6226 TREE_TYPE (data_ref)
6227 = build_aligned_type (TREE_TYPE (data_ref),
6228 TYPE_ALIGN (elem_type));
6229 misalign = DR_MISALIGNMENT (first_dr);
6231 if (dataref_offset == NULL_TREE
6232 && TREE_CODE (dataref_ptr) == SSA_NAME)
6233 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
6234 misalign);
6236 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
6238 tree perm_mask = perm_mask_for_reverse (vectype);
6239 tree perm_dest
6240 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
6241 vectype);
6242 tree new_temp = make_ssa_name (perm_dest);
6244 /* Generate the permute statement. */
6245 gimple *perm_stmt
6246 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
6247 vec_oprnd, perm_mask);
6248 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6250 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
6251 vec_oprnd = new_temp;
6254 /* Arguments are ready. Create the new vector stmt. */
6255 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
6256 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6258 if (slp)
6259 continue;
6261 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
6262 if (!next_stmt)
6263 break;
6266 if (!slp)
6268 if (j == 0)
6269 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6270 else
6271 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6272 prev_stmt_info = vinfo_for_stmt (new_stmt);
6276 oprnds.release ();
6277 result_chain.release ();
6278 vec_oprnds.release ();
6280 return true;
6283 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
6284 VECTOR_CST mask. No checks are made that the target platform supports the
6285 mask, so callers may wish to test can_vec_perm_p separately, or use
6286 vect_gen_perm_mask_checked. */
6288 tree
6289 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
6291 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
6292 int i, nunits;
6294 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6296 mask_elt_type = lang_hooks.types.type_for_mode
6297 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
6298 mask_type = get_vectype_for_scalar_type (mask_elt_type);
6300 mask_elts = XALLOCAVEC (tree, nunits);
6301 for (i = nunits - 1; i >= 0; i--)
6302 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
6303 mask_vec = build_vector (mask_type, mask_elts);
6305 return mask_vec;
6308 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
6309 i.e. that the target supports the pattern _for arbitrary input vectors_. */
6311 tree
6312 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
6314 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
6315 return vect_gen_perm_mask_any (vectype, sel);
6318 /* Given a vector variable X and Y, that was generated for the scalar
6319 STMT, generate instructions to permute the vector elements of X and Y
6320 using permutation mask MASK_VEC, insert them at *GSI and return the
6321 permuted vector variable. */
6323 static tree
6324 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6325 gimple_stmt_iterator *gsi)
6327 tree vectype = TREE_TYPE (x);
6328 tree perm_dest, data_ref;
6329 gimple *perm_stmt;
6331 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6332 data_ref = make_ssa_name (perm_dest);
6334 /* Generate the permute statement. */
6335 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6336 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6338 return data_ref;
6341 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6342 inserting them on the loops preheader edge. Returns true if we
6343 were successful in doing so (and thus STMT can be moved then),
6344 otherwise returns false. */
6346 static bool
6347 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6349 ssa_op_iter i;
6350 tree op;
6351 bool any = false;
6353 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6355 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6356 if (!gimple_nop_p (def_stmt)
6357 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6359 /* Make sure we don't need to recurse. While we could do
6360 so in simple cases when there are more complex use webs
6361 we don't have an easy way to preserve stmt order to fulfil
6362 dependencies within them. */
6363 tree op2;
6364 ssa_op_iter i2;
6365 if (gimple_code (def_stmt) == GIMPLE_PHI)
6366 return false;
6367 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6369 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6370 if (!gimple_nop_p (def_stmt2)
6371 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6372 return false;
6374 any = true;
6378 if (!any)
6379 return true;
6381 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6383 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6384 if (!gimple_nop_p (def_stmt)
6385 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6387 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6388 gsi_remove (&gsi, false);
6389 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6393 return true;
6396 /* vectorizable_load.
6398 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6399 can be vectorized.
6400 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6401 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6402 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6404 static bool
6405 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6406 slp_tree slp_node, slp_instance slp_node_instance)
6408 tree scalar_dest;
6409 tree vec_dest = NULL;
6410 tree data_ref = NULL;
6411 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6412 stmt_vec_info prev_stmt_info;
6413 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6414 struct loop *loop = NULL;
6415 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6416 bool nested_in_vect_loop = false;
6417 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6418 tree elem_type;
6419 tree new_temp;
6420 machine_mode mode;
6421 gimple *new_stmt = NULL;
6422 tree dummy;
6423 enum dr_alignment_support alignment_support_scheme;
6424 tree dataref_ptr = NULL_TREE;
6425 tree dataref_offset = NULL_TREE;
6426 gimple *ptr_incr = NULL;
6427 int ncopies;
6428 int i, j, group_size, group_gap_adj;
6429 tree msq = NULL_TREE, lsq;
6430 tree offset = NULL_TREE;
6431 tree byte_offset = NULL_TREE;
6432 tree realignment_token = NULL_TREE;
6433 gphi *phi = NULL;
6434 vec<tree> dr_chain = vNULL;
6435 bool grouped_load = false;
6436 gimple *first_stmt;
6437 gimple *first_stmt_for_drptr = NULL;
6438 bool inv_p;
6439 bool compute_in_loop = false;
6440 struct loop *at_loop;
6441 int vec_num;
6442 bool slp = (slp_node != NULL);
6443 bool slp_perm = false;
6444 enum tree_code code;
6445 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6446 int vf;
6447 tree aggr_type;
6448 gather_scatter_info gs_info;
6449 vec_info *vinfo = stmt_info->vinfo;
6450 tree ref_type;
6452 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6453 return false;
6455 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6456 && ! vec_stmt)
6457 return false;
6459 /* Is vectorizable load? */
6460 if (!is_gimple_assign (stmt))
6461 return false;
6463 scalar_dest = gimple_assign_lhs (stmt);
6464 if (TREE_CODE (scalar_dest) != SSA_NAME)
6465 return false;
6467 code = gimple_assign_rhs_code (stmt);
6468 if (code != ARRAY_REF
6469 && code != BIT_FIELD_REF
6470 && code != INDIRECT_REF
6471 && code != COMPONENT_REF
6472 && code != IMAGPART_EXPR
6473 && code != REALPART_EXPR
6474 && code != MEM_REF
6475 && TREE_CODE_CLASS (code) != tcc_declaration)
6476 return false;
6478 if (!STMT_VINFO_DATA_REF (stmt_info))
6479 return false;
6481 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6482 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6484 if (loop_vinfo)
6486 loop = LOOP_VINFO_LOOP (loop_vinfo);
6487 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6488 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6490 else
6491 vf = 1;
6493 /* Multiple types in SLP are handled by creating the appropriate number of
6494 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6495 case of SLP. */
6496 if (slp)
6497 ncopies = 1;
6498 else
6499 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6501 gcc_assert (ncopies >= 1);
6503 /* FORNOW. This restriction should be relaxed. */
6504 if (nested_in_vect_loop && ncopies > 1)
6506 if (dump_enabled_p ())
6507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6508 "multiple types in nested loop.\n");
6509 return false;
6512 /* Invalidate assumptions made by dependence analysis when vectorization
6513 on the unrolled body effectively re-orders stmts. */
6514 if (ncopies > 1
6515 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6516 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6517 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6519 if (dump_enabled_p ())
6520 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6521 "cannot perform implicit CSE when unrolling "
6522 "with negative dependence distance\n");
6523 return false;
6526 elem_type = TREE_TYPE (vectype);
6527 mode = TYPE_MODE (vectype);
6529 /* FORNOW. In some cases can vectorize even if data-type not supported
6530 (e.g. - data copies). */
6531 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6533 if (dump_enabled_p ())
6534 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6535 "Aligned load, but unsupported type.\n");
6536 return false;
6539 /* Check if the load is a part of an interleaving chain. */
6540 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6542 grouped_load = true;
6543 /* FORNOW */
6544 gcc_assert (!nested_in_vect_loop);
6545 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6547 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6548 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6550 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6551 slp_perm = true;
6553 /* ??? The following is overly pessimistic (as well as the loop
6554 case above) in the case we can statically determine the excess
6555 elements loaded are within the bounds of a decl that is accessed.
6556 Likewise for BB vectorizations using masked loads is a possibility. */
6557 if (bb_vinfo && slp_perm && group_size % nunits != 0)
6559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6560 "BB vectorization with gaps at the end of a load "
6561 "is not supported\n");
6562 return false;
6565 /* Invalidate assumptions made by dependence analysis when vectorization
6566 on the unrolled body effectively re-orders stmts. */
6567 if (!PURE_SLP_STMT (stmt_info)
6568 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6569 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6570 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6572 if (dump_enabled_p ())
6573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6574 "cannot perform implicit CSE when performing "
6575 "group loads with negative dependence distance\n");
6576 return false;
6579 /* Similarly when the stmt is a load that is both part of a SLP
6580 instance and a loop vectorized stmt via the same-dr mechanism
6581 we have to give up. */
6582 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6583 && (STMT_SLP_TYPE (stmt_info)
6584 != STMT_SLP_TYPE (vinfo_for_stmt
6585 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6587 if (dump_enabled_p ())
6588 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6589 "conflicting SLP types for CSEd load\n");
6590 return false;
6594 vect_memory_access_type memory_access_type;
6595 if (!get_load_store_type (stmt, vectype, slp, VLS_LOAD, ncopies,
6596 &memory_access_type, &gs_info))
6597 return false;
6599 if (!vec_stmt) /* transformation not required. */
6601 if (!slp)
6602 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) = memory_access_type;
6603 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6604 /* The SLP costs are calculated during SLP analysis. */
6605 if (!PURE_SLP_STMT (stmt_info))
6606 vect_model_load_cost (stmt_info, ncopies, memory_access_type,
6607 NULL, NULL, NULL);
6608 return true;
6611 if (!slp)
6612 gcc_assert (memory_access_type
6613 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info));
6615 if (dump_enabled_p ())
6616 dump_printf_loc (MSG_NOTE, vect_location,
6617 "transform load. ncopies = %d\n", ncopies);
6619 /** Transform. **/
6621 ensure_base_align (stmt_info, dr);
6623 if (memory_access_type == VMAT_GATHER_SCATTER)
6625 tree vec_oprnd0 = NULL_TREE, op;
6626 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
6627 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6628 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6629 edge pe = loop_preheader_edge (loop);
6630 gimple_seq seq;
6631 basic_block new_bb;
6632 enum { NARROW, NONE, WIDEN } modifier;
6633 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gs_info.offset_vectype);
6635 if (nunits == gather_off_nunits)
6636 modifier = NONE;
6637 else if (nunits == gather_off_nunits / 2)
6639 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6640 modifier = WIDEN;
6642 for (i = 0; i < gather_off_nunits; ++i)
6643 sel[i] = i | nunits;
6645 perm_mask = vect_gen_perm_mask_checked (gs_info.offset_vectype, sel);
6647 else if (nunits == gather_off_nunits * 2)
6649 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6650 modifier = NARROW;
6652 for (i = 0; i < nunits; ++i)
6653 sel[i] = i < gather_off_nunits
6654 ? i : i + nunits - gather_off_nunits;
6656 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6657 ncopies *= 2;
6659 else
6660 gcc_unreachable ();
6662 rettype = TREE_TYPE (TREE_TYPE (gs_info.decl));
6663 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6664 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6665 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6666 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6667 scaletype = TREE_VALUE (arglist);
6668 gcc_checking_assert (types_compatible_p (srctype, rettype));
6670 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6672 ptr = fold_convert (ptrtype, gs_info.base);
6673 if (!is_gimple_min_invariant (ptr))
6675 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6676 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6677 gcc_assert (!new_bb);
6680 /* Currently we support only unconditional gather loads,
6681 so mask should be all ones. */
6682 if (TREE_CODE (masktype) == INTEGER_TYPE)
6683 mask = build_int_cst (masktype, -1);
6684 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6686 mask = build_int_cst (TREE_TYPE (masktype), -1);
6687 mask = build_vector_from_val (masktype, mask);
6688 mask = vect_init_vector (stmt, mask, masktype, NULL);
6690 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6692 REAL_VALUE_TYPE r;
6693 long tmp[6];
6694 for (j = 0; j < 6; ++j)
6695 tmp[j] = -1;
6696 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6697 mask = build_real (TREE_TYPE (masktype), r);
6698 mask = build_vector_from_val (masktype, mask);
6699 mask = vect_init_vector (stmt, mask, masktype, NULL);
6701 else
6702 gcc_unreachable ();
6704 scale = build_int_cst (scaletype, gs_info.scale);
6706 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6707 merge = build_int_cst (TREE_TYPE (rettype), 0);
6708 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6710 REAL_VALUE_TYPE r;
6711 long tmp[6];
6712 for (j = 0; j < 6; ++j)
6713 tmp[j] = 0;
6714 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6715 merge = build_real (TREE_TYPE (rettype), r);
6717 else
6718 gcc_unreachable ();
6719 merge = build_vector_from_val (rettype, merge);
6720 merge = vect_init_vector (stmt, merge, rettype, NULL);
6722 prev_stmt_info = NULL;
6723 for (j = 0; j < ncopies; ++j)
6725 if (modifier == WIDEN && (j & 1))
6726 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6727 perm_mask, stmt, gsi);
6728 else if (j == 0)
6729 op = vec_oprnd0
6730 = vect_get_vec_def_for_operand (gs_info.offset, stmt);
6731 else
6732 op = vec_oprnd0
6733 = vect_get_vec_def_for_stmt_copy (gs_info.offset_dt, vec_oprnd0);
6735 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6737 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6738 == TYPE_VECTOR_SUBPARTS (idxtype));
6739 var = vect_get_new_ssa_name (idxtype, vect_simple_var);
6740 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6741 new_stmt
6742 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6743 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6744 op = var;
6747 new_stmt
6748 = gimple_build_call (gs_info.decl, 5, merge, ptr, op, mask, scale);
6750 if (!useless_type_conversion_p (vectype, rettype))
6752 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6753 == TYPE_VECTOR_SUBPARTS (rettype));
6754 op = vect_get_new_ssa_name (rettype, vect_simple_var);
6755 gimple_call_set_lhs (new_stmt, op);
6756 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6757 var = make_ssa_name (vec_dest);
6758 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6759 new_stmt
6760 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6762 else
6764 var = make_ssa_name (vec_dest, new_stmt);
6765 gimple_call_set_lhs (new_stmt, var);
6768 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6770 if (modifier == NARROW)
6772 if ((j & 1) == 0)
6774 prev_res = var;
6775 continue;
6777 var = permute_vec_elements (prev_res, var,
6778 perm_mask, stmt, gsi);
6779 new_stmt = SSA_NAME_DEF_STMT (var);
6782 if (prev_stmt_info == NULL)
6783 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6784 else
6785 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6786 prev_stmt_info = vinfo_for_stmt (new_stmt);
6788 return true;
6791 if (memory_access_type == VMAT_ELEMENTWISE
6792 || memory_access_type == VMAT_STRIDED_SLP)
6794 gimple_stmt_iterator incr_gsi;
6795 bool insert_after;
6796 gimple *incr;
6797 tree offvar;
6798 tree ivstep;
6799 tree running_off;
6800 vec<constructor_elt, va_gc> *v = NULL;
6801 gimple_seq stmts = NULL;
6802 tree stride_base, stride_step, alias_off;
6804 gcc_assert (!nested_in_vect_loop);
6806 if (slp && grouped_load)
6808 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6809 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6810 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6811 ref_type = get_group_alias_ptr_type (first_stmt);
6813 else
6815 first_stmt = stmt;
6816 first_dr = dr;
6817 group_size = 1;
6818 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
6821 stride_base
6822 = fold_build_pointer_plus
6823 (DR_BASE_ADDRESS (first_dr),
6824 size_binop (PLUS_EXPR,
6825 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6826 convert_to_ptrofftype (DR_INIT (first_dr))));
6827 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6829 /* For a load with loop-invariant (but other than power-of-2)
6830 stride (i.e. not a grouped access) like so:
6832 for (i = 0; i < n; i += stride)
6833 ... = array[i];
6835 we generate a new induction variable and new accesses to
6836 form a new vector (or vectors, depending on ncopies):
6838 for (j = 0; ; j += VF*stride)
6839 tmp1 = array[j];
6840 tmp2 = array[j + stride];
6842 vectemp = {tmp1, tmp2, ...}
6845 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6846 build_int_cst (TREE_TYPE (stride_step), vf));
6848 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6850 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6851 loop, &incr_gsi, insert_after,
6852 &offvar, NULL);
6853 incr = gsi_stmt (incr_gsi);
6854 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
6856 stride_step = force_gimple_operand (unshare_expr (stride_step),
6857 &stmts, true, NULL_TREE);
6858 if (stmts)
6859 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6861 prev_stmt_info = NULL;
6862 running_off = offvar;
6863 alias_off = build_int_cst (ref_type, 0);
6864 int nloads = nunits;
6865 int lnel = 1;
6866 tree ltype = TREE_TYPE (vectype);
6867 auto_vec<tree> dr_chain;
6868 if (memory_access_type == VMAT_STRIDED_SLP)
6870 nloads = nunits / group_size;
6871 if (group_size < nunits)
6873 lnel = group_size;
6874 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6876 else
6878 lnel = nunits;
6879 ltype = vectype;
6881 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6883 if (slp)
6885 /* For SLP permutation support we need to load the whole group,
6886 not only the number of vector stmts the permutation result
6887 fits in. */
6888 if (slp_perm)
6890 ncopies = (group_size * vf + nunits - 1) / nunits;
6891 dr_chain.create (ncopies);
6893 else
6894 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6896 int group_el = 0;
6897 unsigned HOST_WIDE_INT
6898 elsz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6899 for (j = 0; j < ncopies; j++)
6901 if (nloads > 1)
6902 vec_alloc (v, nloads);
6903 for (i = 0; i < nloads; i++)
6905 tree this_off = build_int_cst (TREE_TYPE (alias_off),
6906 group_el * elsz);
6907 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6908 build2 (MEM_REF, ltype,
6909 running_off, this_off));
6910 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6911 if (nloads > 1)
6912 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
6913 gimple_assign_lhs (new_stmt));
6915 group_el += lnel;
6916 if (! slp
6917 || group_el == group_size)
6919 tree newoff = copy_ssa_name (running_off);
6920 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6921 running_off, stride_step);
6922 vect_finish_stmt_generation (stmt, incr, gsi);
6924 running_off = newoff;
6925 group_el = 0;
6928 if (nloads > 1)
6930 tree vec_inv = build_constructor (vectype, v);
6931 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6932 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6935 if (slp)
6937 if (slp_perm)
6938 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6939 else
6940 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6942 else
6944 if (j == 0)
6945 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6946 else
6947 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6948 prev_stmt_info = vinfo_for_stmt (new_stmt);
6951 if (slp_perm)
6952 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6953 slp_node_instance, false);
6954 return true;
6957 if (grouped_load)
6959 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6960 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6961 /* For SLP vectorization we directly vectorize a subchain
6962 without permutation. */
6963 if (slp && ! SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6964 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6965 /* For BB vectorization always use the first stmt to base
6966 the data ref pointer on. */
6967 if (bb_vinfo)
6968 first_stmt_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6970 /* Check if the chain of loads is already vectorized. */
6971 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6972 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6973 ??? But we can only do so if there is exactly one
6974 as we have no way to get at the rest. Leave the CSE
6975 opportunity alone.
6976 ??? With the group load eventually participating
6977 in multiple different permutations (having multiple
6978 slp nodes which refer to the same group) the CSE
6979 is even wrong code. See PR56270. */
6980 && !slp)
6982 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6983 return true;
6985 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6986 group_gap_adj = 0;
6988 /* VEC_NUM is the number of vect stmts to be created for this group. */
6989 if (slp)
6991 grouped_load = false;
6992 /* For SLP permutation support we need to load the whole group,
6993 not only the number of vector stmts the permutation result
6994 fits in. */
6995 if (slp_perm)
6996 vec_num = (group_size * vf + nunits - 1) / nunits;
6997 else
6998 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6999 group_gap_adj = vf * group_size - nunits * vec_num;
7001 else
7002 vec_num = group_size;
7004 ref_type = get_group_alias_ptr_type (first_stmt);
7006 else
7008 first_stmt = stmt;
7009 first_dr = dr;
7010 group_size = vec_num = 1;
7011 group_gap_adj = 0;
7012 ref_type = reference_alias_ptr_type (DR_REF (first_dr));
7015 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
7016 gcc_assert (alignment_support_scheme);
7017 /* Targets with load-lane instructions must not require explicit
7018 realignment. */
7019 gcc_assert (memory_access_type != VMAT_LOAD_STORE_LANES
7020 || alignment_support_scheme == dr_aligned
7021 || alignment_support_scheme == dr_unaligned_supported);
7023 /* In case the vectorization factor (VF) is bigger than the number
7024 of elements that we can fit in a vectype (nunits), we have to generate
7025 more than one vector stmt - i.e - we need to "unroll" the
7026 vector stmt by a factor VF/nunits. In doing so, we record a pointer
7027 from one copy of the vector stmt to the next, in the field
7028 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
7029 stages to find the correct vector defs to be used when vectorizing
7030 stmts that use the defs of the current stmt. The example below
7031 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
7032 need to create 4 vectorized stmts):
7034 before vectorization:
7035 RELATED_STMT VEC_STMT
7036 S1: x = memref - -
7037 S2: z = x + 1 - -
7039 step 1: vectorize stmt S1:
7040 We first create the vector stmt VS1_0, and, as usual, record a
7041 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
7042 Next, we create the vector stmt VS1_1, and record a pointer to
7043 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
7044 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
7045 stmts and pointers:
7046 RELATED_STMT VEC_STMT
7047 VS1_0: vx0 = memref0 VS1_1 -
7048 VS1_1: vx1 = memref1 VS1_2 -
7049 VS1_2: vx2 = memref2 VS1_3 -
7050 VS1_3: vx3 = memref3 - -
7051 S1: x = load - VS1_0
7052 S2: z = x + 1 - -
7054 See in documentation in vect_get_vec_def_for_stmt_copy for how the
7055 information we recorded in RELATED_STMT field is used to vectorize
7056 stmt S2. */
7058 /* In case of interleaving (non-unit grouped access):
7060 S1: x2 = &base + 2
7061 S2: x0 = &base
7062 S3: x1 = &base + 1
7063 S4: x3 = &base + 3
7065 Vectorized loads are created in the order of memory accesses
7066 starting from the access of the first stmt of the chain:
7068 VS1: vx0 = &base
7069 VS2: vx1 = &base + vec_size*1
7070 VS3: vx3 = &base + vec_size*2
7071 VS4: vx4 = &base + vec_size*3
7073 Then permutation statements are generated:
7075 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
7076 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
7079 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7080 (the order of the data-refs in the output of vect_permute_load_chain
7081 corresponds to the order of scalar stmts in the interleaving chain - see
7082 the documentation of vect_permute_load_chain()).
7083 The generation of permutation stmts and recording them in
7084 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
7086 In case of both multiple types and interleaving, the vector loads and
7087 permutation stmts above are created for every copy. The result vector
7088 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
7089 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
7091 /* If the data reference is aligned (dr_aligned) or potentially unaligned
7092 on a target that supports unaligned accesses (dr_unaligned_supported)
7093 we generate the following code:
7094 p = initial_addr;
7095 indx = 0;
7096 loop {
7097 p = p + indx * vectype_size;
7098 vec_dest = *(p);
7099 indx = indx + 1;
7102 Otherwise, the data reference is potentially unaligned on a target that
7103 does not support unaligned accesses (dr_explicit_realign_optimized) -
7104 then generate the following code, in which the data in each iteration is
7105 obtained by two vector loads, one from the previous iteration, and one
7106 from the current iteration:
7107 p1 = initial_addr;
7108 msq_init = *(floor(p1))
7109 p2 = initial_addr + VS - 1;
7110 realignment_token = call target_builtin;
7111 indx = 0;
7112 loop {
7113 p2 = p2 + indx * vectype_size
7114 lsq = *(floor(p2))
7115 vec_dest = realign_load (msq, lsq, realignment_token)
7116 indx = indx + 1;
7117 msq = lsq;
7118 } */
7120 /* If the misalignment remains the same throughout the execution of the
7121 loop, we can create the init_addr and permutation mask at the loop
7122 preheader. Otherwise, it needs to be created inside the loop.
7123 This can only occur when vectorizing memory accesses in the inner-loop
7124 nested within an outer-loop that is being vectorized. */
7126 if (nested_in_vect_loop
7127 && (TREE_INT_CST_LOW (DR_STEP (dr))
7128 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
7130 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
7131 compute_in_loop = true;
7134 if ((alignment_support_scheme == dr_explicit_realign_optimized
7135 || alignment_support_scheme == dr_explicit_realign)
7136 && !compute_in_loop)
7138 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
7139 alignment_support_scheme, NULL_TREE,
7140 &at_loop);
7141 if (alignment_support_scheme == dr_explicit_realign_optimized)
7143 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
7144 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
7145 size_one_node);
7148 else
7149 at_loop = loop;
7151 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7152 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
7154 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7155 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
7156 else
7157 aggr_type = vectype;
7159 prev_stmt_info = NULL;
7160 for (j = 0; j < ncopies; j++)
7162 /* 1. Create the vector or array pointer update chain. */
7163 if (j == 0)
7165 bool simd_lane_access_p
7166 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
7167 if (simd_lane_access_p
7168 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
7169 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
7170 && integer_zerop (DR_OFFSET (first_dr))
7171 && integer_zerop (DR_INIT (first_dr))
7172 && alias_sets_conflict_p (get_alias_set (aggr_type),
7173 get_alias_set (TREE_TYPE (ref_type)))
7174 && (alignment_support_scheme == dr_aligned
7175 || alignment_support_scheme == dr_unaligned_supported))
7177 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
7178 dataref_offset = build_int_cst (ref_type, 0);
7179 inv_p = false;
7181 else if (first_stmt_for_drptr
7182 && first_stmt != first_stmt_for_drptr)
7184 dataref_ptr
7185 = vect_create_data_ref_ptr (first_stmt_for_drptr, aggr_type,
7186 at_loop, offset, &dummy, gsi,
7187 &ptr_incr, simd_lane_access_p,
7188 &inv_p, byte_offset);
7189 /* Adjust the pointer by the difference to first_stmt. */
7190 data_reference_p ptrdr
7191 = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt_for_drptr));
7192 tree diff = fold_convert (sizetype,
7193 size_binop (MINUS_EXPR,
7194 DR_INIT (first_dr),
7195 DR_INIT (ptrdr)));
7196 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7197 stmt, diff);
7199 else
7200 dataref_ptr
7201 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
7202 offset, &dummy, gsi, &ptr_incr,
7203 simd_lane_access_p, &inv_p,
7204 byte_offset);
7206 else if (dataref_offset)
7207 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
7208 TYPE_SIZE_UNIT (aggr_type));
7209 else
7210 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
7211 TYPE_SIZE_UNIT (aggr_type));
7213 if (grouped_load || slp_perm)
7214 dr_chain.create (vec_num);
7216 if (memory_access_type == VMAT_LOAD_STORE_LANES)
7218 tree vec_array;
7220 vec_array = create_vector_array (vectype, vec_num);
7222 /* Emit:
7223 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
7224 data_ref = create_array_ref (aggr_type, dataref_ptr, ref_type);
7225 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
7226 gimple_call_set_lhs (new_stmt, vec_array);
7227 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7229 /* Extract each vector into an SSA_NAME. */
7230 for (i = 0; i < vec_num; i++)
7232 new_temp = read_vector_array (stmt, gsi, scalar_dest,
7233 vec_array, i);
7234 dr_chain.quick_push (new_temp);
7237 /* Record the mapping between SSA_NAMEs and statements. */
7238 vect_record_grouped_load_vectors (stmt, dr_chain);
7240 else
7242 for (i = 0; i < vec_num; i++)
7244 if (i > 0)
7245 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7246 stmt, NULL_TREE);
7248 /* 2. Create the vector-load in the loop. */
7249 switch (alignment_support_scheme)
7251 case dr_aligned:
7252 case dr_unaligned_supported:
7254 unsigned int align, misalign;
7256 data_ref
7257 = fold_build2 (MEM_REF, vectype, dataref_ptr,
7258 dataref_offset
7259 ? dataref_offset
7260 : build_int_cst (ref_type, 0));
7261 align = TYPE_ALIGN_UNIT (vectype);
7262 if (alignment_support_scheme == dr_aligned)
7264 gcc_assert (aligned_access_p (first_dr));
7265 misalign = 0;
7267 else if (DR_MISALIGNMENT (first_dr) == -1)
7269 if (DR_VECT_AUX (first_dr)->base_element_aligned)
7270 align = TYPE_ALIGN_UNIT (elem_type);
7271 else
7272 align = (get_object_alignment (DR_REF (first_dr))
7273 / BITS_PER_UNIT);
7274 misalign = 0;
7275 TREE_TYPE (data_ref)
7276 = build_aligned_type (TREE_TYPE (data_ref),
7277 align * BITS_PER_UNIT);
7279 else
7281 TREE_TYPE (data_ref)
7282 = build_aligned_type (TREE_TYPE (data_ref),
7283 TYPE_ALIGN (elem_type));
7284 misalign = DR_MISALIGNMENT (first_dr);
7286 if (dataref_offset == NULL_TREE
7287 && TREE_CODE (dataref_ptr) == SSA_NAME)
7288 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7289 align, misalign);
7290 break;
7292 case dr_explicit_realign:
7294 tree ptr, bump;
7296 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7298 if (compute_in_loop)
7299 msq = vect_setup_realignment (first_stmt, gsi,
7300 &realignment_token,
7301 dr_explicit_realign,
7302 dataref_ptr, NULL);
7304 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7305 ptr = copy_ssa_name (dataref_ptr);
7306 else
7307 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7308 new_stmt = gimple_build_assign
7309 (ptr, BIT_AND_EXPR, dataref_ptr,
7310 build_int_cst
7311 (TREE_TYPE (dataref_ptr),
7312 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7313 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7314 data_ref
7315 = build2 (MEM_REF, vectype, ptr,
7316 build_int_cst (ref_type, 0));
7317 vec_dest = vect_create_destination_var (scalar_dest,
7318 vectype);
7319 new_stmt = gimple_build_assign (vec_dest, data_ref);
7320 new_temp = make_ssa_name (vec_dest, new_stmt);
7321 gimple_assign_set_lhs (new_stmt, new_temp);
7322 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7323 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7324 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7325 msq = new_temp;
7327 bump = size_binop (MULT_EXPR, vs,
7328 TYPE_SIZE_UNIT (elem_type));
7329 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7330 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7331 new_stmt = gimple_build_assign
7332 (NULL_TREE, BIT_AND_EXPR, ptr,
7333 build_int_cst
7334 (TREE_TYPE (ptr),
7335 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7336 ptr = copy_ssa_name (ptr, new_stmt);
7337 gimple_assign_set_lhs (new_stmt, ptr);
7338 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7339 data_ref
7340 = build2 (MEM_REF, vectype, ptr,
7341 build_int_cst (ref_type, 0));
7342 break;
7344 case dr_explicit_realign_optimized:
7345 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7346 new_temp = copy_ssa_name (dataref_ptr);
7347 else
7348 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7349 new_stmt = gimple_build_assign
7350 (new_temp, BIT_AND_EXPR, dataref_ptr,
7351 build_int_cst
7352 (TREE_TYPE (dataref_ptr),
7353 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7354 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7355 data_ref
7356 = build2 (MEM_REF, vectype, new_temp,
7357 build_int_cst (ref_type, 0));
7358 break;
7359 default:
7360 gcc_unreachable ();
7362 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7363 new_stmt = gimple_build_assign (vec_dest, data_ref);
7364 new_temp = make_ssa_name (vec_dest, new_stmt);
7365 gimple_assign_set_lhs (new_stmt, new_temp);
7366 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7368 /* 3. Handle explicit realignment if necessary/supported.
7369 Create in loop:
7370 vec_dest = realign_load (msq, lsq, realignment_token) */
7371 if (alignment_support_scheme == dr_explicit_realign_optimized
7372 || alignment_support_scheme == dr_explicit_realign)
7374 lsq = gimple_assign_lhs (new_stmt);
7375 if (!realignment_token)
7376 realignment_token = dataref_ptr;
7377 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7378 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7379 msq, lsq, realignment_token);
7380 new_temp = make_ssa_name (vec_dest, new_stmt);
7381 gimple_assign_set_lhs (new_stmt, new_temp);
7382 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7384 if (alignment_support_scheme == dr_explicit_realign_optimized)
7386 gcc_assert (phi);
7387 if (i == vec_num - 1 && j == ncopies - 1)
7388 add_phi_arg (phi, lsq,
7389 loop_latch_edge (containing_loop),
7390 UNKNOWN_LOCATION);
7391 msq = lsq;
7395 /* 4. Handle invariant-load. */
7396 if (inv_p && !bb_vinfo)
7398 gcc_assert (!grouped_load);
7399 /* If we have versioned for aliasing or the loop doesn't
7400 have any data dependencies that would preclude this,
7401 then we are sure this is a loop invariant load and
7402 thus we can insert it on the preheader edge. */
7403 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7404 && !nested_in_vect_loop
7405 && hoist_defs_of_uses (stmt, loop))
7407 if (dump_enabled_p ())
7409 dump_printf_loc (MSG_NOTE, vect_location,
7410 "hoisting out of the vectorized "
7411 "loop: ");
7412 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7414 tree tem = copy_ssa_name (scalar_dest);
7415 gsi_insert_on_edge_immediate
7416 (loop_preheader_edge (loop),
7417 gimple_build_assign (tem,
7418 unshare_expr
7419 (gimple_assign_rhs1 (stmt))));
7420 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7421 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7422 set_vinfo_for_stmt (new_stmt,
7423 new_stmt_vec_info (new_stmt, vinfo));
7425 else
7427 gimple_stmt_iterator gsi2 = *gsi;
7428 gsi_next (&gsi2);
7429 new_temp = vect_init_vector (stmt, scalar_dest,
7430 vectype, &gsi2);
7431 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7435 if (memory_access_type == VMAT_CONTIGUOUS_REVERSE)
7437 tree perm_mask = perm_mask_for_reverse (vectype);
7438 new_temp = permute_vec_elements (new_temp, new_temp,
7439 perm_mask, stmt, gsi);
7440 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7443 /* Collect vector loads and later create their permutation in
7444 vect_transform_grouped_load (). */
7445 if (grouped_load || slp_perm)
7446 dr_chain.quick_push (new_temp);
7448 /* Store vector loads in the corresponding SLP_NODE. */
7449 if (slp && !slp_perm)
7450 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7452 /* Bump the vector pointer to account for a gap or for excess
7453 elements loaded for a permuted SLP load. */
7454 if (group_gap_adj != 0)
7456 bool ovf;
7457 tree bump
7458 = wide_int_to_tree (sizetype,
7459 wi::smul (TYPE_SIZE_UNIT (elem_type),
7460 group_gap_adj, &ovf));
7461 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7462 stmt, bump);
7466 if (slp && !slp_perm)
7467 continue;
7469 if (slp_perm)
7471 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7472 slp_node_instance, false))
7474 dr_chain.release ();
7475 return false;
7478 else
7480 if (grouped_load)
7482 if (memory_access_type != VMAT_LOAD_STORE_LANES)
7483 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7484 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7486 else
7488 if (j == 0)
7489 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7490 else
7491 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7492 prev_stmt_info = vinfo_for_stmt (new_stmt);
7495 dr_chain.release ();
7498 return true;
7501 /* Function vect_is_simple_cond.
7503 Input:
7504 LOOP - the loop that is being vectorized.
7505 COND - Condition that is checked for simple use.
7507 Output:
7508 *COMP_VECTYPE - the vector type for the comparison.
7510 Returns whether a COND can be vectorized. Checks whether
7511 condition operands are supportable using vec_is_simple_use. */
7513 static bool
7514 vect_is_simple_cond (tree cond, vec_info *vinfo, tree *comp_vectype)
7516 tree lhs, rhs;
7517 enum vect_def_type dt;
7518 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7520 /* Mask case. */
7521 if (TREE_CODE (cond) == SSA_NAME
7522 && TREE_CODE (TREE_TYPE (cond)) == BOOLEAN_TYPE)
7524 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (cond);
7525 if (!vect_is_simple_use (cond, vinfo, &lhs_def_stmt,
7526 &dt, comp_vectype)
7527 || !*comp_vectype
7528 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype))
7529 return false;
7530 return true;
7533 if (!COMPARISON_CLASS_P (cond))
7534 return false;
7536 lhs = TREE_OPERAND (cond, 0);
7537 rhs = TREE_OPERAND (cond, 1);
7539 if (TREE_CODE (lhs) == SSA_NAME)
7541 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7542 if (!vect_is_simple_use (lhs, vinfo, &lhs_def_stmt, &dt, &vectype1))
7543 return false;
7545 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7546 && TREE_CODE (lhs) != FIXED_CST)
7547 return false;
7549 if (TREE_CODE (rhs) == SSA_NAME)
7551 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7552 if (!vect_is_simple_use (rhs, vinfo, &rhs_def_stmt, &dt, &vectype2))
7553 return false;
7555 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7556 && TREE_CODE (rhs) != FIXED_CST)
7557 return false;
7559 if (vectype1 && vectype2
7560 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7561 return false;
7563 *comp_vectype = vectype1 ? vectype1 : vectype2;
7564 return true;
7567 /* vectorizable_condition.
7569 Check if STMT is conditional modify expression that can be vectorized.
7570 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7571 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7572 at GSI.
7574 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7575 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7576 else clause if it is 2).
7578 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7580 bool
7581 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7582 gimple **vec_stmt, tree reduc_def, int reduc_index,
7583 slp_tree slp_node)
7585 tree scalar_dest = NULL_TREE;
7586 tree vec_dest = NULL_TREE;
7587 tree cond_expr, then_clause, else_clause;
7588 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7589 tree comp_vectype = NULL_TREE;
7590 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7591 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7592 tree vec_compare;
7593 tree new_temp;
7594 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7595 enum vect_def_type dt, dts[4];
7596 int ncopies;
7597 enum tree_code code;
7598 stmt_vec_info prev_stmt_info = NULL;
7599 int i, j;
7600 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7601 vec<tree> vec_oprnds0 = vNULL;
7602 vec<tree> vec_oprnds1 = vNULL;
7603 vec<tree> vec_oprnds2 = vNULL;
7604 vec<tree> vec_oprnds3 = vNULL;
7605 tree vec_cmp_type;
7606 bool masked = false;
7608 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7609 return false;
7611 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == TREE_CODE_REDUCTION)
7613 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7614 return false;
7616 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7617 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7618 && reduc_def))
7619 return false;
7621 /* FORNOW: not yet supported. */
7622 if (STMT_VINFO_LIVE_P (stmt_info))
7624 if (dump_enabled_p ())
7625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7626 "value used after loop.\n");
7627 return false;
7631 /* Is vectorizable conditional operation? */
7632 if (!is_gimple_assign (stmt))
7633 return false;
7635 code = gimple_assign_rhs_code (stmt);
7637 if (code != COND_EXPR)
7638 return false;
7640 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7641 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7642 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7644 if (slp_node)
7645 ncopies = 1;
7646 else
7647 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7649 gcc_assert (ncopies >= 1);
7650 if (reduc_index && ncopies > 1)
7651 return false; /* FORNOW */
7653 cond_expr = gimple_assign_rhs1 (stmt);
7654 then_clause = gimple_assign_rhs2 (stmt);
7655 else_clause = gimple_assign_rhs3 (stmt);
7657 if (!vect_is_simple_cond (cond_expr, stmt_info->vinfo, &comp_vectype)
7658 || !comp_vectype)
7659 return false;
7661 gimple *def_stmt;
7662 if (!vect_is_simple_use (then_clause, stmt_info->vinfo, &def_stmt, &dt,
7663 &vectype1))
7664 return false;
7665 if (!vect_is_simple_use (else_clause, stmt_info->vinfo, &def_stmt, &dt,
7666 &vectype2))
7667 return false;
7669 if (vectype1 && !useless_type_conversion_p (vectype, vectype1))
7670 return false;
7672 if (vectype2 && !useless_type_conversion_p (vectype, vectype2))
7673 return false;
7675 masked = !COMPARISON_CLASS_P (cond_expr);
7676 vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
7678 if (vec_cmp_type == NULL_TREE)
7679 return false;
7681 if (!vec_stmt)
7683 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7684 return expand_vec_cond_expr_p (vectype, comp_vectype);
7687 /* Transform. */
7689 if (!slp_node)
7691 vec_oprnds0.create (1);
7692 vec_oprnds1.create (1);
7693 vec_oprnds2.create (1);
7694 vec_oprnds3.create (1);
7697 /* Handle def. */
7698 scalar_dest = gimple_assign_lhs (stmt);
7699 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7701 /* Handle cond expr. */
7702 for (j = 0; j < ncopies; j++)
7704 gassign *new_stmt = NULL;
7705 if (j == 0)
7707 if (slp_node)
7709 auto_vec<tree, 4> ops;
7710 auto_vec<vec<tree>, 4> vec_defs;
7712 if (masked)
7713 ops.safe_push (cond_expr);
7714 else
7716 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7717 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7719 ops.safe_push (then_clause);
7720 ops.safe_push (else_clause);
7721 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7722 vec_oprnds3 = vec_defs.pop ();
7723 vec_oprnds2 = vec_defs.pop ();
7724 if (!masked)
7725 vec_oprnds1 = vec_defs.pop ();
7726 vec_oprnds0 = vec_defs.pop ();
7728 else
7730 gimple *gtemp;
7731 if (masked)
7733 vec_cond_lhs
7734 = vect_get_vec_def_for_operand (cond_expr, stmt,
7735 comp_vectype);
7736 vect_is_simple_use (cond_expr, stmt_info->vinfo,
7737 &gtemp, &dts[0]);
7739 else
7741 vec_cond_lhs =
7742 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7743 stmt, comp_vectype);
7744 vect_is_simple_use (TREE_OPERAND (cond_expr, 0),
7745 loop_vinfo, &gtemp, &dts[0]);
7747 vec_cond_rhs =
7748 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7749 stmt, comp_vectype);
7750 vect_is_simple_use (TREE_OPERAND (cond_expr, 1),
7751 loop_vinfo, &gtemp, &dts[1]);
7753 if (reduc_index == 1)
7754 vec_then_clause = reduc_def;
7755 else
7757 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7758 stmt);
7759 vect_is_simple_use (then_clause, loop_vinfo,
7760 &gtemp, &dts[2]);
7762 if (reduc_index == 2)
7763 vec_else_clause = reduc_def;
7764 else
7766 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7767 stmt);
7768 vect_is_simple_use (else_clause, loop_vinfo, &gtemp, &dts[3]);
7772 else
7774 vec_cond_lhs
7775 = vect_get_vec_def_for_stmt_copy (dts[0],
7776 vec_oprnds0.pop ());
7777 if (!masked)
7778 vec_cond_rhs
7779 = vect_get_vec_def_for_stmt_copy (dts[1],
7780 vec_oprnds1.pop ());
7782 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7783 vec_oprnds2.pop ());
7784 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7785 vec_oprnds3.pop ());
7788 if (!slp_node)
7790 vec_oprnds0.quick_push (vec_cond_lhs);
7791 if (!masked)
7792 vec_oprnds1.quick_push (vec_cond_rhs);
7793 vec_oprnds2.quick_push (vec_then_clause);
7794 vec_oprnds3.quick_push (vec_else_clause);
7797 /* Arguments are ready. Create the new vector stmt. */
7798 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7800 vec_then_clause = vec_oprnds2[i];
7801 vec_else_clause = vec_oprnds3[i];
7803 if (masked)
7804 vec_compare = vec_cond_lhs;
7805 else
7807 vec_cond_rhs = vec_oprnds1[i];
7808 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7809 vec_cond_lhs, vec_cond_rhs);
7811 new_temp = make_ssa_name (vec_dest);
7812 new_stmt = gimple_build_assign (new_temp, VEC_COND_EXPR,
7813 vec_compare, vec_then_clause,
7814 vec_else_clause);
7815 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7816 if (slp_node)
7817 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7820 if (slp_node)
7821 continue;
7823 if (j == 0)
7824 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7825 else
7826 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7828 prev_stmt_info = vinfo_for_stmt (new_stmt);
7831 vec_oprnds0.release ();
7832 vec_oprnds1.release ();
7833 vec_oprnds2.release ();
7834 vec_oprnds3.release ();
7836 return true;
7839 /* vectorizable_comparison.
7841 Check if STMT is comparison expression that can be vectorized.
7842 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7843 comparison, put it in VEC_STMT, and insert it at GSI.
7845 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7847 static bool
7848 vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
7849 gimple **vec_stmt, tree reduc_def,
7850 slp_tree slp_node)
7852 tree lhs, rhs1, rhs2;
7853 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7854 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7855 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7856 tree vec_rhs1 = NULL_TREE, vec_rhs2 = NULL_TREE;
7857 tree new_temp;
7858 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7859 enum vect_def_type dts[2] = {vect_unknown_def_type, vect_unknown_def_type};
7860 unsigned nunits;
7861 int ncopies;
7862 enum tree_code code, bitop1 = NOP_EXPR, bitop2 = NOP_EXPR;
7863 stmt_vec_info prev_stmt_info = NULL;
7864 int i, j;
7865 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7866 vec<tree> vec_oprnds0 = vNULL;
7867 vec<tree> vec_oprnds1 = vNULL;
7868 gimple *def_stmt;
7869 tree mask_type;
7870 tree mask;
7872 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7873 return false;
7875 if (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))
7876 return false;
7878 mask_type = vectype;
7879 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7881 if (slp_node)
7882 ncopies = 1;
7883 else
7884 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7886 gcc_assert (ncopies >= 1);
7887 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7888 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7889 && reduc_def))
7890 return false;
7892 if (STMT_VINFO_LIVE_P (stmt_info))
7894 if (dump_enabled_p ())
7895 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7896 "value used after loop.\n");
7897 return false;
7900 if (!is_gimple_assign (stmt))
7901 return false;
7903 code = gimple_assign_rhs_code (stmt);
7905 if (TREE_CODE_CLASS (code) != tcc_comparison)
7906 return false;
7908 rhs1 = gimple_assign_rhs1 (stmt);
7909 rhs2 = gimple_assign_rhs2 (stmt);
7911 if (!vect_is_simple_use (rhs1, stmt_info->vinfo, &def_stmt,
7912 &dts[0], &vectype1))
7913 return false;
7915 if (!vect_is_simple_use (rhs2, stmt_info->vinfo, &def_stmt,
7916 &dts[1], &vectype2))
7917 return false;
7919 if (vectype1 && vectype2
7920 && TYPE_VECTOR_SUBPARTS (vectype1) != TYPE_VECTOR_SUBPARTS (vectype2))
7921 return false;
7923 vectype = vectype1 ? vectype1 : vectype2;
7925 /* Invariant comparison. */
7926 if (!vectype)
7928 vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
7929 if (TYPE_VECTOR_SUBPARTS (vectype) != nunits)
7930 return false;
7932 else if (nunits != TYPE_VECTOR_SUBPARTS (vectype))
7933 return false;
7935 /* Can't compare mask and non-mask types. */
7936 if (vectype1 && vectype2
7937 && (VECTOR_BOOLEAN_TYPE_P (vectype1) ^ VECTOR_BOOLEAN_TYPE_P (vectype2)))
7938 return false;
7940 /* Boolean values may have another representation in vectors
7941 and therefore we prefer bit operations over comparison for
7942 them (which also works for scalar masks). We store opcodes
7943 to use in bitop1 and bitop2. Statement is vectorized as
7944 BITOP2 (rhs1 BITOP1 rhs2) or
7945 rhs1 BITOP2 (BITOP1 rhs2)
7946 depending on bitop1 and bitop2 arity. */
7947 if (VECTOR_BOOLEAN_TYPE_P (vectype))
7949 if (code == GT_EXPR)
7951 bitop1 = BIT_NOT_EXPR;
7952 bitop2 = BIT_AND_EXPR;
7954 else if (code == GE_EXPR)
7956 bitop1 = BIT_NOT_EXPR;
7957 bitop2 = BIT_IOR_EXPR;
7959 else if (code == LT_EXPR)
7961 bitop1 = BIT_NOT_EXPR;
7962 bitop2 = BIT_AND_EXPR;
7963 std::swap (rhs1, rhs2);
7964 std::swap (dts[0], dts[1]);
7966 else if (code == LE_EXPR)
7968 bitop1 = BIT_NOT_EXPR;
7969 bitop2 = BIT_IOR_EXPR;
7970 std::swap (rhs1, rhs2);
7971 std::swap (dts[0], dts[1]);
7973 else
7975 bitop1 = BIT_XOR_EXPR;
7976 if (code == EQ_EXPR)
7977 bitop2 = BIT_NOT_EXPR;
7981 if (!vec_stmt)
7983 STMT_VINFO_TYPE (stmt_info) = comparison_vec_info_type;
7984 vect_model_simple_cost (stmt_info, ncopies * (1 + (bitop2 != NOP_EXPR)),
7985 dts, NULL, NULL);
7986 if (bitop1 == NOP_EXPR)
7987 return expand_vec_cmp_expr_p (vectype, mask_type);
7988 else
7990 machine_mode mode = TYPE_MODE (vectype);
7991 optab optab;
7993 optab = optab_for_tree_code (bitop1, vectype, optab_default);
7994 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
7995 return false;
7997 if (bitop2 != NOP_EXPR)
7999 optab = optab_for_tree_code (bitop2, vectype, optab_default);
8000 if (!optab || optab_handler (optab, mode) == CODE_FOR_nothing)
8001 return false;
8003 return true;
8007 /* Transform. */
8008 if (!slp_node)
8010 vec_oprnds0.create (1);
8011 vec_oprnds1.create (1);
8014 /* Handle def. */
8015 lhs = gimple_assign_lhs (stmt);
8016 mask = vect_create_destination_var (lhs, mask_type);
8018 /* Handle cmp expr. */
8019 for (j = 0; j < ncopies; j++)
8021 gassign *new_stmt = NULL;
8022 if (j == 0)
8024 if (slp_node)
8026 auto_vec<tree, 2> ops;
8027 auto_vec<vec<tree>, 2> vec_defs;
8029 ops.safe_push (rhs1);
8030 ops.safe_push (rhs2);
8031 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
8032 vec_oprnds1 = vec_defs.pop ();
8033 vec_oprnds0 = vec_defs.pop ();
8035 else
8037 vec_rhs1 = vect_get_vec_def_for_operand (rhs1, stmt, vectype);
8038 vec_rhs2 = vect_get_vec_def_for_operand (rhs2, stmt, vectype);
8041 else
8043 vec_rhs1 = vect_get_vec_def_for_stmt_copy (dts[0],
8044 vec_oprnds0.pop ());
8045 vec_rhs2 = vect_get_vec_def_for_stmt_copy (dts[1],
8046 vec_oprnds1.pop ());
8049 if (!slp_node)
8051 vec_oprnds0.quick_push (vec_rhs1);
8052 vec_oprnds1.quick_push (vec_rhs2);
8055 /* Arguments are ready. Create the new vector stmt. */
8056 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_rhs1)
8058 vec_rhs2 = vec_oprnds1[i];
8060 new_temp = make_ssa_name (mask);
8061 if (bitop1 == NOP_EXPR)
8063 new_stmt = gimple_build_assign (new_temp, code,
8064 vec_rhs1, vec_rhs2);
8065 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8067 else
8069 if (bitop1 == BIT_NOT_EXPR)
8070 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs2);
8071 else
8072 new_stmt = gimple_build_assign (new_temp, bitop1, vec_rhs1,
8073 vec_rhs2);
8074 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8075 if (bitop2 != NOP_EXPR)
8077 tree res = make_ssa_name (mask);
8078 if (bitop2 == BIT_NOT_EXPR)
8079 new_stmt = gimple_build_assign (res, bitop2, new_temp);
8080 else
8081 new_stmt = gimple_build_assign (res, bitop2, vec_rhs1,
8082 new_temp);
8083 vect_finish_stmt_generation (stmt, new_stmt, gsi);
8086 if (slp_node)
8087 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
8090 if (slp_node)
8091 continue;
8093 if (j == 0)
8094 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
8095 else
8096 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
8098 prev_stmt_info = vinfo_for_stmt (new_stmt);
8101 vec_oprnds0.release ();
8102 vec_oprnds1.release ();
8104 return true;
8107 /* Make sure the statement is vectorizable. */
8109 bool
8110 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
8112 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8113 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
8114 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
8115 bool ok;
8116 tree scalar_type, vectype;
8117 gimple *pattern_stmt;
8118 gimple_seq pattern_def_seq;
8120 if (dump_enabled_p ())
8122 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
8123 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8126 if (gimple_has_volatile_ops (stmt))
8128 if (dump_enabled_p ())
8129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8130 "not vectorized: stmt has volatile operands\n");
8132 return false;
8135 /* Skip stmts that do not need to be vectorized. In loops this is expected
8136 to include:
8137 - the COND_EXPR which is the loop exit condition
8138 - any LABEL_EXPRs in the loop
8139 - computations that are used only for array indexing or loop control.
8140 In basic blocks we only analyze statements that are a part of some SLP
8141 instance, therefore, all the statements are relevant.
8143 Pattern statement needs to be analyzed instead of the original statement
8144 if the original statement is not relevant. Otherwise, we analyze both
8145 statements. In basic blocks we are called from some SLP instance
8146 traversal, don't analyze pattern stmts instead, the pattern stmts
8147 already will be part of SLP instance. */
8149 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
8150 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8151 && !STMT_VINFO_LIVE_P (stmt_info))
8153 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8154 && pattern_stmt
8155 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8156 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8158 /* Analyze PATTERN_STMT instead of the original stmt. */
8159 stmt = pattern_stmt;
8160 stmt_info = vinfo_for_stmt (pattern_stmt);
8161 if (dump_enabled_p ())
8163 dump_printf_loc (MSG_NOTE, vect_location,
8164 "==> examining pattern statement: ");
8165 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8168 else
8170 if (dump_enabled_p ())
8171 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
8173 return true;
8176 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8177 && node == NULL
8178 && pattern_stmt
8179 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
8180 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
8182 /* Analyze PATTERN_STMT too. */
8183 if (dump_enabled_p ())
8185 dump_printf_loc (MSG_NOTE, vect_location,
8186 "==> examining pattern statement: ");
8187 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
8190 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
8191 return false;
8194 if (is_pattern_stmt_p (stmt_info)
8195 && node == NULL
8196 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
8198 gimple_stmt_iterator si;
8200 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
8202 gimple *pattern_def_stmt = gsi_stmt (si);
8203 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
8204 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
8206 /* Analyze def stmt of STMT if it's a pattern stmt. */
8207 if (dump_enabled_p ())
8209 dump_printf_loc (MSG_NOTE, vect_location,
8210 "==> examining pattern def statement: ");
8211 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
8214 if (!vect_analyze_stmt (pattern_def_stmt,
8215 need_to_vectorize, node))
8216 return false;
8221 switch (STMT_VINFO_DEF_TYPE (stmt_info))
8223 case vect_internal_def:
8224 break;
8226 case vect_reduction_def:
8227 case vect_nested_cycle:
8228 gcc_assert (!bb_vinfo
8229 && (relevance == vect_used_in_outer
8230 || relevance == vect_used_in_outer_by_reduction
8231 || relevance == vect_used_by_reduction
8232 || relevance == vect_unused_in_scope
8233 || relevance == vect_used_only_live));
8234 break;
8236 case vect_induction_def:
8237 case vect_constant_def:
8238 case vect_external_def:
8239 case vect_unknown_def_type:
8240 default:
8241 gcc_unreachable ();
8244 if (bb_vinfo)
8246 gcc_assert (PURE_SLP_STMT (stmt_info));
8248 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
8249 if (dump_enabled_p ())
8251 dump_printf_loc (MSG_NOTE, vect_location,
8252 "get vectype for scalar type: ");
8253 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
8254 dump_printf (MSG_NOTE, "\n");
8257 vectype = get_vectype_for_scalar_type (scalar_type);
8258 if (!vectype)
8260 if (dump_enabled_p ())
8262 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8263 "not SLPed: unsupported data-type ");
8264 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
8265 scalar_type);
8266 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8268 return false;
8271 if (dump_enabled_p ())
8273 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
8274 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
8275 dump_printf (MSG_NOTE, "\n");
8278 STMT_VINFO_VECTYPE (stmt_info) = vectype;
8281 if (STMT_VINFO_RELEVANT_P (stmt_info))
8283 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
8284 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
8285 || (is_gimple_call (stmt)
8286 && gimple_call_lhs (stmt) == NULL_TREE));
8287 *need_to_vectorize = true;
8290 if (PURE_SLP_STMT (stmt_info) && !node)
8292 dump_printf_loc (MSG_NOTE, vect_location,
8293 "handled only by SLP analysis\n");
8294 return true;
8297 ok = true;
8298 if (!bb_vinfo
8299 && (STMT_VINFO_RELEVANT_P (stmt_info)
8300 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
8301 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8302 || vectorizable_conversion (stmt, NULL, NULL, node)
8303 || vectorizable_shift (stmt, NULL, NULL, node)
8304 || vectorizable_operation (stmt, NULL, NULL, node)
8305 || vectorizable_assignment (stmt, NULL, NULL, node)
8306 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8307 || vectorizable_call (stmt, NULL, NULL, node)
8308 || vectorizable_store (stmt, NULL, NULL, node)
8309 || vectorizable_reduction (stmt, NULL, NULL, node)
8310 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8311 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8312 else
8314 if (bb_vinfo)
8315 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
8316 || vectorizable_conversion (stmt, NULL, NULL, node)
8317 || vectorizable_shift (stmt, NULL, NULL, node)
8318 || vectorizable_operation (stmt, NULL, NULL, node)
8319 || vectorizable_assignment (stmt, NULL, NULL, node)
8320 || vectorizable_load (stmt, NULL, NULL, node, NULL)
8321 || vectorizable_call (stmt, NULL, NULL, node)
8322 || vectorizable_store (stmt, NULL, NULL, node)
8323 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node)
8324 || vectorizable_comparison (stmt, NULL, NULL, NULL, node));
8327 if (!ok)
8329 if (dump_enabled_p ())
8331 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8332 "not vectorized: relevant stmt not ");
8333 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8334 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8337 return false;
8340 if (bb_vinfo)
8341 return true;
8343 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
8344 need extra handling, except for vectorizable reductions. */
8345 if (STMT_VINFO_LIVE_P (stmt_info)
8346 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8347 ok = vectorizable_live_operation (stmt, NULL, NULL, -1, NULL);
8349 if (!ok)
8351 if (dump_enabled_p ())
8353 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8354 "not vectorized: live stmt not ");
8355 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
8356 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
8359 return false;
8362 return true;
8366 /* Function vect_transform_stmt.
8368 Create a vectorized stmt to replace STMT, and insert it at BSI. */
8370 bool
8371 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
8372 bool *grouped_store, slp_tree slp_node,
8373 slp_instance slp_node_instance)
8375 bool is_store = false;
8376 gimple *vec_stmt = NULL;
8377 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8378 bool done;
8380 gcc_assert (slp_node || !PURE_SLP_STMT (stmt_info));
8381 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
8383 switch (STMT_VINFO_TYPE (stmt_info))
8385 case type_demotion_vec_info_type:
8386 case type_promotion_vec_info_type:
8387 case type_conversion_vec_info_type:
8388 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
8389 gcc_assert (done);
8390 break;
8392 case induc_vec_info_type:
8393 gcc_assert (!slp_node);
8394 done = vectorizable_induction (stmt, gsi, &vec_stmt);
8395 gcc_assert (done);
8396 break;
8398 case shift_vec_info_type:
8399 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
8400 gcc_assert (done);
8401 break;
8403 case op_vec_info_type:
8404 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
8405 gcc_assert (done);
8406 break;
8408 case assignment_vec_info_type:
8409 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
8410 gcc_assert (done);
8411 break;
8413 case load_vec_info_type:
8414 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
8415 slp_node_instance);
8416 gcc_assert (done);
8417 break;
8419 case store_vec_info_type:
8420 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
8421 gcc_assert (done);
8422 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
8424 /* In case of interleaving, the whole chain is vectorized when the
8425 last store in the chain is reached. Store stmts before the last
8426 one are skipped, and there vec_stmt_info shouldn't be freed
8427 meanwhile. */
8428 *grouped_store = true;
8429 if (STMT_VINFO_VEC_STMT (stmt_info))
8430 is_store = true;
8432 else
8433 is_store = true;
8434 break;
8436 case condition_vec_info_type:
8437 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
8438 gcc_assert (done);
8439 break;
8441 case comparison_vec_info_type:
8442 done = vectorizable_comparison (stmt, gsi, &vec_stmt, NULL, slp_node);
8443 gcc_assert (done);
8444 break;
8446 case call_vec_info_type:
8447 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
8448 stmt = gsi_stmt (*gsi);
8449 if (is_gimple_call (stmt)
8450 && gimple_call_internal_p (stmt)
8451 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
8452 is_store = true;
8453 break;
8455 case call_simd_clone_vec_info_type:
8456 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
8457 stmt = gsi_stmt (*gsi);
8458 break;
8460 case reduc_vec_info_type:
8461 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
8462 gcc_assert (done);
8463 break;
8465 default:
8466 if (!STMT_VINFO_LIVE_P (stmt_info))
8468 if (dump_enabled_p ())
8469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8470 "stmt not supported.\n");
8471 gcc_unreachable ();
8475 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
8476 This would break hybrid SLP vectorization. */
8477 if (slp_node)
8478 gcc_assert (!vec_stmt
8479 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
8481 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
8482 is being vectorized, but outside the immediately enclosing loop. */
8483 if (vec_stmt
8484 && STMT_VINFO_LOOP_VINFO (stmt_info)
8485 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
8486 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
8487 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
8488 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
8489 || STMT_VINFO_RELEVANT (stmt_info) ==
8490 vect_used_in_outer_by_reduction))
8492 struct loop *innerloop = LOOP_VINFO_LOOP (
8493 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
8494 imm_use_iterator imm_iter;
8495 use_operand_p use_p;
8496 tree scalar_dest;
8497 gimple *exit_phi;
8499 if (dump_enabled_p ())
8500 dump_printf_loc (MSG_NOTE, vect_location,
8501 "Record the vdef for outer-loop vectorization.\n");
8503 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
8504 (to be used when vectorizing outer-loop stmts that use the DEF of
8505 STMT). */
8506 if (gimple_code (stmt) == GIMPLE_PHI)
8507 scalar_dest = PHI_RESULT (stmt);
8508 else
8509 scalar_dest = gimple_assign_lhs (stmt);
8511 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
8513 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
8515 exit_phi = USE_STMT (use_p);
8516 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
8521 /* Handle stmts whose DEF is used outside the loop-nest that is
8522 being vectorized. */
8523 if (slp_node)
8525 gimple *slp_stmt;
8526 int i;
8527 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt)
8529 stmt_vec_info slp_stmt_info = vinfo_for_stmt (slp_stmt);
8530 if (STMT_VINFO_LIVE_P (slp_stmt_info)
8531 && STMT_VINFO_TYPE (slp_stmt_info) != reduc_vec_info_type)
8533 done = vectorizable_live_operation (slp_stmt, gsi, slp_node, i,
8534 &vec_stmt);
8535 gcc_assert (done);
8539 else if (STMT_VINFO_LIVE_P (stmt_info)
8540 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
8542 done = vectorizable_live_operation (stmt, gsi, slp_node, -1, &vec_stmt);
8543 gcc_assert (done);
8546 if (vec_stmt)
8547 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
8549 return is_store;
8553 /* Remove a group of stores (for SLP or interleaving), free their
8554 stmt_vec_info. */
8556 void
8557 vect_remove_stores (gimple *first_stmt)
8559 gimple *next = first_stmt;
8560 gimple *tmp;
8561 gimple_stmt_iterator next_si;
8563 while (next)
8565 stmt_vec_info stmt_info = vinfo_for_stmt (next);
8567 tmp = GROUP_NEXT_ELEMENT (stmt_info);
8568 if (is_pattern_stmt_p (stmt_info))
8569 next = STMT_VINFO_RELATED_STMT (stmt_info);
8570 /* Free the attached stmt_vec_info and remove the stmt. */
8571 next_si = gsi_for_stmt (next);
8572 unlink_stmt_vdef (next);
8573 gsi_remove (&next_si, true);
8574 release_defs (next);
8575 free_stmt_vec_info (next);
8576 next = tmp;
8581 /* Function new_stmt_vec_info.
8583 Create and initialize a new stmt_vec_info struct for STMT. */
8585 stmt_vec_info
8586 new_stmt_vec_info (gimple *stmt, vec_info *vinfo)
8588 stmt_vec_info res;
8589 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
8591 STMT_VINFO_TYPE (res) = undef_vec_info_type;
8592 STMT_VINFO_STMT (res) = stmt;
8593 res->vinfo = vinfo;
8594 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
8595 STMT_VINFO_LIVE_P (res) = false;
8596 STMT_VINFO_VECTYPE (res) = NULL;
8597 STMT_VINFO_VEC_STMT (res) = NULL;
8598 STMT_VINFO_VECTORIZABLE (res) = true;
8599 STMT_VINFO_IN_PATTERN_P (res) = false;
8600 STMT_VINFO_RELATED_STMT (res) = NULL;
8601 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8602 STMT_VINFO_DATA_REF (res) = NULL;
8603 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
8604 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
8606 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8607 STMT_VINFO_DR_OFFSET (res) = NULL;
8608 STMT_VINFO_DR_INIT (res) = NULL;
8609 STMT_VINFO_DR_STEP (res) = NULL;
8610 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8612 if (gimple_code (stmt) == GIMPLE_PHI
8613 && is_loop_header_bb_p (gimple_bb (stmt)))
8614 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8615 else
8616 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8618 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8619 STMT_SLP_TYPE (res) = loop_vect;
8620 STMT_VINFO_NUM_SLP_USES (res) = 0;
8622 GROUP_FIRST_ELEMENT (res) = NULL;
8623 GROUP_NEXT_ELEMENT (res) = NULL;
8624 GROUP_SIZE (res) = 0;
8625 GROUP_STORE_COUNT (res) = 0;
8626 GROUP_GAP (res) = 0;
8627 GROUP_SAME_DR_STMT (res) = NULL;
8629 return res;
8633 /* Create a hash table for stmt_vec_info. */
8635 void
8636 init_stmt_vec_info_vec (void)
8638 gcc_assert (!stmt_vec_info_vec.exists ());
8639 stmt_vec_info_vec.create (50);
8643 /* Free hash table for stmt_vec_info. */
8645 void
8646 free_stmt_vec_info_vec (void)
8648 unsigned int i;
8649 stmt_vec_info info;
8650 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8651 if (info != NULL)
8652 free_stmt_vec_info (STMT_VINFO_STMT (info));
8653 gcc_assert (stmt_vec_info_vec.exists ());
8654 stmt_vec_info_vec.release ();
8658 /* Free stmt vectorization related info. */
8660 void
8661 free_stmt_vec_info (gimple *stmt)
8663 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8665 if (!stmt_info)
8666 return;
8668 /* Check if this statement has a related "pattern stmt"
8669 (introduced by the vectorizer during the pattern recognition
8670 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8671 too. */
8672 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8674 stmt_vec_info patt_info
8675 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8676 if (patt_info)
8678 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8679 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8680 gimple_set_bb (patt_stmt, NULL);
8681 tree lhs = gimple_get_lhs (patt_stmt);
8682 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8683 release_ssa_name (lhs);
8684 if (seq)
8686 gimple_stmt_iterator si;
8687 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8689 gimple *seq_stmt = gsi_stmt (si);
8690 gimple_set_bb (seq_stmt, NULL);
8691 lhs = gimple_get_lhs (seq_stmt);
8692 if (lhs && TREE_CODE (lhs) == SSA_NAME)
8693 release_ssa_name (lhs);
8694 free_stmt_vec_info (seq_stmt);
8697 free_stmt_vec_info (patt_stmt);
8701 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8702 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8703 set_vinfo_for_stmt (stmt, NULL);
8704 free (stmt_info);
8708 /* Function get_vectype_for_scalar_type_and_size.
8710 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8711 by the target. */
8713 static tree
8714 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8716 machine_mode inner_mode = TYPE_MODE (scalar_type);
8717 machine_mode simd_mode;
8718 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8719 int nunits;
8720 tree vectype;
8722 if (nbytes == 0)
8723 return NULL_TREE;
8725 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8726 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8727 return NULL_TREE;
8729 /* For vector types of elements whose mode precision doesn't
8730 match their types precision we use a element type of mode
8731 precision. The vectorization routines will have to make sure
8732 they support the proper result truncation/extension.
8733 We also make sure to build vector types with INTEGER_TYPE
8734 component type only. */
8735 if (INTEGRAL_TYPE_P (scalar_type)
8736 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8737 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8738 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8739 TYPE_UNSIGNED (scalar_type));
8741 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8742 When the component mode passes the above test simply use a type
8743 corresponding to that mode. The theory is that any use that
8744 would cause problems with this will disable vectorization anyway. */
8745 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8746 && !INTEGRAL_TYPE_P (scalar_type))
8747 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8749 /* We can't build a vector type of elements with alignment bigger than
8750 their size. */
8751 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
8752 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8753 TYPE_UNSIGNED (scalar_type));
8755 /* If we felt back to using the mode fail if there was
8756 no scalar type for it. */
8757 if (scalar_type == NULL_TREE)
8758 return NULL_TREE;
8760 /* If no size was supplied use the mode the target prefers. Otherwise
8761 lookup a vector mode of the specified size. */
8762 if (size == 0)
8763 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8764 else
8765 simd_mode = mode_for_vector (inner_mode, size / nbytes);
8766 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8767 if (nunits <= 1)
8768 return NULL_TREE;
8770 vectype = build_vector_type (scalar_type, nunits);
8772 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8773 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
8774 return NULL_TREE;
8776 return vectype;
8779 unsigned int current_vector_size;
8781 /* Function get_vectype_for_scalar_type.
8783 Returns the vector type corresponding to SCALAR_TYPE as supported
8784 by the target. */
8786 tree
8787 get_vectype_for_scalar_type (tree scalar_type)
8789 tree vectype;
8790 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8791 current_vector_size);
8792 if (vectype
8793 && current_vector_size == 0)
8794 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8795 return vectype;
8798 /* Function get_mask_type_for_scalar_type.
8800 Returns the mask type corresponding to a result of comparison
8801 of vectors of specified SCALAR_TYPE as supported by target. */
8803 tree
8804 get_mask_type_for_scalar_type (tree scalar_type)
8806 tree vectype = get_vectype_for_scalar_type (scalar_type);
8808 if (!vectype)
8809 return NULL;
8811 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
8812 current_vector_size);
8815 /* Function get_same_sized_vectype
8817 Returns a vector type corresponding to SCALAR_TYPE of size
8818 VECTOR_TYPE if supported by the target. */
8820 tree
8821 get_same_sized_vectype (tree scalar_type, tree vector_type)
8823 if (TREE_CODE (scalar_type) == BOOLEAN_TYPE)
8824 return build_same_sized_truth_vector_type (vector_type);
8826 return get_vectype_for_scalar_type_and_size
8827 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
8830 /* Function vect_is_simple_use.
8832 Input:
8833 VINFO - the vect info of the loop or basic block that is being vectorized.
8834 OPERAND - operand in the loop or bb.
8835 Output:
8836 DEF_STMT - the defining stmt in case OPERAND is an SSA_NAME.
8837 DT - the type of definition
8839 Returns whether a stmt with OPERAND can be vectorized.
8840 For loops, supportable operands are constants, loop invariants, and operands
8841 that are defined by the current iteration of the loop. Unsupportable
8842 operands are those that are defined by a previous iteration of the loop (as
8843 is the case in reduction/induction computations).
8844 For basic blocks, supportable operands are constants and bb invariants.
8845 For now, operands defined outside the basic block are not supported. */
8847 bool
8848 vect_is_simple_use (tree operand, vec_info *vinfo,
8849 gimple **def_stmt, enum vect_def_type *dt)
8851 *def_stmt = NULL;
8852 *dt = vect_unknown_def_type;
8854 if (dump_enabled_p ())
8856 dump_printf_loc (MSG_NOTE, vect_location,
8857 "vect_is_simple_use: operand ");
8858 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
8859 dump_printf (MSG_NOTE, "\n");
8862 if (CONSTANT_CLASS_P (operand))
8864 *dt = vect_constant_def;
8865 return true;
8868 if (is_gimple_min_invariant (operand))
8870 *dt = vect_external_def;
8871 return true;
8874 if (TREE_CODE (operand) != SSA_NAME)
8876 if (dump_enabled_p ())
8877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8878 "not ssa-name.\n");
8879 return false;
8882 if (SSA_NAME_IS_DEFAULT_DEF (operand))
8884 *dt = vect_external_def;
8885 return true;
8888 *def_stmt = SSA_NAME_DEF_STMT (operand);
8889 if (dump_enabled_p ())
8891 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8892 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
8895 if (! vect_stmt_in_region_p (vinfo, *def_stmt))
8896 *dt = vect_external_def;
8897 else
8899 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
8900 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
8903 if (dump_enabled_p ())
8905 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8906 switch (*dt)
8908 case vect_uninitialized_def:
8909 dump_printf (MSG_NOTE, "uninitialized\n");
8910 break;
8911 case vect_constant_def:
8912 dump_printf (MSG_NOTE, "constant\n");
8913 break;
8914 case vect_external_def:
8915 dump_printf (MSG_NOTE, "external\n");
8916 break;
8917 case vect_internal_def:
8918 dump_printf (MSG_NOTE, "internal\n");
8919 break;
8920 case vect_induction_def:
8921 dump_printf (MSG_NOTE, "induction\n");
8922 break;
8923 case vect_reduction_def:
8924 dump_printf (MSG_NOTE, "reduction\n");
8925 break;
8926 case vect_double_reduction_def:
8927 dump_printf (MSG_NOTE, "double reduction\n");
8928 break;
8929 case vect_nested_cycle:
8930 dump_printf (MSG_NOTE, "nested cycle\n");
8931 break;
8932 case vect_unknown_def_type:
8933 dump_printf (MSG_NOTE, "unknown\n");
8934 break;
8938 if (*dt == vect_unknown_def_type)
8940 if (dump_enabled_p ())
8941 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8942 "Unsupported pattern.\n");
8943 return false;
8946 switch (gimple_code (*def_stmt))
8948 case GIMPLE_PHI:
8949 case GIMPLE_ASSIGN:
8950 case GIMPLE_CALL:
8951 break;
8952 default:
8953 if (dump_enabled_p ())
8954 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8955 "unsupported defining stmt:\n");
8956 return false;
8959 return true;
8962 /* Function vect_is_simple_use.
8964 Same as vect_is_simple_use but also determines the vector operand
8965 type of OPERAND and stores it to *VECTYPE. If the definition of
8966 OPERAND is vect_uninitialized_def, vect_constant_def or
8967 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8968 is responsible to compute the best suited vector type for the
8969 scalar operand. */
8971 bool
8972 vect_is_simple_use (tree operand, vec_info *vinfo,
8973 gimple **def_stmt, enum vect_def_type *dt, tree *vectype)
8975 if (!vect_is_simple_use (operand, vinfo, def_stmt, dt))
8976 return false;
8978 /* Now get a vector type if the def is internal, otherwise supply
8979 NULL_TREE and leave it up to the caller to figure out a proper
8980 type for the use stmt. */
8981 if (*dt == vect_internal_def
8982 || *dt == vect_induction_def
8983 || *dt == vect_reduction_def
8984 || *dt == vect_double_reduction_def
8985 || *dt == vect_nested_cycle)
8987 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8989 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8990 && !STMT_VINFO_RELEVANT (stmt_info)
8991 && !STMT_VINFO_LIVE_P (stmt_info))
8992 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8994 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8995 gcc_assert (*vectype != NULL_TREE);
8997 else if (*dt == vect_uninitialized_def
8998 || *dt == vect_constant_def
8999 || *dt == vect_external_def)
9000 *vectype = NULL_TREE;
9001 else
9002 gcc_unreachable ();
9004 return true;
9008 /* Function supportable_widening_operation
9010 Check whether an operation represented by the code CODE is a
9011 widening operation that is supported by the target platform in
9012 vector form (i.e., when operating on arguments of type VECTYPE_IN
9013 producing a result of type VECTYPE_OUT).
9015 Widening operations we currently support are NOP (CONVERT), FLOAT
9016 and WIDEN_MULT. This function checks if these operations are supported
9017 by the target platform either directly (via vector tree-codes), or via
9018 target builtins.
9020 Output:
9021 - CODE1 and CODE2 are codes of vector operations to be used when
9022 vectorizing the operation, if available.
9023 - MULTI_STEP_CVT determines the number of required intermediate steps in
9024 case of multi-step conversion (like char->short->int - in that case
9025 MULTI_STEP_CVT will be 1).
9026 - INTERM_TYPES contains the intermediate type required to perform the
9027 widening operation (short in the above example). */
9029 bool
9030 supportable_widening_operation (enum tree_code code, gimple *stmt,
9031 tree vectype_out, tree vectype_in,
9032 enum tree_code *code1, enum tree_code *code2,
9033 int *multi_step_cvt,
9034 vec<tree> *interm_types)
9036 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
9037 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
9038 struct loop *vect_loop = NULL;
9039 machine_mode vec_mode;
9040 enum insn_code icode1, icode2;
9041 optab optab1, optab2;
9042 tree vectype = vectype_in;
9043 tree wide_vectype = vectype_out;
9044 enum tree_code c1, c2;
9045 int i;
9046 tree prev_type, intermediate_type;
9047 machine_mode intermediate_mode, prev_mode;
9048 optab optab3, optab4;
9050 *multi_step_cvt = 0;
9051 if (loop_info)
9052 vect_loop = LOOP_VINFO_LOOP (loop_info);
9054 switch (code)
9056 case WIDEN_MULT_EXPR:
9057 /* The result of a vectorized widening operation usually requires
9058 two vectors (because the widened results do not fit into one vector).
9059 The generated vector results would normally be expected to be
9060 generated in the same order as in the original scalar computation,
9061 i.e. if 8 results are generated in each vector iteration, they are
9062 to be organized as follows:
9063 vect1: [res1,res2,res3,res4],
9064 vect2: [res5,res6,res7,res8].
9066 However, in the special case that the result of the widening
9067 operation is used in a reduction computation only, the order doesn't
9068 matter (because when vectorizing a reduction we change the order of
9069 the computation). Some targets can take advantage of this and
9070 generate more efficient code. For example, targets like Altivec,
9071 that support widen_mult using a sequence of {mult_even,mult_odd}
9072 generate the following vectors:
9073 vect1: [res1,res3,res5,res7],
9074 vect2: [res2,res4,res6,res8].
9076 When vectorizing outer-loops, we execute the inner-loop sequentially
9077 (each vectorized inner-loop iteration contributes to VF outer-loop
9078 iterations in parallel). We therefore don't allow to change the
9079 order of the computation in the inner-loop during outer-loop
9080 vectorization. */
9081 /* TODO: Another case in which order doesn't *really* matter is when we
9082 widen and then contract again, e.g. (short)((int)x * y >> 8).
9083 Normally, pack_trunc performs an even/odd permute, whereas the
9084 repack from an even/odd expansion would be an interleave, which
9085 would be significantly simpler for e.g. AVX2. */
9086 /* In any case, in order to avoid duplicating the code below, recurse
9087 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
9088 are properly set up for the caller. If we fail, we'll continue with
9089 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
9090 if (vect_loop
9091 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
9092 && !nested_in_vect_loop_p (vect_loop, stmt)
9093 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
9094 stmt, vectype_out, vectype_in,
9095 code1, code2, multi_step_cvt,
9096 interm_types))
9098 /* Elements in a vector with vect_used_by_reduction property cannot
9099 be reordered if the use chain with this property does not have the
9100 same operation. One such an example is s += a * b, where elements
9101 in a and b cannot be reordered. Here we check if the vector defined
9102 by STMT is only directly used in the reduction statement. */
9103 tree lhs = gimple_assign_lhs (stmt);
9104 use_operand_p dummy;
9105 gimple *use_stmt;
9106 stmt_vec_info use_stmt_info = NULL;
9107 if (single_imm_use (lhs, &dummy, &use_stmt)
9108 && (use_stmt_info = vinfo_for_stmt (use_stmt))
9109 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
9110 return true;
9112 c1 = VEC_WIDEN_MULT_LO_EXPR;
9113 c2 = VEC_WIDEN_MULT_HI_EXPR;
9114 break;
9116 case DOT_PROD_EXPR:
9117 c1 = DOT_PROD_EXPR;
9118 c2 = DOT_PROD_EXPR;
9119 break;
9121 case SAD_EXPR:
9122 c1 = SAD_EXPR;
9123 c2 = SAD_EXPR;
9124 break;
9126 case VEC_WIDEN_MULT_EVEN_EXPR:
9127 /* Support the recursion induced just above. */
9128 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
9129 c2 = VEC_WIDEN_MULT_ODD_EXPR;
9130 break;
9132 case WIDEN_LSHIFT_EXPR:
9133 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
9134 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
9135 break;
9137 CASE_CONVERT:
9138 c1 = VEC_UNPACK_LO_EXPR;
9139 c2 = VEC_UNPACK_HI_EXPR;
9140 break;
9142 case FLOAT_EXPR:
9143 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
9144 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
9145 break;
9147 case FIX_TRUNC_EXPR:
9148 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
9149 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
9150 computing the operation. */
9151 return false;
9153 default:
9154 gcc_unreachable ();
9157 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
9158 std::swap (c1, c2);
9160 if (code == FIX_TRUNC_EXPR)
9162 /* The signedness is determined from output operand. */
9163 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9164 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
9166 else
9168 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9169 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9172 if (!optab1 || !optab2)
9173 return false;
9175 vec_mode = TYPE_MODE (vectype);
9176 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
9177 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
9178 return false;
9180 *code1 = c1;
9181 *code2 = c2;
9183 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9184 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9185 /* For scalar masks we may have different boolean
9186 vector types having the same QImode. Thus we
9187 add additional check for elements number. */
9188 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9189 || (TYPE_VECTOR_SUBPARTS (vectype) / 2
9190 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9192 /* Check if it's a multi-step conversion that can be done using intermediate
9193 types. */
9195 prev_type = vectype;
9196 prev_mode = vec_mode;
9198 if (!CONVERT_EXPR_CODE_P (code))
9199 return false;
9201 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9202 intermediate steps in promotion sequence. We try
9203 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
9204 not. */
9205 interm_types->create (MAX_INTERM_CVT_STEPS);
9206 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9208 intermediate_mode = insn_data[icode1].operand[0].mode;
9209 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9211 intermediate_type
9212 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) / 2,
9213 current_vector_size);
9214 if (intermediate_mode != TYPE_MODE (intermediate_type))
9215 return false;
9217 else
9218 intermediate_type
9219 = lang_hooks.types.type_for_mode (intermediate_mode,
9220 TYPE_UNSIGNED (prev_type));
9222 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
9223 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
9225 if (!optab3 || !optab4
9226 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
9227 || insn_data[icode1].operand[0].mode != intermediate_mode
9228 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
9229 || insn_data[icode2].operand[0].mode != intermediate_mode
9230 || ((icode1 = optab_handler (optab3, intermediate_mode))
9231 == CODE_FOR_nothing)
9232 || ((icode2 = optab_handler (optab4, intermediate_mode))
9233 == CODE_FOR_nothing))
9234 break;
9236 interm_types->quick_push (intermediate_type);
9237 (*multi_step_cvt)++;
9239 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
9240 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
9241 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9242 || (TYPE_VECTOR_SUBPARTS (intermediate_type) / 2
9243 == TYPE_VECTOR_SUBPARTS (wide_vectype)));
9245 prev_type = intermediate_type;
9246 prev_mode = intermediate_mode;
9249 interm_types->release ();
9250 return false;
9254 /* Function supportable_narrowing_operation
9256 Check whether an operation represented by the code CODE is a
9257 narrowing operation that is supported by the target platform in
9258 vector form (i.e., when operating on arguments of type VECTYPE_IN
9259 and producing a result of type VECTYPE_OUT).
9261 Narrowing operations we currently support are NOP (CONVERT) and
9262 FIX_TRUNC. This function checks if these operations are supported by
9263 the target platform directly via vector tree-codes.
9265 Output:
9266 - CODE1 is the code of a vector operation to be used when
9267 vectorizing the operation, if available.
9268 - MULTI_STEP_CVT determines the number of required intermediate steps in
9269 case of multi-step conversion (like int->short->char - in that case
9270 MULTI_STEP_CVT will be 1).
9271 - INTERM_TYPES contains the intermediate type required to perform the
9272 narrowing operation (short in the above example). */
9274 bool
9275 supportable_narrowing_operation (enum tree_code code,
9276 tree vectype_out, tree vectype_in,
9277 enum tree_code *code1, int *multi_step_cvt,
9278 vec<tree> *interm_types)
9280 machine_mode vec_mode;
9281 enum insn_code icode1;
9282 optab optab1, interm_optab;
9283 tree vectype = vectype_in;
9284 tree narrow_vectype = vectype_out;
9285 enum tree_code c1;
9286 tree intermediate_type, prev_type;
9287 machine_mode intermediate_mode, prev_mode;
9288 int i;
9289 bool uns;
9291 *multi_step_cvt = 0;
9292 switch (code)
9294 CASE_CONVERT:
9295 c1 = VEC_PACK_TRUNC_EXPR;
9296 break;
9298 case FIX_TRUNC_EXPR:
9299 c1 = VEC_PACK_FIX_TRUNC_EXPR;
9300 break;
9302 case FLOAT_EXPR:
9303 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
9304 tree code and optabs used for computing the operation. */
9305 return false;
9307 default:
9308 gcc_unreachable ();
9311 if (code == FIX_TRUNC_EXPR)
9312 /* The signedness is determined from output operand. */
9313 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
9314 else
9315 optab1 = optab_for_tree_code (c1, vectype, optab_default);
9317 if (!optab1)
9318 return false;
9320 vec_mode = TYPE_MODE (vectype);
9321 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
9322 return false;
9324 *code1 = c1;
9326 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9327 /* For scalar masks we may have different boolean
9328 vector types having the same QImode. Thus we
9329 add additional check for elements number. */
9330 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9331 || (TYPE_VECTOR_SUBPARTS (vectype) * 2
9332 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9334 /* Check if it's a multi-step conversion that can be done using intermediate
9335 types. */
9336 prev_mode = vec_mode;
9337 prev_type = vectype;
9338 if (code == FIX_TRUNC_EXPR)
9339 uns = TYPE_UNSIGNED (vectype_out);
9340 else
9341 uns = TYPE_UNSIGNED (vectype);
9343 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
9344 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
9345 costly than signed. */
9346 if (code == FIX_TRUNC_EXPR && uns)
9348 enum insn_code icode2;
9350 intermediate_type
9351 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
9352 interm_optab
9353 = optab_for_tree_code (c1, intermediate_type, optab_default);
9354 if (interm_optab != unknown_optab
9355 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
9356 && insn_data[icode1].operand[0].mode
9357 == insn_data[icode2].operand[0].mode)
9359 uns = false;
9360 optab1 = interm_optab;
9361 icode1 = icode2;
9365 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
9366 intermediate steps in promotion sequence. We try
9367 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
9368 interm_types->create (MAX_INTERM_CVT_STEPS);
9369 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
9371 intermediate_mode = insn_data[icode1].operand[0].mode;
9372 if (VECTOR_BOOLEAN_TYPE_P (prev_type))
9374 intermediate_type
9375 = build_truth_vector_type (TYPE_VECTOR_SUBPARTS (prev_type) * 2,
9376 current_vector_size);
9377 if (intermediate_mode != TYPE_MODE (intermediate_type))
9378 return false;
9380 else
9381 intermediate_type
9382 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
9383 interm_optab
9384 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
9385 optab_default);
9386 if (!interm_optab
9387 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
9388 || insn_data[icode1].operand[0].mode != intermediate_mode
9389 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
9390 == CODE_FOR_nothing))
9391 break;
9393 interm_types->quick_push (intermediate_type);
9394 (*multi_step_cvt)++;
9396 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
9397 return (!VECTOR_BOOLEAN_TYPE_P (vectype)
9398 || (TYPE_VECTOR_SUBPARTS (intermediate_type) * 2
9399 == TYPE_VECTOR_SUBPARTS (narrow_vectype)));
9401 prev_mode = intermediate_mode;
9402 prev_type = intermediate_type;
9403 optab1 = interm_optab;
9406 interm_types->release ();
9407 return false;