2015-09-24 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / tree-vect-stmts.c
blobcb9e7e835e7a3829db617bbea7b8356966903cf4
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "backend.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "rtl.h"
30 #include "ssa.h"
31 #include "alias.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
34 #include "target.h"
35 #include "gimple-pretty-print.h"
36 #include "internal-fn.h"
37 #include "tree-eh.h"
38 #include "gimplify.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
41 #include "tree-cfg.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "cfgloop.h"
44 #include "tree-ssa-loop.h"
45 #include "tree-scalar-evolution.h"
46 #include "flags.h"
47 #include "insn-config.h"
48 #include "recog.h" /* FIXME: for insn_data */
49 #include "insn-codes.h"
50 #include "optabs-tree.h"
51 #include "diagnostic-core.h"
52 #include "tree-vectorizer.h"
53 #include "cgraph.h"
54 #include "builtins.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
61 tree
62 stmt_vectype (struct _stmt_vec_info *stmt_info)
64 return STMT_VINFO_VECTYPE (stmt_info);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
69 bool
70 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
72 gimple *stmt = STMT_VINFO_STMT (stmt_info);
73 basic_block bb = gimple_bb (stmt);
74 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
75 struct loop* loop;
77 if (!loop_vinfo)
78 return false;
80 loop = LOOP_VINFO_LOOP (loop_vinfo);
82 return (bb->loop_father == loop->inner);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
89 unsigned
90 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
91 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
92 int misalign, enum vect_cost_model_location where)
94 if (body_cost_vec)
96 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
97 add_stmt_info_to_vec (body_cost_vec, count, kind,
98 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
99 misalign);
100 return (unsigned)
101 (builtin_vectorization_cost (kind, vectype, misalign) * count);
104 else
106 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
107 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
108 void *target_cost_data;
110 if (loop_vinfo)
111 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
112 else
113 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
115 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
116 misalign, where);
120 /* Return a variable of type ELEM_TYPE[NELEMS]. */
122 static tree
123 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
125 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
126 "vect_array");
129 /* ARRAY is an array of vectors created by create_vector_array.
130 Return an SSA_NAME for the vector in index N. The reference
131 is part of the vectorization of STMT and the vector is associated
132 with scalar destination SCALAR_DEST. */
134 static tree
135 read_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
136 tree array, unsigned HOST_WIDE_INT n)
138 tree vect_type, vect, vect_name, array_ref;
139 gimple *new_stmt;
141 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
142 vect_type = TREE_TYPE (TREE_TYPE (array));
143 vect = vect_create_destination_var (scalar_dest, vect_type);
144 array_ref = build4 (ARRAY_REF, vect_type, array,
145 build_int_cst (size_type_node, n),
146 NULL_TREE, NULL_TREE);
148 new_stmt = gimple_build_assign (vect, array_ref);
149 vect_name = make_ssa_name (vect, new_stmt);
150 gimple_assign_set_lhs (new_stmt, vect_name);
151 vect_finish_stmt_generation (stmt, new_stmt, gsi);
153 return vect_name;
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Emit code to store SSA_NAME VECT in index N of the array.
158 The store is part of the vectorization of STMT. */
160 static void
161 write_vector_array (gimple *stmt, gimple_stmt_iterator *gsi, tree vect,
162 tree array, unsigned HOST_WIDE_INT n)
164 tree array_ref;
165 gimple *new_stmt;
167 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
168 build_int_cst (size_type_node, n),
169 NULL_TREE, NULL_TREE);
171 new_stmt = gimple_build_assign (array_ref, vect);
172 vect_finish_stmt_generation (stmt, new_stmt, gsi);
175 /* PTR is a pointer to an array of type TYPE. Return a representation
176 of *PTR. The memory reference replaces those in FIRST_DR
177 (and its group). */
179 static tree
180 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
182 tree mem_ref, alias_ptr_type;
184 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
185 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
186 /* Arrays have the same alignment as their type. */
187 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
188 return mem_ref;
191 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193 /* Function vect_mark_relevant.
195 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
197 static void
198 vect_mark_relevant (vec<gimple *> *worklist, gimple *stmt,
199 enum vect_relevant relevant, bool live_p,
200 bool used_in_pattern)
202 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
203 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
204 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
205 gimple *pattern_stmt;
207 if (dump_enabled_p ())
208 dump_printf_loc (MSG_NOTE, vect_location,
209 "mark relevant %d, live %d.\n", relevant, live_p);
211 /* If this stmt is an original stmt in a pattern, we might need to mark its
212 related pattern stmt instead of the original stmt. However, such stmts
213 may have their own uses that are not in any pattern, in such cases the
214 stmt itself should be marked. */
215 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
217 bool found = false;
218 if (!used_in_pattern)
220 imm_use_iterator imm_iter;
221 use_operand_p use_p;
222 gimple *use_stmt;
223 tree lhs;
224 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
225 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
227 if (is_gimple_assign (stmt))
228 lhs = gimple_assign_lhs (stmt);
229 else
230 lhs = gimple_call_lhs (stmt);
232 /* This use is out of pattern use, if LHS has other uses that are
233 pattern uses, we should mark the stmt itself, and not the pattern
234 stmt. */
235 if (lhs && TREE_CODE (lhs) == SSA_NAME)
236 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
238 if (is_gimple_debug (USE_STMT (use_p)))
239 continue;
240 use_stmt = USE_STMT (use_p);
242 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
243 continue;
245 if (vinfo_for_stmt (use_stmt)
246 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
248 found = true;
249 break;
254 if (!found)
256 /* This is the last stmt in a sequence that was detected as a
257 pattern that can potentially be vectorized. Don't mark the stmt
258 as relevant/live because it's not going to be vectorized.
259 Instead mark the pattern-stmt that replaces it. */
261 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
263 if (dump_enabled_p ())
264 dump_printf_loc (MSG_NOTE, vect_location,
265 "last stmt in pattern. don't mark"
266 " relevant/live.\n");
267 stmt_info = vinfo_for_stmt (pattern_stmt);
268 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
269 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
270 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
271 stmt = pattern_stmt;
275 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
276 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
277 STMT_VINFO_RELEVANT (stmt_info) = relevant;
279 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
280 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
282 if (dump_enabled_p ())
283 dump_printf_loc (MSG_NOTE, vect_location,
284 "already marked relevant/live.\n");
285 return;
288 worklist->safe_push (stmt);
292 /* Function vect_stmt_relevant_p.
294 Return true if STMT in loop that is represented by LOOP_VINFO is
295 "relevant for vectorization".
297 A stmt is considered "relevant for vectorization" if:
298 - it has uses outside the loop.
299 - it has vdefs (it alters memory).
300 - control stmts in the loop (except for the exit condition).
302 CHECKME: what other side effects would the vectorizer allow? */
304 static bool
305 vect_stmt_relevant_p (gimple *stmt, loop_vec_info loop_vinfo,
306 enum vect_relevant *relevant, bool *live_p)
308 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
309 ssa_op_iter op_iter;
310 imm_use_iterator imm_iter;
311 use_operand_p use_p;
312 def_operand_p def_p;
314 *relevant = vect_unused_in_scope;
315 *live_p = false;
317 /* cond stmt other than loop exit cond. */
318 if (is_ctrl_stmt (stmt)
319 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
320 != loop_exit_ctrl_vec_info_type)
321 *relevant = vect_used_in_scope;
323 /* changing memory. */
324 if (gimple_code (stmt) != GIMPLE_PHI)
325 if (gimple_vdef (stmt)
326 && !gimple_clobber_p (stmt))
328 if (dump_enabled_p ())
329 dump_printf_loc (MSG_NOTE, vect_location,
330 "vec_stmt_relevant_p: stmt has vdefs.\n");
331 *relevant = vect_used_in_scope;
334 /* uses outside the loop. */
335 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
337 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
339 basic_block bb = gimple_bb (USE_STMT (use_p));
340 if (!flow_bb_inside_loop_p (loop, bb))
342 if (dump_enabled_p ())
343 dump_printf_loc (MSG_NOTE, vect_location,
344 "vec_stmt_relevant_p: used out of loop.\n");
346 if (is_gimple_debug (USE_STMT (use_p)))
347 continue;
349 /* We expect all such uses to be in the loop exit phis
350 (because of loop closed form) */
351 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
352 gcc_assert (bb == single_exit (loop)->dest);
354 *live_p = true;
359 return (*live_p || *relevant);
363 /* Function exist_non_indexing_operands_for_use_p
365 USE is one of the uses attached to STMT. Check if USE is
366 used in STMT for anything other than indexing an array. */
368 static bool
369 exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
371 tree operand;
372 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
374 /* USE corresponds to some operand in STMT. If there is no data
375 reference in STMT, then any operand that corresponds to USE
376 is not indexing an array. */
377 if (!STMT_VINFO_DATA_REF (stmt_info))
378 return true;
380 /* STMT has a data_ref. FORNOW this means that its of one of
381 the following forms:
382 -1- ARRAY_REF = var
383 -2- var = ARRAY_REF
384 (This should have been verified in analyze_data_refs).
386 'var' in the second case corresponds to a def, not a use,
387 so USE cannot correspond to any operands that are not used
388 for array indexing.
390 Therefore, all we need to check is if STMT falls into the
391 first case, and whether var corresponds to USE. */
393 if (!gimple_assign_copy_p (stmt))
395 if (is_gimple_call (stmt)
396 && gimple_call_internal_p (stmt))
397 switch (gimple_call_internal_fn (stmt))
399 case IFN_MASK_STORE:
400 operand = gimple_call_arg (stmt, 3);
401 if (operand == use)
402 return true;
403 /* FALLTHRU */
404 case IFN_MASK_LOAD:
405 operand = gimple_call_arg (stmt, 2);
406 if (operand == use)
407 return true;
408 break;
409 default:
410 break;
412 return false;
415 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
416 return false;
417 operand = gimple_assign_rhs1 (stmt);
418 if (TREE_CODE (operand) != SSA_NAME)
419 return false;
421 if (operand == use)
422 return true;
424 return false;
429 Function process_use.
431 Inputs:
432 - a USE in STMT in a loop represented by LOOP_VINFO
433 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
434 that defined USE. This is done by calling mark_relevant and passing it
435 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
436 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
437 be performed.
439 Outputs:
440 Generally, LIVE_P and RELEVANT are used to define the liveness and
441 relevance info of the DEF_STMT of this USE:
442 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
443 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
444 Exceptions:
445 - case 1: If USE is used only for address computations (e.g. array indexing),
446 which does not need to be directly vectorized, then the liveness/relevance
447 of the respective DEF_STMT is left unchanged.
448 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
449 skip DEF_STMT cause it had already been processed.
450 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
451 be modified accordingly.
453 Return true if everything is as expected. Return false otherwise. */
455 static bool
456 process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
457 enum vect_relevant relevant, vec<gimple *> *worklist,
458 bool force)
460 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
461 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
462 stmt_vec_info dstmt_vinfo;
463 basic_block bb, def_bb;
464 tree def;
465 gimple *def_stmt;
466 enum vect_def_type dt;
468 /* case 1: we are only interested in uses that need to be vectorized. Uses
469 that are used for address computation are not considered relevant. */
470 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
471 return true;
473 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
475 if (dump_enabled_p ())
476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
477 "not vectorized: unsupported use in stmt.\n");
478 return false;
481 if (!def_stmt || gimple_nop_p (def_stmt))
482 return true;
484 def_bb = gimple_bb (def_stmt);
485 if (!flow_bb_inside_loop_p (loop, def_bb))
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
489 return true;
492 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
493 DEF_STMT must have already been processed, because this should be the
494 only way that STMT, which is a reduction-phi, was put in the worklist,
495 as there should be no other uses for DEF_STMT in the loop. So we just
496 check that everything is as expected, and we are done. */
497 dstmt_vinfo = vinfo_for_stmt (def_stmt);
498 bb = gimple_bb (stmt);
499 if (gimple_code (stmt) == GIMPLE_PHI
500 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
501 && gimple_code (def_stmt) != GIMPLE_PHI
502 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
503 && bb->loop_father == def_bb->loop_father)
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE, vect_location,
507 "reduc-stmt defining reduc-phi in the same nest.\n");
508 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
509 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
510 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
511 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
512 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
513 return true;
516 /* case 3a: outer-loop stmt defining an inner-loop stmt:
517 outer-loop-header-bb:
518 d = def_stmt
519 inner-loop:
520 stmt # use (d)
521 outer-loop-tail-bb:
522 ... */
523 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
525 if (dump_enabled_p ())
526 dump_printf_loc (MSG_NOTE, vect_location,
527 "outer-loop def-stmt defining inner-loop stmt.\n");
529 switch (relevant)
531 case vect_unused_in_scope:
532 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
533 vect_used_in_scope : vect_unused_in_scope;
534 break;
536 case vect_used_in_outer_by_reduction:
537 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
538 relevant = vect_used_by_reduction;
539 break;
541 case vect_used_in_outer:
542 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
543 relevant = vect_used_in_scope;
544 break;
546 case vect_used_in_scope:
547 break;
549 default:
550 gcc_unreachable ();
554 /* case 3b: inner-loop stmt defining an outer-loop stmt:
555 outer-loop-header-bb:
557 inner-loop:
558 d = def_stmt
559 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
560 stmt # use (d) */
561 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
563 if (dump_enabled_p ())
564 dump_printf_loc (MSG_NOTE, vect_location,
565 "inner-loop def-stmt defining outer-loop stmt.\n");
567 switch (relevant)
569 case vect_unused_in_scope:
570 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
571 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
572 vect_used_in_outer_by_reduction : vect_unused_in_scope;
573 break;
575 case vect_used_by_reduction:
576 relevant = vect_used_in_outer_by_reduction;
577 break;
579 case vect_used_in_scope:
580 relevant = vect_used_in_outer;
581 break;
583 default:
584 gcc_unreachable ();
588 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
589 is_pattern_stmt_p (stmt_vinfo));
590 return true;
594 /* Function vect_mark_stmts_to_be_vectorized.
596 Not all stmts in the loop need to be vectorized. For example:
598 for i...
599 for j...
600 1. T0 = i + j
601 2. T1 = a[T0]
603 3. j = j + 1
605 Stmt 1 and 3 do not need to be vectorized, because loop control and
606 addressing of vectorized data-refs are handled differently.
608 This pass detects such stmts. */
610 bool
611 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
613 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
614 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
615 unsigned int nbbs = loop->num_nodes;
616 gimple_stmt_iterator si;
617 gimple *stmt;
618 unsigned int i;
619 stmt_vec_info stmt_vinfo;
620 basic_block bb;
621 gimple *phi;
622 bool live_p;
623 enum vect_relevant relevant, tmp_relevant;
624 enum vect_def_type def_type;
626 if (dump_enabled_p ())
627 dump_printf_loc (MSG_NOTE, vect_location,
628 "=== vect_mark_stmts_to_be_vectorized ===\n");
630 auto_vec<gimple *, 64> worklist;
632 /* 1. Init worklist. */
633 for (i = 0; i < nbbs; i++)
635 bb = bbs[i];
636 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
638 phi = gsi_stmt (si);
639 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
642 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
645 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
646 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
648 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
650 stmt = gsi_stmt (si);
651 if (dump_enabled_p ())
653 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
654 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
657 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
658 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
662 /* 2. Process_worklist */
663 while (worklist.length () > 0)
665 use_operand_p use_p;
666 ssa_op_iter iter;
668 stmt = worklist.pop ();
669 if (dump_enabled_p ())
671 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
672 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
675 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
676 (DEF_STMT) as relevant/irrelevant and live/dead according to the
677 liveness and relevance properties of STMT. */
678 stmt_vinfo = vinfo_for_stmt (stmt);
679 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
680 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
682 /* Generally, the liveness and relevance properties of STMT are
683 propagated as is to the DEF_STMTs of its USEs:
684 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
685 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
687 One exception is when STMT has been identified as defining a reduction
688 variable; in this case we set the liveness/relevance as follows:
689 live_p = false
690 relevant = vect_used_by_reduction
691 This is because we distinguish between two kinds of relevant stmts -
692 those that are used by a reduction computation, and those that are
693 (also) used by a regular computation. This allows us later on to
694 identify stmts that are used solely by a reduction, and therefore the
695 order of the results that they produce does not have to be kept. */
697 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
698 tmp_relevant = relevant;
699 switch (def_type)
701 case vect_reduction_def:
702 switch (tmp_relevant)
704 case vect_unused_in_scope:
705 relevant = vect_used_by_reduction;
706 break;
708 case vect_used_by_reduction:
709 if (gimple_code (stmt) == GIMPLE_PHI)
710 break;
711 /* fall through */
713 default:
714 if (dump_enabled_p ())
715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
716 "unsupported use of reduction.\n");
717 return false;
720 live_p = false;
721 break;
723 case vect_nested_cycle:
724 if (tmp_relevant != vect_unused_in_scope
725 && tmp_relevant != vect_used_in_outer_by_reduction
726 && tmp_relevant != vect_used_in_outer)
728 if (dump_enabled_p ())
729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
730 "unsupported use of nested cycle.\n");
732 return false;
735 live_p = false;
736 break;
738 case vect_double_reduction_def:
739 if (tmp_relevant != vect_unused_in_scope
740 && tmp_relevant != vect_used_by_reduction)
742 if (dump_enabled_p ())
743 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
744 "unsupported use of double reduction.\n");
746 return false;
749 live_p = false;
750 break;
752 default:
753 break;
756 if (is_pattern_stmt_p (stmt_vinfo))
758 /* Pattern statements are not inserted into the code, so
759 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
760 have to scan the RHS or function arguments instead. */
761 if (is_gimple_assign (stmt))
763 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
764 tree op = gimple_assign_rhs1 (stmt);
766 i = 1;
767 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
769 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
770 live_p, relevant, &worklist, false)
771 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
772 live_p, relevant, &worklist, false))
773 return false;
774 i = 2;
776 for (; i < gimple_num_ops (stmt); i++)
778 op = gimple_op (stmt, i);
779 if (TREE_CODE (op) == SSA_NAME
780 && !process_use (stmt, op, loop_vinfo, live_p, relevant,
781 &worklist, false))
782 return false;
785 else if (is_gimple_call (stmt))
787 for (i = 0; i < gimple_call_num_args (stmt); i++)
789 tree arg = gimple_call_arg (stmt, i);
790 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
791 &worklist, false))
792 return false;
796 else
797 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
799 tree op = USE_FROM_PTR (use_p);
800 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
801 &worklist, false))
802 return false;
805 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
807 tree off;
808 tree decl = vect_check_gather_scatter (stmt, loop_vinfo, NULL, &off, NULL);
809 gcc_assert (decl);
810 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
811 &worklist, true))
812 return false;
814 } /* while worklist */
816 return true;
820 /* Function vect_model_simple_cost.
822 Models cost for simple operations, i.e. those that only emit ncopies of a
823 single op. Right now, this does not account for multiple insns that could
824 be generated for the single vector op. We will handle that shortly. */
826 void
827 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
828 enum vect_def_type *dt,
829 stmt_vector_for_cost *prologue_cost_vec,
830 stmt_vector_for_cost *body_cost_vec)
832 int i;
833 int inside_cost = 0, prologue_cost = 0;
835 /* The SLP costs were already calculated during SLP tree build. */
836 if (PURE_SLP_STMT (stmt_info))
837 return;
839 /* FORNOW: Assuming maximum 2 args per stmts. */
840 for (i = 0; i < 2; i++)
841 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
842 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
843 stmt_info, 0, vect_prologue);
845 /* Pass the inside-of-loop statements to the target-specific cost model. */
846 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
847 stmt_info, 0, vect_body);
849 if (dump_enabled_p ())
850 dump_printf_loc (MSG_NOTE, vect_location,
851 "vect_model_simple_cost: inside_cost = %d, "
852 "prologue_cost = %d .\n", inside_cost, prologue_cost);
856 /* Model cost for type demotion and promotion operations. PWR is normally
857 zero for single-step promotions and demotions. It will be one if
858 two-step promotion/demotion is required, and so on. Each additional
859 step doubles the number of instructions required. */
861 static void
862 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
863 enum vect_def_type *dt, int pwr)
865 int i, tmp;
866 int inside_cost = 0, prologue_cost = 0;
867 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
868 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
869 void *target_cost_data;
871 /* The SLP costs were already calculated during SLP tree build. */
872 if (PURE_SLP_STMT (stmt_info))
873 return;
875 if (loop_vinfo)
876 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
877 else
878 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
880 for (i = 0; i < pwr + 1; i++)
882 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
883 (i + 1) : i;
884 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
885 vec_promote_demote, stmt_info, 0,
886 vect_body);
889 /* FORNOW: Assuming maximum 2 args per stmts. */
890 for (i = 0; i < 2; i++)
891 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
892 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
893 stmt_info, 0, vect_prologue);
895 if (dump_enabled_p ())
896 dump_printf_loc (MSG_NOTE, vect_location,
897 "vect_model_promotion_demotion_cost: inside_cost = %d, "
898 "prologue_cost = %d .\n", inside_cost, prologue_cost);
901 /* Function vect_cost_group_size
903 For grouped load or store, return the group_size only if it is the first
904 load or store of a group, else return 1. This ensures that group size is
905 only returned once per group. */
907 static int
908 vect_cost_group_size (stmt_vec_info stmt_info)
910 gimple *first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
912 if (first_stmt == STMT_VINFO_STMT (stmt_info))
913 return GROUP_SIZE (stmt_info);
915 return 1;
919 /* Function vect_model_store_cost
921 Models cost for stores. In the case of grouped accesses, one access
922 has the overhead of the grouped access attributed to it. */
924 void
925 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
926 bool store_lanes_p, enum vect_def_type dt,
927 slp_tree slp_node,
928 stmt_vector_for_cost *prologue_cost_vec,
929 stmt_vector_for_cost *body_cost_vec)
931 int group_size;
932 unsigned int inside_cost = 0, prologue_cost = 0;
933 struct data_reference *first_dr;
934 gimple *first_stmt;
936 if (dt == vect_constant_def || dt == vect_external_def)
937 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
938 stmt_info, 0, vect_prologue);
940 /* Grouped access? */
941 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
943 if (slp_node)
945 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
946 group_size = 1;
948 else
950 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
951 group_size = vect_cost_group_size (stmt_info);
954 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
956 /* Not a grouped access. */
957 else
959 group_size = 1;
960 first_dr = STMT_VINFO_DATA_REF (stmt_info);
963 /* We assume that the cost of a single store-lanes instruction is
964 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
965 access is instead being provided by a permute-and-store operation,
966 include the cost of the permutes. */
967 if (!store_lanes_p && group_size > 1
968 && !STMT_VINFO_STRIDED_P (stmt_info))
970 /* Uses a high and low interleave or shuffle operations for each
971 needed permute. */
972 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
973 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
974 stmt_info, 0, vect_body);
976 if (dump_enabled_p ())
977 dump_printf_loc (MSG_NOTE, vect_location,
978 "vect_model_store_cost: strided group_size = %d .\n",
979 group_size);
982 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
983 /* Costs of the stores. */
984 if (STMT_VINFO_STRIDED_P (stmt_info)
985 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
987 /* N scalar stores plus extracting the elements. */
988 inside_cost += record_stmt_cost (body_cost_vec,
989 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
990 scalar_store, stmt_info, 0, vect_body);
992 else
993 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
995 if (STMT_VINFO_STRIDED_P (stmt_info))
996 inside_cost += record_stmt_cost (body_cost_vec,
997 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
998 vec_to_scalar, stmt_info, 0, vect_body);
1000 if (dump_enabled_p ())
1001 dump_printf_loc (MSG_NOTE, vect_location,
1002 "vect_model_store_cost: inside_cost = %d, "
1003 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1007 /* Calculate cost of DR's memory access. */
1008 void
1009 vect_get_store_cost (struct data_reference *dr, int ncopies,
1010 unsigned int *inside_cost,
1011 stmt_vector_for_cost *body_cost_vec)
1013 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1014 gimple *stmt = DR_STMT (dr);
1015 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1017 switch (alignment_support_scheme)
1019 case dr_aligned:
1021 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1022 vector_store, stmt_info, 0,
1023 vect_body);
1025 if (dump_enabled_p ())
1026 dump_printf_loc (MSG_NOTE, vect_location,
1027 "vect_model_store_cost: aligned.\n");
1028 break;
1031 case dr_unaligned_supported:
1033 /* Here, we assign an additional cost for the unaligned store. */
1034 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1035 unaligned_store, stmt_info,
1036 DR_MISALIGNMENT (dr), vect_body);
1037 if (dump_enabled_p ())
1038 dump_printf_loc (MSG_NOTE, vect_location,
1039 "vect_model_store_cost: unaligned supported by "
1040 "hardware.\n");
1041 break;
1044 case dr_unaligned_unsupported:
1046 *inside_cost = VECT_MAX_COST;
1048 if (dump_enabled_p ())
1049 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1050 "vect_model_store_cost: unsupported access.\n");
1051 break;
1054 default:
1055 gcc_unreachable ();
1060 /* Function vect_model_load_cost
1062 Models cost for loads. In the case of grouped accesses, the last access
1063 has the overhead of the grouped access attributed to it. Since unaligned
1064 accesses are supported for loads, we also account for the costs of the
1065 access scheme chosen. */
1067 void
1068 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1069 bool load_lanes_p, slp_tree slp_node,
1070 stmt_vector_for_cost *prologue_cost_vec,
1071 stmt_vector_for_cost *body_cost_vec)
1073 int group_size;
1074 gimple *first_stmt;
1075 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1076 unsigned int inside_cost = 0, prologue_cost = 0;
1078 /* Grouped accesses? */
1079 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1080 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1082 group_size = vect_cost_group_size (stmt_info);
1083 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1085 /* Not a grouped access. */
1086 else
1088 group_size = 1;
1089 first_dr = dr;
1092 /* We assume that the cost of a single load-lanes instruction is
1093 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1094 access is instead being provided by a load-and-permute operation,
1095 include the cost of the permutes. */
1096 if (!load_lanes_p && group_size > 1
1097 && !STMT_VINFO_STRIDED_P (stmt_info))
1099 /* Uses an even and odd extract operations or shuffle operations
1100 for each needed permute. */
1101 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1102 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1103 stmt_info, 0, vect_body);
1105 if (dump_enabled_p ())
1106 dump_printf_loc (MSG_NOTE, vect_location,
1107 "vect_model_load_cost: strided group_size = %d .\n",
1108 group_size);
1111 /* The loads themselves. */
1112 if (STMT_VINFO_STRIDED_P (stmt_info)
1113 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1115 /* N scalar loads plus gathering them into a vector. */
1116 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1117 inside_cost += record_stmt_cost (body_cost_vec,
1118 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1119 scalar_load, stmt_info, 0, vect_body);
1121 else
1122 vect_get_load_cost (first_dr, ncopies,
1123 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1124 || group_size > 1 || slp_node),
1125 &inside_cost, &prologue_cost,
1126 prologue_cost_vec, body_cost_vec, true);
1127 if (STMT_VINFO_STRIDED_P (stmt_info))
1128 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1129 stmt_info, 0, vect_body);
1131 if (dump_enabled_p ())
1132 dump_printf_loc (MSG_NOTE, vect_location,
1133 "vect_model_load_cost: inside_cost = %d, "
1134 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1138 /* Calculate cost of DR's memory access. */
1139 void
1140 vect_get_load_cost (struct data_reference *dr, int ncopies,
1141 bool add_realign_cost, unsigned int *inside_cost,
1142 unsigned int *prologue_cost,
1143 stmt_vector_for_cost *prologue_cost_vec,
1144 stmt_vector_for_cost *body_cost_vec,
1145 bool record_prologue_costs)
1147 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1148 gimple *stmt = DR_STMT (dr);
1149 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1151 switch (alignment_support_scheme)
1153 case dr_aligned:
1155 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1156 stmt_info, 0, vect_body);
1158 if (dump_enabled_p ())
1159 dump_printf_loc (MSG_NOTE, vect_location,
1160 "vect_model_load_cost: aligned.\n");
1162 break;
1164 case dr_unaligned_supported:
1166 /* Here, we assign an additional cost for the unaligned load. */
1167 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1168 unaligned_load, stmt_info,
1169 DR_MISALIGNMENT (dr), vect_body);
1171 if (dump_enabled_p ())
1172 dump_printf_loc (MSG_NOTE, vect_location,
1173 "vect_model_load_cost: unaligned supported by "
1174 "hardware.\n");
1176 break;
1178 case dr_explicit_realign:
1180 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1181 vector_load, stmt_info, 0, vect_body);
1182 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1183 vec_perm, stmt_info, 0, vect_body);
1185 /* FIXME: If the misalignment remains fixed across the iterations of
1186 the containing loop, the following cost should be added to the
1187 prologue costs. */
1188 if (targetm.vectorize.builtin_mask_for_load)
1189 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1190 stmt_info, 0, vect_body);
1192 if (dump_enabled_p ())
1193 dump_printf_loc (MSG_NOTE, vect_location,
1194 "vect_model_load_cost: explicit realign\n");
1196 break;
1198 case dr_explicit_realign_optimized:
1200 if (dump_enabled_p ())
1201 dump_printf_loc (MSG_NOTE, vect_location,
1202 "vect_model_load_cost: unaligned software "
1203 "pipelined.\n");
1205 /* Unaligned software pipeline has a load of an address, an initial
1206 load, and possibly a mask operation to "prime" the loop. However,
1207 if this is an access in a group of loads, which provide grouped
1208 access, then the above cost should only be considered for one
1209 access in the group. Inside the loop, there is a load op
1210 and a realignment op. */
1212 if (add_realign_cost && record_prologue_costs)
1214 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1215 vector_stmt, stmt_info,
1216 0, vect_prologue);
1217 if (targetm.vectorize.builtin_mask_for_load)
1218 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1219 vector_stmt, stmt_info,
1220 0, vect_prologue);
1223 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1224 stmt_info, 0, vect_body);
1225 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1226 stmt_info, 0, vect_body);
1228 if (dump_enabled_p ())
1229 dump_printf_loc (MSG_NOTE, vect_location,
1230 "vect_model_load_cost: explicit realign optimized"
1231 "\n");
1233 break;
1236 case dr_unaligned_unsupported:
1238 *inside_cost = VECT_MAX_COST;
1240 if (dump_enabled_p ())
1241 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1242 "vect_model_load_cost: unsupported access.\n");
1243 break;
1246 default:
1247 gcc_unreachable ();
1251 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1252 the loop preheader for the vectorized stmt STMT. */
1254 static void
1255 vect_init_vector_1 (gimple *stmt, gimple *new_stmt, gimple_stmt_iterator *gsi)
1257 if (gsi)
1258 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1259 else
1261 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1262 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1264 if (loop_vinfo)
1266 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1267 basic_block new_bb;
1268 edge pe;
1270 if (nested_in_vect_loop_p (loop, stmt))
1271 loop = loop->inner;
1273 pe = loop_preheader_edge (loop);
1274 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1275 gcc_assert (!new_bb);
1277 else
1279 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1280 basic_block bb;
1281 gimple_stmt_iterator gsi_bb_start;
1283 gcc_assert (bb_vinfo);
1284 bb = BB_VINFO_BB (bb_vinfo);
1285 gsi_bb_start = gsi_after_labels (bb);
1286 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1290 if (dump_enabled_p ())
1292 dump_printf_loc (MSG_NOTE, vect_location,
1293 "created new init_stmt: ");
1294 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1298 /* Function vect_init_vector.
1300 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1301 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1302 vector type a vector with all elements equal to VAL is created first.
1303 Place the initialization at BSI if it is not NULL. Otherwise, place the
1304 initialization at the loop preheader.
1305 Return the DEF of INIT_STMT.
1306 It will be used in the vectorization of STMT. */
1308 tree
1309 vect_init_vector (gimple *stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1311 tree new_var;
1312 gimple *init_stmt;
1313 tree vec_oprnd;
1314 tree new_temp;
1316 if (TREE_CODE (type) == VECTOR_TYPE
1317 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1319 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1321 if (CONSTANT_CLASS_P (val))
1322 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1323 else
1325 new_temp = make_ssa_name (TREE_TYPE (type));
1326 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1327 vect_init_vector_1 (stmt, init_stmt, gsi);
1328 val = new_temp;
1331 val = build_vector_from_val (type, val);
1334 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1335 init_stmt = gimple_build_assign (new_var, val);
1336 new_temp = make_ssa_name (new_var, init_stmt);
1337 gimple_assign_set_lhs (init_stmt, new_temp);
1338 vect_init_vector_1 (stmt, init_stmt, gsi);
1339 vec_oprnd = gimple_assign_lhs (init_stmt);
1340 return vec_oprnd;
1344 /* Function vect_get_vec_def_for_operand.
1346 OP is an operand in STMT. This function returns a (vector) def that will be
1347 used in the vectorized stmt for STMT.
1349 In the case that OP is an SSA_NAME which is defined in the loop, then
1350 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1352 In case OP is an invariant or constant, a new stmt that creates a vector def
1353 needs to be introduced. */
1355 tree
1356 vect_get_vec_def_for_operand (tree op, gimple *stmt, tree *scalar_def)
1358 tree vec_oprnd;
1359 gimple *vec_stmt;
1360 gimple *def_stmt;
1361 stmt_vec_info def_stmt_info = NULL;
1362 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1363 unsigned int nunits;
1364 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1365 tree def;
1366 enum vect_def_type dt;
1367 bool is_simple_use;
1368 tree vector_type;
1370 if (dump_enabled_p ())
1372 dump_printf_loc (MSG_NOTE, vect_location,
1373 "vect_get_vec_def_for_operand: ");
1374 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1375 dump_printf (MSG_NOTE, "\n");
1378 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1379 &def_stmt, &def, &dt);
1380 gcc_assert (is_simple_use);
1381 if (dump_enabled_p ())
1383 int loc_printed = 0;
1384 if (def)
1386 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1387 loc_printed = 1;
1388 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1389 dump_printf (MSG_NOTE, "\n");
1391 if (def_stmt)
1393 if (loc_printed)
1394 dump_printf (MSG_NOTE, " def_stmt = ");
1395 else
1396 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1397 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1401 switch (dt)
1403 /* Case 1: operand is a constant. */
1404 case vect_constant_def:
1406 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1407 gcc_assert (vector_type);
1408 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1410 if (scalar_def)
1411 *scalar_def = op;
1413 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1414 if (dump_enabled_p ())
1415 dump_printf_loc (MSG_NOTE, vect_location,
1416 "Create vector_cst. nunits = %d\n", nunits);
1418 return vect_init_vector (stmt, op, vector_type, NULL);
1421 /* Case 2: operand is defined outside the loop - loop invariant. */
1422 case vect_external_def:
1424 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1425 gcc_assert (vector_type);
1427 if (scalar_def)
1428 *scalar_def = def;
1430 /* Create 'vec_inv = {inv,inv,..,inv}' */
1431 if (dump_enabled_p ())
1432 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1434 return vect_init_vector (stmt, def, vector_type, NULL);
1437 /* Case 3: operand is defined inside the loop. */
1438 case vect_internal_def:
1440 if (scalar_def)
1441 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1443 /* Get the def from the vectorized stmt. */
1444 def_stmt_info = vinfo_for_stmt (def_stmt);
1446 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1447 /* Get vectorized pattern statement. */
1448 if (!vec_stmt
1449 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1450 && !STMT_VINFO_RELEVANT (def_stmt_info))
1451 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1452 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1453 gcc_assert (vec_stmt);
1454 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1455 vec_oprnd = PHI_RESULT (vec_stmt);
1456 else if (is_gimple_call (vec_stmt))
1457 vec_oprnd = gimple_call_lhs (vec_stmt);
1458 else
1459 vec_oprnd = gimple_assign_lhs (vec_stmt);
1460 return vec_oprnd;
1463 /* Case 4: operand is defined by a loop header phi - reduction */
1464 case vect_reduction_def:
1465 case vect_double_reduction_def:
1466 case vect_nested_cycle:
1468 struct loop *loop;
1470 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1471 loop = (gimple_bb (def_stmt))->loop_father;
1473 /* Get the def before the loop */
1474 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1475 return get_initial_def_for_reduction (stmt, op, scalar_def);
1478 /* Case 5: operand is defined by loop-header phi - induction. */
1479 case vect_induction_def:
1481 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1483 /* Get the def from the vectorized stmt. */
1484 def_stmt_info = vinfo_for_stmt (def_stmt);
1485 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1486 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1487 vec_oprnd = PHI_RESULT (vec_stmt);
1488 else
1489 vec_oprnd = gimple_get_lhs (vec_stmt);
1490 return vec_oprnd;
1493 default:
1494 gcc_unreachable ();
1499 /* Function vect_get_vec_def_for_stmt_copy
1501 Return a vector-def for an operand. This function is used when the
1502 vectorized stmt to be created (by the caller to this function) is a "copy"
1503 created in case the vectorized result cannot fit in one vector, and several
1504 copies of the vector-stmt are required. In this case the vector-def is
1505 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1506 of the stmt that defines VEC_OPRND.
1507 DT is the type of the vector def VEC_OPRND.
1509 Context:
1510 In case the vectorization factor (VF) is bigger than the number
1511 of elements that can fit in a vectype (nunits), we have to generate
1512 more than one vector stmt to vectorize the scalar stmt. This situation
1513 arises when there are multiple data-types operated upon in the loop; the
1514 smallest data-type determines the VF, and as a result, when vectorizing
1515 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1516 vector stmt (each computing a vector of 'nunits' results, and together
1517 computing 'VF' results in each iteration). This function is called when
1518 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1519 which VF=16 and nunits=4, so the number of copies required is 4):
1521 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1523 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1524 VS1.1: vx.1 = memref1 VS1.2
1525 VS1.2: vx.2 = memref2 VS1.3
1526 VS1.3: vx.3 = memref3
1528 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1529 VSnew.1: vz1 = vx.1 + ... VSnew.2
1530 VSnew.2: vz2 = vx.2 + ... VSnew.3
1531 VSnew.3: vz3 = vx.3 + ...
1533 The vectorization of S1 is explained in vectorizable_load.
1534 The vectorization of S2:
1535 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1536 the function 'vect_get_vec_def_for_operand' is called to
1537 get the relevant vector-def for each operand of S2. For operand x it
1538 returns the vector-def 'vx.0'.
1540 To create the remaining copies of the vector-stmt (VSnew.j), this
1541 function is called to get the relevant vector-def for each operand. It is
1542 obtained from the respective VS1.j stmt, which is recorded in the
1543 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1545 For example, to obtain the vector-def 'vx.1' in order to create the
1546 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1547 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1548 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1549 and return its def ('vx.1').
1550 Overall, to create the above sequence this function will be called 3 times:
1551 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1552 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1553 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1555 tree
1556 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1558 gimple *vec_stmt_for_operand;
1559 stmt_vec_info def_stmt_info;
1561 /* Do nothing; can reuse same def. */
1562 if (dt == vect_external_def || dt == vect_constant_def )
1563 return vec_oprnd;
1565 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1566 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1567 gcc_assert (def_stmt_info);
1568 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1569 gcc_assert (vec_stmt_for_operand);
1570 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1571 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1572 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1573 else
1574 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1575 return vec_oprnd;
1579 /* Get vectorized definitions for the operands to create a copy of an original
1580 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1582 static void
1583 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1584 vec<tree> *vec_oprnds0,
1585 vec<tree> *vec_oprnds1)
1587 tree vec_oprnd = vec_oprnds0->pop ();
1589 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1590 vec_oprnds0->quick_push (vec_oprnd);
1592 if (vec_oprnds1 && vec_oprnds1->length ())
1594 vec_oprnd = vec_oprnds1->pop ();
1595 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1596 vec_oprnds1->quick_push (vec_oprnd);
1601 /* Get vectorized definitions for OP0 and OP1.
1602 REDUC_INDEX is the index of reduction operand in case of reduction,
1603 and -1 otherwise. */
1605 void
1606 vect_get_vec_defs (tree op0, tree op1, gimple *stmt,
1607 vec<tree> *vec_oprnds0,
1608 vec<tree> *vec_oprnds1,
1609 slp_tree slp_node, int reduc_index)
1611 if (slp_node)
1613 int nops = (op1 == NULL_TREE) ? 1 : 2;
1614 auto_vec<tree> ops (nops);
1615 auto_vec<vec<tree> > vec_defs (nops);
1617 ops.quick_push (op0);
1618 if (op1)
1619 ops.quick_push (op1);
1621 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1623 *vec_oprnds0 = vec_defs[0];
1624 if (op1)
1625 *vec_oprnds1 = vec_defs[1];
1627 else
1629 tree vec_oprnd;
1631 vec_oprnds0->create (1);
1632 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1633 vec_oprnds0->quick_push (vec_oprnd);
1635 if (op1)
1637 vec_oprnds1->create (1);
1638 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1639 vec_oprnds1->quick_push (vec_oprnd);
1645 /* Function vect_finish_stmt_generation.
1647 Insert a new stmt. */
1649 void
1650 vect_finish_stmt_generation (gimple *stmt, gimple *vec_stmt,
1651 gimple_stmt_iterator *gsi)
1653 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1654 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1655 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1657 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1659 if (!gsi_end_p (*gsi)
1660 && gimple_has_mem_ops (vec_stmt))
1662 gimple *at_stmt = gsi_stmt (*gsi);
1663 tree vuse = gimple_vuse (at_stmt);
1664 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1666 tree vdef = gimple_vdef (at_stmt);
1667 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1668 /* If we have an SSA vuse and insert a store, update virtual
1669 SSA form to avoid triggering the renamer. Do so only
1670 if we can easily see all uses - which is what almost always
1671 happens with the way vectorized stmts are inserted. */
1672 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1673 && ((is_gimple_assign (vec_stmt)
1674 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1675 || (is_gimple_call (vec_stmt)
1676 && !(gimple_call_flags (vec_stmt)
1677 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1679 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1680 gimple_set_vdef (vec_stmt, new_vdef);
1681 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1685 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1687 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1688 bb_vinfo));
1690 if (dump_enabled_p ())
1692 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1693 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1696 gimple_set_location (vec_stmt, gimple_location (stmt));
1698 /* While EH edges will generally prevent vectorization, stmt might
1699 e.g. be in a must-not-throw region. Ensure newly created stmts
1700 that could throw are part of the same region. */
1701 int lp_nr = lookup_stmt_eh_lp (stmt);
1702 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1703 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1706 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1707 a function declaration if the target has a vectorized version
1708 of the function, or NULL_TREE if the function cannot be vectorized. */
1710 tree
1711 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1713 tree fndecl = gimple_call_fndecl (call);
1715 /* We only handle functions that do not read or clobber memory -- i.e.
1716 const or novops ones. */
1717 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1718 return NULL_TREE;
1720 if (!fndecl
1721 || TREE_CODE (fndecl) != FUNCTION_DECL
1722 || !DECL_BUILT_IN (fndecl))
1723 return NULL_TREE;
1725 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1726 vectype_in);
1730 static tree permute_vec_elements (tree, tree, tree, gimple *,
1731 gimple_stmt_iterator *);
1734 /* Function vectorizable_mask_load_store.
1736 Check if STMT performs a conditional load or store that can be vectorized.
1737 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1738 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1739 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1741 static bool
1742 vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
1743 gimple **vec_stmt, slp_tree slp_node)
1745 tree vec_dest = NULL;
1746 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1747 stmt_vec_info prev_stmt_info;
1748 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1749 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1750 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1751 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1752 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1753 tree elem_type;
1754 gimple *new_stmt;
1755 tree dummy;
1756 tree dataref_ptr = NULL_TREE;
1757 gimple *ptr_incr;
1758 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1759 int ncopies;
1760 int i, j;
1761 bool inv_p;
1762 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1763 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1764 int gather_scale = 1;
1765 enum vect_def_type gather_dt = vect_unknown_def_type;
1766 bool is_store;
1767 tree mask;
1768 gimple *def_stmt;
1769 tree def;
1770 enum vect_def_type dt;
1772 if (slp_node != NULL)
1773 return false;
1775 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1776 gcc_assert (ncopies >= 1);
1778 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1779 mask = gimple_call_arg (stmt, 2);
1780 if (TYPE_PRECISION (TREE_TYPE (mask))
1781 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1782 return false;
1784 /* FORNOW. This restriction should be relaxed. */
1785 if (nested_in_vect_loop && ncopies > 1)
1787 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1789 "multiple types in nested loop.");
1790 return false;
1793 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1794 return false;
1796 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1797 return false;
1799 if (!STMT_VINFO_DATA_REF (stmt_info))
1800 return false;
1802 elem_type = TREE_TYPE (vectype);
1804 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1805 return false;
1807 if (STMT_VINFO_STRIDED_P (stmt_info))
1808 return false;
1810 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1812 gimple *def_stmt;
1813 tree def;
1814 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
1815 &gather_off, &gather_scale);
1816 gcc_assert (gather_decl);
1817 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1818 &def_stmt, &def, &gather_dt,
1819 &gather_off_vectype))
1821 if (dump_enabled_p ())
1822 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1823 "gather index use not simple.");
1824 return false;
1827 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1828 tree masktype
1829 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1830 if (TREE_CODE (masktype) == INTEGER_TYPE)
1832 if (dump_enabled_p ())
1833 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1834 "masked gather with integer mask not supported.");
1835 return false;
1838 else if (tree_int_cst_compare (nested_in_vect_loop
1839 ? STMT_VINFO_DR_STEP (stmt_info)
1840 : DR_STEP (dr), size_zero_node) <= 0)
1841 return false;
1842 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1843 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1844 return false;
1846 if (TREE_CODE (mask) != SSA_NAME)
1847 return false;
1849 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1850 &def_stmt, &def, &dt))
1851 return false;
1853 if (is_store)
1855 tree rhs = gimple_call_arg (stmt, 3);
1856 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1857 &def_stmt, &def, &dt))
1858 return false;
1861 if (!vec_stmt) /* transformation not required. */
1863 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1864 if (is_store)
1865 vect_model_store_cost (stmt_info, ncopies, false, dt,
1866 NULL, NULL, NULL);
1867 else
1868 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1869 return true;
1872 /** Transform. **/
1874 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1876 tree vec_oprnd0 = NULL_TREE, op;
1877 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1878 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1879 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1880 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1881 tree mask_perm_mask = NULL_TREE;
1882 edge pe = loop_preheader_edge (loop);
1883 gimple_seq seq;
1884 basic_block new_bb;
1885 enum { NARROW, NONE, WIDEN } modifier;
1886 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1888 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1889 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1890 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1891 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1892 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1893 scaletype = TREE_VALUE (arglist);
1894 gcc_checking_assert (types_compatible_p (srctype, rettype)
1895 && types_compatible_p (srctype, masktype));
1897 if (nunits == gather_off_nunits)
1898 modifier = NONE;
1899 else if (nunits == gather_off_nunits / 2)
1901 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1902 modifier = WIDEN;
1904 for (i = 0; i < gather_off_nunits; ++i)
1905 sel[i] = i | nunits;
1907 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1909 else if (nunits == gather_off_nunits * 2)
1911 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1912 modifier = NARROW;
1914 for (i = 0; i < nunits; ++i)
1915 sel[i] = i < gather_off_nunits
1916 ? i : i + nunits - gather_off_nunits;
1918 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1919 ncopies *= 2;
1920 for (i = 0; i < nunits; ++i)
1921 sel[i] = i | gather_off_nunits;
1922 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1924 else
1925 gcc_unreachable ();
1927 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1929 ptr = fold_convert (ptrtype, gather_base);
1930 if (!is_gimple_min_invariant (ptr))
1932 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1933 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1934 gcc_assert (!new_bb);
1937 scale = build_int_cst (scaletype, gather_scale);
1939 prev_stmt_info = NULL;
1940 for (j = 0; j < ncopies; ++j)
1942 if (modifier == WIDEN && (j & 1))
1943 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1944 perm_mask, stmt, gsi);
1945 else if (j == 0)
1946 op = vec_oprnd0
1947 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1948 else
1949 op = vec_oprnd0
1950 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1952 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1954 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1955 == TYPE_VECTOR_SUBPARTS (idxtype));
1956 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1957 var = make_ssa_name (var);
1958 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1959 new_stmt
1960 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1961 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1962 op = var;
1965 if (mask_perm_mask && (j & 1))
1966 mask_op = permute_vec_elements (mask_op, mask_op,
1967 mask_perm_mask, stmt, gsi);
1968 else
1970 if (j == 0)
1971 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1972 else
1974 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1975 &def_stmt, &def, &dt);
1976 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1979 mask_op = vec_mask;
1980 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
1982 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
1983 == TYPE_VECTOR_SUBPARTS (masktype));
1984 var = vect_get_new_vect_var (masktype, vect_simple_var,
1985 NULL);
1986 var = make_ssa_name (var);
1987 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
1988 new_stmt
1989 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
1990 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1991 mask_op = var;
1995 new_stmt
1996 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
1997 scale);
1999 if (!useless_type_conversion_p (vectype, rettype))
2001 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2002 == TYPE_VECTOR_SUBPARTS (rettype));
2003 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2004 op = make_ssa_name (var, new_stmt);
2005 gimple_call_set_lhs (new_stmt, op);
2006 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2007 var = make_ssa_name (vec_dest);
2008 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2009 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2011 else
2013 var = make_ssa_name (vec_dest, new_stmt);
2014 gimple_call_set_lhs (new_stmt, var);
2017 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2019 if (modifier == NARROW)
2021 if ((j & 1) == 0)
2023 prev_res = var;
2024 continue;
2026 var = permute_vec_elements (prev_res, var,
2027 perm_mask, stmt, gsi);
2028 new_stmt = SSA_NAME_DEF_STMT (var);
2031 if (prev_stmt_info == NULL)
2032 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2033 else
2034 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2035 prev_stmt_info = vinfo_for_stmt (new_stmt);
2038 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2039 from the IL. */
2040 tree lhs = gimple_call_lhs (stmt);
2041 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2042 set_vinfo_for_stmt (new_stmt, stmt_info);
2043 set_vinfo_for_stmt (stmt, NULL);
2044 STMT_VINFO_STMT (stmt_info) = new_stmt;
2045 gsi_replace (gsi, new_stmt, true);
2046 return true;
2048 else if (is_store)
2050 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2051 prev_stmt_info = NULL;
2052 for (i = 0; i < ncopies; i++)
2054 unsigned align, misalign;
2056 if (i == 0)
2058 tree rhs = gimple_call_arg (stmt, 3);
2059 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2060 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2061 /* We should have catched mismatched types earlier. */
2062 gcc_assert (useless_type_conversion_p (vectype,
2063 TREE_TYPE (vec_rhs)));
2064 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2065 NULL_TREE, &dummy, gsi,
2066 &ptr_incr, false, &inv_p);
2067 gcc_assert (!inv_p);
2069 else
2071 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2072 &def, &dt);
2073 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2074 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2075 &def, &dt);
2076 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2077 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2078 TYPE_SIZE_UNIT (vectype));
2081 align = TYPE_ALIGN_UNIT (vectype);
2082 if (aligned_access_p (dr))
2083 misalign = 0;
2084 else if (DR_MISALIGNMENT (dr) == -1)
2086 align = TYPE_ALIGN_UNIT (elem_type);
2087 misalign = 0;
2089 else
2090 misalign = DR_MISALIGNMENT (dr);
2091 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2092 misalign);
2093 new_stmt
2094 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2095 gimple_call_arg (stmt, 1),
2096 vec_mask, vec_rhs);
2097 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2098 if (i == 0)
2099 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2100 else
2101 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2102 prev_stmt_info = vinfo_for_stmt (new_stmt);
2105 else
2107 tree vec_mask = NULL_TREE;
2108 prev_stmt_info = NULL;
2109 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2110 for (i = 0; i < ncopies; i++)
2112 unsigned align, misalign;
2114 if (i == 0)
2116 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2117 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2118 NULL_TREE, &dummy, gsi,
2119 &ptr_incr, false, &inv_p);
2120 gcc_assert (!inv_p);
2122 else
2124 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2125 &def, &dt);
2126 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2127 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2128 TYPE_SIZE_UNIT (vectype));
2131 align = TYPE_ALIGN_UNIT (vectype);
2132 if (aligned_access_p (dr))
2133 misalign = 0;
2134 else if (DR_MISALIGNMENT (dr) == -1)
2136 align = TYPE_ALIGN_UNIT (elem_type);
2137 misalign = 0;
2139 else
2140 misalign = DR_MISALIGNMENT (dr);
2141 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2142 misalign);
2143 new_stmt
2144 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2145 gimple_call_arg (stmt, 1),
2146 vec_mask);
2147 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2148 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2149 if (i == 0)
2150 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2151 else
2152 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2153 prev_stmt_info = vinfo_for_stmt (new_stmt);
2157 if (!is_store)
2159 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2160 from the IL. */
2161 tree lhs = gimple_call_lhs (stmt);
2162 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2163 set_vinfo_for_stmt (new_stmt, stmt_info);
2164 set_vinfo_for_stmt (stmt, NULL);
2165 STMT_VINFO_STMT (stmt_info) = new_stmt;
2166 gsi_replace (gsi, new_stmt, true);
2169 return true;
2173 /* Function vectorizable_call.
2175 Check if GS performs a function call that can be vectorized.
2176 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2177 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2178 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2180 static bool
2181 vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
2182 slp_tree slp_node)
2184 gcall *stmt;
2185 tree vec_dest;
2186 tree scalar_dest;
2187 tree op, type;
2188 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2189 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2190 tree vectype_out, vectype_in;
2191 int nunits_in;
2192 int nunits_out;
2193 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2194 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2195 tree fndecl, new_temp, def, rhs_type;
2196 gimple *def_stmt;
2197 enum vect_def_type dt[3]
2198 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2199 gimple *new_stmt = NULL;
2200 int ncopies, j;
2201 vec<tree> vargs = vNULL;
2202 enum { NARROW, NONE, WIDEN } modifier;
2203 size_t i, nargs;
2204 tree lhs;
2206 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2207 return false;
2209 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2210 return false;
2212 /* Is GS a vectorizable call? */
2213 stmt = dyn_cast <gcall *> (gs);
2214 if (!stmt)
2215 return false;
2217 if (gimple_call_internal_p (stmt)
2218 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2219 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2220 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2221 slp_node);
2223 if (gimple_call_lhs (stmt) == NULL_TREE
2224 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2225 return false;
2227 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2229 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2231 /* Process function arguments. */
2232 rhs_type = NULL_TREE;
2233 vectype_in = NULL_TREE;
2234 nargs = gimple_call_num_args (stmt);
2236 /* Bail out if the function has more than three arguments, we do not have
2237 interesting builtin functions to vectorize with more than two arguments
2238 except for fma. No arguments is also not good. */
2239 if (nargs == 0 || nargs > 3)
2240 return false;
2242 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2243 if (gimple_call_internal_p (stmt)
2244 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2246 nargs = 0;
2247 rhs_type = unsigned_type_node;
2250 for (i = 0; i < nargs; i++)
2252 tree opvectype;
2254 op = gimple_call_arg (stmt, i);
2256 /* We can only handle calls with arguments of the same type. */
2257 if (rhs_type
2258 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2260 if (dump_enabled_p ())
2261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2262 "argument types differ.\n");
2263 return false;
2265 if (!rhs_type)
2266 rhs_type = TREE_TYPE (op);
2268 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2269 &def_stmt, &def, &dt[i], &opvectype))
2271 if (dump_enabled_p ())
2272 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2273 "use not simple.\n");
2274 return false;
2277 if (!vectype_in)
2278 vectype_in = opvectype;
2279 else if (opvectype
2280 && opvectype != vectype_in)
2282 if (dump_enabled_p ())
2283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2284 "argument vector types differ.\n");
2285 return false;
2288 /* If all arguments are external or constant defs use a vector type with
2289 the same size as the output vector type. */
2290 if (!vectype_in)
2291 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2292 if (vec_stmt)
2293 gcc_assert (vectype_in);
2294 if (!vectype_in)
2296 if (dump_enabled_p ())
2298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2299 "no vectype for scalar type ");
2300 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2301 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2304 return false;
2307 /* FORNOW */
2308 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2309 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2310 if (nunits_in == nunits_out / 2)
2311 modifier = NARROW;
2312 else if (nunits_out == nunits_in)
2313 modifier = NONE;
2314 else if (nunits_out == nunits_in / 2)
2315 modifier = WIDEN;
2316 else
2317 return false;
2319 /* For now, we only vectorize functions if a target specific builtin
2320 is available. TODO -- in some cases, it might be profitable to
2321 insert the calls for pieces of the vector, in order to be able
2322 to vectorize other operations in the loop. */
2323 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2324 if (fndecl == NULL_TREE)
2326 if (gimple_call_internal_p (stmt)
2327 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2328 && !slp_node
2329 && loop_vinfo
2330 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2331 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2332 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2333 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2335 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2336 { 0, 1, 2, ... vf - 1 } vector. */
2337 gcc_assert (nargs == 0);
2339 else
2341 if (dump_enabled_p ())
2342 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2343 "function is not vectorizable.\n");
2344 return false;
2348 gcc_assert (!gimple_vuse (stmt));
2350 if (slp_node || PURE_SLP_STMT (stmt_info))
2351 ncopies = 1;
2352 else if (modifier == NARROW)
2353 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2354 else
2355 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2357 /* Sanity check: make sure that at least one copy of the vectorized stmt
2358 needs to be generated. */
2359 gcc_assert (ncopies >= 1);
2361 if (!vec_stmt) /* transformation not required. */
2363 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2364 if (dump_enabled_p ())
2365 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2366 "\n");
2367 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2368 return true;
2371 /** Transform. **/
2373 if (dump_enabled_p ())
2374 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2376 /* Handle def. */
2377 scalar_dest = gimple_call_lhs (stmt);
2378 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2380 prev_stmt_info = NULL;
2381 switch (modifier)
2383 case NONE:
2384 for (j = 0; j < ncopies; ++j)
2386 /* Build argument list for the vectorized call. */
2387 if (j == 0)
2388 vargs.create (nargs);
2389 else
2390 vargs.truncate (0);
2392 if (slp_node)
2394 auto_vec<vec<tree> > vec_defs (nargs);
2395 vec<tree> vec_oprnds0;
2397 for (i = 0; i < nargs; i++)
2398 vargs.quick_push (gimple_call_arg (stmt, i));
2399 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2400 vec_oprnds0 = vec_defs[0];
2402 /* Arguments are ready. Create the new vector stmt. */
2403 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2405 size_t k;
2406 for (k = 0; k < nargs; k++)
2408 vec<tree> vec_oprndsk = vec_defs[k];
2409 vargs[k] = vec_oprndsk[i];
2411 new_stmt = gimple_build_call_vec (fndecl, vargs);
2412 new_temp = make_ssa_name (vec_dest, new_stmt);
2413 gimple_call_set_lhs (new_stmt, new_temp);
2414 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2415 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2418 for (i = 0; i < nargs; i++)
2420 vec<tree> vec_oprndsi = vec_defs[i];
2421 vec_oprndsi.release ();
2423 continue;
2426 for (i = 0; i < nargs; i++)
2428 op = gimple_call_arg (stmt, i);
2429 if (j == 0)
2430 vec_oprnd0
2431 = vect_get_vec_def_for_operand (op, stmt, NULL);
2432 else
2434 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2435 vec_oprnd0
2436 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2439 vargs.quick_push (vec_oprnd0);
2442 if (gimple_call_internal_p (stmt)
2443 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2445 tree *v = XALLOCAVEC (tree, nunits_out);
2446 int k;
2447 for (k = 0; k < nunits_out; ++k)
2448 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2449 tree cst = build_vector (vectype_out, v);
2450 tree new_var
2451 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2452 gimple *init_stmt = gimple_build_assign (new_var, cst);
2453 new_temp = make_ssa_name (new_var, init_stmt);
2454 gimple_assign_set_lhs (init_stmt, new_temp);
2455 vect_init_vector_1 (stmt, init_stmt, NULL);
2456 new_temp = make_ssa_name (vec_dest);
2457 new_stmt = gimple_build_assign (new_temp,
2458 gimple_assign_lhs (init_stmt));
2460 else
2462 new_stmt = gimple_build_call_vec (fndecl, vargs);
2463 new_temp = make_ssa_name (vec_dest, new_stmt);
2464 gimple_call_set_lhs (new_stmt, new_temp);
2466 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2468 if (j == 0)
2469 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2470 else
2471 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2473 prev_stmt_info = vinfo_for_stmt (new_stmt);
2476 break;
2478 case NARROW:
2479 for (j = 0; j < ncopies; ++j)
2481 /* Build argument list for the vectorized call. */
2482 if (j == 0)
2483 vargs.create (nargs * 2);
2484 else
2485 vargs.truncate (0);
2487 if (slp_node)
2489 auto_vec<vec<tree> > vec_defs (nargs);
2490 vec<tree> vec_oprnds0;
2492 for (i = 0; i < nargs; i++)
2493 vargs.quick_push (gimple_call_arg (stmt, i));
2494 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2495 vec_oprnds0 = vec_defs[0];
2497 /* Arguments are ready. Create the new vector stmt. */
2498 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2500 size_t k;
2501 vargs.truncate (0);
2502 for (k = 0; k < nargs; k++)
2504 vec<tree> vec_oprndsk = vec_defs[k];
2505 vargs.quick_push (vec_oprndsk[i]);
2506 vargs.quick_push (vec_oprndsk[i + 1]);
2508 new_stmt = gimple_build_call_vec (fndecl, vargs);
2509 new_temp = make_ssa_name (vec_dest, new_stmt);
2510 gimple_call_set_lhs (new_stmt, new_temp);
2511 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2512 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2515 for (i = 0; i < nargs; i++)
2517 vec<tree> vec_oprndsi = vec_defs[i];
2518 vec_oprndsi.release ();
2520 continue;
2523 for (i = 0; i < nargs; i++)
2525 op = gimple_call_arg (stmt, i);
2526 if (j == 0)
2528 vec_oprnd0
2529 = vect_get_vec_def_for_operand (op, stmt, NULL);
2530 vec_oprnd1
2531 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2533 else
2535 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2536 vec_oprnd0
2537 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2538 vec_oprnd1
2539 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2542 vargs.quick_push (vec_oprnd0);
2543 vargs.quick_push (vec_oprnd1);
2546 new_stmt = gimple_build_call_vec (fndecl, vargs);
2547 new_temp = make_ssa_name (vec_dest, new_stmt);
2548 gimple_call_set_lhs (new_stmt, new_temp);
2549 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2551 if (j == 0)
2552 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2553 else
2554 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2556 prev_stmt_info = vinfo_for_stmt (new_stmt);
2559 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2561 break;
2563 case WIDEN:
2564 /* No current target implements this case. */
2565 return false;
2568 vargs.release ();
2570 /* The call in STMT might prevent it from being removed in dce.
2571 We however cannot remove it here, due to the way the ssa name
2572 it defines is mapped to the new definition. So just replace
2573 rhs of the statement with something harmless. */
2575 if (slp_node)
2576 return true;
2578 type = TREE_TYPE (scalar_dest);
2579 if (is_pattern_stmt_p (stmt_info))
2580 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2581 else
2582 lhs = gimple_call_lhs (stmt);
2584 if (gimple_call_internal_p (stmt)
2585 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2587 /* Replace uses of the lhs of GOMP_SIMD_LANE call outside the loop
2588 with vf - 1 rather than 0, that is the last iteration of the
2589 vectorized loop. */
2590 imm_use_iterator iter;
2591 use_operand_p use_p;
2592 gimple *use_stmt;
2593 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2595 basic_block use_bb = gimple_bb (use_stmt);
2596 if (use_bb
2597 && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), use_bb))
2599 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2600 SET_USE (use_p, build_int_cst (TREE_TYPE (lhs),
2601 ncopies * nunits_out - 1));
2602 update_stmt (use_stmt);
2607 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2608 set_vinfo_for_stmt (new_stmt, stmt_info);
2609 set_vinfo_for_stmt (stmt, NULL);
2610 STMT_VINFO_STMT (stmt_info) = new_stmt;
2611 gsi_replace (gsi, new_stmt, false);
2613 return true;
2617 struct simd_call_arg_info
2619 tree vectype;
2620 tree op;
2621 enum vect_def_type dt;
2622 HOST_WIDE_INT linear_step;
2623 unsigned int align;
2624 bool simd_lane_linear;
2627 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
2628 is linear within simd lane (but not within whole loop), note it in
2629 *ARGINFO. */
2631 static void
2632 vect_simd_lane_linear (tree op, struct loop *loop,
2633 struct simd_call_arg_info *arginfo)
2635 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
2637 if (!is_gimple_assign (def_stmt)
2638 || gimple_assign_rhs_code (def_stmt) != POINTER_PLUS_EXPR
2639 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt)))
2640 return;
2642 tree base = gimple_assign_rhs1 (def_stmt);
2643 HOST_WIDE_INT linear_step = 0;
2644 tree v = gimple_assign_rhs2 (def_stmt);
2645 while (TREE_CODE (v) == SSA_NAME)
2647 tree t;
2648 def_stmt = SSA_NAME_DEF_STMT (v);
2649 if (is_gimple_assign (def_stmt))
2650 switch (gimple_assign_rhs_code (def_stmt))
2652 case PLUS_EXPR:
2653 t = gimple_assign_rhs2 (def_stmt);
2654 if (linear_step || TREE_CODE (t) != INTEGER_CST)
2655 return;
2656 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base), base, t);
2657 v = gimple_assign_rhs1 (def_stmt);
2658 continue;
2659 case MULT_EXPR:
2660 t = gimple_assign_rhs2 (def_stmt);
2661 if (linear_step || !tree_fits_shwi_p (t) || integer_zerop (t))
2662 return;
2663 linear_step = tree_to_shwi (t);
2664 v = gimple_assign_rhs1 (def_stmt);
2665 continue;
2666 CASE_CONVERT:
2667 t = gimple_assign_rhs1 (def_stmt);
2668 if (TREE_CODE (TREE_TYPE (t)) != INTEGER_TYPE
2669 || (TYPE_PRECISION (TREE_TYPE (v))
2670 < TYPE_PRECISION (TREE_TYPE (t))))
2671 return;
2672 if (!linear_step)
2673 linear_step = 1;
2674 v = t;
2675 continue;
2676 default:
2677 return;
2679 else if (is_gimple_call (def_stmt)
2680 && gimple_call_internal_p (def_stmt)
2681 && gimple_call_internal_fn (def_stmt) == IFN_GOMP_SIMD_LANE
2682 && loop->simduid
2683 && TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME
2684 && (SSA_NAME_VAR (gimple_call_arg (def_stmt, 0))
2685 == loop->simduid))
2687 if (!linear_step)
2688 linear_step = 1;
2689 arginfo->linear_step = linear_step;
2690 arginfo->op = base;
2691 arginfo->simd_lane_linear = true;
2692 return;
2697 /* Function vectorizable_simd_clone_call.
2699 Check if STMT performs a function call that can be vectorized
2700 by calling a simd clone of the function.
2701 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2702 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2703 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2705 static bool
2706 vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
2707 gimple **vec_stmt, slp_tree slp_node)
2709 tree vec_dest;
2710 tree scalar_dest;
2711 tree op, type;
2712 tree vec_oprnd0 = NULL_TREE;
2713 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2714 tree vectype;
2715 unsigned int nunits;
2716 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2717 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2718 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2719 tree fndecl, new_temp, def;
2720 gimple *def_stmt;
2721 gimple *new_stmt = NULL;
2722 int ncopies, j;
2723 vec<simd_call_arg_info> arginfo = vNULL;
2724 vec<tree> vargs = vNULL;
2725 size_t i, nargs;
2726 tree lhs, rtype, ratype;
2727 vec<constructor_elt, va_gc> *ret_ctor_elts;
2729 /* Is STMT a vectorizable call? */
2730 if (!is_gimple_call (stmt))
2731 return false;
2733 fndecl = gimple_call_fndecl (stmt);
2734 if (fndecl == NULL_TREE)
2735 return false;
2737 struct cgraph_node *node = cgraph_node::get (fndecl);
2738 if (node == NULL || node->simd_clones == NULL)
2739 return false;
2741 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2742 return false;
2744 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2745 return false;
2747 if (gimple_call_lhs (stmt)
2748 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2749 return false;
2751 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2753 vectype = STMT_VINFO_VECTYPE (stmt_info);
2755 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2756 return false;
2758 /* FORNOW */
2759 if (slp_node || PURE_SLP_STMT (stmt_info))
2760 return false;
2762 /* Process function arguments. */
2763 nargs = gimple_call_num_args (stmt);
2765 /* Bail out if the function has zero arguments. */
2766 if (nargs == 0)
2767 return false;
2769 arginfo.create (nargs);
2771 for (i = 0; i < nargs; i++)
2773 simd_call_arg_info thisarginfo;
2774 affine_iv iv;
2776 thisarginfo.linear_step = 0;
2777 thisarginfo.align = 0;
2778 thisarginfo.op = NULL_TREE;
2779 thisarginfo.simd_lane_linear = false;
2781 op = gimple_call_arg (stmt, i);
2782 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2783 &def_stmt, &def, &thisarginfo.dt,
2784 &thisarginfo.vectype)
2785 || thisarginfo.dt == vect_uninitialized_def)
2787 if (dump_enabled_p ())
2788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2789 "use not simple.\n");
2790 arginfo.release ();
2791 return false;
2794 if (thisarginfo.dt == vect_constant_def
2795 || thisarginfo.dt == vect_external_def)
2796 gcc_assert (thisarginfo.vectype == NULL_TREE);
2797 else
2798 gcc_assert (thisarginfo.vectype != NULL_TREE);
2800 /* For linear arguments, the analyze phase should have saved
2801 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2802 if (i * 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2803 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2])
2805 gcc_assert (vec_stmt);
2806 thisarginfo.linear_step
2807 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2]);
2808 thisarginfo.op
2809 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 1];
2810 thisarginfo.simd_lane_linear
2811 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 3]
2812 == boolean_true_node);
2813 /* If loop has been peeled for alignment, we need to adjust it. */
2814 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2815 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2816 if (n1 != n2 && !thisarginfo.simd_lane_linear)
2818 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2819 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 3 + 2];
2820 tree opt = TREE_TYPE (thisarginfo.op);
2821 bias = fold_convert (TREE_TYPE (step), bias);
2822 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2823 thisarginfo.op
2824 = fold_build2 (POINTER_TYPE_P (opt)
2825 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2826 thisarginfo.op, bias);
2829 else if (!vec_stmt
2830 && thisarginfo.dt != vect_constant_def
2831 && thisarginfo.dt != vect_external_def
2832 && loop_vinfo
2833 && TREE_CODE (op) == SSA_NAME
2834 && simple_iv (loop, loop_containing_stmt (stmt), op,
2835 &iv, false)
2836 && tree_fits_shwi_p (iv.step))
2838 thisarginfo.linear_step = tree_to_shwi (iv.step);
2839 thisarginfo.op = iv.base;
2841 else if ((thisarginfo.dt == vect_constant_def
2842 || thisarginfo.dt == vect_external_def)
2843 && POINTER_TYPE_P (TREE_TYPE (op)))
2844 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2845 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
2846 linear too. */
2847 if (POINTER_TYPE_P (TREE_TYPE (op))
2848 && !thisarginfo.linear_step
2849 && !vec_stmt
2850 && thisarginfo.dt != vect_constant_def
2851 && thisarginfo.dt != vect_external_def
2852 && loop_vinfo
2853 && !slp_node
2854 && TREE_CODE (op) == SSA_NAME)
2855 vect_simd_lane_linear (op, loop, &thisarginfo);
2857 arginfo.quick_push (thisarginfo);
2860 unsigned int badness = 0;
2861 struct cgraph_node *bestn = NULL;
2862 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2863 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2864 else
2865 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2866 n = n->simdclone->next_clone)
2868 unsigned int this_badness = 0;
2869 if (n->simdclone->simdlen
2870 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2871 || n->simdclone->nargs != nargs)
2872 continue;
2873 if (n->simdclone->simdlen
2874 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2875 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2876 - exact_log2 (n->simdclone->simdlen)) * 1024;
2877 if (n->simdclone->inbranch)
2878 this_badness += 2048;
2879 int target_badness = targetm.simd_clone.usable (n);
2880 if (target_badness < 0)
2881 continue;
2882 this_badness += target_badness * 512;
2883 /* FORNOW: Have to add code to add the mask argument. */
2884 if (n->simdclone->inbranch)
2885 continue;
2886 for (i = 0; i < nargs; i++)
2888 switch (n->simdclone->args[i].arg_type)
2890 case SIMD_CLONE_ARG_TYPE_VECTOR:
2891 if (!useless_type_conversion_p
2892 (n->simdclone->args[i].orig_type,
2893 TREE_TYPE (gimple_call_arg (stmt, i))))
2894 i = -1;
2895 else if (arginfo[i].dt == vect_constant_def
2896 || arginfo[i].dt == vect_external_def
2897 || arginfo[i].linear_step)
2898 this_badness += 64;
2899 break;
2900 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2901 if (arginfo[i].dt != vect_constant_def
2902 && arginfo[i].dt != vect_external_def)
2903 i = -1;
2904 break;
2905 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2906 if (arginfo[i].dt == vect_constant_def
2907 || arginfo[i].dt == vect_external_def
2908 || (arginfo[i].linear_step
2909 != n->simdclone->args[i].linear_step))
2910 i = -1;
2911 break;
2912 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2913 /* FORNOW */
2914 i = -1;
2915 break;
2916 case SIMD_CLONE_ARG_TYPE_MASK:
2917 gcc_unreachable ();
2919 if (i == (size_t) -1)
2920 break;
2921 if (n->simdclone->args[i].alignment > arginfo[i].align)
2923 i = -1;
2924 break;
2926 if (arginfo[i].align)
2927 this_badness += (exact_log2 (arginfo[i].align)
2928 - exact_log2 (n->simdclone->args[i].alignment));
2930 if (i == (size_t) -1)
2931 continue;
2932 if (bestn == NULL || this_badness < badness)
2934 bestn = n;
2935 badness = this_badness;
2939 if (bestn == NULL)
2941 arginfo.release ();
2942 return false;
2945 for (i = 0; i < nargs; i++)
2946 if ((arginfo[i].dt == vect_constant_def
2947 || arginfo[i].dt == vect_external_def)
2948 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2950 arginfo[i].vectype
2951 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2952 i)));
2953 if (arginfo[i].vectype == NULL
2954 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2955 > bestn->simdclone->simdlen))
2957 arginfo.release ();
2958 return false;
2962 fndecl = bestn->decl;
2963 nunits = bestn->simdclone->simdlen;
2964 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2966 /* If the function isn't const, only allow it in simd loops where user
2967 has asserted that at least nunits consecutive iterations can be
2968 performed using SIMD instructions. */
2969 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2970 && gimple_vuse (stmt))
2972 arginfo.release ();
2973 return false;
2976 /* Sanity check: make sure that at least one copy of the vectorized stmt
2977 needs to be generated. */
2978 gcc_assert (ncopies >= 1);
2980 if (!vec_stmt) /* transformation not required. */
2982 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2983 for (i = 0; i < nargs; i++)
2984 if (bestn->simdclone->args[i].arg_type
2985 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2987 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 3
2988 + 1);
2989 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2990 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2991 ? size_type_node : TREE_TYPE (arginfo[i].op);
2992 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2993 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2994 tree sll = arginfo[i].simd_lane_linear
2995 ? boolean_true_node : boolean_false_node;
2996 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (sll);
2998 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2999 if (dump_enabled_p ())
3000 dump_printf_loc (MSG_NOTE, vect_location,
3001 "=== vectorizable_simd_clone_call ===\n");
3002 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
3003 arginfo.release ();
3004 return true;
3007 /** Transform. **/
3009 if (dump_enabled_p ())
3010 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
3012 /* Handle def. */
3013 scalar_dest = gimple_call_lhs (stmt);
3014 vec_dest = NULL_TREE;
3015 rtype = NULL_TREE;
3016 ratype = NULL_TREE;
3017 if (scalar_dest)
3019 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3020 rtype = TREE_TYPE (TREE_TYPE (fndecl));
3021 if (TREE_CODE (rtype) == ARRAY_TYPE)
3023 ratype = rtype;
3024 rtype = TREE_TYPE (ratype);
3028 prev_stmt_info = NULL;
3029 for (j = 0; j < ncopies; ++j)
3031 /* Build argument list for the vectorized call. */
3032 if (j == 0)
3033 vargs.create (nargs);
3034 else
3035 vargs.truncate (0);
3037 for (i = 0; i < nargs; i++)
3039 unsigned int k, l, m, o;
3040 tree atype;
3041 op = gimple_call_arg (stmt, i);
3042 switch (bestn->simdclone->args[i].arg_type)
3044 case SIMD_CLONE_ARG_TYPE_VECTOR:
3045 atype = bestn->simdclone->args[i].vector_type;
3046 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
3047 for (m = j * o; m < (j + 1) * o; m++)
3049 if (TYPE_VECTOR_SUBPARTS (atype)
3050 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
3052 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
3053 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
3054 / TYPE_VECTOR_SUBPARTS (atype));
3055 gcc_assert ((k & (k - 1)) == 0);
3056 if (m == 0)
3057 vec_oprnd0
3058 = vect_get_vec_def_for_operand (op, stmt, NULL);
3059 else
3061 vec_oprnd0 = arginfo[i].op;
3062 if ((m & (k - 1)) == 0)
3063 vec_oprnd0
3064 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3065 vec_oprnd0);
3067 arginfo[i].op = vec_oprnd0;
3068 vec_oprnd0
3069 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
3070 size_int (prec),
3071 bitsize_int ((m & (k - 1)) * prec));
3072 new_stmt
3073 = gimple_build_assign (make_ssa_name (atype),
3074 vec_oprnd0);
3075 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3076 vargs.safe_push (gimple_assign_lhs (new_stmt));
3078 else
3080 k = (TYPE_VECTOR_SUBPARTS (atype)
3081 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
3082 gcc_assert ((k & (k - 1)) == 0);
3083 vec<constructor_elt, va_gc> *ctor_elts;
3084 if (k != 1)
3085 vec_alloc (ctor_elts, k);
3086 else
3087 ctor_elts = NULL;
3088 for (l = 0; l < k; l++)
3090 if (m == 0 && l == 0)
3091 vec_oprnd0
3092 = vect_get_vec_def_for_operand (op, stmt, NULL);
3093 else
3094 vec_oprnd0
3095 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3096 arginfo[i].op);
3097 arginfo[i].op = vec_oprnd0;
3098 if (k == 1)
3099 break;
3100 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3101 vec_oprnd0);
3103 if (k == 1)
3104 vargs.safe_push (vec_oprnd0);
3105 else
3107 vec_oprnd0 = build_constructor (atype, ctor_elts);
3108 new_stmt
3109 = gimple_build_assign (make_ssa_name (atype),
3110 vec_oprnd0);
3111 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3112 vargs.safe_push (gimple_assign_lhs (new_stmt));
3116 break;
3117 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3118 vargs.safe_push (op);
3119 break;
3120 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3121 if (j == 0)
3123 gimple_seq stmts;
3124 arginfo[i].op
3125 = force_gimple_operand (arginfo[i].op, &stmts, true,
3126 NULL_TREE);
3127 if (stmts != NULL)
3129 basic_block new_bb;
3130 edge pe = loop_preheader_edge (loop);
3131 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3132 gcc_assert (!new_bb);
3134 if (arginfo[i].simd_lane_linear)
3136 vargs.safe_push (arginfo[i].op);
3137 break;
3139 tree phi_res = copy_ssa_name (op);
3140 gphi *new_phi = create_phi_node (phi_res, loop->header);
3141 set_vinfo_for_stmt (new_phi,
3142 new_stmt_vec_info (new_phi, loop_vinfo,
3143 NULL));
3144 add_phi_arg (new_phi, arginfo[i].op,
3145 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3146 enum tree_code code
3147 = POINTER_TYPE_P (TREE_TYPE (op))
3148 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3149 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3150 ? sizetype : TREE_TYPE (op);
3151 widest_int cst
3152 = wi::mul (bestn->simdclone->args[i].linear_step,
3153 ncopies * nunits);
3154 tree tcst = wide_int_to_tree (type, cst);
3155 tree phi_arg = copy_ssa_name (op);
3156 new_stmt
3157 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3158 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3159 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3160 set_vinfo_for_stmt (new_stmt,
3161 new_stmt_vec_info (new_stmt, loop_vinfo,
3162 NULL));
3163 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3164 UNKNOWN_LOCATION);
3165 arginfo[i].op = phi_res;
3166 vargs.safe_push (phi_res);
3168 else
3170 enum tree_code code
3171 = POINTER_TYPE_P (TREE_TYPE (op))
3172 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3173 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3174 ? sizetype : TREE_TYPE (op);
3175 widest_int cst
3176 = wi::mul (bestn->simdclone->args[i].linear_step,
3177 j * nunits);
3178 tree tcst = wide_int_to_tree (type, cst);
3179 new_temp = make_ssa_name (TREE_TYPE (op));
3180 new_stmt = gimple_build_assign (new_temp, code,
3181 arginfo[i].op, tcst);
3182 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3183 vargs.safe_push (new_temp);
3185 break;
3186 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3187 default:
3188 gcc_unreachable ();
3192 new_stmt = gimple_build_call_vec (fndecl, vargs);
3193 if (vec_dest)
3195 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3196 if (ratype)
3197 new_temp = create_tmp_var (ratype);
3198 else if (TYPE_VECTOR_SUBPARTS (vectype)
3199 == TYPE_VECTOR_SUBPARTS (rtype))
3200 new_temp = make_ssa_name (vec_dest, new_stmt);
3201 else
3202 new_temp = make_ssa_name (rtype, new_stmt);
3203 gimple_call_set_lhs (new_stmt, new_temp);
3205 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3207 if (vec_dest)
3209 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3211 unsigned int k, l;
3212 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3213 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3214 gcc_assert ((k & (k - 1)) == 0);
3215 for (l = 0; l < k; l++)
3217 tree t;
3218 if (ratype)
3220 t = build_fold_addr_expr (new_temp);
3221 t = build2 (MEM_REF, vectype, t,
3222 build_int_cst (TREE_TYPE (t),
3223 l * prec / BITS_PER_UNIT));
3225 else
3226 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3227 size_int (prec), bitsize_int (l * prec));
3228 new_stmt
3229 = gimple_build_assign (make_ssa_name (vectype), t);
3230 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3231 if (j == 0 && l == 0)
3232 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3233 else
3234 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3236 prev_stmt_info = vinfo_for_stmt (new_stmt);
3239 if (ratype)
3241 tree clobber = build_constructor (ratype, NULL);
3242 TREE_THIS_VOLATILE (clobber) = 1;
3243 new_stmt = gimple_build_assign (new_temp, clobber);
3244 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3246 continue;
3248 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3250 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3251 / TYPE_VECTOR_SUBPARTS (rtype));
3252 gcc_assert ((k & (k - 1)) == 0);
3253 if ((j & (k - 1)) == 0)
3254 vec_alloc (ret_ctor_elts, k);
3255 if (ratype)
3257 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3258 for (m = 0; m < o; m++)
3260 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3261 size_int (m), NULL_TREE, NULL_TREE);
3262 new_stmt
3263 = gimple_build_assign (make_ssa_name (rtype), tem);
3264 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3265 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3266 gimple_assign_lhs (new_stmt));
3268 tree clobber = build_constructor (ratype, NULL);
3269 TREE_THIS_VOLATILE (clobber) = 1;
3270 new_stmt = gimple_build_assign (new_temp, clobber);
3271 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3273 else
3274 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3275 if ((j & (k - 1)) != k - 1)
3276 continue;
3277 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3278 new_stmt
3279 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3280 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3282 if ((unsigned) j == k - 1)
3283 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3284 else
3285 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3287 prev_stmt_info = vinfo_for_stmt (new_stmt);
3288 continue;
3290 else if (ratype)
3292 tree t = build_fold_addr_expr (new_temp);
3293 t = build2 (MEM_REF, vectype, t,
3294 build_int_cst (TREE_TYPE (t), 0));
3295 new_stmt
3296 = gimple_build_assign (make_ssa_name (vec_dest), t);
3297 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3298 tree clobber = build_constructor (ratype, NULL);
3299 TREE_THIS_VOLATILE (clobber) = 1;
3300 vect_finish_stmt_generation (stmt,
3301 gimple_build_assign (new_temp,
3302 clobber), gsi);
3306 if (j == 0)
3307 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3308 else
3309 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3311 prev_stmt_info = vinfo_for_stmt (new_stmt);
3314 vargs.release ();
3316 /* The call in STMT might prevent it from being removed in dce.
3317 We however cannot remove it here, due to the way the ssa name
3318 it defines is mapped to the new definition. So just replace
3319 rhs of the statement with something harmless. */
3321 if (slp_node)
3322 return true;
3324 if (scalar_dest)
3326 type = TREE_TYPE (scalar_dest);
3327 if (is_pattern_stmt_p (stmt_info))
3328 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3329 else
3330 lhs = gimple_call_lhs (stmt);
3331 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3333 else
3334 new_stmt = gimple_build_nop ();
3335 set_vinfo_for_stmt (new_stmt, stmt_info);
3336 set_vinfo_for_stmt (stmt, NULL);
3337 STMT_VINFO_STMT (stmt_info) = new_stmt;
3338 gsi_replace (gsi, new_stmt, true);
3339 unlink_stmt_vdef (stmt);
3341 return true;
3345 /* Function vect_gen_widened_results_half
3347 Create a vector stmt whose code, type, number of arguments, and result
3348 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3349 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3350 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3351 needs to be created (DECL is a function-decl of a target-builtin).
3352 STMT is the original scalar stmt that we are vectorizing. */
3354 static gimple *
3355 vect_gen_widened_results_half (enum tree_code code,
3356 tree decl,
3357 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3358 tree vec_dest, gimple_stmt_iterator *gsi,
3359 gimple *stmt)
3361 gimple *new_stmt;
3362 tree new_temp;
3364 /* Generate half of the widened result: */
3365 if (code == CALL_EXPR)
3367 /* Target specific support */
3368 if (op_type == binary_op)
3369 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3370 else
3371 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3372 new_temp = make_ssa_name (vec_dest, new_stmt);
3373 gimple_call_set_lhs (new_stmt, new_temp);
3375 else
3377 /* Generic support */
3378 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3379 if (op_type != binary_op)
3380 vec_oprnd1 = NULL;
3381 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3382 new_temp = make_ssa_name (vec_dest, new_stmt);
3383 gimple_assign_set_lhs (new_stmt, new_temp);
3385 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3387 return new_stmt;
3391 /* Get vectorized definitions for loop-based vectorization. For the first
3392 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3393 scalar operand), and for the rest we get a copy with
3394 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3395 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3396 The vectors are collected into VEC_OPRNDS. */
3398 static void
3399 vect_get_loop_based_defs (tree *oprnd, gimple *stmt, enum vect_def_type dt,
3400 vec<tree> *vec_oprnds, int multi_step_cvt)
3402 tree vec_oprnd;
3404 /* Get first vector operand. */
3405 /* All the vector operands except the very first one (that is scalar oprnd)
3406 are stmt copies. */
3407 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3408 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3409 else
3410 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3412 vec_oprnds->quick_push (vec_oprnd);
3414 /* Get second vector operand. */
3415 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3416 vec_oprnds->quick_push (vec_oprnd);
3418 *oprnd = vec_oprnd;
3420 /* For conversion in multiple steps, continue to get operands
3421 recursively. */
3422 if (multi_step_cvt)
3423 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3427 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3428 For multi-step conversions store the resulting vectors and call the function
3429 recursively. */
3431 static void
3432 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3433 int multi_step_cvt, gimple *stmt,
3434 vec<tree> vec_dsts,
3435 gimple_stmt_iterator *gsi,
3436 slp_tree slp_node, enum tree_code code,
3437 stmt_vec_info *prev_stmt_info)
3439 unsigned int i;
3440 tree vop0, vop1, new_tmp, vec_dest;
3441 gimple *new_stmt;
3442 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3444 vec_dest = vec_dsts.pop ();
3446 for (i = 0; i < vec_oprnds->length (); i += 2)
3448 /* Create demotion operation. */
3449 vop0 = (*vec_oprnds)[i];
3450 vop1 = (*vec_oprnds)[i + 1];
3451 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3452 new_tmp = make_ssa_name (vec_dest, new_stmt);
3453 gimple_assign_set_lhs (new_stmt, new_tmp);
3454 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3456 if (multi_step_cvt)
3457 /* Store the resulting vector for next recursive call. */
3458 (*vec_oprnds)[i/2] = new_tmp;
3459 else
3461 /* This is the last step of the conversion sequence. Store the
3462 vectors in SLP_NODE or in vector info of the scalar statement
3463 (or in STMT_VINFO_RELATED_STMT chain). */
3464 if (slp_node)
3465 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3466 else
3468 if (!*prev_stmt_info)
3469 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3470 else
3471 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3473 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3478 /* For multi-step demotion operations we first generate demotion operations
3479 from the source type to the intermediate types, and then combine the
3480 results (stored in VEC_OPRNDS) in demotion operation to the destination
3481 type. */
3482 if (multi_step_cvt)
3484 /* At each level of recursion we have half of the operands we had at the
3485 previous level. */
3486 vec_oprnds->truncate ((i+1)/2);
3487 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3488 stmt, vec_dsts, gsi, slp_node,
3489 VEC_PACK_TRUNC_EXPR,
3490 prev_stmt_info);
3493 vec_dsts.quick_push (vec_dest);
3497 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3498 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3499 the resulting vectors and call the function recursively. */
3501 static void
3502 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3503 vec<tree> *vec_oprnds1,
3504 gimple *stmt, tree vec_dest,
3505 gimple_stmt_iterator *gsi,
3506 enum tree_code code1,
3507 enum tree_code code2, tree decl1,
3508 tree decl2, int op_type)
3510 int i;
3511 tree vop0, vop1, new_tmp1, new_tmp2;
3512 gimple *new_stmt1, *new_stmt2;
3513 vec<tree> vec_tmp = vNULL;
3515 vec_tmp.create (vec_oprnds0->length () * 2);
3516 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3518 if (op_type == binary_op)
3519 vop1 = (*vec_oprnds1)[i];
3520 else
3521 vop1 = NULL_TREE;
3523 /* Generate the two halves of promotion operation. */
3524 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3525 op_type, vec_dest, gsi, stmt);
3526 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3527 op_type, vec_dest, gsi, stmt);
3528 if (is_gimple_call (new_stmt1))
3530 new_tmp1 = gimple_call_lhs (new_stmt1);
3531 new_tmp2 = gimple_call_lhs (new_stmt2);
3533 else
3535 new_tmp1 = gimple_assign_lhs (new_stmt1);
3536 new_tmp2 = gimple_assign_lhs (new_stmt2);
3539 /* Store the results for the next step. */
3540 vec_tmp.quick_push (new_tmp1);
3541 vec_tmp.quick_push (new_tmp2);
3544 vec_oprnds0->release ();
3545 *vec_oprnds0 = vec_tmp;
3549 /* Check if STMT performs a conversion operation, that can be vectorized.
3550 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3551 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3552 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3554 static bool
3555 vectorizable_conversion (gimple *stmt, gimple_stmt_iterator *gsi,
3556 gimple **vec_stmt, slp_tree slp_node)
3558 tree vec_dest;
3559 tree scalar_dest;
3560 tree op0, op1 = NULL_TREE;
3561 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3562 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3563 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3564 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3565 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3566 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3567 tree new_temp;
3568 tree def;
3569 gimple *def_stmt;
3570 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3571 gimple *new_stmt = NULL;
3572 stmt_vec_info prev_stmt_info;
3573 int nunits_in;
3574 int nunits_out;
3575 tree vectype_out, vectype_in;
3576 int ncopies, i, j;
3577 tree lhs_type, rhs_type;
3578 enum { NARROW, NONE, WIDEN } modifier;
3579 vec<tree> vec_oprnds0 = vNULL;
3580 vec<tree> vec_oprnds1 = vNULL;
3581 tree vop0;
3582 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3583 int multi_step_cvt = 0;
3584 vec<tree> vec_dsts = vNULL;
3585 vec<tree> interm_types = vNULL;
3586 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3587 int op_type;
3588 machine_mode rhs_mode;
3589 unsigned short fltsz;
3591 /* Is STMT a vectorizable conversion? */
3593 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3594 return false;
3596 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3597 return false;
3599 if (!is_gimple_assign (stmt))
3600 return false;
3602 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3603 return false;
3605 code = gimple_assign_rhs_code (stmt);
3606 if (!CONVERT_EXPR_CODE_P (code)
3607 && code != FIX_TRUNC_EXPR
3608 && code != FLOAT_EXPR
3609 && code != WIDEN_MULT_EXPR
3610 && code != WIDEN_LSHIFT_EXPR)
3611 return false;
3613 op_type = TREE_CODE_LENGTH (code);
3615 /* Check types of lhs and rhs. */
3616 scalar_dest = gimple_assign_lhs (stmt);
3617 lhs_type = TREE_TYPE (scalar_dest);
3618 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3620 op0 = gimple_assign_rhs1 (stmt);
3621 rhs_type = TREE_TYPE (op0);
3623 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3624 && !((INTEGRAL_TYPE_P (lhs_type)
3625 && INTEGRAL_TYPE_P (rhs_type))
3626 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3627 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3628 return false;
3630 if ((INTEGRAL_TYPE_P (lhs_type)
3631 && (TYPE_PRECISION (lhs_type)
3632 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3633 || (INTEGRAL_TYPE_P (rhs_type)
3634 && (TYPE_PRECISION (rhs_type)
3635 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3637 if (dump_enabled_p ())
3638 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3639 "type conversion to/from bit-precision unsupported."
3640 "\n");
3641 return false;
3644 /* Check the operands of the operation. */
3645 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3646 &def_stmt, &def, &dt[0], &vectype_in))
3648 if (dump_enabled_p ())
3649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3650 "use not simple.\n");
3651 return false;
3653 if (op_type == binary_op)
3655 bool ok;
3657 op1 = gimple_assign_rhs2 (stmt);
3658 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3659 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3660 OP1. */
3661 if (CONSTANT_CLASS_P (op0))
3662 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3663 &def_stmt, &def, &dt[1], &vectype_in);
3664 else
3665 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3666 &def, &dt[1]);
3668 if (!ok)
3670 if (dump_enabled_p ())
3671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3672 "use not simple.\n");
3673 return false;
3677 /* If op0 is an external or constant defs use a vector type of
3678 the same size as the output vector type. */
3679 if (!vectype_in)
3680 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3681 if (vec_stmt)
3682 gcc_assert (vectype_in);
3683 if (!vectype_in)
3685 if (dump_enabled_p ())
3687 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3688 "no vectype for scalar type ");
3689 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3690 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3693 return false;
3696 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3697 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3698 if (nunits_in < nunits_out)
3699 modifier = NARROW;
3700 else if (nunits_out == nunits_in)
3701 modifier = NONE;
3702 else
3703 modifier = WIDEN;
3705 /* Multiple types in SLP are handled by creating the appropriate number of
3706 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3707 case of SLP. */
3708 if (slp_node || PURE_SLP_STMT (stmt_info))
3709 ncopies = 1;
3710 else if (modifier == NARROW)
3711 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3712 else
3713 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3715 /* Sanity check: make sure that at least one copy of the vectorized stmt
3716 needs to be generated. */
3717 gcc_assert (ncopies >= 1);
3719 /* Supportable by target? */
3720 switch (modifier)
3722 case NONE:
3723 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3724 return false;
3725 if (supportable_convert_operation (code, vectype_out, vectype_in,
3726 &decl1, &code1))
3727 break;
3728 /* FALLTHRU */
3729 unsupported:
3730 if (dump_enabled_p ())
3731 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3732 "conversion not supported by target.\n");
3733 return false;
3735 case WIDEN:
3736 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3737 &code1, &code2, &multi_step_cvt,
3738 &interm_types))
3740 /* Binary widening operation can only be supported directly by the
3741 architecture. */
3742 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3743 break;
3746 if (code != FLOAT_EXPR
3747 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3748 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3749 goto unsupported;
3751 rhs_mode = TYPE_MODE (rhs_type);
3752 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3753 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3754 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3755 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3757 cvt_type
3758 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3759 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3760 if (cvt_type == NULL_TREE)
3761 goto unsupported;
3763 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3765 if (!supportable_convert_operation (code, vectype_out,
3766 cvt_type, &decl1, &codecvt1))
3767 goto unsupported;
3769 else if (!supportable_widening_operation (code, stmt, vectype_out,
3770 cvt_type, &codecvt1,
3771 &codecvt2, &multi_step_cvt,
3772 &interm_types))
3773 continue;
3774 else
3775 gcc_assert (multi_step_cvt == 0);
3777 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3778 vectype_in, &code1, &code2,
3779 &multi_step_cvt, &interm_types))
3780 break;
3783 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3784 goto unsupported;
3786 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3787 codecvt2 = ERROR_MARK;
3788 else
3790 multi_step_cvt++;
3791 interm_types.safe_push (cvt_type);
3792 cvt_type = NULL_TREE;
3794 break;
3796 case NARROW:
3797 gcc_assert (op_type == unary_op);
3798 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3799 &code1, &multi_step_cvt,
3800 &interm_types))
3801 break;
3803 if (code != FIX_TRUNC_EXPR
3804 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3805 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3806 goto unsupported;
3808 rhs_mode = TYPE_MODE (rhs_type);
3809 cvt_type
3810 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3811 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3812 if (cvt_type == NULL_TREE)
3813 goto unsupported;
3814 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3815 &decl1, &codecvt1))
3816 goto unsupported;
3817 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3818 &code1, &multi_step_cvt,
3819 &interm_types))
3820 break;
3821 goto unsupported;
3823 default:
3824 gcc_unreachable ();
3827 if (!vec_stmt) /* transformation not required. */
3829 if (dump_enabled_p ())
3830 dump_printf_loc (MSG_NOTE, vect_location,
3831 "=== vectorizable_conversion ===\n");
3832 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3834 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3835 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3837 else if (modifier == NARROW)
3839 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3840 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3842 else
3844 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3845 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3847 interm_types.release ();
3848 return true;
3851 /** Transform. **/
3852 if (dump_enabled_p ())
3853 dump_printf_loc (MSG_NOTE, vect_location,
3854 "transform conversion. ncopies = %d.\n", ncopies);
3856 if (op_type == binary_op)
3858 if (CONSTANT_CLASS_P (op0))
3859 op0 = fold_convert (TREE_TYPE (op1), op0);
3860 else if (CONSTANT_CLASS_P (op1))
3861 op1 = fold_convert (TREE_TYPE (op0), op1);
3864 /* In case of multi-step conversion, we first generate conversion operations
3865 to the intermediate types, and then from that types to the final one.
3866 We create vector destinations for the intermediate type (TYPES) received
3867 from supportable_*_operation, and store them in the correct order
3868 for future use in vect_create_vectorized_*_stmts (). */
3869 vec_dsts.create (multi_step_cvt + 1);
3870 vec_dest = vect_create_destination_var (scalar_dest,
3871 (cvt_type && modifier == WIDEN)
3872 ? cvt_type : vectype_out);
3873 vec_dsts.quick_push (vec_dest);
3875 if (multi_step_cvt)
3877 for (i = interm_types.length () - 1;
3878 interm_types.iterate (i, &intermediate_type); i--)
3880 vec_dest = vect_create_destination_var (scalar_dest,
3881 intermediate_type);
3882 vec_dsts.quick_push (vec_dest);
3886 if (cvt_type)
3887 vec_dest = vect_create_destination_var (scalar_dest,
3888 modifier == WIDEN
3889 ? vectype_out : cvt_type);
3891 if (!slp_node)
3893 if (modifier == WIDEN)
3895 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3896 if (op_type == binary_op)
3897 vec_oprnds1.create (1);
3899 else if (modifier == NARROW)
3900 vec_oprnds0.create (
3901 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3903 else if (code == WIDEN_LSHIFT_EXPR)
3904 vec_oprnds1.create (slp_node->vec_stmts_size);
3906 last_oprnd = op0;
3907 prev_stmt_info = NULL;
3908 switch (modifier)
3910 case NONE:
3911 for (j = 0; j < ncopies; j++)
3913 if (j == 0)
3914 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3915 -1);
3916 else
3917 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3919 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3921 /* Arguments are ready, create the new vector stmt. */
3922 if (code1 == CALL_EXPR)
3924 new_stmt = gimple_build_call (decl1, 1, vop0);
3925 new_temp = make_ssa_name (vec_dest, new_stmt);
3926 gimple_call_set_lhs (new_stmt, new_temp);
3928 else
3930 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3931 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3932 new_temp = make_ssa_name (vec_dest, new_stmt);
3933 gimple_assign_set_lhs (new_stmt, new_temp);
3936 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3937 if (slp_node)
3938 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3939 else
3941 if (!prev_stmt_info)
3942 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3943 else
3944 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3945 prev_stmt_info = vinfo_for_stmt (new_stmt);
3949 break;
3951 case WIDEN:
3952 /* In case the vectorization factor (VF) is bigger than the number
3953 of elements that we can fit in a vectype (nunits), we have to
3954 generate more than one vector stmt - i.e - we need to "unroll"
3955 the vector stmt by a factor VF/nunits. */
3956 for (j = 0; j < ncopies; j++)
3958 /* Handle uses. */
3959 if (j == 0)
3961 if (slp_node)
3963 if (code == WIDEN_LSHIFT_EXPR)
3965 unsigned int k;
3967 vec_oprnd1 = op1;
3968 /* Store vec_oprnd1 for every vector stmt to be created
3969 for SLP_NODE. We check during the analysis that all
3970 the shift arguments are the same. */
3971 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3972 vec_oprnds1.quick_push (vec_oprnd1);
3974 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3975 slp_node, -1);
3977 else
3978 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3979 &vec_oprnds1, slp_node, -1);
3981 else
3983 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3984 vec_oprnds0.quick_push (vec_oprnd0);
3985 if (op_type == binary_op)
3987 if (code == WIDEN_LSHIFT_EXPR)
3988 vec_oprnd1 = op1;
3989 else
3990 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3991 NULL);
3992 vec_oprnds1.quick_push (vec_oprnd1);
3996 else
3998 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3999 vec_oprnds0.truncate (0);
4000 vec_oprnds0.quick_push (vec_oprnd0);
4001 if (op_type == binary_op)
4003 if (code == WIDEN_LSHIFT_EXPR)
4004 vec_oprnd1 = op1;
4005 else
4006 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
4007 vec_oprnd1);
4008 vec_oprnds1.truncate (0);
4009 vec_oprnds1.quick_push (vec_oprnd1);
4013 /* Arguments are ready. Create the new vector stmts. */
4014 for (i = multi_step_cvt; i >= 0; i--)
4016 tree this_dest = vec_dsts[i];
4017 enum tree_code c1 = code1, c2 = code2;
4018 if (i == 0 && codecvt2 != ERROR_MARK)
4020 c1 = codecvt1;
4021 c2 = codecvt2;
4023 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
4024 &vec_oprnds1,
4025 stmt, this_dest, gsi,
4026 c1, c2, decl1, decl2,
4027 op_type);
4030 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4032 if (cvt_type)
4034 if (codecvt1 == CALL_EXPR)
4036 new_stmt = gimple_build_call (decl1, 1, vop0);
4037 new_temp = make_ssa_name (vec_dest, new_stmt);
4038 gimple_call_set_lhs (new_stmt, new_temp);
4040 else
4042 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4043 new_temp = make_ssa_name (vec_dest);
4044 new_stmt = gimple_build_assign (new_temp, codecvt1,
4045 vop0);
4048 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4050 else
4051 new_stmt = SSA_NAME_DEF_STMT (vop0);
4053 if (slp_node)
4054 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4055 else
4057 if (!prev_stmt_info)
4058 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4059 else
4060 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4061 prev_stmt_info = vinfo_for_stmt (new_stmt);
4066 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4067 break;
4069 case NARROW:
4070 /* In case the vectorization factor (VF) is bigger than the number
4071 of elements that we can fit in a vectype (nunits), we have to
4072 generate more than one vector stmt - i.e - we need to "unroll"
4073 the vector stmt by a factor VF/nunits. */
4074 for (j = 0; j < ncopies; j++)
4076 /* Handle uses. */
4077 if (slp_node)
4078 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4079 slp_node, -1);
4080 else
4082 vec_oprnds0.truncate (0);
4083 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
4084 vect_pow2 (multi_step_cvt) - 1);
4087 /* Arguments are ready. Create the new vector stmts. */
4088 if (cvt_type)
4089 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4091 if (codecvt1 == CALL_EXPR)
4093 new_stmt = gimple_build_call (decl1, 1, vop0);
4094 new_temp = make_ssa_name (vec_dest, new_stmt);
4095 gimple_call_set_lhs (new_stmt, new_temp);
4097 else
4099 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
4100 new_temp = make_ssa_name (vec_dest);
4101 new_stmt = gimple_build_assign (new_temp, codecvt1,
4102 vop0);
4105 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4106 vec_oprnds0[i] = new_temp;
4109 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4110 stmt, vec_dsts, gsi,
4111 slp_node, code1,
4112 &prev_stmt_info);
4115 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4116 break;
4119 vec_oprnds0.release ();
4120 vec_oprnds1.release ();
4121 vec_dsts.release ();
4122 interm_types.release ();
4124 return true;
4128 /* Function vectorizable_assignment.
4130 Check if STMT performs an assignment (copy) that can be vectorized.
4131 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4132 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4133 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4135 static bool
4136 vectorizable_assignment (gimple *stmt, gimple_stmt_iterator *gsi,
4137 gimple **vec_stmt, slp_tree slp_node)
4139 tree vec_dest;
4140 tree scalar_dest;
4141 tree op;
4142 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4143 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4144 tree new_temp;
4145 tree def;
4146 gimple *def_stmt;
4147 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4148 int ncopies;
4149 int i, j;
4150 vec<tree> vec_oprnds = vNULL;
4151 tree vop;
4152 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4153 gimple *new_stmt = NULL;
4154 stmt_vec_info prev_stmt_info = NULL;
4155 enum tree_code code;
4156 tree vectype_in;
4158 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4159 return false;
4161 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4162 return false;
4164 /* Is vectorizable assignment? */
4165 if (!is_gimple_assign (stmt))
4166 return false;
4168 scalar_dest = gimple_assign_lhs (stmt);
4169 if (TREE_CODE (scalar_dest) != SSA_NAME)
4170 return false;
4172 code = gimple_assign_rhs_code (stmt);
4173 if (gimple_assign_single_p (stmt)
4174 || code == PAREN_EXPR
4175 || CONVERT_EXPR_CODE_P (code))
4176 op = gimple_assign_rhs1 (stmt);
4177 else
4178 return false;
4180 if (code == VIEW_CONVERT_EXPR)
4181 op = TREE_OPERAND (op, 0);
4183 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4184 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4186 /* Multiple types in SLP are handled by creating the appropriate number of
4187 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4188 case of SLP. */
4189 if (slp_node || PURE_SLP_STMT (stmt_info))
4190 ncopies = 1;
4191 else
4192 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4194 gcc_assert (ncopies >= 1);
4196 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4197 &def_stmt, &def, &dt[0], &vectype_in))
4199 if (dump_enabled_p ())
4200 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4201 "use not simple.\n");
4202 return false;
4205 /* We can handle NOP_EXPR conversions that do not change the number
4206 of elements or the vector size. */
4207 if ((CONVERT_EXPR_CODE_P (code)
4208 || code == VIEW_CONVERT_EXPR)
4209 && (!vectype_in
4210 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4211 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4212 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4213 return false;
4215 /* We do not handle bit-precision changes. */
4216 if ((CONVERT_EXPR_CODE_P (code)
4217 || code == VIEW_CONVERT_EXPR)
4218 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4219 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4220 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4221 || ((TYPE_PRECISION (TREE_TYPE (op))
4222 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4223 /* But a conversion that does not change the bit-pattern is ok. */
4224 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4225 > TYPE_PRECISION (TREE_TYPE (op)))
4226 && TYPE_UNSIGNED (TREE_TYPE (op))))
4228 if (dump_enabled_p ())
4229 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4230 "type conversion to/from bit-precision "
4231 "unsupported.\n");
4232 return false;
4235 if (!vec_stmt) /* transformation not required. */
4237 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4238 if (dump_enabled_p ())
4239 dump_printf_loc (MSG_NOTE, vect_location,
4240 "=== vectorizable_assignment ===\n");
4241 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4242 return true;
4245 /** Transform. **/
4246 if (dump_enabled_p ())
4247 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4249 /* Handle def. */
4250 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4252 /* Handle use. */
4253 for (j = 0; j < ncopies; j++)
4255 /* Handle uses. */
4256 if (j == 0)
4257 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4258 else
4259 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4261 /* Arguments are ready. create the new vector stmt. */
4262 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4264 if (CONVERT_EXPR_CODE_P (code)
4265 || code == VIEW_CONVERT_EXPR)
4266 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4267 new_stmt = gimple_build_assign (vec_dest, vop);
4268 new_temp = make_ssa_name (vec_dest, new_stmt);
4269 gimple_assign_set_lhs (new_stmt, new_temp);
4270 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4271 if (slp_node)
4272 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4275 if (slp_node)
4276 continue;
4278 if (j == 0)
4279 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4280 else
4281 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4283 prev_stmt_info = vinfo_for_stmt (new_stmt);
4286 vec_oprnds.release ();
4287 return true;
4291 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4292 either as shift by a scalar or by a vector. */
4294 bool
4295 vect_supportable_shift (enum tree_code code, tree scalar_type)
4298 machine_mode vec_mode;
4299 optab optab;
4300 int icode;
4301 tree vectype;
4303 vectype = get_vectype_for_scalar_type (scalar_type);
4304 if (!vectype)
4305 return false;
4307 optab = optab_for_tree_code (code, vectype, optab_scalar);
4308 if (!optab
4309 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4311 optab = optab_for_tree_code (code, vectype, optab_vector);
4312 if (!optab
4313 || (optab_handler (optab, TYPE_MODE (vectype))
4314 == CODE_FOR_nothing))
4315 return false;
4318 vec_mode = TYPE_MODE (vectype);
4319 icode = (int) optab_handler (optab, vec_mode);
4320 if (icode == CODE_FOR_nothing)
4321 return false;
4323 return true;
4327 /* Function vectorizable_shift.
4329 Check if STMT performs a shift operation that can be vectorized.
4330 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4331 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4332 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4334 static bool
4335 vectorizable_shift (gimple *stmt, gimple_stmt_iterator *gsi,
4336 gimple **vec_stmt, slp_tree slp_node)
4338 tree vec_dest;
4339 tree scalar_dest;
4340 tree op0, op1 = NULL;
4341 tree vec_oprnd1 = NULL_TREE;
4342 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4343 tree vectype;
4344 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4345 enum tree_code code;
4346 machine_mode vec_mode;
4347 tree new_temp;
4348 optab optab;
4349 int icode;
4350 machine_mode optab_op2_mode;
4351 tree def;
4352 gimple *def_stmt;
4353 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4354 gimple *new_stmt = NULL;
4355 stmt_vec_info prev_stmt_info;
4356 int nunits_in;
4357 int nunits_out;
4358 tree vectype_out;
4359 tree op1_vectype;
4360 int ncopies;
4361 int j, i;
4362 vec<tree> vec_oprnds0 = vNULL;
4363 vec<tree> vec_oprnds1 = vNULL;
4364 tree vop0, vop1;
4365 unsigned int k;
4366 bool scalar_shift_arg = true;
4367 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4368 int vf;
4370 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4371 return false;
4373 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4374 return false;
4376 /* Is STMT a vectorizable binary/unary operation? */
4377 if (!is_gimple_assign (stmt))
4378 return false;
4380 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4381 return false;
4383 code = gimple_assign_rhs_code (stmt);
4385 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4386 || code == RROTATE_EXPR))
4387 return false;
4389 scalar_dest = gimple_assign_lhs (stmt);
4390 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4391 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4392 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4394 if (dump_enabled_p ())
4395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4396 "bit-precision shifts not supported.\n");
4397 return false;
4400 op0 = gimple_assign_rhs1 (stmt);
4401 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4402 &def_stmt, &def, &dt[0], &vectype))
4404 if (dump_enabled_p ())
4405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4406 "use not simple.\n");
4407 return false;
4409 /* If op0 is an external or constant def use a vector type with
4410 the same size as the output vector type. */
4411 if (!vectype)
4412 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4413 if (vec_stmt)
4414 gcc_assert (vectype);
4415 if (!vectype)
4417 if (dump_enabled_p ())
4418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4419 "no vectype for scalar type\n");
4420 return false;
4423 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4424 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4425 if (nunits_out != nunits_in)
4426 return false;
4428 op1 = gimple_assign_rhs2 (stmt);
4429 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4430 &def, &dt[1], &op1_vectype))
4432 if (dump_enabled_p ())
4433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4434 "use not simple.\n");
4435 return false;
4438 if (loop_vinfo)
4439 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4440 else
4441 vf = 1;
4443 /* Multiple types in SLP are handled by creating the appropriate number of
4444 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4445 case of SLP. */
4446 if (slp_node || PURE_SLP_STMT (stmt_info))
4447 ncopies = 1;
4448 else
4449 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4451 gcc_assert (ncopies >= 1);
4453 /* Determine whether the shift amount is a vector, or scalar. If the
4454 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4456 if ((dt[1] == vect_internal_def
4457 || dt[1] == vect_induction_def)
4458 && !slp_node)
4459 scalar_shift_arg = false;
4460 else if (dt[1] == vect_constant_def
4461 || dt[1] == vect_external_def
4462 || dt[1] == vect_internal_def)
4464 /* In SLP, need to check whether the shift count is the same,
4465 in loops if it is a constant or invariant, it is always
4466 a scalar shift. */
4467 if (slp_node)
4469 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4470 gimple *slpstmt;
4472 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4473 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4474 scalar_shift_arg = false;
4477 else
4479 if (dump_enabled_p ())
4480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4481 "operand mode requires invariant argument.\n");
4482 return false;
4485 /* Vector shifted by vector. */
4486 if (!scalar_shift_arg)
4488 optab = optab_for_tree_code (code, vectype, optab_vector);
4489 if (dump_enabled_p ())
4490 dump_printf_loc (MSG_NOTE, vect_location,
4491 "vector/vector shift/rotate found.\n");
4493 if (!op1_vectype)
4494 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4495 if (op1_vectype == NULL_TREE
4496 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4498 if (dump_enabled_p ())
4499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4500 "unusable type for last operand in"
4501 " vector/vector shift/rotate.\n");
4502 return false;
4505 /* See if the machine has a vector shifted by scalar insn and if not
4506 then see if it has a vector shifted by vector insn. */
4507 else
4509 optab = optab_for_tree_code (code, vectype, optab_scalar);
4510 if (optab
4511 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4513 if (dump_enabled_p ())
4514 dump_printf_loc (MSG_NOTE, vect_location,
4515 "vector/scalar shift/rotate found.\n");
4517 else
4519 optab = optab_for_tree_code (code, vectype, optab_vector);
4520 if (optab
4521 && (optab_handler (optab, TYPE_MODE (vectype))
4522 != CODE_FOR_nothing))
4524 scalar_shift_arg = false;
4526 if (dump_enabled_p ())
4527 dump_printf_loc (MSG_NOTE, vect_location,
4528 "vector/vector shift/rotate found.\n");
4530 /* Unlike the other binary operators, shifts/rotates have
4531 the rhs being int, instead of the same type as the lhs,
4532 so make sure the scalar is the right type if we are
4533 dealing with vectors of long long/long/short/char. */
4534 if (dt[1] == vect_constant_def)
4535 op1 = fold_convert (TREE_TYPE (vectype), op1);
4536 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4537 TREE_TYPE (op1)))
4539 if (slp_node
4540 && TYPE_MODE (TREE_TYPE (vectype))
4541 != TYPE_MODE (TREE_TYPE (op1)))
4543 if (dump_enabled_p ())
4544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4545 "unusable type for last operand in"
4546 " vector/vector shift/rotate.\n");
4547 return false;
4549 if (vec_stmt && !slp_node)
4551 op1 = fold_convert (TREE_TYPE (vectype), op1);
4552 op1 = vect_init_vector (stmt, op1,
4553 TREE_TYPE (vectype), NULL);
4560 /* Supportable by target? */
4561 if (!optab)
4563 if (dump_enabled_p ())
4564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4565 "no optab.\n");
4566 return false;
4568 vec_mode = TYPE_MODE (vectype);
4569 icode = (int) optab_handler (optab, vec_mode);
4570 if (icode == CODE_FOR_nothing)
4572 if (dump_enabled_p ())
4573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4574 "op not supported by target.\n");
4575 /* Check only during analysis. */
4576 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4577 || (vf < vect_min_worthwhile_factor (code)
4578 && !vec_stmt))
4579 return false;
4580 if (dump_enabled_p ())
4581 dump_printf_loc (MSG_NOTE, vect_location,
4582 "proceeding using word mode.\n");
4585 /* Worthwhile without SIMD support? Check only during analysis. */
4586 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4587 && vf < vect_min_worthwhile_factor (code)
4588 && !vec_stmt)
4590 if (dump_enabled_p ())
4591 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4592 "not worthwhile without SIMD support.\n");
4593 return false;
4596 if (!vec_stmt) /* transformation not required. */
4598 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4599 if (dump_enabled_p ())
4600 dump_printf_loc (MSG_NOTE, vect_location,
4601 "=== vectorizable_shift ===\n");
4602 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4603 return true;
4606 /** Transform. **/
4608 if (dump_enabled_p ())
4609 dump_printf_loc (MSG_NOTE, vect_location,
4610 "transform binary/unary operation.\n");
4612 /* Handle def. */
4613 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4615 prev_stmt_info = NULL;
4616 for (j = 0; j < ncopies; j++)
4618 /* Handle uses. */
4619 if (j == 0)
4621 if (scalar_shift_arg)
4623 /* Vector shl and shr insn patterns can be defined with scalar
4624 operand 2 (shift operand). In this case, use constant or loop
4625 invariant op1 directly, without extending it to vector mode
4626 first. */
4627 optab_op2_mode = insn_data[icode].operand[2].mode;
4628 if (!VECTOR_MODE_P (optab_op2_mode))
4630 if (dump_enabled_p ())
4631 dump_printf_loc (MSG_NOTE, vect_location,
4632 "operand 1 using scalar mode.\n");
4633 vec_oprnd1 = op1;
4634 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4635 vec_oprnds1.quick_push (vec_oprnd1);
4636 if (slp_node)
4638 /* Store vec_oprnd1 for every vector stmt to be created
4639 for SLP_NODE. We check during the analysis that all
4640 the shift arguments are the same.
4641 TODO: Allow different constants for different vector
4642 stmts generated for an SLP instance. */
4643 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4644 vec_oprnds1.quick_push (vec_oprnd1);
4649 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4650 (a special case for certain kind of vector shifts); otherwise,
4651 operand 1 should be of a vector type (the usual case). */
4652 if (vec_oprnd1)
4653 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4654 slp_node, -1);
4655 else
4656 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4657 slp_node, -1);
4659 else
4660 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4662 /* Arguments are ready. Create the new vector stmt. */
4663 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4665 vop1 = vec_oprnds1[i];
4666 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4667 new_temp = make_ssa_name (vec_dest, new_stmt);
4668 gimple_assign_set_lhs (new_stmt, new_temp);
4669 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4670 if (slp_node)
4671 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4674 if (slp_node)
4675 continue;
4677 if (j == 0)
4678 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4679 else
4680 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4681 prev_stmt_info = vinfo_for_stmt (new_stmt);
4684 vec_oprnds0.release ();
4685 vec_oprnds1.release ();
4687 return true;
4691 /* Function vectorizable_operation.
4693 Check if STMT performs a binary, unary or ternary operation that can
4694 be vectorized.
4695 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4696 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4697 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4699 static bool
4700 vectorizable_operation (gimple *stmt, gimple_stmt_iterator *gsi,
4701 gimple **vec_stmt, slp_tree slp_node)
4703 tree vec_dest;
4704 tree scalar_dest;
4705 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4706 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4707 tree vectype;
4708 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4709 enum tree_code code;
4710 machine_mode vec_mode;
4711 tree new_temp;
4712 int op_type;
4713 optab optab;
4714 bool target_support_p;
4715 tree def;
4716 gimple *def_stmt;
4717 enum vect_def_type dt[3]
4718 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4719 gimple *new_stmt = NULL;
4720 stmt_vec_info prev_stmt_info;
4721 int nunits_in;
4722 int nunits_out;
4723 tree vectype_out;
4724 int ncopies;
4725 int j, i;
4726 vec<tree> vec_oprnds0 = vNULL;
4727 vec<tree> vec_oprnds1 = vNULL;
4728 vec<tree> vec_oprnds2 = vNULL;
4729 tree vop0, vop1, vop2;
4730 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4731 int vf;
4733 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4734 return false;
4736 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4737 return false;
4739 /* Is STMT a vectorizable binary/unary operation? */
4740 if (!is_gimple_assign (stmt))
4741 return false;
4743 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4744 return false;
4746 code = gimple_assign_rhs_code (stmt);
4748 /* For pointer addition, we should use the normal plus for
4749 the vector addition. */
4750 if (code == POINTER_PLUS_EXPR)
4751 code = PLUS_EXPR;
4753 /* Support only unary or binary operations. */
4754 op_type = TREE_CODE_LENGTH (code);
4755 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4757 if (dump_enabled_p ())
4758 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4759 "num. args = %d (not unary/binary/ternary op).\n",
4760 op_type);
4761 return false;
4764 scalar_dest = gimple_assign_lhs (stmt);
4765 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4767 /* Most operations cannot handle bit-precision types without extra
4768 truncations. */
4769 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4770 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4771 /* Exception are bitwise binary operations. */
4772 && code != BIT_IOR_EXPR
4773 && code != BIT_XOR_EXPR
4774 && code != BIT_AND_EXPR)
4776 if (dump_enabled_p ())
4777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4778 "bit-precision arithmetic not supported.\n");
4779 return false;
4782 op0 = gimple_assign_rhs1 (stmt);
4783 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4784 &def_stmt, &def, &dt[0], &vectype))
4786 if (dump_enabled_p ())
4787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4788 "use not simple.\n");
4789 return false;
4791 /* If op0 is an external or constant def use a vector type with
4792 the same size as the output vector type. */
4793 if (!vectype)
4794 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4795 if (vec_stmt)
4796 gcc_assert (vectype);
4797 if (!vectype)
4799 if (dump_enabled_p ())
4801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4802 "no vectype for scalar type ");
4803 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4804 TREE_TYPE (op0));
4805 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4808 return false;
4811 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4812 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4813 if (nunits_out != nunits_in)
4814 return false;
4816 if (op_type == binary_op || op_type == ternary_op)
4818 op1 = gimple_assign_rhs2 (stmt);
4819 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4820 &def, &dt[1]))
4822 if (dump_enabled_p ())
4823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4824 "use not simple.\n");
4825 return false;
4828 if (op_type == ternary_op)
4830 op2 = gimple_assign_rhs3 (stmt);
4831 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4832 &def, &dt[2]))
4834 if (dump_enabled_p ())
4835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4836 "use not simple.\n");
4837 return false;
4841 if (loop_vinfo)
4842 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4843 else
4844 vf = 1;
4846 /* Multiple types in SLP are handled by creating the appropriate number of
4847 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4848 case of SLP. */
4849 if (slp_node || PURE_SLP_STMT (stmt_info))
4850 ncopies = 1;
4851 else
4852 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4854 gcc_assert (ncopies >= 1);
4856 /* Shifts are handled in vectorizable_shift (). */
4857 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4858 || code == RROTATE_EXPR)
4859 return false;
4861 /* Supportable by target? */
4863 vec_mode = TYPE_MODE (vectype);
4864 if (code == MULT_HIGHPART_EXPR)
4865 target_support_p = can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype));
4866 else
4868 optab = optab_for_tree_code (code, vectype, optab_default);
4869 if (!optab)
4871 if (dump_enabled_p ())
4872 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4873 "no optab.\n");
4874 return false;
4876 target_support_p = (optab_handler (optab, vec_mode)
4877 != CODE_FOR_nothing);
4880 if (!target_support_p)
4882 if (dump_enabled_p ())
4883 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4884 "op not supported by target.\n");
4885 /* Check only during analysis. */
4886 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4887 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4888 return false;
4889 if (dump_enabled_p ())
4890 dump_printf_loc (MSG_NOTE, vect_location,
4891 "proceeding using word mode.\n");
4894 /* Worthwhile without SIMD support? Check only during analysis. */
4895 if (!VECTOR_MODE_P (vec_mode)
4896 && !vec_stmt
4897 && vf < vect_min_worthwhile_factor (code))
4899 if (dump_enabled_p ())
4900 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4901 "not worthwhile without SIMD support.\n");
4902 return false;
4905 if (!vec_stmt) /* transformation not required. */
4907 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4908 if (dump_enabled_p ())
4909 dump_printf_loc (MSG_NOTE, vect_location,
4910 "=== vectorizable_operation ===\n");
4911 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4912 return true;
4915 /** Transform. **/
4917 if (dump_enabled_p ())
4918 dump_printf_loc (MSG_NOTE, vect_location,
4919 "transform binary/unary operation.\n");
4921 /* Handle def. */
4922 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4924 /* In case the vectorization factor (VF) is bigger than the number
4925 of elements that we can fit in a vectype (nunits), we have to generate
4926 more than one vector stmt - i.e - we need to "unroll" the
4927 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4928 from one copy of the vector stmt to the next, in the field
4929 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4930 stages to find the correct vector defs to be used when vectorizing
4931 stmts that use the defs of the current stmt. The example below
4932 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4933 we need to create 4 vectorized stmts):
4935 before vectorization:
4936 RELATED_STMT VEC_STMT
4937 S1: x = memref - -
4938 S2: z = x + 1 - -
4940 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4941 there):
4942 RELATED_STMT VEC_STMT
4943 VS1_0: vx0 = memref0 VS1_1 -
4944 VS1_1: vx1 = memref1 VS1_2 -
4945 VS1_2: vx2 = memref2 VS1_3 -
4946 VS1_3: vx3 = memref3 - -
4947 S1: x = load - VS1_0
4948 S2: z = x + 1 - -
4950 step2: vectorize stmt S2 (done here):
4951 To vectorize stmt S2 we first need to find the relevant vector
4952 def for the first operand 'x'. This is, as usual, obtained from
4953 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4954 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4955 relevant vector def 'vx0'. Having found 'vx0' we can generate
4956 the vector stmt VS2_0, and as usual, record it in the
4957 STMT_VINFO_VEC_STMT of stmt S2.
4958 When creating the second copy (VS2_1), we obtain the relevant vector
4959 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4960 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4961 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4962 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4963 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4964 chain of stmts and pointers:
4965 RELATED_STMT VEC_STMT
4966 VS1_0: vx0 = memref0 VS1_1 -
4967 VS1_1: vx1 = memref1 VS1_2 -
4968 VS1_2: vx2 = memref2 VS1_3 -
4969 VS1_3: vx3 = memref3 - -
4970 S1: x = load - VS1_0
4971 VS2_0: vz0 = vx0 + v1 VS2_1 -
4972 VS2_1: vz1 = vx1 + v1 VS2_2 -
4973 VS2_2: vz2 = vx2 + v1 VS2_3 -
4974 VS2_3: vz3 = vx3 + v1 - -
4975 S2: z = x + 1 - VS2_0 */
4977 prev_stmt_info = NULL;
4978 for (j = 0; j < ncopies; j++)
4980 /* Handle uses. */
4981 if (j == 0)
4983 if (op_type == binary_op || op_type == ternary_op)
4984 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4985 slp_node, -1);
4986 else
4987 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4988 slp_node, -1);
4989 if (op_type == ternary_op)
4991 vec_oprnds2.create (1);
4992 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4993 stmt,
4994 NULL));
4997 else
4999 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
5000 if (op_type == ternary_op)
5002 tree vec_oprnd = vec_oprnds2.pop ();
5003 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
5004 vec_oprnd));
5008 /* Arguments are ready. Create the new vector stmt. */
5009 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
5011 vop1 = ((op_type == binary_op || op_type == ternary_op)
5012 ? vec_oprnds1[i] : NULL_TREE);
5013 vop2 = ((op_type == ternary_op)
5014 ? vec_oprnds2[i] : NULL_TREE);
5015 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
5016 new_temp = make_ssa_name (vec_dest, new_stmt);
5017 gimple_assign_set_lhs (new_stmt, new_temp);
5018 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5019 if (slp_node)
5020 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5023 if (slp_node)
5024 continue;
5026 if (j == 0)
5027 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5028 else
5029 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5030 prev_stmt_info = vinfo_for_stmt (new_stmt);
5033 vec_oprnds0.release ();
5034 vec_oprnds1.release ();
5035 vec_oprnds2.release ();
5037 return true;
5040 /* A helper function to ensure data reference DR's base alignment
5041 for STMT_INFO. */
5043 static void
5044 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
5046 if (!dr->aux)
5047 return;
5049 if (DR_VECT_AUX (dr)->base_misaligned)
5051 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5052 tree base_decl = DR_VECT_AUX (dr)->base_decl;
5054 if (decl_in_symtab_p (base_decl))
5055 symtab_node::get (base_decl)->increase_alignment (TYPE_ALIGN (vectype));
5056 else
5058 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
5059 DECL_USER_ALIGN (base_decl) = 1;
5061 DR_VECT_AUX (dr)->base_misaligned = false;
5066 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
5067 reversal of the vector elements. If that is impossible to do,
5068 returns NULL. */
5070 static tree
5071 perm_mask_for_reverse (tree vectype)
5073 int i, nunits;
5074 unsigned char *sel;
5076 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5077 sel = XALLOCAVEC (unsigned char, nunits);
5079 for (i = 0; i < nunits; ++i)
5080 sel[i] = nunits - 1 - i;
5082 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
5083 return NULL_TREE;
5084 return vect_gen_perm_mask_checked (vectype, sel);
5087 /* Function vectorizable_store.
5089 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
5090 can be vectorized.
5091 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5092 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5093 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5095 static bool
5096 vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
5097 slp_tree slp_node)
5099 tree scalar_dest;
5100 tree data_ref;
5101 tree op;
5102 tree vec_oprnd = NULL_TREE;
5103 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5104 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5105 tree elem_type;
5106 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5107 struct loop *loop = NULL;
5108 machine_mode vec_mode;
5109 tree dummy;
5110 enum dr_alignment_support alignment_support_scheme;
5111 tree def;
5112 gimple *def_stmt;
5113 enum vect_def_type dt;
5114 stmt_vec_info prev_stmt_info = NULL;
5115 tree dataref_ptr = NULL_TREE;
5116 tree dataref_offset = NULL_TREE;
5117 gimple *ptr_incr = NULL;
5118 int ncopies;
5119 int j;
5120 gimple *next_stmt, *first_stmt = NULL;
5121 bool grouped_store = false;
5122 bool store_lanes_p = false;
5123 unsigned int group_size, i;
5124 vec<tree> dr_chain = vNULL;
5125 vec<tree> oprnds = vNULL;
5126 vec<tree> result_chain = vNULL;
5127 bool inv_p;
5128 bool negative = false;
5129 tree offset = NULL_TREE;
5130 vec<tree> vec_oprnds = vNULL;
5131 bool slp = (slp_node != NULL);
5132 unsigned int vec_num;
5133 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5134 tree aggr_type;
5135 tree scatter_base = NULL_TREE, scatter_off = NULL_TREE;
5136 tree scatter_off_vectype = NULL_TREE, scatter_decl = NULL_TREE;
5137 int scatter_scale = 1;
5138 enum vect_def_type scatter_idx_dt = vect_unknown_def_type;
5139 enum vect_def_type scatter_src_dt = vect_unknown_def_type;
5140 gimple *new_stmt;
5142 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5143 return false;
5145 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5146 return false;
5148 /* Is vectorizable store? */
5150 if (!is_gimple_assign (stmt))
5151 return false;
5153 scalar_dest = gimple_assign_lhs (stmt);
5154 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5155 && is_pattern_stmt_p (stmt_info))
5156 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5157 if (TREE_CODE (scalar_dest) != ARRAY_REF
5158 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5159 && TREE_CODE (scalar_dest) != INDIRECT_REF
5160 && TREE_CODE (scalar_dest) != COMPONENT_REF
5161 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5162 && TREE_CODE (scalar_dest) != REALPART_EXPR
5163 && TREE_CODE (scalar_dest) != MEM_REF)
5164 return false;
5166 gcc_assert (gimple_assign_single_p (stmt));
5168 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5169 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5171 if (loop_vinfo)
5172 loop = LOOP_VINFO_LOOP (loop_vinfo);
5174 /* Multiple types in SLP are handled by creating the appropriate number of
5175 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5176 case of SLP. */
5177 if (slp || PURE_SLP_STMT (stmt_info))
5178 ncopies = 1;
5179 else
5180 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5182 gcc_assert (ncopies >= 1);
5184 /* FORNOW. This restriction should be relaxed. */
5185 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5187 if (dump_enabled_p ())
5188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5189 "multiple types in nested loop.\n");
5190 return false;
5193 op = gimple_assign_rhs1 (stmt);
5194 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5195 &def, &dt))
5197 if (dump_enabled_p ())
5198 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5199 "use not simple.\n");
5200 return false;
5203 elem_type = TREE_TYPE (vectype);
5204 vec_mode = TYPE_MODE (vectype);
5206 /* FORNOW. In some cases can vectorize even if data-type not supported
5207 (e.g. - array initialization with 0). */
5208 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5209 return false;
5211 if (!STMT_VINFO_DATA_REF (stmt_info))
5212 return false;
5214 if (!STMT_VINFO_STRIDED_P (stmt_info))
5216 negative =
5217 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5218 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5219 size_zero_node) < 0;
5220 if (negative && ncopies > 1)
5222 if (dump_enabled_p ())
5223 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5224 "multiple types with negative step.\n");
5225 return false;
5227 if (negative)
5229 gcc_assert (!grouped_store);
5230 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5231 if (alignment_support_scheme != dr_aligned
5232 && alignment_support_scheme != dr_unaligned_supported)
5234 if (dump_enabled_p ())
5235 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5236 "negative step but alignment required.\n");
5237 return false;
5239 if (dt != vect_constant_def
5240 && dt != vect_external_def
5241 && !perm_mask_for_reverse (vectype))
5243 if (dump_enabled_p ())
5244 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5245 "negative step and reversing not supported.\n");
5246 return false;
5251 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5253 grouped_store = true;
5254 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5255 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5256 if (!slp
5257 && !PURE_SLP_STMT (stmt_info)
5258 && !STMT_VINFO_STRIDED_P (stmt_info))
5260 if (vect_store_lanes_supported (vectype, group_size))
5261 store_lanes_p = true;
5262 else if (!vect_grouped_store_supported (vectype, group_size))
5263 return false;
5266 if (STMT_VINFO_STRIDED_P (stmt_info)
5267 && (slp || PURE_SLP_STMT (stmt_info))
5268 && (group_size > nunits
5269 || nunits % group_size != 0))
5271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5272 "unhandled strided group store\n");
5273 return false;
5276 if (first_stmt == stmt)
5278 /* STMT is the leader of the group. Check the operands of all the
5279 stmts of the group. */
5280 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5281 while (next_stmt)
5283 gcc_assert (gimple_assign_single_p (next_stmt));
5284 op = gimple_assign_rhs1 (next_stmt);
5285 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5286 &def_stmt, &def, &dt))
5288 if (dump_enabled_p ())
5289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5290 "use not simple.\n");
5291 return false;
5293 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5298 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5300 gimple *def_stmt;
5301 tree def;
5302 scatter_decl = vect_check_gather_scatter (stmt, loop_vinfo, &scatter_base,
5303 &scatter_off, &scatter_scale);
5304 gcc_assert (scatter_decl);
5305 if (!vect_is_simple_use_1 (scatter_off, NULL, loop_vinfo, bb_vinfo,
5306 &def_stmt, &def, &scatter_idx_dt,
5307 &scatter_off_vectype))
5309 if (dump_enabled_p ())
5310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5311 "scatter index use not simple.");
5312 return false;
5316 if (!vec_stmt) /* transformation not required. */
5318 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5319 /* The SLP costs are calculated during SLP analysis. */
5320 if (!PURE_SLP_STMT (stmt_info))
5321 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5322 NULL, NULL, NULL);
5323 return true;
5326 /** Transform. **/
5328 ensure_base_align (stmt_info, dr);
5330 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5332 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE, op, src;
5333 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (scatter_decl));
5334 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5335 tree ptr, mask, var, scale, perm_mask = NULL_TREE;
5336 edge pe = loop_preheader_edge (loop);
5337 gimple_seq seq;
5338 basic_block new_bb;
5339 enum { NARROW, NONE, WIDEN } modifier;
5340 int scatter_off_nunits = TYPE_VECTOR_SUBPARTS (scatter_off_vectype);
5342 if (nunits == (unsigned int) scatter_off_nunits)
5343 modifier = NONE;
5344 else if (nunits == (unsigned int) scatter_off_nunits / 2)
5346 unsigned char *sel = XALLOCAVEC (unsigned char, scatter_off_nunits);
5347 modifier = WIDEN;
5349 for (i = 0; i < (unsigned int) scatter_off_nunits; ++i)
5350 sel[i] = i | nunits;
5352 perm_mask = vect_gen_perm_mask_checked (scatter_off_vectype, sel);
5353 gcc_assert (perm_mask != NULL_TREE);
5355 else if (nunits == (unsigned int) scatter_off_nunits * 2)
5357 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5358 modifier = NARROW;
5360 for (i = 0; i < (unsigned int) nunits; ++i)
5361 sel[i] = i | scatter_off_nunits;
5363 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5364 gcc_assert (perm_mask != NULL_TREE);
5365 ncopies *= 2;
5367 else
5368 gcc_unreachable ();
5370 rettype = TREE_TYPE (TREE_TYPE (scatter_decl));
5371 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5372 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5373 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5374 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5375 scaletype = TREE_VALUE (arglist);
5377 gcc_checking_assert (TREE_CODE (masktype) == INTEGER_TYPE
5378 && TREE_CODE (rettype) == VOID_TYPE);
5380 ptr = fold_convert (ptrtype, scatter_base);
5381 if (!is_gimple_min_invariant (ptr))
5383 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5384 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5385 gcc_assert (!new_bb);
5388 /* Currently we support only unconditional scatter stores,
5389 so mask should be all ones. */
5390 mask = build_int_cst (masktype, -1);
5391 mask = vect_init_vector (stmt, mask, masktype, NULL);
5393 scale = build_int_cst (scaletype, scatter_scale);
5395 prev_stmt_info = NULL;
5396 for (j = 0; j < ncopies; ++j)
5398 if (j == 0)
5400 src = vec_oprnd1
5401 = vect_get_vec_def_for_operand (gimple_assign_rhs1 (stmt), stmt, NULL);
5402 op = vec_oprnd0
5403 = vect_get_vec_def_for_operand (scatter_off, stmt, NULL);
5405 else if (modifier != NONE && (j & 1))
5407 if (modifier == WIDEN)
5409 src = vec_oprnd1
5410 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5411 op = permute_vec_elements (vec_oprnd0, vec_oprnd0, perm_mask,
5412 stmt, gsi);
5414 else if (modifier == NARROW)
5416 src = permute_vec_elements (vec_oprnd1, vec_oprnd1, perm_mask,
5417 stmt, gsi);
5418 op = vec_oprnd0
5419 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5421 else
5422 gcc_unreachable ();
5424 else
5426 src = vec_oprnd1
5427 = vect_get_vec_def_for_stmt_copy (scatter_src_dt, vec_oprnd1);
5428 op = vec_oprnd0
5429 = vect_get_vec_def_for_stmt_copy (scatter_idx_dt, vec_oprnd0);
5432 if (!useless_type_conversion_p (srctype, TREE_TYPE (src)))
5434 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src))
5435 == TYPE_VECTOR_SUBPARTS (srctype));
5436 var = vect_get_new_vect_var (srctype, vect_simple_var, NULL);
5437 var = make_ssa_name (var);
5438 src = build1 (VIEW_CONVERT_EXPR, srctype, src);
5439 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, src);
5440 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5441 src = var;
5444 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5446 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5447 == TYPE_VECTOR_SUBPARTS (idxtype));
5448 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
5449 var = make_ssa_name (var);
5450 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5451 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5452 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5453 op = var;
5456 new_stmt
5457 = gimple_build_call (scatter_decl, 5, ptr, mask, op, src, scale);
5459 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5461 if (prev_stmt_info == NULL)
5462 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5463 else
5464 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5465 prev_stmt_info = vinfo_for_stmt (new_stmt);
5467 return true;
5470 if (grouped_store)
5472 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5473 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5475 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5477 /* FORNOW */
5478 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5480 /* We vectorize all the stmts of the interleaving group when we
5481 reach the last stmt in the group. */
5482 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5483 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5484 && !slp)
5486 *vec_stmt = NULL;
5487 return true;
5490 if (slp)
5492 grouped_store = false;
5493 /* VEC_NUM is the number of vect stmts to be created for this
5494 group. */
5495 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5496 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5497 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5498 op = gimple_assign_rhs1 (first_stmt);
5500 else
5501 /* VEC_NUM is the number of vect stmts to be created for this
5502 group. */
5503 vec_num = group_size;
5505 else
5507 first_stmt = stmt;
5508 first_dr = dr;
5509 group_size = vec_num = 1;
5512 if (dump_enabled_p ())
5513 dump_printf_loc (MSG_NOTE, vect_location,
5514 "transform store. ncopies = %d\n", ncopies);
5516 if (STMT_VINFO_STRIDED_P (stmt_info))
5518 gimple_stmt_iterator incr_gsi;
5519 bool insert_after;
5520 gimple *incr;
5521 tree offvar;
5522 tree ivstep;
5523 tree running_off;
5524 gimple_seq stmts = NULL;
5525 tree stride_base, stride_step, alias_off;
5526 tree vec_oprnd;
5527 unsigned int g;
5529 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
5531 stride_base
5532 = fold_build_pointer_plus
5533 (unshare_expr (DR_BASE_ADDRESS (first_dr)),
5534 size_binop (PLUS_EXPR,
5535 convert_to_ptrofftype (unshare_expr (DR_OFFSET (first_dr))),
5536 convert_to_ptrofftype (DR_INIT(first_dr))));
5537 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (first_dr)));
5539 /* For a store with loop-invariant (but other than power-of-2)
5540 stride (i.e. not a grouped access) like so:
5542 for (i = 0; i < n; i += stride)
5543 array[i] = ...;
5545 we generate a new induction variable and new stores from
5546 the components of the (vectorized) rhs:
5548 for (j = 0; ; j += VF*stride)
5549 vectemp = ...;
5550 tmp1 = vectemp[0];
5551 array[j] = tmp1;
5552 tmp2 = vectemp[1];
5553 array[j + stride] = tmp2;
5557 unsigned nstores = nunits;
5558 tree ltype = elem_type;
5559 if (slp)
5561 nstores = nunits / group_size;
5562 if (group_size < nunits)
5563 ltype = build_vector_type (elem_type, group_size);
5564 else
5565 ltype = vectype;
5566 ltype = build_aligned_type (ltype, TYPE_ALIGN (elem_type));
5567 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5568 group_size = 1;
5571 ivstep = stride_step;
5572 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
5573 build_int_cst (TREE_TYPE (ivstep),
5574 ncopies * nstores));
5576 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
5578 create_iv (stride_base, ivstep, NULL,
5579 loop, &incr_gsi, insert_after,
5580 &offvar, NULL);
5581 incr = gsi_stmt (incr_gsi);
5582 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
5584 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
5585 if (stmts)
5586 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5588 prev_stmt_info = NULL;
5589 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
5590 next_stmt = first_stmt;
5591 for (g = 0; g < group_size; g++)
5593 running_off = offvar;
5594 if (g)
5596 tree size = TYPE_SIZE_UNIT (ltype);
5597 tree pos = fold_build2 (MULT_EXPR, sizetype, size_int (g),
5598 size);
5599 tree newoff = copy_ssa_name (running_off, NULL);
5600 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5601 running_off, pos);
5602 vect_finish_stmt_generation (stmt, incr, gsi);
5603 running_off = newoff;
5605 for (j = 0; j < ncopies; j++)
5607 /* We've set op and dt above, from gimple_assign_rhs1(stmt),
5608 and first_stmt == stmt. */
5609 if (j == 0)
5611 if (slp)
5613 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds, NULL,
5614 slp_node, -1);
5615 vec_oprnd = vec_oprnds[0];
5617 else
5619 gcc_assert (gimple_assign_single_p (next_stmt));
5620 op = gimple_assign_rhs1 (next_stmt);
5621 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5622 NULL);
5625 else
5627 if (slp)
5628 vec_oprnd = vec_oprnds[j];
5629 else
5631 vect_is_simple_use (vec_oprnd, NULL, loop_vinfo,
5632 bb_vinfo, &def_stmt, &def, &dt);
5633 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
5637 for (i = 0; i < nstores; i++)
5639 tree newref, newoff;
5640 gimple *incr, *assign;
5641 tree size = TYPE_SIZE (ltype);
5642 /* Extract the i'th component. */
5643 tree pos = fold_build2 (MULT_EXPR, bitsizetype,
5644 bitsize_int (i), size);
5645 tree elem = fold_build3 (BIT_FIELD_REF, ltype, vec_oprnd,
5646 size, pos);
5648 elem = force_gimple_operand_gsi (gsi, elem, true,
5649 NULL_TREE, true,
5650 GSI_SAME_STMT);
5652 newref = build2 (MEM_REF, ltype,
5653 running_off, alias_off);
5655 /* And store it to *running_off. */
5656 assign = gimple_build_assign (newref, elem);
5657 vect_finish_stmt_generation (stmt, assign, gsi);
5659 newoff = copy_ssa_name (running_off, NULL);
5660 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
5661 running_off, stride_step);
5662 vect_finish_stmt_generation (stmt, incr, gsi);
5664 running_off = newoff;
5665 if (g == group_size - 1
5666 && !slp)
5668 if (j == 0 && i == 0)
5669 STMT_VINFO_VEC_STMT (stmt_info)
5670 = *vec_stmt = assign;
5671 else
5672 STMT_VINFO_RELATED_STMT (prev_stmt_info) = assign;
5673 prev_stmt_info = vinfo_for_stmt (assign);
5677 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5679 return true;
5682 dr_chain.create (group_size);
5683 oprnds.create (group_size);
5685 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5686 gcc_assert (alignment_support_scheme);
5687 /* Targets with store-lane instructions must not require explicit
5688 realignment. */
5689 gcc_assert (!store_lanes_p
5690 || alignment_support_scheme == dr_aligned
5691 || alignment_support_scheme == dr_unaligned_supported);
5693 if (negative)
5694 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5696 if (store_lanes_p)
5697 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5698 else
5699 aggr_type = vectype;
5701 /* In case the vectorization factor (VF) is bigger than the number
5702 of elements that we can fit in a vectype (nunits), we have to generate
5703 more than one vector stmt - i.e - we need to "unroll" the
5704 vector stmt by a factor VF/nunits. For more details see documentation in
5705 vect_get_vec_def_for_copy_stmt. */
5707 /* In case of interleaving (non-unit grouped access):
5709 S1: &base + 2 = x2
5710 S2: &base = x0
5711 S3: &base + 1 = x1
5712 S4: &base + 3 = x3
5714 We create vectorized stores starting from base address (the access of the
5715 first stmt in the chain (S2 in the above example), when the last store stmt
5716 of the chain (S4) is reached:
5718 VS1: &base = vx2
5719 VS2: &base + vec_size*1 = vx0
5720 VS3: &base + vec_size*2 = vx1
5721 VS4: &base + vec_size*3 = vx3
5723 Then permutation statements are generated:
5725 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5726 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5729 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5730 (the order of the data-refs in the output of vect_permute_store_chain
5731 corresponds to the order of scalar stmts in the interleaving chain - see
5732 the documentation of vect_permute_store_chain()).
5734 In case of both multiple types and interleaving, above vector stores and
5735 permutation stmts are created for every copy. The result vector stmts are
5736 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5737 STMT_VINFO_RELATED_STMT for the next copies.
5740 prev_stmt_info = NULL;
5741 for (j = 0; j < ncopies; j++)
5744 if (j == 0)
5746 if (slp)
5748 /* Get vectorized arguments for SLP_NODE. */
5749 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5750 NULL, slp_node, -1);
5752 vec_oprnd = vec_oprnds[0];
5754 else
5756 /* For interleaved stores we collect vectorized defs for all the
5757 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5758 used as an input to vect_permute_store_chain(), and OPRNDS as
5759 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5761 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5762 OPRNDS are of size 1. */
5763 next_stmt = first_stmt;
5764 for (i = 0; i < group_size; i++)
5766 /* Since gaps are not supported for interleaved stores,
5767 GROUP_SIZE is the exact number of stmts in the chain.
5768 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5769 there is no interleaving, GROUP_SIZE is 1, and only one
5770 iteration of the loop will be executed. */
5771 gcc_assert (next_stmt
5772 && gimple_assign_single_p (next_stmt));
5773 op = gimple_assign_rhs1 (next_stmt);
5775 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5776 NULL);
5777 dr_chain.quick_push (vec_oprnd);
5778 oprnds.quick_push (vec_oprnd);
5779 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5783 /* We should have catched mismatched types earlier. */
5784 gcc_assert (useless_type_conversion_p (vectype,
5785 TREE_TYPE (vec_oprnd)));
5786 bool simd_lane_access_p
5787 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5788 if (simd_lane_access_p
5789 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5790 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5791 && integer_zerop (DR_OFFSET (first_dr))
5792 && integer_zerop (DR_INIT (first_dr))
5793 && alias_sets_conflict_p (get_alias_set (aggr_type),
5794 get_alias_set (DR_REF (first_dr))))
5796 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5797 dataref_offset = build_int_cst (reference_alias_ptr_type
5798 (DR_REF (first_dr)), 0);
5799 inv_p = false;
5801 else
5802 dataref_ptr
5803 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5804 simd_lane_access_p ? loop : NULL,
5805 offset, &dummy, gsi, &ptr_incr,
5806 simd_lane_access_p, &inv_p);
5807 gcc_assert (bb_vinfo || !inv_p);
5809 else
5811 /* For interleaved stores we created vectorized defs for all the
5812 defs stored in OPRNDS in the previous iteration (previous copy).
5813 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5814 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5815 next copy.
5816 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5817 OPRNDS are of size 1. */
5818 for (i = 0; i < group_size; i++)
5820 op = oprnds[i];
5821 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5822 &def, &dt);
5823 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5824 dr_chain[i] = vec_oprnd;
5825 oprnds[i] = vec_oprnd;
5827 if (dataref_offset)
5828 dataref_offset
5829 = int_const_binop (PLUS_EXPR, dataref_offset,
5830 TYPE_SIZE_UNIT (aggr_type));
5831 else
5832 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5833 TYPE_SIZE_UNIT (aggr_type));
5836 if (store_lanes_p)
5838 tree vec_array;
5840 /* Combine all the vectors into an array. */
5841 vec_array = create_vector_array (vectype, vec_num);
5842 for (i = 0; i < vec_num; i++)
5844 vec_oprnd = dr_chain[i];
5845 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5848 /* Emit:
5849 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5850 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5851 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5852 gimple_call_set_lhs (new_stmt, data_ref);
5853 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5855 else
5857 new_stmt = NULL;
5858 if (grouped_store)
5860 if (j == 0)
5861 result_chain.create (group_size);
5862 /* Permute. */
5863 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5864 &result_chain);
5867 next_stmt = first_stmt;
5868 for (i = 0; i < vec_num; i++)
5870 unsigned align, misalign;
5872 if (i > 0)
5873 /* Bump the vector pointer. */
5874 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5875 stmt, NULL_TREE);
5877 if (slp)
5878 vec_oprnd = vec_oprnds[i];
5879 else if (grouped_store)
5880 /* For grouped stores vectorized defs are interleaved in
5881 vect_permute_store_chain(). */
5882 vec_oprnd = result_chain[i];
5884 data_ref = fold_build2 (MEM_REF, TREE_TYPE (vec_oprnd),
5885 dataref_ptr,
5886 dataref_offset
5887 ? dataref_offset
5888 : build_int_cst (reference_alias_ptr_type
5889 (DR_REF (first_dr)), 0));
5890 align = TYPE_ALIGN_UNIT (vectype);
5891 if (aligned_access_p (first_dr))
5892 misalign = 0;
5893 else if (DR_MISALIGNMENT (first_dr) == -1)
5895 if (DR_VECT_AUX (first_dr)->base_element_aligned)
5896 align = TYPE_ALIGN_UNIT (elem_type);
5897 else
5898 align = get_object_alignment (DR_REF (first_dr))
5899 / BITS_PER_UNIT;
5900 misalign = 0;
5901 TREE_TYPE (data_ref)
5902 = build_aligned_type (TREE_TYPE (data_ref),
5903 align * BITS_PER_UNIT);
5905 else
5907 TREE_TYPE (data_ref)
5908 = build_aligned_type (TREE_TYPE (data_ref),
5909 TYPE_ALIGN (elem_type));
5910 misalign = DR_MISALIGNMENT (first_dr);
5912 if (dataref_offset == NULL_TREE
5913 && TREE_CODE (dataref_ptr) == SSA_NAME)
5914 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5915 misalign);
5917 if (negative
5918 && dt != vect_constant_def
5919 && dt != vect_external_def)
5921 tree perm_mask = perm_mask_for_reverse (vectype);
5922 tree perm_dest
5923 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5924 vectype);
5925 tree new_temp = make_ssa_name (perm_dest);
5927 /* Generate the permute statement. */
5928 gimple *perm_stmt
5929 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5930 vec_oprnd, perm_mask);
5931 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5933 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5934 vec_oprnd = new_temp;
5937 /* Arguments are ready. Create the new vector stmt. */
5938 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5939 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5941 if (slp)
5942 continue;
5944 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5945 if (!next_stmt)
5946 break;
5949 if (!slp)
5951 if (j == 0)
5952 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5953 else
5954 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5955 prev_stmt_info = vinfo_for_stmt (new_stmt);
5959 dr_chain.release ();
5960 oprnds.release ();
5961 result_chain.release ();
5962 vec_oprnds.release ();
5964 return true;
5967 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5968 VECTOR_CST mask. No checks are made that the target platform supports the
5969 mask, so callers may wish to test can_vec_perm_p separately, or use
5970 vect_gen_perm_mask_checked. */
5972 tree
5973 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5975 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5976 int i, nunits;
5978 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5980 mask_elt_type = lang_hooks.types.type_for_mode
5981 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5982 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5984 mask_elts = XALLOCAVEC (tree, nunits);
5985 for (i = nunits - 1; i >= 0; i--)
5986 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5987 mask_vec = build_vector (mask_type, mask_elts);
5989 return mask_vec;
5992 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5993 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5995 tree
5996 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5998 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5999 return vect_gen_perm_mask_any (vectype, sel);
6002 /* Given a vector variable X and Y, that was generated for the scalar
6003 STMT, generate instructions to permute the vector elements of X and Y
6004 using permutation mask MASK_VEC, insert them at *GSI and return the
6005 permuted vector variable. */
6007 static tree
6008 permute_vec_elements (tree x, tree y, tree mask_vec, gimple *stmt,
6009 gimple_stmt_iterator *gsi)
6011 tree vectype = TREE_TYPE (x);
6012 tree perm_dest, data_ref;
6013 gimple *perm_stmt;
6015 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
6016 data_ref = make_ssa_name (perm_dest);
6018 /* Generate the permute statement. */
6019 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
6020 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
6022 return data_ref;
6025 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
6026 inserting them on the loops preheader edge. Returns true if we
6027 were successful in doing so (and thus STMT can be moved then),
6028 otherwise returns false. */
6030 static bool
6031 hoist_defs_of_uses (gimple *stmt, struct loop *loop)
6033 ssa_op_iter i;
6034 tree op;
6035 bool any = false;
6037 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6039 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6040 if (!gimple_nop_p (def_stmt)
6041 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6043 /* Make sure we don't need to recurse. While we could do
6044 so in simple cases when there are more complex use webs
6045 we don't have an easy way to preserve stmt order to fulfil
6046 dependencies within them. */
6047 tree op2;
6048 ssa_op_iter i2;
6049 if (gimple_code (def_stmt) == GIMPLE_PHI)
6050 return false;
6051 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
6053 gimple *def_stmt2 = SSA_NAME_DEF_STMT (op2);
6054 if (!gimple_nop_p (def_stmt2)
6055 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
6056 return false;
6058 any = true;
6062 if (!any)
6063 return true;
6065 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
6067 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
6068 if (!gimple_nop_p (def_stmt)
6069 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
6071 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
6072 gsi_remove (&gsi, false);
6073 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
6077 return true;
6080 /* vectorizable_load.
6082 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
6083 can be vectorized.
6084 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6085 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
6086 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6088 static bool
6089 vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
6090 slp_tree slp_node, slp_instance slp_node_instance)
6092 tree scalar_dest;
6093 tree vec_dest = NULL;
6094 tree data_ref = NULL;
6095 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6096 stmt_vec_info prev_stmt_info;
6097 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6098 struct loop *loop = NULL;
6099 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
6100 bool nested_in_vect_loop = false;
6101 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
6102 tree elem_type;
6103 tree new_temp;
6104 machine_mode mode;
6105 gimple *new_stmt = NULL;
6106 tree dummy;
6107 enum dr_alignment_support alignment_support_scheme;
6108 tree dataref_ptr = NULL_TREE;
6109 tree dataref_offset = NULL_TREE;
6110 gimple *ptr_incr = NULL;
6111 int ncopies;
6112 int i, j, group_size = -1, group_gap_adj;
6113 tree msq = NULL_TREE, lsq;
6114 tree offset = NULL_TREE;
6115 tree byte_offset = NULL_TREE;
6116 tree realignment_token = NULL_TREE;
6117 gphi *phi = NULL;
6118 vec<tree> dr_chain = vNULL;
6119 bool grouped_load = false;
6120 bool load_lanes_p = false;
6121 gimple *first_stmt;
6122 bool inv_p;
6123 bool negative = false;
6124 bool compute_in_loop = false;
6125 struct loop *at_loop;
6126 int vec_num;
6127 bool slp = (slp_node != NULL);
6128 bool slp_perm = false;
6129 enum tree_code code;
6130 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6131 int vf;
6132 tree aggr_type;
6133 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
6134 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
6135 int gather_scale = 1;
6136 enum vect_def_type gather_dt = vect_unknown_def_type;
6138 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6139 return false;
6141 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
6142 return false;
6144 /* Is vectorizable load? */
6145 if (!is_gimple_assign (stmt))
6146 return false;
6148 scalar_dest = gimple_assign_lhs (stmt);
6149 if (TREE_CODE (scalar_dest) != SSA_NAME)
6150 return false;
6152 code = gimple_assign_rhs_code (stmt);
6153 if (code != ARRAY_REF
6154 && code != BIT_FIELD_REF
6155 && code != INDIRECT_REF
6156 && code != COMPONENT_REF
6157 && code != IMAGPART_EXPR
6158 && code != REALPART_EXPR
6159 && code != MEM_REF
6160 && TREE_CODE_CLASS (code) != tcc_declaration)
6161 return false;
6163 if (!STMT_VINFO_DATA_REF (stmt_info))
6164 return false;
6166 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6167 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6169 if (loop_vinfo)
6171 loop = LOOP_VINFO_LOOP (loop_vinfo);
6172 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
6173 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6175 else
6176 vf = 1;
6178 /* Multiple types in SLP are handled by creating the appropriate number of
6179 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6180 case of SLP. */
6181 if (slp || PURE_SLP_STMT (stmt_info))
6182 ncopies = 1;
6183 else
6184 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6186 gcc_assert (ncopies >= 1);
6188 /* FORNOW. This restriction should be relaxed. */
6189 if (nested_in_vect_loop && ncopies > 1)
6191 if (dump_enabled_p ())
6192 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6193 "multiple types in nested loop.\n");
6194 return false;
6197 /* Invalidate assumptions made by dependence analysis when vectorization
6198 on the unrolled body effectively re-orders stmts. */
6199 if (ncopies > 1
6200 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6201 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6202 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6204 if (dump_enabled_p ())
6205 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6206 "cannot perform implicit CSE when unrolling "
6207 "with negative dependence distance\n");
6208 return false;
6211 elem_type = TREE_TYPE (vectype);
6212 mode = TYPE_MODE (vectype);
6214 /* FORNOW. In some cases can vectorize even if data-type not supported
6215 (e.g. - data copies). */
6216 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
6218 if (dump_enabled_p ())
6219 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6220 "Aligned load, but unsupported type.\n");
6221 return false;
6224 /* Check if the load is a part of an interleaving chain. */
6225 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
6227 grouped_load = true;
6228 /* FORNOW */
6229 gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
6231 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6233 /* If this is single-element interleaving with an element distance
6234 that leaves unused vector loads around punt - we at least create
6235 very sub-optimal code in that case (and blow up memory,
6236 see PR65518). */
6237 if (first_stmt == stmt
6238 && !GROUP_NEXT_ELEMENT (stmt_info)
6239 && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
6241 if (dump_enabled_p ())
6242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6243 "single-element interleaving not supported "
6244 "for not adjacent vector loads\n");
6245 return false;
6248 if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6249 slp_perm = true;
6251 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6252 if (!slp
6253 && !PURE_SLP_STMT (stmt_info)
6254 && !STMT_VINFO_STRIDED_P (stmt_info))
6256 if (vect_load_lanes_supported (vectype, group_size))
6257 load_lanes_p = true;
6258 else if (!vect_grouped_load_supported (vectype, group_size))
6259 return false;
6262 /* Invalidate assumptions made by dependence analysis when vectorization
6263 on the unrolled body effectively re-orders stmts. */
6264 if (!PURE_SLP_STMT (stmt_info)
6265 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
6266 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
6267 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
6269 if (dump_enabled_p ())
6270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6271 "cannot perform implicit CSE when performing "
6272 "group loads with negative dependence distance\n");
6273 return false;
6276 /* Similarly when the stmt is a load that is both part of a SLP
6277 instance and a loop vectorized stmt via the same-dr mechanism
6278 we have to give up. */
6279 if (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)
6280 && (STMT_SLP_TYPE (stmt_info)
6281 != STMT_SLP_TYPE (vinfo_for_stmt
6282 (STMT_VINFO_GROUP_SAME_DR_STMT (stmt_info)))))
6284 if (dump_enabled_p ())
6285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6286 "conflicting SLP types for CSEd load\n");
6287 return false;
6292 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6294 gimple *def_stmt;
6295 tree def;
6296 gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
6297 &gather_off, &gather_scale);
6298 gcc_assert (gather_decl);
6299 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
6300 &def_stmt, &def, &gather_dt,
6301 &gather_off_vectype))
6303 if (dump_enabled_p ())
6304 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6305 "gather index use not simple.\n");
6306 return false;
6309 else if (STMT_VINFO_STRIDED_P (stmt_info))
6311 if ((grouped_load
6312 && (slp || PURE_SLP_STMT (stmt_info)))
6313 && (group_size > nunits
6314 || nunits % group_size != 0))
6316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6317 "unhandled strided group load\n");
6318 return false;
6321 else
6323 negative = tree_int_cst_compare (nested_in_vect_loop
6324 ? STMT_VINFO_DR_STEP (stmt_info)
6325 : DR_STEP (dr),
6326 size_zero_node) < 0;
6327 if (negative && ncopies > 1)
6329 if (dump_enabled_p ())
6330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6331 "multiple types with negative step.\n");
6332 return false;
6335 if (negative)
6337 if (grouped_load)
6339 if (dump_enabled_p ())
6340 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6341 "negative step for group load not supported"
6342 "\n");
6343 return false;
6345 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
6346 if (alignment_support_scheme != dr_aligned
6347 && alignment_support_scheme != dr_unaligned_supported)
6349 if (dump_enabled_p ())
6350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6351 "negative step but alignment required.\n");
6352 return false;
6354 if (!perm_mask_for_reverse (vectype))
6356 if (dump_enabled_p ())
6357 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6358 "negative step and reversing not supported."
6359 "\n");
6360 return false;
6365 if (!vec_stmt) /* transformation not required. */
6367 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
6368 /* The SLP costs are calculated during SLP analysis. */
6369 if (!PURE_SLP_STMT (stmt_info))
6370 vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
6371 NULL, NULL, NULL);
6372 return true;
6375 if (dump_enabled_p ())
6376 dump_printf_loc (MSG_NOTE, vect_location,
6377 "transform load. ncopies = %d\n", ncopies);
6379 /** Transform. **/
6381 ensure_base_align (stmt_info, dr);
6383 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
6385 tree vec_oprnd0 = NULL_TREE, op;
6386 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
6387 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
6388 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
6389 edge pe = loop_preheader_edge (loop);
6390 gimple_seq seq;
6391 basic_block new_bb;
6392 enum { NARROW, NONE, WIDEN } modifier;
6393 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
6395 if (nunits == gather_off_nunits)
6396 modifier = NONE;
6397 else if (nunits == gather_off_nunits / 2)
6399 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
6400 modifier = WIDEN;
6402 for (i = 0; i < gather_off_nunits; ++i)
6403 sel[i] = i | nunits;
6405 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
6407 else if (nunits == gather_off_nunits * 2)
6409 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
6410 modifier = NARROW;
6412 for (i = 0; i < nunits; ++i)
6413 sel[i] = i < gather_off_nunits
6414 ? i : i + nunits - gather_off_nunits;
6416 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
6417 ncopies *= 2;
6419 else
6420 gcc_unreachable ();
6422 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
6423 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6424 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6425 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6426 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
6427 scaletype = TREE_VALUE (arglist);
6428 gcc_checking_assert (types_compatible_p (srctype, rettype));
6430 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6432 ptr = fold_convert (ptrtype, gather_base);
6433 if (!is_gimple_min_invariant (ptr))
6435 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
6436 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
6437 gcc_assert (!new_bb);
6440 /* Currently we support only unconditional gather loads,
6441 so mask should be all ones. */
6442 if (TREE_CODE (masktype) == INTEGER_TYPE)
6443 mask = build_int_cst (masktype, -1);
6444 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
6446 mask = build_int_cst (TREE_TYPE (masktype), -1);
6447 mask = build_vector_from_val (masktype, mask);
6448 mask = vect_init_vector (stmt, mask, masktype, NULL);
6450 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
6452 REAL_VALUE_TYPE r;
6453 long tmp[6];
6454 for (j = 0; j < 6; ++j)
6455 tmp[j] = -1;
6456 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
6457 mask = build_real (TREE_TYPE (masktype), r);
6458 mask = build_vector_from_val (masktype, mask);
6459 mask = vect_init_vector (stmt, mask, masktype, NULL);
6461 else
6462 gcc_unreachable ();
6464 scale = build_int_cst (scaletype, gather_scale);
6466 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
6467 merge = build_int_cst (TREE_TYPE (rettype), 0);
6468 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
6470 REAL_VALUE_TYPE r;
6471 long tmp[6];
6472 for (j = 0; j < 6; ++j)
6473 tmp[j] = 0;
6474 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
6475 merge = build_real (TREE_TYPE (rettype), r);
6477 else
6478 gcc_unreachable ();
6479 merge = build_vector_from_val (rettype, merge);
6480 merge = vect_init_vector (stmt, merge, rettype, NULL);
6482 prev_stmt_info = NULL;
6483 for (j = 0; j < ncopies; ++j)
6485 if (modifier == WIDEN && (j & 1))
6486 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
6487 perm_mask, stmt, gsi);
6488 else if (j == 0)
6489 op = vec_oprnd0
6490 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
6491 else
6492 op = vec_oprnd0
6493 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
6495 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
6497 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
6498 == TYPE_VECTOR_SUBPARTS (idxtype));
6499 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
6500 var = make_ssa_name (var);
6501 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
6502 new_stmt
6503 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6504 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6505 op = var;
6508 new_stmt
6509 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6511 if (!useless_type_conversion_p (vectype, rettype))
6513 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6514 == TYPE_VECTOR_SUBPARTS (rettype));
6515 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
6516 op = make_ssa_name (var, new_stmt);
6517 gimple_call_set_lhs (new_stmt, op);
6518 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6519 var = make_ssa_name (vec_dest);
6520 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6521 new_stmt
6522 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6524 else
6526 var = make_ssa_name (vec_dest, new_stmt);
6527 gimple_call_set_lhs (new_stmt, var);
6530 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6532 if (modifier == NARROW)
6534 if ((j & 1) == 0)
6536 prev_res = var;
6537 continue;
6539 var = permute_vec_elements (prev_res, var,
6540 perm_mask, stmt, gsi);
6541 new_stmt = SSA_NAME_DEF_STMT (var);
6544 if (prev_stmt_info == NULL)
6545 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6546 else
6547 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6548 prev_stmt_info = vinfo_for_stmt (new_stmt);
6550 return true;
6552 else if (STMT_VINFO_STRIDED_P (stmt_info))
6554 gimple_stmt_iterator incr_gsi;
6555 bool insert_after;
6556 gimple *incr;
6557 tree offvar;
6558 tree ivstep;
6559 tree running_off;
6560 vec<constructor_elt, va_gc> *v = NULL;
6561 gimple_seq stmts = NULL;
6562 tree stride_base, stride_step, alias_off;
6564 gcc_assert (!nested_in_vect_loop);
6566 if (slp && grouped_load)
6567 first_dr = STMT_VINFO_DATA_REF
6568 (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
6569 else
6570 first_dr = dr;
6572 stride_base
6573 = fold_build_pointer_plus
6574 (DR_BASE_ADDRESS (first_dr),
6575 size_binop (PLUS_EXPR,
6576 convert_to_ptrofftype (DR_OFFSET (first_dr)),
6577 convert_to_ptrofftype (DR_INIT (first_dr))));
6578 stride_step = fold_convert (sizetype, DR_STEP (first_dr));
6580 /* For a load with loop-invariant (but other than power-of-2)
6581 stride (i.e. not a grouped access) like so:
6583 for (i = 0; i < n; i += stride)
6584 ... = array[i];
6586 we generate a new induction variable and new accesses to
6587 form a new vector (or vectors, depending on ncopies):
6589 for (j = 0; ; j += VF*stride)
6590 tmp1 = array[j];
6591 tmp2 = array[j + stride];
6593 vectemp = {tmp1, tmp2, ...}
6596 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (stride_step), stride_step,
6597 build_int_cst (TREE_TYPE (stride_step), vf));
6599 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6601 create_iv (unshare_expr (stride_base), unshare_expr (ivstep), NULL,
6602 loop, &incr_gsi, insert_after,
6603 &offvar, NULL);
6604 incr = gsi_stmt (incr_gsi);
6605 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6607 stride_step = force_gimple_operand (unshare_expr (stride_step),
6608 &stmts, true, NULL_TREE);
6609 if (stmts)
6610 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6612 prev_stmt_info = NULL;
6613 running_off = offvar;
6614 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (first_dr)), 0);
6615 int nloads = nunits;
6616 tree ltype = TREE_TYPE (vectype);
6617 auto_vec<tree> dr_chain;
6618 if (slp)
6620 nloads = nunits / group_size;
6621 if (group_size < nunits)
6622 ltype = build_vector_type (TREE_TYPE (vectype), group_size);
6623 else
6624 ltype = vectype;
6625 ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
6626 ncopies = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6627 if (slp_perm)
6628 dr_chain.create (ncopies);
6630 for (j = 0; j < ncopies; j++)
6632 tree vec_inv;
6634 if (nloads > 1)
6636 vec_alloc (v, nloads);
6637 for (i = 0; i < nloads; i++)
6639 tree newref, newoff;
6640 gimple *incr;
6641 newref = build2 (MEM_REF, ltype, running_off, alias_off);
6643 newref = force_gimple_operand_gsi (gsi, newref, true,
6644 NULL_TREE, true,
6645 GSI_SAME_STMT);
6646 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6647 newoff = copy_ssa_name (running_off);
6648 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6649 running_off, stride_step);
6650 vect_finish_stmt_generation (stmt, incr, gsi);
6652 running_off = newoff;
6655 vec_inv = build_constructor (vectype, v);
6656 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6657 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6659 else
6661 new_stmt = gimple_build_assign (make_ssa_name (ltype),
6662 build2 (MEM_REF, ltype,
6663 running_off, alias_off));
6664 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6666 tree newoff = copy_ssa_name (running_off);
6667 gimple *incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6668 running_off, stride_step);
6669 vect_finish_stmt_generation (stmt, incr, gsi);
6671 running_off = newoff;
6674 if (slp)
6676 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6677 if (slp_perm)
6678 dr_chain.quick_push (gimple_assign_lhs (new_stmt));
6680 else
6682 if (j == 0)
6683 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6684 else
6685 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6686 prev_stmt_info = vinfo_for_stmt (new_stmt);
6689 if (slp_perm)
6690 vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6691 slp_node_instance, false);
6692 return true;
6695 if (grouped_load)
6697 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6698 if (slp
6699 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6700 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6701 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6703 /* Check if the chain of loads is already vectorized. */
6704 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6705 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6706 ??? But we can only do so if there is exactly one
6707 as we have no way to get at the rest. Leave the CSE
6708 opportunity alone.
6709 ??? With the group load eventually participating
6710 in multiple different permutations (having multiple
6711 slp nodes which refer to the same group) the CSE
6712 is even wrong code. See PR56270. */
6713 && !slp)
6715 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6716 return true;
6718 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6719 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6720 group_gap_adj = 0;
6722 /* VEC_NUM is the number of vect stmts to be created for this group. */
6723 if (slp)
6725 grouped_load = false;
6726 /* For SLP permutation support we need to load the whole group,
6727 not only the number of vector stmts the permutation result
6728 fits in. */
6729 if (slp_perm)
6730 vec_num = (group_size * vf + nunits - 1) / nunits;
6731 else
6732 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6733 group_gap_adj = vf * group_size - nunits * vec_num;
6735 else
6736 vec_num = group_size;
6738 else
6740 first_stmt = stmt;
6741 first_dr = dr;
6742 group_size = vec_num = 1;
6743 group_gap_adj = 0;
6746 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6747 gcc_assert (alignment_support_scheme);
6748 /* Targets with load-lane instructions must not require explicit
6749 realignment. */
6750 gcc_assert (!load_lanes_p
6751 || alignment_support_scheme == dr_aligned
6752 || alignment_support_scheme == dr_unaligned_supported);
6754 /* In case the vectorization factor (VF) is bigger than the number
6755 of elements that we can fit in a vectype (nunits), we have to generate
6756 more than one vector stmt - i.e - we need to "unroll" the
6757 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6758 from one copy of the vector stmt to the next, in the field
6759 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6760 stages to find the correct vector defs to be used when vectorizing
6761 stmts that use the defs of the current stmt. The example below
6762 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6763 need to create 4 vectorized stmts):
6765 before vectorization:
6766 RELATED_STMT VEC_STMT
6767 S1: x = memref - -
6768 S2: z = x + 1 - -
6770 step 1: vectorize stmt S1:
6771 We first create the vector stmt VS1_0, and, as usual, record a
6772 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6773 Next, we create the vector stmt VS1_1, and record a pointer to
6774 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6775 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6776 stmts and pointers:
6777 RELATED_STMT VEC_STMT
6778 VS1_0: vx0 = memref0 VS1_1 -
6779 VS1_1: vx1 = memref1 VS1_2 -
6780 VS1_2: vx2 = memref2 VS1_3 -
6781 VS1_3: vx3 = memref3 - -
6782 S1: x = load - VS1_0
6783 S2: z = x + 1 - -
6785 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6786 information we recorded in RELATED_STMT field is used to vectorize
6787 stmt S2. */
6789 /* In case of interleaving (non-unit grouped access):
6791 S1: x2 = &base + 2
6792 S2: x0 = &base
6793 S3: x1 = &base + 1
6794 S4: x3 = &base + 3
6796 Vectorized loads are created in the order of memory accesses
6797 starting from the access of the first stmt of the chain:
6799 VS1: vx0 = &base
6800 VS2: vx1 = &base + vec_size*1
6801 VS3: vx3 = &base + vec_size*2
6802 VS4: vx4 = &base + vec_size*3
6804 Then permutation statements are generated:
6806 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6807 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6810 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6811 (the order of the data-refs in the output of vect_permute_load_chain
6812 corresponds to the order of scalar stmts in the interleaving chain - see
6813 the documentation of vect_permute_load_chain()).
6814 The generation of permutation stmts and recording them in
6815 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6817 In case of both multiple types and interleaving, the vector loads and
6818 permutation stmts above are created for every copy. The result vector
6819 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6820 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6822 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6823 on a target that supports unaligned accesses (dr_unaligned_supported)
6824 we generate the following code:
6825 p = initial_addr;
6826 indx = 0;
6827 loop {
6828 p = p + indx * vectype_size;
6829 vec_dest = *(p);
6830 indx = indx + 1;
6833 Otherwise, the data reference is potentially unaligned on a target that
6834 does not support unaligned accesses (dr_explicit_realign_optimized) -
6835 then generate the following code, in which the data in each iteration is
6836 obtained by two vector loads, one from the previous iteration, and one
6837 from the current iteration:
6838 p1 = initial_addr;
6839 msq_init = *(floor(p1))
6840 p2 = initial_addr + VS - 1;
6841 realignment_token = call target_builtin;
6842 indx = 0;
6843 loop {
6844 p2 = p2 + indx * vectype_size
6845 lsq = *(floor(p2))
6846 vec_dest = realign_load (msq, lsq, realignment_token)
6847 indx = indx + 1;
6848 msq = lsq;
6849 } */
6851 /* If the misalignment remains the same throughout the execution of the
6852 loop, we can create the init_addr and permutation mask at the loop
6853 preheader. Otherwise, it needs to be created inside the loop.
6854 This can only occur when vectorizing memory accesses in the inner-loop
6855 nested within an outer-loop that is being vectorized. */
6857 if (nested_in_vect_loop
6858 && (TREE_INT_CST_LOW (DR_STEP (dr))
6859 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6861 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6862 compute_in_loop = true;
6865 if ((alignment_support_scheme == dr_explicit_realign_optimized
6866 || alignment_support_scheme == dr_explicit_realign)
6867 && !compute_in_loop)
6869 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6870 alignment_support_scheme, NULL_TREE,
6871 &at_loop);
6872 if (alignment_support_scheme == dr_explicit_realign_optimized)
6874 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6875 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6876 size_one_node);
6879 else
6880 at_loop = loop;
6882 if (negative)
6883 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6885 if (load_lanes_p)
6886 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6887 else
6888 aggr_type = vectype;
6890 prev_stmt_info = NULL;
6891 for (j = 0; j < ncopies; j++)
6893 /* 1. Create the vector or array pointer update chain. */
6894 if (j == 0)
6896 bool simd_lane_access_p
6897 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6898 if (simd_lane_access_p
6899 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6900 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6901 && integer_zerop (DR_OFFSET (first_dr))
6902 && integer_zerop (DR_INIT (first_dr))
6903 && alias_sets_conflict_p (get_alias_set (aggr_type),
6904 get_alias_set (DR_REF (first_dr)))
6905 && (alignment_support_scheme == dr_aligned
6906 || alignment_support_scheme == dr_unaligned_supported))
6908 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6909 dataref_offset = build_int_cst (reference_alias_ptr_type
6910 (DR_REF (first_dr)), 0);
6911 inv_p = false;
6913 else
6914 dataref_ptr
6915 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6916 offset, &dummy, gsi, &ptr_incr,
6917 simd_lane_access_p, &inv_p,
6918 byte_offset);
6920 else if (dataref_offset)
6921 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6922 TYPE_SIZE_UNIT (aggr_type));
6923 else
6924 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6925 TYPE_SIZE_UNIT (aggr_type));
6927 if (grouped_load || slp_perm)
6928 dr_chain.create (vec_num);
6930 if (load_lanes_p)
6932 tree vec_array;
6934 vec_array = create_vector_array (vectype, vec_num);
6936 /* Emit:
6937 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6938 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6939 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6940 gimple_call_set_lhs (new_stmt, vec_array);
6941 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6943 /* Extract each vector into an SSA_NAME. */
6944 for (i = 0; i < vec_num; i++)
6946 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6947 vec_array, i);
6948 dr_chain.quick_push (new_temp);
6951 /* Record the mapping between SSA_NAMEs and statements. */
6952 vect_record_grouped_load_vectors (stmt, dr_chain);
6954 else
6956 for (i = 0; i < vec_num; i++)
6958 if (i > 0)
6959 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6960 stmt, NULL_TREE);
6962 /* 2. Create the vector-load in the loop. */
6963 switch (alignment_support_scheme)
6965 case dr_aligned:
6966 case dr_unaligned_supported:
6968 unsigned int align, misalign;
6970 data_ref
6971 = fold_build2 (MEM_REF, vectype, dataref_ptr,
6972 dataref_offset
6973 ? dataref_offset
6974 : build_int_cst (reference_alias_ptr_type
6975 (DR_REF (first_dr)), 0));
6976 align = TYPE_ALIGN_UNIT (vectype);
6977 if (alignment_support_scheme == dr_aligned)
6979 gcc_assert (aligned_access_p (first_dr));
6980 misalign = 0;
6982 else if (DR_MISALIGNMENT (first_dr) == -1)
6984 if (DR_VECT_AUX (first_dr)->base_element_aligned)
6985 align = TYPE_ALIGN_UNIT (elem_type);
6986 else
6987 align = (get_object_alignment (DR_REF (first_dr))
6988 / BITS_PER_UNIT);
6989 misalign = 0;
6990 TREE_TYPE (data_ref)
6991 = build_aligned_type (TREE_TYPE (data_ref),
6992 align * BITS_PER_UNIT);
6994 else
6996 TREE_TYPE (data_ref)
6997 = build_aligned_type (TREE_TYPE (data_ref),
6998 TYPE_ALIGN (elem_type));
6999 misalign = DR_MISALIGNMENT (first_dr);
7001 if (dataref_offset == NULL_TREE
7002 && TREE_CODE (dataref_ptr) == SSA_NAME)
7003 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
7004 align, misalign);
7005 break;
7007 case dr_explicit_realign:
7009 tree ptr, bump;
7011 tree vs = size_int (TYPE_VECTOR_SUBPARTS (vectype));
7013 if (compute_in_loop)
7014 msq = vect_setup_realignment (first_stmt, gsi,
7015 &realignment_token,
7016 dr_explicit_realign,
7017 dataref_ptr, NULL);
7019 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7020 ptr = copy_ssa_name (dataref_ptr);
7021 else
7022 ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
7023 new_stmt = gimple_build_assign
7024 (ptr, BIT_AND_EXPR, dataref_ptr,
7025 build_int_cst
7026 (TREE_TYPE (dataref_ptr),
7027 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7028 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7029 data_ref
7030 = build2 (MEM_REF, vectype, ptr,
7031 build_int_cst (reference_alias_ptr_type
7032 (DR_REF (first_dr)), 0));
7033 vec_dest = vect_create_destination_var (scalar_dest,
7034 vectype);
7035 new_stmt = gimple_build_assign (vec_dest, data_ref);
7036 new_temp = make_ssa_name (vec_dest, new_stmt);
7037 gimple_assign_set_lhs (new_stmt, new_temp);
7038 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
7039 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
7040 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7041 msq = new_temp;
7043 bump = size_binop (MULT_EXPR, vs,
7044 TYPE_SIZE_UNIT (elem_type));
7045 bump = size_binop (MINUS_EXPR, bump, size_one_node);
7046 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
7047 new_stmt = gimple_build_assign
7048 (NULL_TREE, BIT_AND_EXPR, ptr,
7049 build_int_cst
7050 (TREE_TYPE (ptr),
7051 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7052 ptr = copy_ssa_name (ptr, new_stmt);
7053 gimple_assign_set_lhs (new_stmt, ptr);
7054 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7055 data_ref
7056 = build2 (MEM_REF, vectype, ptr,
7057 build_int_cst (reference_alias_ptr_type
7058 (DR_REF (first_dr)), 0));
7059 break;
7061 case dr_explicit_realign_optimized:
7062 if (TREE_CODE (dataref_ptr) == SSA_NAME)
7063 new_temp = copy_ssa_name (dataref_ptr);
7064 else
7065 new_temp = make_ssa_name (TREE_TYPE (dataref_ptr));
7066 new_stmt = gimple_build_assign
7067 (new_temp, BIT_AND_EXPR, dataref_ptr,
7068 build_int_cst
7069 (TREE_TYPE (dataref_ptr),
7070 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
7071 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7072 data_ref
7073 = build2 (MEM_REF, vectype, new_temp,
7074 build_int_cst (reference_alias_ptr_type
7075 (DR_REF (first_dr)), 0));
7076 break;
7077 default:
7078 gcc_unreachable ();
7080 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7081 new_stmt = gimple_build_assign (vec_dest, data_ref);
7082 new_temp = make_ssa_name (vec_dest, new_stmt);
7083 gimple_assign_set_lhs (new_stmt, new_temp);
7084 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7086 /* 3. Handle explicit realignment if necessary/supported.
7087 Create in loop:
7088 vec_dest = realign_load (msq, lsq, realignment_token) */
7089 if (alignment_support_scheme == dr_explicit_realign_optimized
7090 || alignment_support_scheme == dr_explicit_realign)
7092 lsq = gimple_assign_lhs (new_stmt);
7093 if (!realignment_token)
7094 realignment_token = dataref_ptr;
7095 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7096 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
7097 msq, lsq, realignment_token);
7098 new_temp = make_ssa_name (vec_dest, new_stmt);
7099 gimple_assign_set_lhs (new_stmt, new_temp);
7100 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7102 if (alignment_support_scheme == dr_explicit_realign_optimized)
7104 gcc_assert (phi);
7105 if (i == vec_num - 1 && j == ncopies - 1)
7106 add_phi_arg (phi, lsq,
7107 loop_latch_edge (containing_loop),
7108 UNKNOWN_LOCATION);
7109 msq = lsq;
7113 /* 4. Handle invariant-load. */
7114 if (inv_p && !bb_vinfo)
7116 gcc_assert (!grouped_load);
7117 /* If we have versioned for aliasing or the loop doesn't
7118 have any data dependencies that would preclude this,
7119 then we are sure this is a loop invariant load and
7120 thus we can insert it on the preheader edge. */
7121 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
7122 && !nested_in_vect_loop
7123 && hoist_defs_of_uses (stmt, loop))
7125 if (dump_enabled_p ())
7127 dump_printf_loc (MSG_NOTE, vect_location,
7128 "hoisting out of the vectorized "
7129 "loop: ");
7130 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7132 tree tem = copy_ssa_name (scalar_dest);
7133 gsi_insert_on_edge_immediate
7134 (loop_preheader_edge (loop),
7135 gimple_build_assign (tem,
7136 unshare_expr
7137 (gimple_assign_rhs1 (stmt))));
7138 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
7140 else
7142 gimple_stmt_iterator gsi2 = *gsi;
7143 gsi_next (&gsi2);
7144 new_temp = vect_init_vector (stmt, scalar_dest,
7145 vectype, &gsi2);
7147 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7148 set_vinfo_for_stmt (new_stmt,
7149 new_stmt_vec_info (new_stmt, loop_vinfo,
7150 bb_vinfo));
7153 if (negative)
7155 tree perm_mask = perm_mask_for_reverse (vectype);
7156 new_temp = permute_vec_elements (new_temp, new_temp,
7157 perm_mask, stmt, gsi);
7158 new_stmt = SSA_NAME_DEF_STMT (new_temp);
7161 /* Collect vector loads and later create their permutation in
7162 vect_transform_grouped_load (). */
7163 if (grouped_load || slp_perm)
7164 dr_chain.quick_push (new_temp);
7166 /* Store vector loads in the corresponding SLP_NODE. */
7167 if (slp && !slp_perm)
7168 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7170 /* Bump the vector pointer to account for a gap or for excess
7171 elements loaded for a permuted SLP load. */
7172 if (group_gap_adj != 0)
7174 bool ovf;
7175 tree bump
7176 = wide_int_to_tree (sizetype,
7177 wi::smul (TYPE_SIZE_UNIT (elem_type),
7178 group_gap_adj, &ovf));
7179 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
7180 stmt, bump);
7184 if (slp && !slp_perm)
7185 continue;
7187 if (slp_perm)
7189 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
7190 slp_node_instance, false))
7192 dr_chain.release ();
7193 return false;
7196 else
7198 if (grouped_load)
7200 if (!load_lanes_p)
7201 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
7202 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7204 else
7206 if (j == 0)
7207 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7208 else
7209 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7210 prev_stmt_info = vinfo_for_stmt (new_stmt);
7213 dr_chain.release ();
7216 return true;
7219 /* Function vect_is_simple_cond.
7221 Input:
7222 LOOP - the loop that is being vectorized.
7223 COND - Condition that is checked for simple use.
7225 Output:
7226 *COMP_VECTYPE - the vector type for the comparison.
7228 Returns whether a COND can be vectorized. Checks whether
7229 condition operands are supportable using vec_is_simple_use. */
7231 static bool
7232 vect_is_simple_cond (tree cond, gimple *stmt, loop_vec_info loop_vinfo,
7233 bb_vec_info bb_vinfo, tree *comp_vectype)
7235 tree lhs, rhs;
7236 tree def;
7237 enum vect_def_type dt;
7238 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
7240 if (!COMPARISON_CLASS_P (cond))
7241 return false;
7243 lhs = TREE_OPERAND (cond, 0);
7244 rhs = TREE_OPERAND (cond, 1);
7246 if (TREE_CODE (lhs) == SSA_NAME)
7248 gimple *lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
7249 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
7250 &lhs_def_stmt, &def, &dt, &vectype1))
7251 return false;
7253 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
7254 && TREE_CODE (lhs) != FIXED_CST)
7255 return false;
7257 if (TREE_CODE (rhs) == SSA_NAME)
7259 gimple *rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
7260 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
7261 &rhs_def_stmt, &def, &dt, &vectype2))
7262 return false;
7264 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
7265 && TREE_CODE (rhs) != FIXED_CST)
7266 return false;
7268 *comp_vectype = vectype1 ? vectype1 : vectype2;
7269 return true;
7272 /* vectorizable_condition.
7274 Check if STMT is conditional modify expression that can be vectorized.
7275 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7276 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
7277 at GSI.
7279 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
7280 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
7281 else clause if it is 2).
7283 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
7285 bool
7286 vectorizable_condition (gimple *stmt, gimple_stmt_iterator *gsi,
7287 gimple **vec_stmt, tree reduc_def, int reduc_index,
7288 slp_tree slp_node)
7290 tree scalar_dest = NULL_TREE;
7291 tree vec_dest = NULL_TREE;
7292 tree cond_expr, then_clause, else_clause;
7293 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7294 tree comp_vectype = NULL_TREE;
7295 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
7296 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
7297 tree vec_compare, vec_cond_expr;
7298 tree new_temp;
7299 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7300 tree def;
7301 enum vect_def_type dt, dts[4];
7302 int ncopies;
7303 enum tree_code code;
7304 stmt_vec_info prev_stmt_info = NULL;
7305 int i, j;
7306 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7307 vec<tree> vec_oprnds0 = vNULL;
7308 vec<tree> vec_oprnds1 = vNULL;
7309 vec<tree> vec_oprnds2 = vNULL;
7310 vec<tree> vec_oprnds3 = vNULL;
7311 tree vec_cmp_type;
7313 if (reduc_index && STMT_SLP_TYPE (stmt_info))
7314 return false;
7316 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
7317 return false;
7319 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
7320 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
7321 && reduc_def))
7322 return false;
7324 /* FORNOW: not yet supported. */
7325 if (STMT_VINFO_LIVE_P (stmt_info))
7327 if (dump_enabled_p ())
7328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7329 "value used after loop.\n");
7330 return false;
7333 /* Is vectorizable conditional operation? */
7334 if (!is_gimple_assign (stmt))
7335 return false;
7337 code = gimple_assign_rhs_code (stmt);
7339 if (code != COND_EXPR)
7340 return false;
7342 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7343 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
7345 if (slp_node || PURE_SLP_STMT (stmt_info))
7346 ncopies = 1;
7347 else
7348 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
7350 gcc_assert (ncopies >= 1);
7351 if (reduc_index && ncopies > 1)
7352 return false; /* FORNOW */
7354 cond_expr = gimple_assign_rhs1 (stmt);
7355 then_clause = gimple_assign_rhs2 (stmt);
7356 else_clause = gimple_assign_rhs3 (stmt);
7358 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
7359 &comp_vectype)
7360 || !comp_vectype)
7361 return false;
7363 if (TREE_CODE (then_clause) == SSA_NAME)
7365 gimple *then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
7366 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
7367 &then_def_stmt, &def, &dt))
7368 return false;
7370 else if (TREE_CODE (then_clause) != INTEGER_CST
7371 && TREE_CODE (then_clause) != REAL_CST
7372 && TREE_CODE (then_clause) != FIXED_CST)
7373 return false;
7375 if (TREE_CODE (else_clause) == SSA_NAME)
7377 gimple *else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
7378 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
7379 &else_def_stmt, &def, &dt))
7380 return false;
7382 else if (TREE_CODE (else_clause) != INTEGER_CST
7383 && TREE_CODE (else_clause) != REAL_CST
7384 && TREE_CODE (else_clause) != FIXED_CST)
7385 return false;
7387 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
7388 /* The result of a vector comparison should be signed type. */
7389 tree cmp_type = build_nonstandard_integer_type (prec, 0);
7390 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
7391 if (vec_cmp_type == NULL_TREE)
7392 return false;
7394 if (!vec_stmt)
7396 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
7397 return expand_vec_cond_expr_p (vectype, comp_vectype);
7400 /* Transform. */
7402 if (!slp_node)
7404 vec_oprnds0.create (1);
7405 vec_oprnds1.create (1);
7406 vec_oprnds2.create (1);
7407 vec_oprnds3.create (1);
7410 /* Handle def. */
7411 scalar_dest = gimple_assign_lhs (stmt);
7412 vec_dest = vect_create_destination_var (scalar_dest, vectype);
7414 /* Handle cond expr. */
7415 for (j = 0; j < ncopies; j++)
7417 gassign *new_stmt = NULL;
7418 if (j == 0)
7420 if (slp_node)
7422 auto_vec<tree, 4> ops;
7423 auto_vec<vec<tree>, 4> vec_defs;
7425 ops.safe_push (TREE_OPERAND (cond_expr, 0));
7426 ops.safe_push (TREE_OPERAND (cond_expr, 1));
7427 ops.safe_push (then_clause);
7428 ops.safe_push (else_clause);
7429 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
7430 vec_oprnds3 = vec_defs.pop ();
7431 vec_oprnds2 = vec_defs.pop ();
7432 vec_oprnds1 = vec_defs.pop ();
7433 vec_oprnds0 = vec_defs.pop ();
7435 ops.release ();
7436 vec_defs.release ();
7438 else
7440 gimple *gtemp;
7441 vec_cond_lhs =
7442 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
7443 stmt, NULL);
7444 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
7445 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
7447 vec_cond_rhs =
7448 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
7449 stmt, NULL);
7450 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
7451 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
7452 if (reduc_index == 1)
7453 vec_then_clause = reduc_def;
7454 else
7456 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
7457 stmt, NULL);
7458 vect_is_simple_use (then_clause, stmt, loop_vinfo,
7459 NULL, &gtemp, &def, &dts[2]);
7461 if (reduc_index == 2)
7462 vec_else_clause = reduc_def;
7463 else
7465 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
7466 stmt, NULL);
7467 vect_is_simple_use (else_clause, stmt, loop_vinfo,
7468 NULL, &gtemp, &def, &dts[3]);
7472 else
7474 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
7475 vec_oprnds0.pop ());
7476 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
7477 vec_oprnds1.pop ());
7478 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
7479 vec_oprnds2.pop ());
7480 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
7481 vec_oprnds3.pop ());
7484 if (!slp_node)
7486 vec_oprnds0.quick_push (vec_cond_lhs);
7487 vec_oprnds1.quick_push (vec_cond_rhs);
7488 vec_oprnds2.quick_push (vec_then_clause);
7489 vec_oprnds3.quick_push (vec_else_clause);
7492 /* Arguments are ready. Create the new vector stmt. */
7493 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
7495 vec_cond_rhs = vec_oprnds1[i];
7496 vec_then_clause = vec_oprnds2[i];
7497 vec_else_clause = vec_oprnds3[i];
7499 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
7500 vec_cond_lhs, vec_cond_rhs);
7501 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
7502 vec_compare, vec_then_clause, vec_else_clause);
7504 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
7505 new_temp = make_ssa_name (vec_dest, new_stmt);
7506 gimple_assign_set_lhs (new_stmt, new_temp);
7507 vect_finish_stmt_generation (stmt, new_stmt, gsi);
7508 if (slp_node)
7509 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
7512 if (slp_node)
7513 continue;
7515 if (j == 0)
7516 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
7517 else
7518 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
7520 prev_stmt_info = vinfo_for_stmt (new_stmt);
7523 vec_oprnds0.release ();
7524 vec_oprnds1.release ();
7525 vec_oprnds2.release ();
7526 vec_oprnds3.release ();
7528 return true;
7532 /* Make sure the statement is vectorizable. */
7534 bool
7535 vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
7537 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7538 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
7539 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
7540 bool ok;
7541 tree scalar_type, vectype;
7542 gimple *pattern_stmt;
7543 gimple_seq pattern_def_seq;
7545 if (dump_enabled_p ())
7547 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
7548 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7551 if (gimple_has_volatile_ops (stmt))
7553 if (dump_enabled_p ())
7554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7555 "not vectorized: stmt has volatile operands\n");
7557 return false;
7560 /* Skip stmts that do not need to be vectorized. In loops this is expected
7561 to include:
7562 - the COND_EXPR which is the loop exit condition
7563 - any LABEL_EXPRs in the loop
7564 - computations that are used only for array indexing or loop control.
7565 In basic blocks we only analyze statements that are a part of some SLP
7566 instance, therefore, all the statements are relevant.
7568 Pattern statement needs to be analyzed instead of the original statement
7569 if the original statement is not relevant. Otherwise, we analyze both
7570 statements. In basic blocks we are called from some SLP instance
7571 traversal, don't analyze pattern stmts instead, the pattern stmts
7572 already will be part of SLP instance. */
7574 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7575 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7576 && !STMT_VINFO_LIVE_P (stmt_info))
7578 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7579 && pattern_stmt
7580 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7581 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7583 /* Analyze PATTERN_STMT instead of the original stmt. */
7584 stmt = pattern_stmt;
7585 stmt_info = vinfo_for_stmt (pattern_stmt);
7586 if (dump_enabled_p ())
7588 dump_printf_loc (MSG_NOTE, vect_location,
7589 "==> examining pattern statement: ");
7590 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7593 else
7595 if (dump_enabled_p ())
7596 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7598 return true;
7601 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7602 && node == NULL
7603 && pattern_stmt
7604 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7605 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7607 /* Analyze PATTERN_STMT too. */
7608 if (dump_enabled_p ())
7610 dump_printf_loc (MSG_NOTE, vect_location,
7611 "==> examining pattern statement: ");
7612 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7615 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7616 return false;
7619 if (is_pattern_stmt_p (stmt_info)
7620 && node == NULL
7621 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7623 gimple_stmt_iterator si;
7625 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7627 gimple *pattern_def_stmt = gsi_stmt (si);
7628 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7629 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7631 /* Analyze def stmt of STMT if it's a pattern stmt. */
7632 if (dump_enabled_p ())
7634 dump_printf_loc (MSG_NOTE, vect_location,
7635 "==> examining pattern def statement: ");
7636 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7639 if (!vect_analyze_stmt (pattern_def_stmt,
7640 need_to_vectorize, node))
7641 return false;
7646 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7648 case vect_internal_def:
7649 break;
7651 case vect_reduction_def:
7652 case vect_nested_cycle:
7653 gcc_assert (!bb_vinfo
7654 && (relevance == vect_used_in_outer
7655 || relevance == vect_used_in_outer_by_reduction
7656 || relevance == vect_used_by_reduction
7657 || relevance == vect_unused_in_scope));
7658 break;
7660 case vect_induction_def:
7661 case vect_constant_def:
7662 case vect_external_def:
7663 case vect_unknown_def_type:
7664 default:
7665 gcc_unreachable ();
7668 if (bb_vinfo)
7670 gcc_assert (PURE_SLP_STMT (stmt_info));
7672 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7673 if (dump_enabled_p ())
7675 dump_printf_loc (MSG_NOTE, vect_location,
7676 "get vectype for scalar type: ");
7677 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7678 dump_printf (MSG_NOTE, "\n");
7681 vectype = get_vectype_for_scalar_type (scalar_type);
7682 if (!vectype)
7684 if (dump_enabled_p ())
7686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7687 "not SLPed: unsupported data-type ");
7688 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7689 scalar_type);
7690 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7692 return false;
7695 if (dump_enabled_p ())
7697 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7698 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7699 dump_printf (MSG_NOTE, "\n");
7702 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7705 if (STMT_VINFO_RELEVANT_P (stmt_info))
7707 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7708 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7709 || (is_gimple_call (stmt)
7710 && gimple_call_lhs (stmt) == NULL_TREE));
7711 *need_to_vectorize = true;
7714 if (PURE_SLP_STMT (stmt_info) && !node)
7716 dump_printf_loc (MSG_NOTE, vect_location,
7717 "handled only by SLP analysis\n");
7718 return true;
7721 ok = true;
7722 if (!bb_vinfo
7723 && (STMT_VINFO_RELEVANT_P (stmt_info)
7724 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7725 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7726 || vectorizable_conversion (stmt, NULL, NULL, node)
7727 || vectorizable_shift (stmt, NULL, NULL, node)
7728 || vectorizable_operation (stmt, NULL, NULL, node)
7729 || vectorizable_assignment (stmt, NULL, NULL, node)
7730 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7731 || vectorizable_call (stmt, NULL, NULL, node)
7732 || vectorizable_store (stmt, NULL, NULL, node)
7733 || vectorizable_reduction (stmt, NULL, NULL, node)
7734 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7735 else
7737 if (bb_vinfo)
7738 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7739 || vectorizable_conversion (stmt, NULL, NULL, node)
7740 || vectorizable_shift (stmt, NULL, NULL, node)
7741 || vectorizable_operation (stmt, NULL, NULL, node)
7742 || vectorizable_assignment (stmt, NULL, NULL, node)
7743 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7744 || vectorizable_call (stmt, NULL, NULL, node)
7745 || vectorizable_store (stmt, NULL, NULL, node)
7746 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7749 if (!ok)
7751 if (dump_enabled_p ())
7753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7754 "not vectorized: relevant stmt not ");
7755 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7756 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7759 return false;
7762 if (bb_vinfo)
7763 return true;
7765 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7766 need extra handling, except for vectorizable reductions. */
7767 if (STMT_VINFO_LIVE_P (stmt_info)
7768 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7769 ok = vectorizable_live_operation (stmt, NULL, NULL);
7771 if (!ok)
7773 if (dump_enabled_p ())
7775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7776 "not vectorized: live stmt not ");
7777 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7778 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7781 return false;
7784 return true;
7788 /* Function vect_transform_stmt.
7790 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7792 bool
7793 vect_transform_stmt (gimple *stmt, gimple_stmt_iterator *gsi,
7794 bool *grouped_store, slp_tree slp_node,
7795 slp_instance slp_node_instance)
7797 bool is_store = false;
7798 gimple *vec_stmt = NULL;
7799 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7800 bool done;
7802 gimple *old_vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
7804 switch (STMT_VINFO_TYPE (stmt_info))
7806 case type_demotion_vec_info_type:
7807 case type_promotion_vec_info_type:
7808 case type_conversion_vec_info_type:
7809 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7810 gcc_assert (done);
7811 break;
7813 case induc_vec_info_type:
7814 gcc_assert (!slp_node);
7815 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7816 gcc_assert (done);
7817 break;
7819 case shift_vec_info_type:
7820 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7821 gcc_assert (done);
7822 break;
7824 case op_vec_info_type:
7825 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7826 gcc_assert (done);
7827 break;
7829 case assignment_vec_info_type:
7830 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7831 gcc_assert (done);
7832 break;
7834 case load_vec_info_type:
7835 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7836 slp_node_instance);
7837 gcc_assert (done);
7838 break;
7840 case store_vec_info_type:
7841 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7842 gcc_assert (done);
7843 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7845 /* In case of interleaving, the whole chain is vectorized when the
7846 last store in the chain is reached. Store stmts before the last
7847 one are skipped, and there vec_stmt_info shouldn't be freed
7848 meanwhile. */
7849 *grouped_store = true;
7850 if (STMT_VINFO_VEC_STMT (stmt_info))
7851 is_store = true;
7853 else
7854 is_store = true;
7855 break;
7857 case condition_vec_info_type:
7858 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7859 gcc_assert (done);
7860 break;
7862 case call_vec_info_type:
7863 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7864 stmt = gsi_stmt (*gsi);
7865 if (is_gimple_call (stmt)
7866 && gimple_call_internal_p (stmt)
7867 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7868 is_store = true;
7869 break;
7871 case call_simd_clone_vec_info_type:
7872 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7873 stmt = gsi_stmt (*gsi);
7874 break;
7876 case reduc_vec_info_type:
7877 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7878 gcc_assert (done);
7879 break;
7881 default:
7882 if (!STMT_VINFO_LIVE_P (stmt_info))
7884 if (dump_enabled_p ())
7885 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7886 "stmt not supported.\n");
7887 gcc_unreachable ();
7891 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
7892 This would break hybrid SLP vectorization. */
7893 if (slp_node)
7894 gcc_assert (!vec_stmt
7895 && STMT_VINFO_VEC_STMT (stmt_info) == old_vec_stmt);
7897 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7898 is being vectorized, but outside the immediately enclosing loop. */
7899 if (vec_stmt
7900 && STMT_VINFO_LOOP_VINFO (stmt_info)
7901 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7902 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7903 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7904 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7905 || STMT_VINFO_RELEVANT (stmt_info) ==
7906 vect_used_in_outer_by_reduction))
7908 struct loop *innerloop = LOOP_VINFO_LOOP (
7909 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7910 imm_use_iterator imm_iter;
7911 use_operand_p use_p;
7912 tree scalar_dest;
7913 gimple *exit_phi;
7915 if (dump_enabled_p ())
7916 dump_printf_loc (MSG_NOTE, vect_location,
7917 "Record the vdef for outer-loop vectorization.\n");
7919 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7920 (to be used when vectorizing outer-loop stmts that use the DEF of
7921 STMT). */
7922 if (gimple_code (stmt) == GIMPLE_PHI)
7923 scalar_dest = PHI_RESULT (stmt);
7924 else
7925 scalar_dest = gimple_assign_lhs (stmt);
7927 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7929 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7931 exit_phi = USE_STMT (use_p);
7932 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7937 /* Handle stmts whose DEF is used outside the loop-nest that is
7938 being vectorized. */
7939 if (STMT_VINFO_LIVE_P (stmt_info)
7940 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7942 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7943 gcc_assert (done);
7946 if (vec_stmt)
7947 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7949 return is_store;
7953 /* Remove a group of stores (for SLP or interleaving), free their
7954 stmt_vec_info. */
7956 void
7957 vect_remove_stores (gimple *first_stmt)
7959 gimple *next = first_stmt;
7960 gimple *tmp;
7961 gimple_stmt_iterator next_si;
7963 while (next)
7965 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7967 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7968 if (is_pattern_stmt_p (stmt_info))
7969 next = STMT_VINFO_RELATED_STMT (stmt_info);
7970 /* Free the attached stmt_vec_info and remove the stmt. */
7971 next_si = gsi_for_stmt (next);
7972 unlink_stmt_vdef (next);
7973 gsi_remove (&next_si, true);
7974 release_defs (next);
7975 free_stmt_vec_info (next);
7976 next = tmp;
7981 /* Function new_stmt_vec_info.
7983 Create and initialize a new stmt_vec_info struct for STMT. */
7985 stmt_vec_info
7986 new_stmt_vec_info (gimple *stmt, loop_vec_info loop_vinfo,
7987 bb_vec_info bb_vinfo)
7989 stmt_vec_info res;
7990 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7992 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7993 STMT_VINFO_STMT (res) = stmt;
7994 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7995 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7996 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7997 STMT_VINFO_LIVE_P (res) = false;
7998 STMT_VINFO_VECTYPE (res) = NULL;
7999 STMT_VINFO_VEC_STMT (res) = NULL;
8000 STMT_VINFO_VECTORIZABLE (res) = true;
8001 STMT_VINFO_IN_PATTERN_P (res) = false;
8002 STMT_VINFO_RELATED_STMT (res) = NULL;
8003 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
8004 STMT_VINFO_DATA_REF (res) = NULL;
8006 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
8007 STMT_VINFO_DR_OFFSET (res) = NULL;
8008 STMT_VINFO_DR_INIT (res) = NULL;
8009 STMT_VINFO_DR_STEP (res) = NULL;
8010 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
8012 if (gimple_code (stmt) == GIMPLE_PHI
8013 && is_loop_header_bb_p (gimple_bb (stmt)))
8014 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
8015 else
8016 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
8018 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
8019 STMT_SLP_TYPE (res) = loop_vect;
8020 GROUP_FIRST_ELEMENT (res) = NULL;
8021 GROUP_NEXT_ELEMENT (res) = NULL;
8022 GROUP_SIZE (res) = 0;
8023 GROUP_STORE_COUNT (res) = 0;
8024 GROUP_GAP (res) = 0;
8025 GROUP_SAME_DR_STMT (res) = NULL;
8027 return res;
8031 /* Create a hash table for stmt_vec_info. */
8033 void
8034 init_stmt_vec_info_vec (void)
8036 gcc_assert (!stmt_vec_info_vec.exists ());
8037 stmt_vec_info_vec.create (50);
8041 /* Free hash table for stmt_vec_info. */
8043 void
8044 free_stmt_vec_info_vec (void)
8046 unsigned int i;
8047 vec_void_p info;
8048 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
8049 if (info != NULL)
8050 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
8051 gcc_assert (stmt_vec_info_vec.exists ());
8052 stmt_vec_info_vec.release ();
8056 /* Free stmt vectorization related info. */
8058 void
8059 free_stmt_vec_info (gimple *stmt)
8061 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8063 if (!stmt_info)
8064 return;
8066 /* Check if this statement has a related "pattern stmt"
8067 (introduced by the vectorizer during the pattern recognition
8068 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
8069 too. */
8070 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
8072 stmt_vec_info patt_info
8073 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8074 if (patt_info)
8076 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
8077 gimple *patt_stmt = STMT_VINFO_STMT (patt_info);
8078 gimple_set_bb (patt_stmt, NULL);
8079 tree lhs = gimple_get_lhs (patt_stmt);
8080 if (TREE_CODE (lhs) == SSA_NAME)
8081 release_ssa_name (lhs);
8082 if (seq)
8084 gimple_stmt_iterator si;
8085 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
8087 gimple *seq_stmt = gsi_stmt (si);
8088 gimple_set_bb (seq_stmt, NULL);
8089 lhs = gimple_get_lhs (patt_stmt);
8090 if (TREE_CODE (lhs) == SSA_NAME)
8091 release_ssa_name (lhs);
8092 free_stmt_vec_info (seq_stmt);
8095 free_stmt_vec_info (patt_stmt);
8099 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
8100 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
8101 set_vinfo_for_stmt (stmt, NULL);
8102 free (stmt_info);
8106 /* Function get_vectype_for_scalar_type_and_size.
8108 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
8109 by the target. */
8111 static tree
8112 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
8114 machine_mode inner_mode = TYPE_MODE (scalar_type);
8115 machine_mode simd_mode;
8116 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
8117 int nunits;
8118 tree vectype;
8120 if (nbytes == 0)
8121 return NULL_TREE;
8123 if (GET_MODE_CLASS (inner_mode) != MODE_INT
8124 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
8125 return NULL_TREE;
8127 /* For vector types of elements whose mode precision doesn't
8128 match their types precision we use a element type of mode
8129 precision. The vectorization routines will have to make sure
8130 they support the proper result truncation/extension.
8131 We also make sure to build vector types with INTEGER_TYPE
8132 component type only. */
8133 if (INTEGRAL_TYPE_P (scalar_type)
8134 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
8135 || TREE_CODE (scalar_type) != INTEGER_TYPE))
8136 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
8137 TYPE_UNSIGNED (scalar_type));
8139 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
8140 When the component mode passes the above test simply use a type
8141 corresponding to that mode. The theory is that any use that
8142 would cause problems with this will disable vectorization anyway. */
8143 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
8144 && !INTEGRAL_TYPE_P (scalar_type))
8145 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
8147 /* We can't build a vector type of elements with alignment bigger than
8148 their size. */
8149 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
8150 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
8151 TYPE_UNSIGNED (scalar_type));
8153 /* If we felt back to using the mode fail if there was
8154 no scalar type for it. */
8155 if (scalar_type == NULL_TREE)
8156 return NULL_TREE;
8158 /* If no size was supplied use the mode the target prefers. Otherwise
8159 lookup a vector mode of the specified size. */
8160 if (size == 0)
8161 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
8162 else
8163 simd_mode = mode_for_vector (inner_mode, size / nbytes);
8164 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
8165 if (nunits <= 1)
8166 return NULL_TREE;
8168 vectype = build_vector_type (scalar_type, nunits);
8170 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
8171 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
8172 return NULL_TREE;
8174 return vectype;
8177 unsigned int current_vector_size;
8179 /* Function get_vectype_for_scalar_type.
8181 Returns the vector type corresponding to SCALAR_TYPE as supported
8182 by the target. */
8184 tree
8185 get_vectype_for_scalar_type (tree scalar_type)
8187 tree vectype;
8188 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
8189 current_vector_size);
8190 if (vectype
8191 && current_vector_size == 0)
8192 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
8193 return vectype;
8196 /* Function get_same_sized_vectype
8198 Returns a vector type corresponding to SCALAR_TYPE of size
8199 VECTOR_TYPE if supported by the target. */
8201 tree
8202 get_same_sized_vectype (tree scalar_type, tree vector_type)
8204 return get_vectype_for_scalar_type_and_size
8205 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
8208 /* Function vect_is_simple_use.
8210 Input:
8211 LOOP_VINFO - the vect info of the loop that is being vectorized.
8212 BB_VINFO - the vect info of the basic block that is being vectorized.
8213 OPERAND - operand of STMT in the loop or bb.
8214 DEF - the defining stmt in case OPERAND is an SSA_NAME.
8216 Returns whether a stmt with OPERAND can be vectorized.
8217 For loops, supportable operands are constants, loop invariants, and operands
8218 that are defined by the current iteration of the loop. Unsupportable
8219 operands are those that are defined by a previous iteration of the loop (as
8220 is the case in reduction/induction computations).
8221 For basic blocks, supportable operands are constants and bb invariants.
8222 For now, operands defined outside the basic block are not supported. */
8224 bool
8225 vect_is_simple_use (tree operand, gimple *stmt, loop_vec_info loop_vinfo,
8226 bb_vec_info bb_vinfo, gimple **def_stmt,
8227 tree *def, enum vect_def_type *dt)
8229 *def_stmt = NULL;
8230 *def = NULL_TREE;
8231 *dt = vect_unknown_def_type;
8233 if (dump_enabled_p ())
8235 dump_printf_loc (MSG_NOTE, vect_location,
8236 "vect_is_simple_use: operand ");
8237 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
8238 dump_printf (MSG_NOTE, "\n");
8241 if (CONSTANT_CLASS_P (operand))
8243 *dt = vect_constant_def;
8244 return true;
8247 if (is_gimple_min_invariant (operand))
8249 *def = operand;
8250 *dt = vect_external_def;
8251 return true;
8254 if (TREE_CODE (operand) != SSA_NAME)
8256 if (dump_enabled_p ())
8257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8258 "not ssa-name.\n");
8259 return false;
8262 if (SSA_NAME_IS_DEFAULT_DEF (operand))
8264 *def = operand;
8265 *dt = vect_external_def;
8266 return true;
8269 *def_stmt = SSA_NAME_DEF_STMT (operand);
8270 if (dump_enabled_p ())
8272 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
8273 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
8276 basic_block bb = gimple_bb (*def_stmt);
8277 if ((loop_vinfo && !flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo), bb))
8278 || (bb_vinfo
8279 && (bb != BB_VINFO_BB (bb_vinfo)
8280 || gimple_code (*def_stmt) == GIMPLE_PHI)))
8281 *dt = vect_external_def;
8282 else
8284 stmt_vec_info stmt_vinfo = vinfo_for_stmt (*def_stmt);
8285 if (bb_vinfo && !STMT_VINFO_VECTORIZABLE (stmt_vinfo))
8286 *dt = vect_external_def;
8287 else
8288 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
8291 if (dump_enabled_p ())
8293 dump_printf_loc (MSG_NOTE, vect_location, "type of def: ");
8294 switch (*dt)
8296 case vect_uninitialized_def:
8297 dump_printf (MSG_NOTE, "uninitialized\n");
8298 break;
8299 case vect_constant_def:
8300 dump_printf (MSG_NOTE, "constant\n");
8301 break;
8302 case vect_external_def:
8303 dump_printf (MSG_NOTE, "external\n");
8304 break;
8305 case vect_internal_def:
8306 dump_printf (MSG_NOTE, "internal\n");
8307 break;
8308 case vect_induction_def:
8309 dump_printf (MSG_NOTE, "induction\n");
8310 break;
8311 case vect_reduction_def:
8312 dump_printf (MSG_NOTE, "reduction\n");
8313 break;
8314 case vect_double_reduction_def:
8315 dump_printf (MSG_NOTE, "double reduction\n");
8316 break;
8317 case vect_nested_cycle:
8318 dump_printf (MSG_NOTE, "nested cycle\n");
8319 break;
8320 case vect_unknown_def_type:
8321 dump_printf (MSG_NOTE, "unknown\n");
8322 break;
8326 if (*dt == vect_unknown_def_type
8327 || (stmt
8328 && *dt == vect_double_reduction_def
8329 && gimple_code (stmt) != GIMPLE_PHI))
8331 if (dump_enabled_p ())
8332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8333 "Unsupported pattern.\n");
8334 return false;
8337 switch (gimple_code (*def_stmt))
8339 case GIMPLE_PHI:
8340 *def = gimple_phi_result (*def_stmt);
8341 break;
8343 case GIMPLE_ASSIGN:
8344 *def = gimple_assign_lhs (*def_stmt);
8345 break;
8347 case GIMPLE_CALL:
8348 *def = gimple_call_lhs (*def_stmt);
8349 if (*def != NULL)
8350 break;
8351 /* FALLTHRU */
8352 default:
8353 if (dump_enabled_p ())
8354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
8355 "unsupported defining stmt:\n");
8356 return false;
8359 return true;
8362 /* Function vect_is_simple_use_1.
8364 Same as vect_is_simple_use_1 but also determines the vector operand
8365 type of OPERAND and stores it to *VECTYPE. If the definition of
8366 OPERAND is vect_uninitialized_def, vect_constant_def or
8367 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
8368 is responsible to compute the best suited vector type for the
8369 scalar operand. */
8371 bool
8372 vect_is_simple_use_1 (tree operand, gimple *stmt, loop_vec_info loop_vinfo,
8373 bb_vec_info bb_vinfo, gimple **def_stmt,
8374 tree *def, enum vect_def_type *dt, tree *vectype)
8376 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
8377 def, dt))
8378 return false;
8380 /* Now get a vector type if the def is internal, otherwise supply
8381 NULL_TREE and leave it up to the caller to figure out a proper
8382 type for the use stmt. */
8383 if (*dt == vect_internal_def
8384 || *dt == vect_induction_def
8385 || *dt == vect_reduction_def
8386 || *dt == vect_double_reduction_def
8387 || *dt == vect_nested_cycle)
8389 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
8391 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
8392 && !STMT_VINFO_RELEVANT (stmt_info)
8393 && !STMT_VINFO_LIVE_P (stmt_info))
8394 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
8396 *vectype = STMT_VINFO_VECTYPE (stmt_info);
8397 gcc_assert (*vectype != NULL_TREE);
8399 else if (*dt == vect_uninitialized_def
8400 || *dt == vect_constant_def
8401 || *dt == vect_external_def)
8402 *vectype = NULL_TREE;
8403 else
8404 gcc_unreachable ();
8406 return true;
8410 /* Function supportable_widening_operation
8412 Check whether an operation represented by the code CODE is a
8413 widening operation that is supported by the target platform in
8414 vector form (i.e., when operating on arguments of type VECTYPE_IN
8415 producing a result of type VECTYPE_OUT).
8417 Widening operations we currently support are NOP (CONVERT), FLOAT
8418 and WIDEN_MULT. This function checks if these operations are supported
8419 by the target platform either directly (via vector tree-codes), or via
8420 target builtins.
8422 Output:
8423 - CODE1 and CODE2 are codes of vector operations to be used when
8424 vectorizing the operation, if available.
8425 - MULTI_STEP_CVT determines the number of required intermediate steps in
8426 case of multi-step conversion (like char->short->int - in that case
8427 MULTI_STEP_CVT will be 1).
8428 - INTERM_TYPES contains the intermediate type required to perform the
8429 widening operation (short in the above example). */
8431 bool
8432 supportable_widening_operation (enum tree_code code, gimple *stmt,
8433 tree vectype_out, tree vectype_in,
8434 enum tree_code *code1, enum tree_code *code2,
8435 int *multi_step_cvt,
8436 vec<tree> *interm_types)
8438 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
8439 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
8440 struct loop *vect_loop = NULL;
8441 machine_mode vec_mode;
8442 enum insn_code icode1, icode2;
8443 optab optab1, optab2;
8444 tree vectype = vectype_in;
8445 tree wide_vectype = vectype_out;
8446 enum tree_code c1, c2;
8447 int i;
8448 tree prev_type, intermediate_type;
8449 machine_mode intermediate_mode, prev_mode;
8450 optab optab3, optab4;
8452 *multi_step_cvt = 0;
8453 if (loop_info)
8454 vect_loop = LOOP_VINFO_LOOP (loop_info);
8456 switch (code)
8458 case WIDEN_MULT_EXPR:
8459 /* The result of a vectorized widening operation usually requires
8460 two vectors (because the widened results do not fit into one vector).
8461 The generated vector results would normally be expected to be
8462 generated in the same order as in the original scalar computation,
8463 i.e. if 8 results are generated in each vector iteration, they are
8464 to be organized as follows:
8465 vect1: [res1,res2,res3,res4],
8466 vect2: [res5,res6,res7,res8].
8468 However, in the special case that the result of the widening
8469 operation is used in a reduction computation only, the order doesn't
8470 matter (because when vectorizing a reduction we change the order of
8471 the computation). Some targets can take advantage of this and
8472 generate more efficient code. For example, targets like Altivec,
8473 that support widen_mult using a sequence of {mult_even,mult_odd}
8474 generate the following vectors:
8475 vect1: [res1,res3,res5,res7],
8476 vect2: [res2,res4,res6,res8].
8478 When vectorizing outer-loops, we execute the inner-loop sequentially
8479 (each vectorized inner-loop iteration contributes to VF outer-loop
8480 iterations in parallel). We therefore don't allow to change the
8481 order of the computation in the inner-loop during outer-loop
8482 vectorization. */
8483 /* TODO: Another case in which order doesn't *really* matter is when we
8484 widen and then contract again, e.g. (short)((int)x * y >> 8).
8485 Normally, pack_trunc performs an even/odd permute, whereas the
8486 repack from an even/odd expansion would be an interleave, which
8487 would be significantly simpler for e.g. AVX2. */
8488 /* In any case, in order to avoid duplicating the code below, recurse
8489 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
8490 are properly set up for the caller. If we fail, we'll continue with
8491 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
8492 if (vect_loop
8493 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
8494 && !nested_in_vect_loop_p (vect_loop, stmt)
8495 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
8496 stmt, vectype_out, vectype_in,
8497 code1, code2, multi_step_cvt,
8498 interm_types))
8500 /* Elements in a vector with vect_used_by_reduction property cannot
8501 be reordered if the use chain with this property does not have the
8502 same operation. One such an example is s += a * b, where elements
8503 in a and b cannot be reordered. Here we check if the vector defined
8504 by STMT is only directly used in the reduction statement. */
8505 tree lhs = gimple_assign_lhs (stmt);
8506 use_operand_p dummy;
8507 gimple *use_stmt;
8508 stmt_vec_info use_stmt_info = NULL;
8509 if (single_imm_use (lhs, &dummy, &use_stmt)
8510 && (use_stmt_info = vinfo_for_stmt (use_stmt))
8511 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
8512 return true;
8514 c1 = VEC_WIDEN_MULT_LO_EXPR;
8515 c2 = VEC_WIDEN_MULT_HI_EXPR;
8516 break;
8518 case VEC_WIDEN_MULT_EVEN_EXPR:
8519 /* Support the recursion induced just above. */
8520 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
8521 c2 = VEC_WIDEN_MULT_ODD_EXPR;
8522 break;
8524 case WIDEN_LSHIFT_EXPR:
8525 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
8526 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
8527 break;
8529 CASE_CONVERT:
8530 c1 = VEC_UNPACK_LO_EXPR;
8531 c2 = VEC_UNPACK_HI_EXPR;
8532 break;
8534 case FLOAT_EXPR:
8535 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
8536 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
8537 break;
8539 case FIX_TRUNC_EXPR:
8540 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
8541 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
8542 computing the operation. */
8543 return false;
8545 default:
8546 gcc_unreachable ();
8549 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
8550 std::swap (c1, c2);
8552 if (code == FIX_TRUNC_EXPR)
8554 /* The signedness is determined from output operand. */
8555 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8556 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
8558 else
8560 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8561 optab2 = optab_for_tree_code (c2, vectype, optab_default);
8564 if (!optab1 || !optab2)
8565 return false;
8567 vec_mode = TYPE_MODE (vectype);
8568 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
8569 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
8570 return false;
8572 *code1 = c1;
8573 *code2 = c2;
8575 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8576 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8577 return true;
8579 /* Check if it's a multi-step conversion that can be done using intermediate
8580 types. */
8582 prev_type = vectype;
8583 prev_mode = vec_mode;
8585 if (!CONVERT_EXPR_CODE_P (code))
8586 return false;
8588 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8589 intermediate steps in promotion sequence. We try
8590 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8591 not. */
8592 interm_types->create (MAX_INTERM_CVT_STEPS);
8593 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8595 intermediate_mode = insn_data[icode1].operand[0].mode;
8596 intermediate_type
8597 = lang_hooks.types.type_for_mode (intermediate_mode,
8598 TYPE_UNSIGNED (prev_type));
8599 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8600 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8602 if (!optab3 || !optab4
8603 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8604 || insn_data[icode1].operand[0].mode != intermediate_mode
8605 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8606 || insn_data[icode2].operand[0].mode != intermediate_mode
8607 || ((icode1 = optab_handler (optab3, intermediate_mode))
8608 == CODE_FOR_nothing)
8609 || ((icode2 = optab_handler (optab4, intermediate_mode))
8610 == CODE_FOR_nothing))
8611 break;
8613 interm_types->quick_push (intermediate_type);
8614 (*multi_step_cvt)++;
8616 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8617 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8618 return true;
8620 prev_type = intermediate_type;
8621 prev_mode = intermediate_mode;
8624 interm_types->release ();
8625 return false;
8629 /* Function supportable_narrowing_operation
8631 Check whether an operation represented by the code CODE is a
8632 narrowing operation that is supported by the target platform in
8633 vector form (i.e., when operating on arguments of type VECTYPE_IN
8634 and producing a result of type VECTYPE_OUT).
8636 Narrowing operations we currently support are NOP (CONVERT) and
8637 FIX_TRUNC. This function checks if these operations are supported by
8638 the target platform directly via vector tree-codes.
8640 Output:
8641 - CODE1 is the code of a vector operation to be used when
8642 vectorizing the operation, if available.
8643 - MULTI_STEP_CVT determines the number of required intermediate steps in
8644 case of multi-step conversion (like int->short->char - in that case
8645 MULTI_STEP_CVT will be 1).
8646 - INTERM_TYPES contains the intermediate type required to perform the
8647 narrowing operation (short in the above example). */
8649 bool
8650 supportable_narrowing_operation (enum tree_code code,
8651 tree vectype_out, tree vectype_in,
8652 enum tree_code *code1, int *multi_step_cvt,
8653 vec<tree> *interm_types)
8655 machine_mode vec_mode;
8656 enum insn_code icode1;
8657 optab optab1, interm_optab;
8658 tree vectype = vectype_in;
8659 tree narrow_vectype = vectype_out;
8660 enum tree_code c1;
8661 tree intermediate_type;
8662 machine_mode intermediate_mode, prev_mode;
8663 int i;
8664 bool uns;
8666 *multi_step_cvt = 0;
8667 switch (code)
8669 CASE_CONVERT:
8670 c1 = VEC_PACK_TRUNC_EXPR;
8671 break;
8673 case FIX_TRUNC_EXPR:
8674 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8675 break;
8677 case FLOAT_EXPR:
8678 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8679 tree code and optabs used for computing the operation. */
8680 return false;
8682 default:
8683 gcc_unreachable ();
8686 if (code == FIX_TRUNC_EXPR)
8687 /* The signedness is determined from output operand. */
8688 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8689 else
8690 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8692 if (!optab1)
8693 return false;
8695 vec_mode = TYPE_MODE (vectype);
8696 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8697 return false;
8699 *code1 = c1;
8701 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8702 return true;
8704 /* Check if it's a multi-step conversion that can be done using intermediate
8705 types. */
8706 prev_mode = vec_mode;
8707 if (code == FIX_TRUNC_EXPR)
8708 uns = TYPE_UNSIGNED (vectype_out);
8709 else
8710 uns = TYPE_UNSIGNED (vectype);
8712 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8713 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8714 costly than signed. */
8715 if (code == FIX_TRUNC_EXPR && uns)
8717 enum insn_code icode2;
8719 intermediate_type
8720 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8721 interm_optab
8722 = optab_for_tree_code (c1, intermediate_type, optab_default);
8723 if (interm_optab != unknown_optab
8724 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8725 && insn_data[icode1].operand[0].mode
8726 == insn_data[icode2].operand[0].mode)
8728 uns = false;
8729 optab1 = interm_optab;
8730 icode1 = icode2;
8734 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8735 intermediate steps in promotion sequence. We try
8736 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8737 interm_types->create (MAX_INTERM_CVT_STEPS);
8738 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8740 intermediate_mode = insn_data[icode1].operand[0].mode;
8741 intermediate_type
8742 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8743 interm_optab
8744 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8745 optab_default);
8746 if (!interm_optab
8747 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8748 || insn_data[icode1].operand[0].mode != intermediate_mode
8749 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8750 == CODE_FOR_nothing))
8751 break;
8753 interm_types->quick_push (intermediate_type);
8754 (*multi_step_cvt)++;
8756 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8757 return true;
8759 prev_mode = intermediate_mode;
8760 optab1 = interm_optab;
8763 interm_types->release ();
8764 return false;