svn merge -r 219183:219425 svn+ssh://gcc.gnu.org/svn/gcc/trunk
[official-gcc.git] / gcc / tree-vect-stmts.c
blob1b79ace08b30051a00d9b3467dcc4c67c2c657d6
1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "dumpfile.h"
26 #include "tm.h"
27 #include "hash-set.h"
28 #include "machmode.h"
29 #include "vec.h"
30 #include "double-int.h"
31 #include "input.h"
32 #include "alias.h"
33 #include "symtab.h"
34 #include "wide-int.h"
35 #include "inchash.h"
36 #include "tree.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "target.h"
40 #include "predict.h"
41 #include "hard-reg-set.h"
42 #include "input.h"
43 #include "function.h"
44 #include "dominance.h"
45 #include "cfg.h"
46 #include "basic-block.h"
47 #include "gimple-pretty-print.h"
48 #include "tree-ssa-alias.h"
49 #include "internal-fn.h"
50 #include "tree-eh.h"
51 #include "gimple-expr.h"
52 #include "is-a.h"
53 #include "gimple.h"
54 #include "gimplify.h"
55 #include "gimple-iterator.h"
56 #include "gimplify-me.h"
57 #include "gimple-ssa.h"
58 #include "tree-cfg.h"
59 #include "tree-phinodes.h"
60 #include "ssa-iterators.h"
61 #include "stringpool.h"
62 #include "tree-ssanames.h"
63 #include "tree-ssa-loop-manip.h"
64 #include "cfgloop.h"
65 #include "tree-ssa-loop.h"
66 #include "tree-scalar-evolution.h"
67 #include "expr.h"
68 #include "recog.h" /* FIXME: for insn_data */
69 #include "insn-codes.h"
70 #include "optabs.h"
71 #include "diagnostic-core.h"
72 #include "tree-vectorizer.h"
73 #include "dumpfile.h"
74 #include "hash-map.h"
75 #include "plugin-api.h"
76 #include "ipa-ref.h"
77 #include "cgraph.h"
78 #include "builtins.h"
80 /* For lang_hooks.types.type_for_mode. */
81 #include "langhooks.h"
83 /* Return the vectorized type for the given statement. */
85 tree
86 stmt_vectype (struct _stmt_vec_info *stmt_info)
88 return STMT_VINFO_VECTYPE (stmt_info);
91 /* Return TRUE iff the given statement is in an inner loop relative to
92 the loop being vectorized. */
93 bool
94 stmt_in_inner_loop_p (struct _stmt_vec_info *stmt_info)
96 gimple stmt = STMT_VINFO_STMT (stmt_info);
97 basic_block bb = gimple_bb (stmt);
98 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
99 struct loop* loop;
101 if (!loop_vinfo)
102 return false;
104 loop = LOOP_VINFO_LOOP (loop_vinfo);
106 return (bb->loop_father == loop->inner);
109 /* Record the cost of a statement, either by directly informing the
110 target model or by saving it in a vector for later processing.
111 Return a preliminary estimate of the statement's cost. */
113 unsigned
114 record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
115 enum vect_cost_for_stmt kind, stmt_vec_info stmt_info,
116 int misalign, enum vect_cost_model_location where)
118 if (body_cost_vec)
120 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
121 add_stmt_info_to_vec (body_cost_vec, count, kind,
122 stmt_info ? STMT_VINFO_STMT (stmt_info) : NULL,
123 misalign);
124 return (unsigned)
125 (builtin_vectorization_cost (kind, vectype, misalign) * count);
128 else
130 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
131 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
132 void *target_cost_data;
134 if (loop_vinfo)
135 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
136 else
137 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
139 return add_stmt_cost (target_cost_data, count, kind, stmt_info,
140 misalign, where);
144 /* Return a variable of type ELEM_TYPE[NELEMS]. */
146 static tree
147 create_vector_array (tree elem_type, unsigned HOST_WIDE_INT nelems)
149 return create_tmp_var (build_array_type_nelts (elem_type, nelems),
150 "vect_array");
153 /* ARRAY is an array of vectors created by create_vector_array.
154 Return an SSA_NAME for the vector in index N. The reference
155 is part of the vectorization of STMT and the vector is associated
156 with scalar destination SCALAR_DEST. */
158 static tree
159 read_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree scalar_dest,
160 tree array, unsigned HOST_WIDE_INT n)
162 tree vect_type, vect, vect_name, array_ref;
163 gimple new_stmt;
165 gcc_assert (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE);
166 vect_type = TREE_TYPE (TREE_TYPE (array));
167 vect = vect_create_destination_var (scalar_dest, vect_type);
168 array_ref = build4 (ARRAY_REF, vect_type, array,
169 build_int_cst (size_type_node, n),
170 NULL_TREE, NULL_TREE);
172 new_stmt = gimple_build_assign (vect, array_ref);
173 vect_name = make_ssa_name (vect, new_stmt);
174 gimple_assign_set_lhs (new_stmt, vect_name);
175 vect_finish_stmt_generation (stmt, new_stmt, gsi);
177 return vect_name;
180 /* ARRAY is an array of vectors created by create_vector_array.
181 Emit code to store SSA_NAME VECT in index N of the array.
182 The store is part of the vectorization of STMT. */
184 static void
185 write_vector_array (gimple stmt, gimple_stmt_iterator *gsi, tree vect,
186 tree array, unsigned HOST_WIDE_INT n)
188 tree array_ref;
189 gimple new_stmt;
191 array_ref = build4 (ARRAY_REF, TREE_TYPE (vect), array,
192 build_int_cst (size_type_node, n),
193 NULL_TREE, NULL_TREE);
195 new_stmt = gimple_build_assign (array_ref, vect);
196 vect_finish_stmt_generation (stmt, new_stmt, gsi);
199 /* PTR is a pointer to an array of type TYPE. Return a representation
200 of *PTR. The memory reference replaces those in FIRST_DR
201 (and its group). */
203 static tree
204 create_array_ref (tree type, tree ptr, struct data_reference *first_dr)
206 tree mem_ref, alias_ptr_type;
208 alias_ptr_type = reference_alias_ptr_type (DR_REF (first_dr));
209 mem_ref = build2 (MEM_REF, type, ptr, build_int_cst (alias_ptr_type, 0));
210 /* Arrays have the same alignment as their type. */
211 set_ptr_info_alignment (get_ptr_info (ptr), TYPE_ALIGN_UNIT (type), 0);
212 return mem_ref;
215 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
217 /* Function vect_mark_relevant.
219 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
221 static void
222 vect_mark_relevant (vec<gimple> *worklist, gimple stmt,
223 enum vect_relevant relevant, bool live_p,
224 bool used_in_pattern)
226 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
227 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
228 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
229 gimple pattern_stmt;
231 if (dump_enabled_p ())
232 dump_printf_loc (MSG_NOTE, vect_location,
233 "mark relevant %d, live %d.\n", relevant, live_p);
235 /* If this stmt is an original stmt in a pattern, we might need to mark its
236 related pattern stmt instead of the original stmt. However, such stmts
237 may have their own uses that are not in any pattern, in such cases the
238 stmt itself should be marked. */
239 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
241 bool found = false;
242 if (!used_in_pattern)
244 imm_use_iterator imm_iter;
245 use_operand_p use_p;
246 gimple use_stmt;
247 tree lhs;
248 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
249 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
251 if (is_gimple_assign (stmt))
252 lhs = gimple_assign_lhs (stmt);
253 else
254 lhs = gimple_call_lhs (stmt);
256 /* This use is out of pattern use, if LHS has other uses that are
257 pattern uses, we should mark the stmt itself, and not the pattern
258 stmt. */
259 if (lhs && TREE_CODE (lhs) == SSA_NAME)
260 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
262 if (is_gimple_debug (USE_STMT (use_p)))
263 continue;
264 use_stmt = USE_STMT (use_p);
266 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
267 continue;
269 if (vinfo_for_stmt (use_stmt)
270 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
272 found = true;
273 break;
278 if (!found)
280 /* This is the last stmt in a sequence that was detected as a
281 pattern that can potentially be vectorized. Don't mark the stmt
282 as relevant/live because it's not going to be vectorized.
283 Instead mark the pattern-stmt that replaces it. */
285 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
287 if (dump_enabled_p ())
288 dump_printf_loc (MSG_NOTE, vect_location,
289 "last stmt in pattern. don't mark"
290 " relevant/live.\n");
291 stmt_info = vinfo_for_stmt (pattern_stmt);
292 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
293 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
294 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
295 stmt = pattern_stmt;
299 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
300 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
301 STMT_VINFO_RELEVANT (stmt_info) = relevant;
303 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
304 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
306 if (dump_enabled_p ())
307 dump_printf_loc (MSG_NOTE, vect_location,
308 "already marked relevant/live.\n");
309 return;
312 worklist->safe_push (stmt);
316 /* Function vect_stmt_relevant_p.
318 Return true if STMT in loop that is represented by LOOP_VINFO is
319 "relevant for vectorization".
321 A stmt is considered "relevant for vectorization" if:
322 - it has uses outside the loop.
323 - it has vdefs (it alters memory).
324 - control stmts in the loop (except for the exit condition).
326 CHECKME: what other side effects would the vectorizer allow? */
328 static bool
329 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
330 enum vect_relevant *relevant, bool *live_p)
332 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
333 ssa_op_iter op_iter;
334 imm_use_iterator imm_iter;
335 use_operand_p use_p;
336 def_operand_p def_p;
338 *relevant = vect_unused_in_scope;
339 *live_p = false;
341 /* cond stmt other than loop exit cond. */
342 if (is_ctrl_stmt (stmt)
343 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt))
344 != loop_exit_ctrl_vec_info_type)
345 *relevant = vect_used_in_scope;
347 /* changing memory. */
348 if (gimple_code (stmt) != GIMPLE_PHI)
349 if (gimple_vdef (stmt)
350 && !gimple_clobber_p (stmt))
352 if (dump_enabled_p ())
353 dump_printf_loc (MSG_NOTE, vect_location,
354 "vec_stmt_relevant_p: stmt has vdefs.\n");
355 *relevant = vect_used_in_scope;
358 /* uses outside the loop. */
359 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
361 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
363 basic_block bb = gimple_bb (USE_STMT (use_p));
364 if (!flow_bb_inside_loop_p (loop, bb))
366 if (dump_enabled_p ())
367 dump_printf_loc (MSG_NOTE, vect_location,
368 "vec_stmt_relevant_p: used out of loop.\n");
370 if (is_gimple_debug (USE_STMT (use_p)))
371 continue;
373 /* We expect all such uses to be in the loop exit phis
374 (because of loop closed form) */
375 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
376 gcc_assert (bb == single_exit (loop)->dest);
378 *live_p = true;
383 return (*live_p || *relevant);
387 /* Function exist_non_indexing_operands_for_use_p
389 USE is one of the uses attached to STMT. Check if USE is
390 used in STMT for anything other than indexing an array. */
392 static bool
393 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
395 tree operand;
396 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
398 /* USE corresponds to some operand in STMT. If there is no data
399 reference in STMT, then any operand that corresponds to USE
400 is not indexing an array. */
401 if (!STMT_VINFO_DATA_REF (stmt_info))
402 return true;
404 /* STMT has a data_ref. FORNOW this means that its of one of
405 the following forms:
406 -1- ARRAY_REF = var
407 -2- var = ARRAY_REF
408 (This should have been verified in analyze_data_refs).
410 'var' in the second case corresponds to a def, not a use,
411 so USE cannot correspond to any operands that are not used
412 for array indexing.
414 Therefore, all we need to check is if STMT falls into the
415 first case, and whether var corresponds to USE. */
417 if (!gimple_assign_copy_p (stmt))
419 if (is_gimple_call (stmt)
420 && gimple_call_internal_p (stmt))
421 switch (gimple_call_internal_fn (stmt))
423 case IFN_MASK_STORE:
424 operand = gimple_call_arg (stmt, 3);
425 if (operand == use)
426 return true;
427 /* FALLTHRU */
428 case IFN_MASK_LOAD:
429 operand = gimple_call_arg (stmt, 2);
430 if (operand == use)
431 return true;
432 break;
433 default:
434 break;
436 return false;
439 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
440 return false;
441 operand = gimple_assign_rhs1 (stmt);
442 if (TREE_CODE (operand) != SSA_NAME)
443 return false;
445 if (operand == use)
446 return true;
448 return false;
453 Function process_use.
455 Inputs:
456 - a USE in STMT in a loop represented by LOOP_VINFO
457 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
458 that defined USE. This is done by calling mark_relevant and passing it
459 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
460 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
461 be performed.
463 Outputs:
464 Generally, LIVE_P and RELEVANT are used to define the liveness and
465 relevance info of the DEF_STMT of this USE:
466 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
467 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
468 Exceptions:
469 - case 1: If USE is used only for address computations (e.g. array indexing),
470 which does not need to be directly vectorized, then the liveness/relevance
471 of the respective DEF_STMT is left unchanged.
472 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
473 skip DEF_STMT cause it had already been processed.
474 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
475 be modified accordingly.
477 Return true if everything is as expected. Return false otherwise. */
479 static bool
480 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
481 enum vect_relevant relevant, vec<gimple> *worklist,
482 bool force)
484 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
485 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
486 stmt_vec_info dstmt_vinfo;
487 basic_block bb, def_bb;
488 tree def;
489 gimple def_stmt;
490 enum vect_def_type dt;
492 /* case 1: we are only interested in uses that need to be vectorized. Uses
493 that are used for address computation are not considered relevant. */
494 if (!force && !exist_non_indexing_operands_for_use_p (use, stmt))
495 return true;
497 if (!vect_is_simple_use (use, stmt, loop_vinfo, NULL, &def_stmt, &def, &dt))
499 if (dump_enabled_p ())
500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
501 "not vectorized: unsupported use in stmt.\n");
502 return false;
505 if (!def_stmt || gimple_nop_p (def_stmt))
506 return true;
508 def_bb = gimple_bb (def_stmt);
509 if (!flow_bb_inside_loop_p (loop, def_bb))
511 if (dump_enabled_p ())
512 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt is out of loop.\n");
513 return true;
516 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
517 DEF_STMT must have already been processed, because this should be the
518 only way that STMT, which is a reduction-phi, was put in the worklist,
519 as there should be no other uses for DEF_STMT in the loop. So we just
520 check that everything is as expected, and we are done. */
521 dstmt_vinfo = vinfo_for_stmt (def_stmt);
522 bb = gimple_bb (stmt);
523 if (gimple_code (stmt) == GIMPLE_PHI
524 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
525 && gimple_code (def_stmt) != GIMPLE_PHI
526 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
527 && bb->loop_father == def_bb->loop_father)
529 if (dump_enabled_p ())
530 dump_printf_loc (MSG_NOTE, vect_location,
531 "reduc-stmt defining reduc-phi in the same nest.\n");
532 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
533 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
534 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
535 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
536 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
537 return true;
540 /* case 3a: outer-loop stmt defining an inner-loop stmt:
541 outer-loop-header-bb:
542 d = def_stmt
543 inner-loop:
544 stmt # use (d)
545 outer-loop-tail-bb:
546 ... */
547 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
549 if (dump_enabled_p ())
550 dump_printf_loc (MSG_NOTE, vect_location,
551 "outer-loop def-stmt defining inner-loop stmt.\n");
553 switch (relevant)
555 case vect_unused_in_scope:
556 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_nested_cycle) ?
557 vect_used_in_scope : vect_unused_in_scope;
558 break;
560 case vect_used_in_outer_by_reduction:
561 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
562 relevant = vect_used_by_reduction;
563 break;
565 case vect_used_in_outer:
566 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def);
567 relevant = vect_used_in_scope;
568 break;
570 case vect_used_in_scope:
571 break;
573 default:
574 gcc_unreachable ();
578 /* case 3b: inner-loop stmt defining an outer-loop stmt:
579 outer-loop-header-bb:
581 inner-loop:
582 d = def_stmt
583 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
584 stmt # use (d) */
585 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
587 if (dump_enabled_p ())
588 dump_printf_loc (MSG_NOTE, vect_location,
589 "inner-loop def-stmt defining outer-loop stmt.\n");
591 switch (relevant)
593 case vect_unused_in_scope:
594 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
595 || STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_double_reduction_def) ?
596 vect_used_in_outer_by_reduction : vect_unused_in_scope;
597 break;
599 case vect_used_by_reduction:
600 relevant = vect_used_in_outer_by_reduction;
601 break;
603 case vect_used_in_scope:
604 relevant = vect_used_in_outer;
605 break;
607 default:
608 gcc_unreachable ();
612 vect_mark_relevant (worklist, def_stmt, relevant, live_p,
613 is_pattern_stmt_p (stmt_vinfo));
614 return true;
618 /* Function vect_mark_stmts_to_be_vectorized.
620 Not all stmts in the loop need to be vectorized. For example:
622 for i...
623 for j...
624 1. T0 = i + j
625 2. T1 = a[T0]
627 3. j = j + 1
629 Stmt 1 and 3 do not need to be vectorized, because loop control and
630 addressing of vectorized data-refs are handled differently.
632 This pass detects such stmts. */
634 bool
635 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
637 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
638 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
639 unsigned int nbbs = loop->num_nodes;
640 gimple_stmt_iterator si;
641 gimple stmt;
642 unsigned int i;
643 stmt_vec_info stmt_vinfo;
644 basic_block bb;
645 gimple phi;
646 bool live_p;
647 enum vect_relevant relevant, tmp_relevant;
648 enum vect_def_type def_type;
650 if (dump_enabled_p ())
651 dump_printf_loc (MSG_NOTE, vect_location,
652 "=== vect_mark_stmts_to_be_vectorized ===\n");
654 auto_vec<gimple, 64> worklist;
656 /* 1. Init worklist. */
657 for (i = 0; i < nbbs; i++)
659 bb = bbs[i];
660 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
662 phi = gsi_stmt (si);
663 if (dump_enabled_p ())
665 dump_printf_loc (MSG_NOTE, vect_location, "init: phi relevant? ");
666 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
667 dump_printf (MSG_NOTE, "\n");
670 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
671 vect_mark_relevant (&worklist, phi, relevant, live_p, false);
673 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
675 stmt = gsi_stmt (si);
676 if (dump_enabled_p ())
678 dump_printf_loc (MSG_NOTE, vect_location, "init: stmt relevant? ");
679 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
680 dump_printf (MSG_NOTE, "\n");
683 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
684 vect_mark_relevant (&worklist, stmt, relevant, live_p, false);
688 /* 2. Process_worklist */
689 while (worklist.length () > 0)
691 use_operand_p use_p;
692 ssa_op_iter iter;
694 stmt = worklist.pop ();
695 if (dump_enabled_p ())
697 dump_printf_loc (MSG_NOTE, vect_location, "worklist: examine stmt: ");
698 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
699 dump_printf (MSG_NOTE, "\n");
702 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
703 (DEF_STMT) as relevant/irrelevant and live/dead according to the
704 liveness and relevance properties of STMT. */
705 stmt_vinfo = vinfo_for_stmt (stmt);
706 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
707 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
709 /* Generally, the liveness and relevance properties of STMT are
710 propagated as is to the DEF_STMTs of its USEs:
711 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
712 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
714 One exception is when STMT has been identified as defining a reduction
715 variable; in this case we set the liveness/relevance as follows:
716 live_p = false
717 relevant = vect_used_by_reduction
718 This is because we distinguish between two kinds of relevant stmts -
719 those that are used by a reduction computation, and those that are
720 (also) used by a regular computation. This allows us later on to
721 identify stmts that are used solely by a reduction, and therefore the
722 order of the results that they produce does not have to be kept. */
724 def_type = STMT_VINFO_DEF_TYPE (stmt_vinfo);
725 tmp_relevant = relevant;
726 switch (def_type)
728 case vect_reduction_def:
729 switch (tmp_relevant)
731 case vect_unused_in_scope:
732 relevant = vect_used_by_reduction;
733 break;
735 case vect_used_by_reduction:
736 if (gimple_code (stmt) == GIMPLE_PHI)
737 break;
738 /* fall through */
740 default:
741 if (dump_enabled_p ())
742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
743 "unsupported use of reduction.\n");
744 return false;
747 live_p = false;
748 break;
750 case vect_nested_cycle:
751 if (tmp_relevant != vect_unused_in_scope
752 && tmp_relevant != vect_used_in_outer_by_reduction
753 && tmp_relevant != vect_used_in_outer)
755 if (dump_enabled_p ())
756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
757 "unsupported use of nested cycle.\n");
759 return false;
762 live_p = false;
763 break;
765 case vect_double_reduction_def:
766 if (tmp_relevant != vect_unused_in_scope
767 && tmp_relevant != vect_used_by_reduction)
769 if (dump_enabled_p ())
770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
771 "unsupported use of double reduction.\n");
773 return false;
776 live_p = false;
777 break;
779 default:
780 break;
783 if (is_pattern_stmt_p (stmt_vinfo))
785 /* Pattern statements are not inserted into the code, so
786 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
787 have to scan the RHS or function arguments instead. */
788 if (is_gimple_assign (stmt))
790 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
791 tree op = gimple_assign_rhs1 (stmt);
793 i = 1;
794 if (rhs_code == COND_EXPR && COMPARISON_CLASS_P (op))
796 if (!process_use (stmt, TREE_OPERAND (op, 0), loop_vinfo,
797 live_p, relevant, &worklist, false)
798 || !process_use (stmt, TREE_OPERAND (op, 1), loop_vinfo,
799 live_p, relevant, &worklist, false))
800 return false;
801 i = 2;
803 for (; i < gimple_num_ops (stmt); i++)
805 op = gimple_op (stmt, i);
806 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
807 &worklist, false))
808 return false;
811 else if (is_gimple_call (stmt))
813 for (i = 0; i < gimple_call_num_args (stmt); i++)
815 tree arg = gimple_call_arg (stmt, i);
816 if (!process_use (stmt, arg, loop_vinfo, live_p, relevant,
817 &worklist, false))
818 return false;
822 else
823 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
825 tree op = USE_FROM_PTR (use_p);
826 if (!process_use (stmt, op, loop_vinfo, live_p, relevant,
827 &worklist, false))
828 return false;
831 if (STMT_VINFO_GATHER_P (stmt_vinfo))
833 tree off;
834 tree decl = vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
835 gcc_assert (decl);
836 if (!process_use (stmt, off, loop_vinfo, live_p, relevant,
837 &worklist, true))
838 return false;
840 } /* while worklist */
842 return true;
846 /* Function vect_model_simple_cost.
848 Models cost for simple operations, i.e. those that only emit ncopies of a
849 single op. Right now, this does not account for multiple insns that could
850 be generated for the single vector op. We will handle that shortly. */
852 void
853 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
854 enum vect_def_type *dt,
855 stmt_vector_for_cost *prologue_cost_vec,
856 stmt_vector_for_cost *body_cost_vec)
858 int i;
859 int inside_cost = 0, prologue_cost = 0;
861 /* The SLP costs were already calculated during SLP tree build. */
862 if (PURE_SLP_STMT (stmt_info))
863 return;
865 /* FORNOW: Assuming maximum 2 args per stmts. */
866 for (i = 0; i < 2; i++)
867 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
868 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
869 stmt_info, 0, vect_prologue);
871 /* Pass the inside-of-loop statements to the target-specific cost model. */
872 inside_cost = record_stmt_cost (body_cost_vec, ncopies, vector_stmt,
873 stmt_info, 0, vect_body);
875 if (dump_enabled_p ())
876 dump_printf_loc (MSG_NOTE, vect_location,
877 "vect_model_simple_cost: inside_cost = %d, "
878 "prologue_cost = %d .\n", inside_cost, prologue_cost);
882 /* Model cost for type demotion and promotion operations. PWR is normally
883 zero for single-step promotions and demotions. It will be one if
884 two-step promotion/demotion is required, and so on. Each additional
885 step doubles the number of instructions required. */
887 static void
888 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
889 enum vect_def_type *dt, int pwr)
891 int i, tmp;
892 int inside_cost = 0, prologue_cost = 0;
893 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
894 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
895 void *target_cost_data;
897 /* The SLP costs were already calculated during SLP tree build. */
898 if (PURE_SLP_STMT (stmt_info))
899 return;
901 if (loop_vinfo)
902 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
903 else
904 target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
906 for (i = 0; i < pwr + 1; i++)
908 tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
909 (i + 1) : i;
910 inside_cost += add_stmt_cost (target_cost_data, vect_pow2 (tmp),
911 vec_promote_demote, stmt_info, 0,
912 vect_body);
915 /* FORNOW: Assuming maximum 2 args per stmts. */
916 for (i = 0; i < 2; i++)
917 if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
918 prologue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
919 stmt_info, 0, vect_prologue);
921 if (dump_enabled_p ())
922 dump_printf_loc (MSG_NOTE, vect_location,
923 "vect_model_promotion_demotion_cost: inside_cost = %d, "
924 "prologue_cost = %d .\n", inside_cost, prologue_cost);
927 /* Function vect_cost_group_size
929 For grouped load or store, return the group_size only if it is the first
930 load or store of a group, else return 1. This ensures that group size is
931 only returned once per group. */
933 static int
934 vect_cost_group_size (stmt_vec_info stmt_info)
936 gimple first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
938 if (first_stmt == STMT_VINFO_STMT (stmt_info))
939 return GROUP_SIZE (stmt_info);
941 return 1;
945 /* Function vect_model_store_cost
947 Models cost for stores. In the case of grouped accesses, one access
948 has the overhead of the grouped access attributed to it. */
950 void
951 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
952 bool store_lanes_p, enum vect_def_type dt,
953 slp_tree slp_node,
954 stmt_vector_for_cost *prologue_cost_vec,
955 stmt_vector_for_cost *body_cost_vec)
957 int group_size;
958 unsigned int inside_cost = 0, prologue_cost = 0;
959 struct data_reference *first_dr;
960 gimple first_stmt;
962 /* The SLP costs were already calculated during SLP tree build. */
963 if (PURE_SLP_STMT (stmt_info))
964 return;
966 if (dt == vect_constant_def || dt == vect_external_def)
967 prologue_cost += record_stmt_cost (prologue_cost_vec, 1, scalar_to_vec,
968 stmt_info, 0, vect_prologue);
970 /* Grouped access? */
971 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
973 if (slp_node)
975 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
976 group_size = 1;
978 else
980 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
981 group_size = vect_cost_group_size (stmt_info);
984 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
986 /* Not a grouped access. */
987 else
989 group_size = 1;
990 first_dr = STMT_VINFO_DATA_REF (stmt_info);
993 /* We assume that the cost of a single store-lanes instruction is
994 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
995 access is instead being provided by a permute-and-store operation,
996 include the cost of the permutes. */
997 if (!store_lanes_p && group_size > 1)
999 /* Uses a high and low interleave or shuffle operations for each
1000 needed permute. */
1001 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1002 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1003 stmt_info, 0, vect_body);
1005 if (dump_enabled_p ())
1006 dump_printf_loc (MSG_NOTE, vect_location,
1007 "vect_model_store_cost: strided group_size = %d .\n",
1008 group_size);
1011 /* Costs of the stores. */
1012 vect_get_store_cost (first_dr, ncopies, &inside_cost, body_cost_vec);
1014 if (dump_enabled_p ())
1015 dump_printf_loc (MSG_NOTE, vect_location,
1016 "vect_model_store_cost: inside_cost = %d, "
1017 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1021 /* Calculate cost of DR's memory access. */
1022 void
1023 vect_get_store_cost (struct data_reference *dr, int ncopies,
1024 unsigned int *inside_cost,
1025 stmt_vector_for_cost *body_cost_vec)
1027 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1028 gimple stmt = DR_STMT (dr);
1029 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1031 switch (alignment_support_scheme)
1033 case dr_aligned:
1035 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1036 vector_store, stmt_info, 0,
1037 vect_body);
1039 if (dump_enabled_p ())
1040 dump_printf_loc (MSG_NOTE, vect_location,
1041 "vect_model_store_cost: aligned.\n");
1042 break;
1045 case dr_unaligned_supported:
1047 /* Here, we assign an additional cost for the unaligned store. */
1048 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1049 unaligned_store, stmt_info,
1050 DR_MISALIGNMENT (dr), vect_body);
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_NOTE, vect_location,
1053 "vect_model_store_cost: unaligned supported by "
1054 "hardware.\n");
1055 break;
1058 case dr_unaligned_unsupported:
1060 *inside_cost = VECT_MAX_COST;
1062 if (dump_enabled_p ())
1063 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1064 "vect_model_store_cost: unsupported access.\n");
1065 break;
1068 default:
1069 gcc_unreachable ();
1074 /* Function vect_model_load_cost
1076 Models cost for loads. In the case of grouped accesses, the last access
1077 has the overhead of the grouped access attributed to it. Since unaligned
1078 accesses are supported for loads, we also account for the costs of the
1079 access scheme chosen. */
1081 void
1082 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies,
1083 bool load_lanes_p, slp_tree slp_node,
1084 stmt_vector_for_cost *prologue_cost_vec,
1085 stmt_vector_for_cost *body_cost_vec)
1087 int group_size;
1088 gimple first_stmt;
1089 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
1090 unsigned int inside_cost = 0, prologue_cost = 0;
1092 /* The SLP costs were already calculated during SLP tree build. */
1093 if (PURE_SLP_STMT (stmt_info))
1094 return;
1096 /* Grouped accesses? */
1097 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
1098 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && first_stmt && !slp_node)
1100 group_size = vect_cost_group_size (stmt_info);
1101 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
1103 /* Not a grouped access. */
1104 else
1106 group_size = 1;
1107 first_dr = dr;
1110 /* We assume that the cost of a single load-lanes instruction is
1111 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1112 access is instead being provided by a load-and-permute operation,
1113 include the cost of the permutes. */
1114 if (!load_lanes_p && group_size > 1)
1116 /* Uses an even and odd extract operations or shuffle operations
1117 for each needed permute. */
1118 int nstmts = ncopies * ceil_log2 (group_size) * group_size;
1119 inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
1120 stmt_info, 0, vect_body);
1122 if (dump_enabled_p ())
1123 dump_printf_loc (MSG_NOTE, vect_location,
1124 "vect_model_load_cost: strided group_size = %d .\n",
1125 group_size);
1128 /* The loads themselves. */
1129 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1131 /* N scalar loads plus gathering them into a vector. */
1132 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1133 inside_cost += record_stmt_cost (body_cost_vec,
1134 ncopies * TYPE_VECTOR_SUBPARTS (vectype),
1135 scalar_load, stmt_info, 0, vect_body);
1136 inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_construct,
1137 stmt_info, 0, vect_body);
1139 else
1140 vect_get_load_cost (first_dr, ncopies,
1141 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1142 || group_size > 1 || slp_node),
1143 &inside_cost, &prologue_cost,
1144 prologue_cost_vec, body_cost_vec, true);
1146 if (dump_enabled_p ())
1147 dump_printf_loc (MSG_NOTE, vect_location,
1148 "vect_model_load_cost: inside_cost = %d, "
1149 "prologue_cost = %d .\n", inside_cost, prologue_cost);
1153 /* Calculate cost of DR's memory access. */
1154 void
1155 vect_get_load_cost (struct data_reference *dr, int ncopies,
1156 bool add_realign_cost, unsigned int *inside_cost,
1157 unsigned int *prologue_cost,
1158 stmt_vector_for_cost *prologue_cost_vec,
1159 stmt_vector_for_cost *body_cost_vec,
1160 bool record_prologue_costs)
1162 int alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
1163 gimple stmt = DR_STMT (dr);
1164 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1166 switch (alignment_support_scheme)
1168 case dr_aligned:
1170 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1171 stmt_info, 0, vect_body);
1173 if (dump_enabled_p ())
1174 dump_printf_loc (MSG_NOTE, vect_location,
1175 "vect_model_load_cost: aligned.\n");
1177 break;
1179 case dr_unaligned_supported:
1181 /* Here, we assign an additional cost for the unaligned load. */
1182 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1183 unaligned_load, stmt_info,
1184 DR_MISALIGNMENT (dr), vect_body);
1186 if (dump_enabled_p ())
1187 dump_printf_loc (MSG_NOTE, vect_location,
1188 "vect_model_load_cost: unaligned supported by "
1189 "hardware.\n");
1191 break;
1193 case dr_explicit_realign:
1195 *inside_cost += record_stmt_cost (body_cost_vec, ncopies * 2,
1196 vector_load, stmt_info, 0, vect_body);
1197 *inside_cost += record_stmt_cost (body_cost_vec, ncopies,
1198 vec_perm, stmt_info, 0, vect_body);
1200 /* FIXME: If the misalignment remains fixed across the iterations of
1201 the containing loop, the following cost should be added to the
1202 prologue costs. */
1203 if (targetm.vectorize.builtin_mask_for_load)
1204 *inside_cost += record_stmt_cost (body_cost_vec, 1, vector_stmt,
1205 stmt_info, 0, vect_body);
1207 if (dump_enabled_p ())
1208 dump_printf_loc (MSG_NOTE, vect_location,
1209 "vect_model_load_cost: explicit realign\n");
1211 break;
1213 case dr_explicit_realign_optimized:
1215 if (dump_enabled_p ())
1216 dump_printf_loc (MSG_NOTE, vect_location,
1217 "vect_model_load_cost: unaligned software "
1218 "pipelined.\n");
1220 /* Unaligned software pipeline has a load of an address, an initial
1221 load, and possibly a mask operation to "prime" the loop. However,
1222 if this is an access in a group of loads, which provide grouped
1223 access, then the above cost should only be considered for one
1224 access in the group. Inside the loop, there is a load op
1225 and a realignment op. */
1227 if (add_realign_cost && record_prologue_costs)
1229 *prologue_cost += record_stmt_cost (prologue_cost_vec, 2,
1230 vector_stmt, stmt_info,
1231 0, vect_prologue);
1232 if (targetm.vectorize.builtin_mask_for_load)
1233 *prologue_cost += record_stmt_cost (prologue_cost_vec, 1,
1234 vector_stmt, stmt_info,
1235 0, vect_prologue);
1238 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vector_load,
1239 stmt_info, 0, vect_body);
1240 *inside_cost += record_stmt_cost (body_cost_vec, ncopies, vec_perm,
1241 stmt_info, 0, vect_body);
1243 if (dump_enabled_p ())
1244 dump_printf_loc (MSG_NOTE, vect_location,
1245 "vect_model_load_cost: explicit realign optimized"
1246 "\n");
1248 break;
1251 case dr_unaligned_unsupported:
1253 *inside_cost = VECT_MAX_COST;
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1257 "vect_model_load_cost: unsupported access.\n");
1258 break;
1261 default:
1262 gcc_unreachable ();
1266 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1267 the loop preheader for the vectorized stmt STMT. */
1269 static void
1270 vect_init_vector_1 (gimple stmt, gimple new_stmt, gimple_stmt_iterator *gsi)
1272 if (gsi)
1273 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1274 else
1276 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1277 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1279 if (loop_vinfo)
1281 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1282 basic_block new_bb;
1283 edge pe;
1285 if (nested_in_vect_loop_p (loop, stmt))
1286 loop = loop->inner;
1288 pe = loop_preheader_edge (loop);
1289 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
1290 gcc_assert (!new_bb);
1292 else
1294 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1295 basic_block bb;
1296 gimple_stmt_iterator gsi_bb_start;
1298 gcc_assert (bb_vinfo);
1299 bb = BB_VINFO_BB (bb_vinfo);
1300 gsi_bb_start = gsi_after_labels (bb);
1301 gsi_insert_before (&gsi_bb_start, new_stmt, GSI_SAME_STMT);
1305 if (dump_enabled_p ())
1307 dump_printf_loc (MSG_NOTE, vect_location,
1308 "created new init_stmt: ");
1309 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
1310 dump_printf (MSG_NOTE, "\n");
1314 /* Function vect_init_vector.
1316 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1317 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1318 vector type a vector with all elements equal to VAL is created first.
1319 Place the initialization at BSI if it is not NULL. Otherwise, place the
1320 initialization at the loop preheader.
1321 Return the DEF of INIT_STMT.
1322 It will be used in the vectorization of STMT. */
1324 tree
1325 vect_init_vector (gimple stmt, tree val, tree type, gimple_stmt_iterator *gsi)
1327 tree new_var;
1328 gimple init_stmt;
1329 tree vec_oprnd;
1330 tree new_temp;
1332 if (TREE_CODE (type) == VECTOR_TYPE
1333 && TREE_CODE (TREE_TYPE (val)) != VECTOR_TYPE)
1335 if (!types_compatible_p (TREE_TYPE (type), TREE_TYPE (val)))
1337 if (CONSTANT_CLASS_P (val))
1338 val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (type), val);
1339 else
1341 new_temp = make_ssa_name (TREE_TYPE (type));
1342 init_stmt = gimple_build_assign (new_temp, NOP_EXPR, val);
1343 vect_init_vector_1 (stmt, init_stmt, gsi);
1344 val = new_temp;
1347 val = build_vector_from_val (type, val);
1350 new_var = vect_get_new_vect_var (type, vect_simple_var, "cst_");
1351 init_stmt = gimple_build_assign (new_var, val);
1352 new_temp = make_ssa_name (new_var, init_stmt);
1353 gimple_assign_set_lhs (init_stmt, new_temp);
1354 vect_init_vector_1 (stmt, init_stmt, gsi);
1355 vec_oprnd = gimple_assign_lhs (init_stmt);
1356 return vec_oprnd;
1360 /* Function vect_get_vec_def_for_operand.
1362 OP is an operand in STMT. This function returns a (vector) def that will be
1363 used in the vectorized stmt for STMT.
1365 In the case that OP is an SSA_NAME which is defined in the loop, then
1366 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1368 In case OP is an invariant or constant, a new stmt that creates a vector def
1369 needs to be introduced. */
1371 tree
1372 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1374 tree vec_oprnd;
1375 gimple vec_stmt;
1376 gimple def_stmt;
1377 stmt_vec_info def_stmt_info = NULL;
1378 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1379 unsigned int nunits;
1380 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1381 tree def;
1382 enum vect_def_type dt;
1383 bool is_simple_use;
1384 tree vector_type;
1386 if (dump_enabled_p ())
1388 dump_printf_loc (MSG_NOTE, vect_location,
1389 "vect_get_vec_def_for_operand: ");
1390 dump_generic_expr (MSG_NOTE, TDF_SLIM, op);
1391 dump_printf (MSG_NOTE, "\n");
1394 is_simple_use = vect_is_simple_use (op, stmt, loop_vinfo, NULL,
1395 &def_stmt, &def, &dt);
1396 gcc_assert (is_simple_use);
1397 if (dump_enabled_p ())
1399 int loc_printed = 0;
1400 if (def)
1402 dump_printf_loc (MSG_NOTE, vect_location, "def = ");
1403 loc_printed = 1;
1404 dump_generic_expr (MSG_NOTE, TDF_SLIM, def);
1405 dump_printf (MSG_NOTE, "\n");
1407 if (def_stmt)
1409 if (loc_printed)
1410 dump_printf (MSG_NOTE, " def_stmt = ");
1411 else
1412 dump_printf_loc (MSG_NOTE, vect_location, " def_stmt = ");
1413 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1414 dump_printf (MSG_NOTE, "\n");
1418 switch (dt)
1420 /* Case 1: operand is a constant. */
1421 case vect_constant_def:
1423 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1424 gcc_assert (vector_type);
1425 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1427 if (scalar_def)
1428 *scalar_def = op;
1430 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1431 if (dump_enabled_p ())
1432 dump_printf_loc (MSG_NOTE, vect_location,
1433 "Create vector_cst. nunits = %d\n", nunits);
1435 return vect_init_vector (stmt, op, vector_type, NULL);
1438 /* Case 2: operand is defined outside the loop - loop invariant. */
1439 case vect_external_def:
1441 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1442 gcc_assert (vector_type);
1444 if (scalar_def)
1445 *scalar_def = def;
1447 /* Create 'vec_inv = {inv,inv,..,inv}' */
1448 if (dump_enabled_p ())
1449 dump_printf_loc (MSG_NOTE, vect_location, "Create vector_inv.\n");
1451 return vect_init_vector (stmt, def, vector_type, NULL);
1454 /* Case 3: operand is defined inside the loop. */
1455 case vect_internal_def:
1457 if (scalar_def)
1458 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1460 /* Get the def from the vectorized stmt. */
1461 def_stmt_info = vinfo_for_stmt (def_stmt);
1463 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1464 /* Get vectorized pattern statement. */
1465 if (!vec_stmt
1466 && STMT_VINFO_IN_PATTERN_P (def_stmt_info)
1467 && !STMT_VINFO_RELEVANT (def_stmt_info))
1468 vec_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1469 STMT_VINFO_RELATED_STMT (def_stmt_info)));
1470 gcc_assert (vec_stmt);
1471 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1472 vec_oprnd = PHI_RESULT (vec_stmt);
1473 else if (is_gimple_call (vec_stmt))
1474 vec_oprnd = gimple_call_lhs (vec_stmt);
1475 else
1476 vec_oprnd = gimple_assign_lhs (vec_stmt);
1477 return vec_oprnd;
1480 /* Case 4: operand is defined by a loop header phi - reduction */
1481 case vect_reduction_def:
1482 case vect_double_reduction_def:
1483 case vect_nested_cycle:
1485 struct loop *loop;
1487 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1488 loop = (gimple_bb (def_stmt))->loop_father;
1490 /* Get the def before the loop */
1491 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1492 return get_initial_def_for_reduction (stmt, op, scalar_def);
1495 /* Case 5: operand is defined by loop-header phi - induction. */
1496 case vect_induction_def:
1498 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1500 /* Get the def from the vectorized stmt. */
1501 def_stmt_info = vinfo_for_stmt (def_stmt);
1502 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1503 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1504 vec_oprnd = PHI_RESULT (vec_stmt);
1505 else
1506 vec_oprnd = gimple_get_lhs (vec_stmt);
1507 return vec_oprnd;
1510 default:
1511 gcc_unreachable ();
1516 /* Function vect_get_vec_def_for_stmt_copy
1518 Return a vector-def for an operand. This function is used when the
1519 vectorized stmt to be created (by the caller to this function) is a "copy"
1520 created in case the vectorized result cannot fit in one vector, and several
1521 copies of the vector-stmt are required. In this case the vector-def is
1522 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1523 of the stmt that defines VEC_OPRND.
1524 DT is the type of the vector def VEC_OPRND.
1526 Context:
1527 In case the vectorization factor (VF) is bigger than the number
1528 of elements that can fit in a vectype (nunits), we have to generate
1529 more than one vector stmt to vectorize the scalar stmt. This situation
1530 arises when there are multiple data-types operated upon in the loop; the
1531 smallest data-type determines the VF, and as a result, when vectorizing
1532 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1533 vector stmt (each computing a vector of 'nunits' results, and together
1534 computing 'VF' results in each iteration). This function is called when
1535 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1536 which VF=16 and nunits=4, so the number of copies required is 4):
1538 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1540 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1541 VS1.1: vx.1 = memref1 VS1.2
1542 VS1.2: vx.2 = memref2 VS1.3
1543 VS1.3: vx.3 = memref3
1545 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1546 VSnew.1: vz1 = vx.1 + ... VSnew.2
1547 VSnew.2: vz2 = vx.2 + ... VSnew.3
1548 VSnew.3: vz3 = vx.3 + ...
1550 The vectorization of S1 is explained in vectorizable_load.
1551 The vectorization of S2:
1552 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1553 the function 'vect_get_vec_def_for_operand' is called to
1554 get the relevant vector-def for each operand of S2. For operand x it
1555 returns the vector-def 'vx.0'.
1557 To create the remaining copies of the vector-stmt (VSnew.j), this
1558 function is called to get the relevant vector-def for each operand. It is
1559 obtained from the respective VS1.j stmt, which is recorded in the
1560 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1562 For example, to obtain the vector-def 'vx.1' in order to create the
1563 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1564 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1565 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1566 and return its def ('vx.1').
1567 Overall, to create the above sequence this function will be called 3 times:
1568 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1569 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1570 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1572 tree
1573 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1575 gimple vec_stmt_for_operand;
1576 stmt_vec_info def_stmt_info;
1578 /* Do nothing; can reuse same def. */
1579 if (dt == vect_external_def || dt == vect_constant_def )
1580 return vec_oprnd;
1582 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1583 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1584 gcc_assert (def_stmt_info);
1585 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1586 gcc_assert (vec_stmt_for_operand);
1587 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1588 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
1589 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
1590 else
1591 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
1592 return vec_oprnd;
1596 /* Get vectorized definitions for the operands to create a copy of an original
1597 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1599 static void
1600 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1601 vec<tree> *vec_oprnds0,
1602 vec<tree> *vec_oprnds1)
1604 tree vec_oprnd = vec_oprnds0->pop ();
1606 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1607 vec_oprnds0->quick_push (vec_oprnd);
1609 if (vec_oprnds1 && vec_oprnds1->length ())
1611 vec_oprnd = vec_oprnds1->pop ();
1612 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1613 vec_oprnds1->quick_push (vec_oprnd);
1618 /* Get vectorized definitions for OP0 and OP1.
1619 REDUC_INDEX is the index of reduction operand in case of reduction,
1620 and -1 otherwise. */
1622 void
1623 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
1624 vec<tree> *vec_oprnds0,
1625 vec<tree> *vec_oprnds1,
1626 slp_tree slp_node, int reduc_index)
1628 if (slp_node)
1630 int nops = (op1 == NULL_TREE) ? 1 : 2;
1631 auto_vec<tree> ops (nops);
1632 auto_vec<vec<tree> > vec_defs (nops);
1634 ops.quick_push (op0);
1635 if (op1)
1636 ops.quick_push (op1);
1638 vect_get_slp_defs (ops, slp_node, &vec_defs, reduc_index);
1640 *vec_oprnds0 = vec_defs[0];
1641 if (op1)
1642 *vec_oprnds1 = vec_defs[1];
1644 else
1646 tree vec_oprnd;
1648 vec_oprnds0->create (1);
1649 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1650 vec_oprnds0->quick_push (vec_oprnd);
1652 if (op1)
1654 vec_oprnds1->create (1);
1655 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1656 vec_oprnds1->quick_push (vec_oprnd);
1662 /* Function vect_finish_stmt_generation.
1664 Insert a new stmt. */
1666 void
1667 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
1668 gimple_stmt_iterator *gsi)
1670 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1671 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1672 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1674 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
1676 if (!gsi_end_p (*gsi)
1677 && gimple_has_mem_ops (vec_stmt))
1679 gimple at_stmt = gsi_stmt (*gsi);
1680 tree vuse = gimple_vuse (at_stmt);
1681 if (vuse && TREE_CODE (vuse) == SSA_NAME)
1683 tree vdef = gimple_vdef (at_stmt);
1684 gimple_set_vuse (vec_stmt, gimple_vuse (at_stmt));
1685 /* If we have an SSA vuse and insert a store, update virtual
1686 SSA form to avoid triggering the renamer. Do so only
1687 if we can easily see all uses - which is what almost always
1688 happens with the way vectorized stmts are inserted. */
1689 if ((vdef && TREE_CODE (vdef) == SSA_NAME)
1690 && ((is_gimple_assign (vec_stmt)
1691 && !is_gimple_reg (gimple_assign_lhs (vec_stmt)))
1692 || (is_gimple_call (vec_stmt)
1693 && !(gimple_call_flags (vec_stmt)
1694 & (ECF_CONST|ECF_PURE|ECF_NOVOPS)))))
1696 tree new_vdef = copy_ssa_name (vuse, vec_stmt);
1697 gimple_set_vdef (vec_stmt, new_vdef);
1698 SET_USE (gimple_vuse_op (at_stmt), new_vdef);
1702 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
1704 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo,
1705 bb_vinfo));
1707 if (dump_enabled_p ())
1709 dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: ");
1710 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vec_stmt, 0);
1711 dump_printf (MSG_NOTE, "\n");
1714 gimple_set_location (vec_stmt, gimple_location (stmt));
1716 /* While EH edges will generally prevent vectorization, stmt might
1717 e.g. be in a must-not-throw region. Ensure newly created stmts
1718 that could throw are part of the same region. */
1719 int lp_nr = lookup_stmt_eh_lp (stmt);
1720 if (lp_nr != 0 && stmt_could_throw_p (vec_stmt))
1721 add_stmt_to_eh_lp (vec_stmt, lp_nr);
1724 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1725 a function declaration if the target has a vectorized version
1726 of the function, or NULL_TREE if the function cannot be vectorized. */
1728 tree
1729 vectorizable_function (gcall *call, tree vectype_out, tree vectype_in)
1731 tree fndecl = gimple_call_fndecl (call);
1733 /* We only handle functions that do not read or clobber memory -- i.e.
1734 const or novops ones. */
1735 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
1736 return NULL_TREE;
1738 if (!fndecl
1739 || TREE_CODE (fndecl) != FUNCTION_DECL
1740 || !DECL_BUILT_IN (fndecl))
1741 return NULL_TREE;
1743 return targetm.vectorize.builtin_vectorized_function (fndecl, vectype_out,
1744 vectype_in);
1748 static tree permute_vec_elements (tree, tree, tree, gimple,
1749 gimple_stmt_iterator *);
1752 /* Function vectorizable_mask_load_store.
1754 Check if STMT performs a conditional load or store that can be vectorized.
1755 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1756 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
1757 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1759 static bool
1760 vectorizable_mask_load_store (gimple stmt, gimple_stmt_iterator *gsi,
1761 gimple *vec_stmt, slp_tree slp_node)
1763 tree vec_dest = NULL;
1764 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1765 stmt_vec_info prev_stmt_info;
1766 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1767 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1768 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1769 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1770 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1771 tree elem_type;
1772 gimple new_stmt;
1773 tree dummy;
1774 tree dataref_ptr = NULL_TREE;
1775 gimple ptr_incr;
1776 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1777 int ncopies;
1778 int i, j;
1779 bool inv_p;
1780 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
1781 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
1782 int gather_scale = 1;
1783 enum vect_def_type gather_dt = vect_unknown_def_type;
1784 bool is_store;
1785 tree mask;
1786 gimple def_stmt;
1787 tree def;
1788 enum vect_def_type dt;
1790 if (slp_node != NULL)
1791 return false;
1793 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1794 gcc_assert (ncopies >= 1);
1796 is_store = gimple_call_internal_fn (stmt) == IFN_MASK_STORE;
1797 mask = gimple_call_arg (stmt, 2);
1798 if (TYPE_PRECISION (TREE_TYPE (mask))
1799 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))))
1800 return false;
1802 /* FORNOW. This restriction should be relaxed. */
1803 if (nested_in_vect_loop && ncopies > 1)
1805 if (dump_enabled_p ())
1806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1807 "multiple types in nested loop.");
1808 return false;
1811 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1812 return false;
1814 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
1815 return false;
1817 if (!STMT_VINFO_DATA_REF (stmt_info))
1818 return false;
1820 elem_type = TREE_TYPE (vectype);
1822 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1823 return false;
1825 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1826 return false;
1828 if (STMT_VINFO_GATHER_P (stmt_info))
1830 gimple def_stmt;
1831 tree def;
1832 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
1833 &gather_off, &gather_scale);
1834 gcc_assert (gather_decl);
1835 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, NULL,
1836 &def_stmt, &def, &gather_dt,
1837 &gather_off_vectype))
1839 if (dump_enabled_p ())
1840 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1841 "gather index use not simple.");
1842 return false;
1845 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1846 tree masktype
1847 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
1848 if (TREE_CODE (masktype) == INTEGER_TYPE)
1850 if (dump_enabled_p ())
1851 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1852 "masked gather with integer mask not supported.");
1853 return false;
1856 else if (tree_int_cst_compare (nested_in_vect_loop
1857 ? STMT_VINFO_DR_STEP (stmt_info)
1858 : DR_STEP (dr), size_zero_node) <= 0)
1859 return false;
1860 else if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1861 || !can_vec_mask_load_store_p (TYPE_MODE (vectype), !is_store))
1862 return false;
1864 if (TREE_CODE (mask) != SSA_NAME)
1865 return false;
1867 if (!vect_is_simple_use (mask, stmt, loop_vinfo, NULL,
1868 &def_stmt, &def, &dt))
1869 return false;
1871 if (is_store)
1873 tree rhs = gimple_call_arg (stmt, 3);
1874 if (!vect_is_simple_use (rhs, stmt, loop_vinfo, NULL,
1875 &def_stmt, &def, &dt))
1876 return false;
1879 if (!vec_stmt) /* transformation not required. */
1881 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
1882 if (is_store)
1883 vect_model_store_cost (stmt_info, ncopies, false, dt,
1884 NULL, NULL, NULL);
1885 else
1886 vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
1887 return true;
1890 /** Transform. **/
1892 if (STMT_VINFO_GATHER_P (stmt_info))
1894 tree vec_oprnd0 = NULL_TREE, op;
1895 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
1896 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
1897 tree ptr, vec_mask = NULL_TREE, mask_op = NULL_TREE, var, scale;
1898 tree perm_mask = NULL_TREE, prev_res = NULL_TREE;
1899 tree mask_perm_mask = NULL_TREE;
1900 edge pe = loop_preheader_edge (loop);
1901 gimple_seq seq;
1902 basic_block new_bb;
1903 enum { NARROW, NONE, WIDEN } modifier;
1904 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
1906 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
1907 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1908 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1909 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1910 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
1911 scaletype = TREE_VALUE (arglist);
1912 gcc_checking_assert (types_compatible_p (srctype, rettype)
1913 && types_compatible_p (srctype, masktype));
1915 if (nunits == gather_off_nunits)
1916 modifier = NONE;
1917 else if (nunits == gather_off_nunits / 2)
1919 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
1920 modifier = WIDEN;
1922 for (i = 0; i < gather_off_nunits; ++i)
1923 sel[i] = i | nunits;
1925 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
1927 else if (nunits == gather_off_nunits * 2)
1929 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
1930 modifier = NARROW;
1932 for (i = 0; i < nunits; ++i)
1933 sel[i] = i < gather_off_nunits
1934 ? i : i + nunits - gather_off_nunits;
1936 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
1937 ncopies *= 2;
1938 for (i = 0; i < nunits; ++i)
1939 sel[i] = i | gather_off_nunits;
1940 mask_perm_mask = vect_gen_perm_mask_checked (masktype, sel);
1942 else
1943 gcc_unreachable ();
1945 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
1947 ptr = fold_convert (ptrtype, gather_base);
1948 if (!is_gimple_min_invariant (ptr))
1950 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
1951 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1952 gcc_assert (!new_bb);
1955 scale = build_int_cst (scaletype, gather_scale);
1957 prev_stmt_info = NULL;
1958 for (j = 0; j < ncopies; ++j)
1960 if (modifier == WIDEN && (j & 1))
1961 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
1962 perm_mask, stmt, gsi);
1963 else if (j == 0)
1964 op = vec_oprnd0
1965 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
1966 else
1967 op = vec_oprnd0
1968 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
1970 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
1972 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
1973 == TYPE_VECTOR_SUBPARTS (idxtype));
1974 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
1975 var = make_ssa_name (var);
1976 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
1977 new_stmt
1978 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
1979 vect_finish_stmt_generation (stmt, new_stmt, gsi);
1980 op = var;
1983 if (mask_perm_mask && (j & 1))
1984 mask_op = permute_vec_elements (mask_op, mask_op,
1985 mask_perm_mask, stmt, gsi);
1986 else
1988 if (j == 0)
1989 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
1990 else
1992 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL,
1993 &def_stmt, &def, &dt);
1994 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
1997 mask_op = vec_mask;
1998 if (!useless_type_conversion_p (masktype, TREE_TYPE (vec_mask)))
2000 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op))
2001 == TYPE_VECTOR_SUBPARTS (masktype));
2002 var = vect_get_new_vect_var (masktype, vect_simple_var,
2003 NULL);
2004 var = make_ssa_name (var);
2005 mask_op = build1 (VIEW_CONVERT_EXPR, masktype, mask_op);
2006 new_stmt
2007 = gimple_build_assign (var, VIEW_CONVERT_EXPR, mask_op);
2008 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2009 mask_op = var;
2013 new_stmt
2014 = gimple_build_call (gather_decl, 5, mask_op, ptr, op, mask_op,
2015 scale);
2017 if (!useless_type_conversion_p (vectype, rettype))
2019 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
2020 == TYPE_VECTOR_SUBPARTS (rettype));
2021 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
2022 op = make_ssa_name (var, new_stmt);
2023 gimple_call_set_lhs (new_stmt, op);
2024 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2025 var = make_ssa_name (vec_dest);
2026 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
2027 new_stmt = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
2029 else
2031 var = make_ssa_name (vec_dest, new_stmt);
2032 gimple_call_set_lhs (new_stmt, var);
2035 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2037 if (modifier == NARROW)
2039 if ((j & 1) == 0)
2041 prev_res = var;
2042 continue;
2044 var = permute_vec_elements (prev_res, var,
2045 perm_mask, stmt, gsi);
2046 new_stmt = SSA_NAME_DEF_STMT (var);
2049 if (prev_stmt_info == NULL)
2050 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2051 else
2052 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2053 prev_stmt_info = vinfo_for_stmt (new_stmt);
2056 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2057 from the IL. */
2058 tree lhs = gimple_call_lhs (stmt);
2059 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2060 set_vinfo_for_stmt (new_stmt, stmt_info);
2061 set_vinfo_for_stmt (stmt, NULL);
2062 STMT_VINFO_STMT (stmt_info) = new_stmt;
2063 gsi_replace (gsi, new_stmt, true);
2064 return true;
2066 else if (is_store)
2068 tree vec_rhs = NULL_TREE, vec_mask = NULL_TREE;
2069 prev_stmt_info = NULL;
2070 for (i = 0; i < ncopies; i++)
2072 unsigned align, misalign;
2074 if (i == 0)
2076 tree rhs = gimple_call_arg (stmt, 3);
2077 vec_rhs = vect_get_vec_def_for_operand (rhs, stmt, NULL);
2078 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2079 /* We should have catched mismatched types earlier. */
2080 gcc_assert (useless_type_conversion_p (vectype,
2081 TREE_TYPE (vec_rhs)));
2082 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2083 NULL_TREE, &dummy, gsi,
2084 &ptr_incr, false, &inv_p);
2085 gcc_assert (!inv_p);
2087 else
2089 vect_is_simple_use (vec_rhs, NULL, loop_vinfo, NULL, &def_stmt,
2090 &def, &dt);
2091 vec_rhs = vect_get_vec_def_for_stmt_copy (dt, vec_rhs);
2092 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2093 &def, &dt);
2094 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2095 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2096 TYPE_SIZE_UNIT (vectype));
2099 align = TYPE_ALIGN_UNIT (vectype);
2100 if (aligned_access_p (dr))
2101 misalign = 0;
2102 else if (DR_MISALIGNMENT (dr) == -1)
2104 align = TYPE_ALIGN_UNIT (elem_type);
2105 misalign = 0;
2107 else
2108 misalign = DR_MISALIGNMENT (dr);
2109 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2110 misalign);
2111 new_stmt
2112 = gimple_build_call_internal (IFN_MASK_STORE, 4, dataref_ptr,
2113 gimple_call_arg (stmt, 1),
2114 vec_mask, vec_rhs);
2115 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2116 if (i == 0)
2117 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2118 else
2119 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2120 prev_stmt_info = vinfo_for_stmt (new_stmt);
2123 else
2125 tree vec_mask = NULL_TREE;
2126 prev_stmt_info = NULL;
2127 vec_dest = vect_create_destination_var (gimple_call_lhs (stmt), vectype);
2128 for (i = 0; i < ncopies; i++)
2130 unsigned align, misalign;
2132 if (i == 0)
2134 vec_mask = vect_get_vec_def_for_operand (mask, stmt, NULL);
2135 dataref_ptr = vect_create_data_ref_ptr (stmt, vectype, NULL,
2136 NULL_TREE, &dummy, gsi,
2137 &ptr_incr, false, &inv_p);
2138 gcc_assert (!inv_p);
2140 else
2142 vect_is_simple_use (vec_mask, NULL, loop_vinfo, NULL, &def_stmt,
2143 &def, &dt);
2144 vec_mask = vect_get_vec_def_for_stmt_copy (dt, vec_mask);
2145 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
2146 TYPE_SIZE_UNIT (vectype));
2149 align = TYPE_ALIGN_UNIT (vectype);
2150 if (aligned_access_p (dr))
2151 misalign = 0;
2152 else if (DR_MISALIGNMENT (dr) == -1)
2154 align = TYPE_ALIGN_UNIT (elem_type);
2155 misalign = 0;
2157 else
2158 misalign = DR_MISALIGNMENT (dr);
2159 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
2160 misalign);
2161 new_stmt
2162 = gimple_build_call_internal (IFN_MASK_LOAD, 3, dataref_ptr,
2163 gimple_call_arg (stmt, 1),
2164 vec_mask);
2165 gimple_call_set_lhs (new_stmt, make_ssa_name (vec_dest));
2166 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2167 if (i == 0)
2168 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2169 else
2170 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2171 prev_stmt_info = vinfo_for_stmt (new_stmt);
2175 if (!is_store)
2177 /* Ensure that even with -fno-tree-dce the scalar MASK_LOAD is removed
2178 from the IL. */
2179 tree lhs = gimple_call_lhs (stmt);
2180 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2181 set_vinfo_for_stmt (new_stmt, stmt_info);
2182 set_vinfo_for_stmt (stmt, NULL);
2183 STMT_VINFO_STMT (stmt_info) = new_stmt;
2184 gsi_replace (gsi, new_stmt, true);
2187 return true;
2191 /* Function vectorizable_call.
2193 Check if GS performs a function call that can be vectorized.
2194 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2195 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2196 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2198 static bool
2199 vectorizable_call (gimple gs, gimple_stmt_iterator *gsi, gimple *vec_stmt,
2200 slp_tree slp_node)
2202 gcall *stmt;
2203 tree vec_dest;
2204 tree scalar_dest;
2205 tree op, type;
2206 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2207 stmt_vec_info stmt_info = vinfo_for_stmt (gs), prev_stmt_info;
2208 tree vectype_out, vectype_in;
2209 int nunits_in;
2210 int nunits_out;
2211 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2212 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2213 tree fndecl, new_temp, def, rhs_type;
2214 gimple def_stmt;
2215 enum vect_def_type dt[3]
2216 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
2217 gimple new_stmt = NULL;
2218 int ncopies, j;
2219 vec<tree> vargs = vNULL;
2220 enum { NARROW, NONE, WIDEN } modifier;
2221 size_t i, nargs;
2222 tree lhs;
2224 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2225 return false;
2227 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2228 return false;
2230 /* Is GS a vectorizable call? */
2231 stmt = dyn_cast <gcall *> (gs);
2232 if (!stmt)
2233 return false;
2235 if (gimple_call_internal_p (stmt)
2236 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2237 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
2238 return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
2239 slp_node);
2241 if (gimple_call_lhs (stmt) == NULL_TREE
2242 || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2243 return false;
2245 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2247 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
2249 /* Process function arguments. */
2250 rhs_type = NULL_TREE;
2251 vectype_in = NULL_TREE;
2252 nargs = gimple_call_num_args (stmt);
2254 /* Bail out if the function has more than three arguments, we do not have
2255 interesting builtin functions to vectorize with more than two arguments
2256 except for fma. No arguments is also not good. */
2257 if (nargs == 0 || nargs > 3)
2258 return false;
2260 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
2261 if (gimple_call_internal_p (stmt)
2262 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2264 nargs = 0;
2265 rhs_type = unsigned_type_node;
2268 for (i = 0; i < nargs; i++)
2270 tree opvectype;
2272 op = gimple_call_arg (stmt, i);
2274 /* We can only handle calls with arguments of the same type. */
2275 if (rhs_type
2276 && !types_compatible_p (rhs_type, TREE_TYPE (op)))
2278 if (dump_enabled_p ())
2279 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2280 "argument types differ.\n");
2281 return false;
2283 if (!rhs_type)
2284 rhs_type = TREE_TYPE (op);
2286 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2287 &def_stmt, &def, &dt[i], &opvectype))
2289 if (dump_enabled_p ())
2290 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2291 "use not simple.\n");
2292 return false;
2295 if (!vectype_in)
2296 vectype_in = opvectype;
2297 else if (opvectype
2298 && opvectype != vectype_in)
2300 if (dump_enabled_p ())
2301 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2302 "argument vector types differ.\n");
2303 return false;
2306 /* If all arguments are external or constant defs use a vector type with
2307 the same size as the output vector type. */
2308 if (!vectype_in)
2309 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
2310 if (vec_stmt)
2311 gcc_assert (vectype_in);
2312 if (!vectype_in)
2314 if (dump_enabled_p ())
2316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2317 "no vectype for scalar type ");
2318 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
2319 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2322 return false;
2325 /* FORNOW */
2326 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2327 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2328 if (nunits_in == nunits_out / 2)
2329 modifier = NARROW;
2330 else if (nunits_out == nunits_in)
2331 modifier = NONE;
2332 else if (nunits_out == nunits_in / 2)
2333 modifier = WIDEN;
2334 else
2335 return false;
2337 /* For now, we only vectorize functions if a target specific builtin
2338 is available. TODO -- in some cases, it might be profitable to
2339 insert the calls for pieces of the vector, in order to be able
2340 to vectorize other operations in the loop. */
2341 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
2342 if (fndecl == NULL_TREE)
2344 if (gimple_call_internal_p (stmt)
2345 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
2346 && !slp_node
2347 && loop_vinfo
2348 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2349 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
2350 && LOOP_VINFO_LOOP (loop_vinfo)->simduid
2351 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
2353 /* We can handle IFN_GOMP_SIMD_LANE by returning a
2354 { 0, 1, 2, ... vf - 1 } vector. */
2355 gcc_assert (nargs == 0);
2357 else
2359 if (dump_enabled_p ())
2360 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2361 "function is not vectorizable.\n");
2362 return false;
2366 gcc_assert (!gimple_vuse (stmt));
2368 if (slp_node || PURE_SLP_STMT (stmt_info))
2369 ncopies = 1;
2370 else if (modifier == NARROW)
2371 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2372 else
2373 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2375 /* Sanity check: make sure that at least one copy of the vectorized stmt
2376 needs to be generated. */
2377 gcc_assert (ncopies >= 1);
2379 if (!vec_stmt) /* transformation not required. */
2381 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2382 if (dump_enabled_p ())
2383 dump_printf_loc (MSG_NOTE, vect_location, "=== vectorizable_call ==="
2384 "\n");
2385 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
2386 return true;
2389 /** Transform. **/
2391 if (dump_enabled_p ())
2392 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2394 /* Handle def. */
2395 scalar_dest = gimple_call_lhs (stmt);
2396 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2398 prev_stmt_info = NULL;
2399 switch (modifier)
2401 case NONE:
2402 for (j = 0; j < ncopies; ++j)
2404 /* Build argument list for the vectorized call. */
2405 if (j == 0)
2406 vargs.create (nargs);
2407 else
2408 vargs.truncate (0);
2410 if (slp_node)
2412 auto_vec<vec<tree> > vec_defs (nargs);
2413 vec<tree> vec_oprnds0;
2415 for (i = 0; i < nargs; i++)
2416 vargs.quick_push (gimple_call_arg (stmt, i));
2417 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2418 vec_oprnds0 = vec_defs[0];
2420 /* Arguments are ready. Create the new vector stmt. */
2421 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_oprnd0)
2423 size_t k;
2424 for (k = 0; k < nargs; k++)
2426 vec<tree> vec_oprndsk = vec_defs[k];
2427 vargs[k] = vec_oprndsk[i];
2429 new_stmt = gimple_build_call_vec (fndecl, vargs);
2430 new_temp = make_ssa_name (vec_dest, new_stmt);
2431 gimple_call_set_lhs (new_stmt, new_temp);
2432 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2433 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2436 for (i = 0; i < nargs; i++)
2438 vec<tree> vec_oprndsi = vec_defs[i];
2439 vec_oprndsi.release ();
2441 continue;
2444 for (i = 0; i < nargs; i++)
2446 op = gimple_call_arg (stmt, i);
2447 if (j == 0)
2448 vec_oprnd0
2449 = vect_get_vec_def_for_operand (op, stmt, NULL);
2450 else
2452 vec_oprnd0 = gimple_call_arg (new_stmt, i);
2453 vec_oprnd0
2454 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2457 vargs.quick_push (vec_oprnd0);
2460 if (gimple_call_internal_p (stmt)
2461 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE)
2463 tree *v = XALLOCAVEC (tree, nunits_out);
2464 int k;
2465 for (k = 0; k < nunits_out; ++k)
2466 v[k] = build_int_cst (unsigned_type_node, j * nunits_out + k);
2467 tree cst = build_vector (vectype_out, v);
2468 tree new_var
2469 = vect_get_new_vect_var (vectype_out, vect_simple_var, "cst_");
2470 gimple init_stmt = gimple_build_assign (new_var, cst);
2471 new_temp = make_ssa_name (new_var, init_stmt);
2472 gimple_assign_set_lhs (init_stmt, new_temp);
2473 vect_init_vector_1 (stmt, init_stmt, NULL);
2474 new_temp = make_ssa_name (vec_dest);
2475 new_stmt = gimple_build_assign (new_temp,
2476 gimple_assign_lhs (init_stmt));
2478 else
2480 new_stmt = gimple_build_call_vec (fndecl, vargs);
2481 new_temp = make_ssa_name (vec_dest, new_stmt);
2482 gimple_call_set_lhs (new_stmt, new_temp);
2484 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2486 if (j == 0)
2487 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2488 else
2489 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2491 prev_stmt_info = vinfo_for_stmt (new_stmt);
2494 break;
2496 case NARROW:
2497 for (j = 0; j < ncopies; ++j)
2499 /* Build argument list for the vectorized call. */
2500 if (j == 0)
2501 vargs.create (nargs * 2);
2502 else
2503 vargs.truncate (0);
2505 if (slp_node)
2507 auto_vec<vec<tree> > vec_defs (nargs);
2508 vec<tree> vec_oprnds0;
2510 for (i = 0; i < nargs; i++)
2511 vargs.quick_push (gimple_call_arg (stmt, i));
2512 vect_get_slp_defs (vargs, slp_node, &vec_defs, -1);
2513 vec_oprnds0 = vec_defs[0];
2515 /* Arguments are ready. Create the new vector stmt. */
2516 for (i = 0; vec_oprnds0.iterate (i, &vec_oprnd0); i += 2)
2518 size_t k;
2519 vargs.truncate (0);
2520 for (k = 0; k < nargs; k++)
2522 vec<tree> vec_oprndsk = vec_defs[k];
2523 vargs.quick_push (vec_oprndsk[i]);
2524 vargs.quick_push (vec_oprndsk[i + 1]);
2526 new_stmt = gimple_build_call_vec (fndecl, vargs);
2527 new_temp = make_ssa_name (vec_dest, new_stmt);
2528 gimple_call_set_lhs (new_stmt, new_temp);
2529 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2530 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
2533 for (i = 0; i < nargs; i++)
2535 vec<tree> vec_oprndsi = vec_defs[i];
2536 vec_oprndsi.release ();
2538 continue;
2541 for (i = 0; i < nargs; i++)
2543 op = gimple_call_arg (stmt, i);
2544 if (j == 0)
2546 vec_oprnd0
2547 = vect_get_vec_def_for_operand (op, stmt, NULL);
2548 vec_oprnd1
2549 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2551 else
2553 vec_oprnd1 = gimple_call_arg (new_stmt, 2*i + 1);
2554 vec_oprnd0
2555 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd1);
2556 vec_oprnd1
2557 = vect_get_vec_def_for_stmt_copy (dt[i], vec_oprnd0);
2560 vargs.quick_push (vec_oprnd0);
2561 vargs.quick_push (vec_oprnd1);
2564 new_stmt = gimple_build_call_vec (fndecl, vargs);
2565 new_temp = make_ssa_name (vec_dest, new_stmt);
2566 gimple_call_set_lhs (new_stmt, new_temp);
2567 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2569 if (j == 0)
2570 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2571 else
2572 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2574 prev_stmt_info = vinfo_for_stmt (new_stmt);
2577 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2579 break;
2581 case WIDEN:
2582 /* No current target implements this case. */
2583 return false;
2586 vargs.release ();
2588 /* The call in STMT might prevent it from being removed in dce.
2589 We however cannot remove it here, due to the way the ssa name
2590 it defines is mapped to the new definition. So just replace
2591 rhs of the statement with something harmless. */
2593 if (slp_node)
2594 return true;
2596 type = TREE_TYPE (scalar_dest);
2597 if (is_pattern_stmt_p (stmt_info))
2598 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
2599 else
2600 lhs = gimple_call_lhs (stmt);
2601 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
2602 set_vinfo_for_stmt (new_stmt, stmt_info);
2603 set_vinfo_for_stmt (stmt, NULL);
2604 STMT_VINFO_STMT (stmt_info) = new_stmt;
2605 gsi_replace (gsi, new_stmt, false);
2607 return true;
2611 struct simd_call_arg_info
2613 tree vectype;
2614 tree op;
2615 enum vect_def_type dt;
2616 HOST_WIDE_INT linear_step;
2617 unsigned int align;
2620 /* Function vectorizable_simd_clone_call.
2622 Check if STMT performs a function call that can be vectorized
2623 by calling a simd clone of the function.
2624 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2625 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2626 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2628 static bool
2629 vectorizable_simd_clone_call (gimple stmt, gimple_stmt_iterator *gsi,
2630 gimple *vec_stmt, slp_tree slp_node)
2632 tree vec_dest;
2633 tree scalar_dest;
2634 tree op, type;
2635 tree vec_oprnd0 = NULL_TREE;
2636 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2637 tree vectype;
2638 unsigned int nunits;
2639 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2640 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2641 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
2642 tree fndecl, new_temp, def;
2643 gimple def_stmt;
2644 gimple new_stmt = NULL;
2645 int ncopies, j;
2646 vec<simd_call_arg_info> arginfo = vNULL;
2647 vec<tree> vargs = vNULL;
2648 size_t i, nargs;
2649 tree lhs, rtype, ratype;
2650 vec<constructor_elt, va_gc> *ret_ctor_elts;
2652 /* Is STMT a vectorizable call? */
2653 if (!is_gimple_call (stmt))
2654 return false;
2656 fndecl = gimple_call_fndecl (stmt);
2657 if (fndecl == NULL_TREE)
2658 return false;
2660 struct cgraph_node *node = cgraph_node::get (fndecl);
2661 if (node == NULL || node->simd_clones == NULL)
2662 return false;
2664 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
2665 return false;
2667 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
2668 return false;
2670 if (gimple_call_lhs (stmt)
2671 && TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
2672 return false;
2674 gcc_checking_assert (!stmt_can_throw_internal (stmt));
2676 vectype = STMT_VINFO_VECTYPE (stmt_info);
2678 if (loop_vinfo && nested_in_vect_loop_p (loop, stmt))
2679 return false;
2681 /* FORNOW */
2682 if (slp_node || PURE_SLP_STMT (stmt_info))
2683 return false;
2685 /* Process function arguments. */
2686 nargs = gimple_call_num_args (stmt);
2688 /* Bail out if the function has zero arguments. */
2689 if (nargs == 0)
2690 return false;
2692 arginfo.create (nargs);
2694 for (i = 0; i < nargs; i++)
2696 simd_call_arg_info thisarginfo;
2697 affine_iv iv;
2699 thisarginfo.linear_step = 0;
2700 thisarginfo.align = 0;
2701 thisarginfo.op = NULL_TREE;
2703 op = gimple_call_arg (stmt, i);
2704 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
2705 &def_stmt, &def, &thisarginfo.dt,
2706 &thisarginfo.vectype)
2707 || thisarginfo.dt == vect_uninitialized_def)
2709 if (dump_enabled_p ())
2710 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2711 "use not simple.\n");
2712 arginfo.release ();
2713 return false;
2716 if (thisarginfo.dt == vect_constant_def
2717 || thisarginfo.dt == vect_external_def)
2718 gcc_assert (thisarginfo.vectype == NULL_TREE);
2719 else
2720 gcc_assert (thisarginfo.vectype != NULL_TREE);
2722 /* For linear arguments, the analyze phase should have saved
2723 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
2724 if (i * 2 + 3 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info).length ()
2725 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2])
2727 gcc_assert (vec_stmt);
2728 thisarginfo.linear_step
2729 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2]);
2730 thisarginfo.op
2731 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 1];
2732 /* If loop has been peeled for alignment, we need to adjust it. */
2733 tree n1 = LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo);
2734 tree n2 = LOOP_VINFO_NITERS (loop_vinfo);
2735 if (n1 != n2)
2737 tree bias = fold_build2 (MINUS_EXPR, TREE_TYPE (n1), n1, n2);
2738 tree step = STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[i * 2 + 2];
2739 tree opt = TREE_TYPE (thisarginfo.op);
2740 bias = fold_convert (TREE_TYPE (step), bias);
2741 bias = fold_build2 (MULT_EXPR, TREE_TYPE (step), bias, step);
2742 thisarginfo.op
2743 = fold_build2 (POINTER_TYPE_P (opt)
2744 ? POINTER_PLUS_EXPR : PLUS_EXPR, opt,
2745 thisarginfo.op, bias);
2748 else if (!vec_stmt
2749 && thisarginfo.dt != vect_constant_def
2750 && thisarginfo.dt != vect_external_def
2751 && loop_vinfo
2752 && TREE_CODE (op) == SSA_NAME
2753 && simple_iv (loop, loop_containing_stmt (stmt), op,
2754 &iv, false)
2755 && tree_fits_shwi_p (iv.step))
2757 thisarginfo.linear_step = tree_to_shwi (iv.step);
2758 thisarginfo.op = iv.base;
2760 else if ((thisarginfo.dt == vect_constant_def
2761 || thisarginfo.dt == vect_external_def)
2762 && POINTER_TYPE_P (TREE_TYPE (op)))
2763 thisarginfo.align = get_pointer_alignment (op) / BITS_PER_UNIT;
2765 arginfo.quick_push (thisarginfo);
2768 unsigned int badness = 0;
2769 struct cgraph_node *bestn = NULL;
2770 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info).exists ())
2771 bestn = cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info)[0]);
2772 else
2773 for (struct cgraph_node *n = node->simd_clones; n != NULL;
2774 n = n->simdclone->next_clone)
2776 unsigned int this_badness = 0;
2777 if (n->simdclone->simdlen
2778 > (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2779 || n->simdclone->nargs != nargs)
2780 continue;
2781 if (n->simdclone->simdlen
2782 < (unsigned) LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2783 this_badness += (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
2784 - exact_log2 (n->simdclone->simdlen)) * 1024;
2785 if (n->simdclone->inbranch)
2786 this_badness += 2048;
2787 int target_badness = targetm.simd_clone.usable (n);
2788 if (target_badness < 0)
2789 continue;
2790 this_badness += target_badness * 512;
2791 /* FORNOW: Have to add code to add the mask argument. */
2792 if (n->simdclone->inbranch)
2793 continue;
2794 for (i = 0; i < nargs; i++)
2796 switch (n->simdclone->args[i].arg_type)
2798 case SIMD_CLONE_ARG_TYPE_VECTOR:
2799 if (!useless_type_conversion_p
2800 (n->simdclone->args[i].orig_type,
2801 TREE_TYPE (gimple_call_arg (stmt, i))))
2802 i = -1;
2803 else if (arginfo[i].dt == vect_constant_def
2804 || arginfo[i].dt == vect_external_def
2805 || arginfo[i].linear_step)
2806 this_badness += 64;
2807 break;
2808 case SIMD_CLONE_ARG_TYPE_UNIFORM:
2809 if (arginfo[i].dt != vect_constant_def
2810 && arginfo[i].dt != vect_external_def)
2811 i = -1;
2812 break;
2813 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
2814 if (arginfo[i].dt == vect_constant_def
2815 || arginfo[i].dt == vect_external_def
2816 || (arginfo[i].linear_step
2817 != n->simdclone->args[i].linear_step))
2818 i = -1;
2819 break;
2820 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
2821 /* FORNOW */
2822 i = -1;
2823 break;
2824 case SIMD_CLONE_ARG_TYPE_MASK:
2825 gcc_unreachable ();
2827 if (i == (size_t) -1)
2828 break;
2829 if (n->simdclone->args[i].alignment > arginfo[i].align)
2831 i = -1;
2832 break;
2834 if (arginfo[i].align)
2835 this_badness += (exact_log2 (arginfo[i].align)
2836 - exact_log2 (n->simdclone->args[i].alignment));
2838 if (i == (size_t) -1)
2839 continue;
2840 if (bestn == NULL || this_badness < badness)
2842 bestn = n;
2843 badness = this_badness;
2847 if (bestn == NULL)
2849 arginfo.release ();
2850 return false;
2853 for (i = 0; i < nargs; i++)
2854 if ((arginfo[i].dt == vect_constant_def
2855 || arginfo[i].dt == vect_external_def)
2856 && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
2858 arginfo[i].vectype
2859 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
2860 i)));
2861 if (arginfo[i].vectype == NULL
2862 || (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2863 > bestn->simdclone->simdlen))
2865 arginfo.release ();
2866 return false;
2870 fndecl = bestn->decl;
2871 nunits = bestn->simdclone->simdlen;
2872 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2874 /* If the function isn't const, only allow it in simd loops where user
2875 has asserted that at least nunits consecutive iterations can be
2876 performed using SIMD instructions. */
2877 if ((loop == NULL || (unsigned) loop->safelen < nunits)
2878 && gimple_vuse (stmt))
2880 arginfo.release ();
2881 return false;
2884 /* Sanity check: make sure that at least one copy of the vectorized stmt
2885 needs to be generated. */
2886 gcc_assert (ncopies >= 1);
2888 if (!vec_stmt) /* transformation not required. */
2890 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (bestn->decl);
2891 for (i = 0; i < nargs; i++)
2892 if (bestn->simdclone->args[i].arg_type
2893 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP)
2895 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_grow_cleared (i * 2
2896 + 1);
2897 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (arginfo[i].op);
2898 tree lst = POINTER_TYPE_P (TREE_TYPE (arginfo[i].op))
2899 ? size_type_node : TREE_TYPE (arginfo[i].op);
2900 tree ls = build_int_cst (lst, arginfo[i].linear_step);
2901 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).safe_push (ls);
2903 STMT_VINFO_TYPE (stmt_info) = call_simd_clone_vec_info_type;
2904 if (dump_enabled_p ())
2905 dump_printf_loc (MSG_NOTE, vect_location,
2906 "=== vectorizable_simd_clone_call ===\n");
2907 /* vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL); */
2908 arginfo.release ();
2909 return true;
2912 /** Transform. **/
2914 if (dump_enabled_p ())
2915 dump_printf_loc (MSG_NOTE, vect_location, "transform call.\n");
2917 /* Handle def. */
2918 scalar_dest = gimple_call_lhs (stmt);
2919 vec_dest = NULL_TREE;
2920 rtype = NULL_TREE;
2921 ratype = NULL_TREE;
2922 if (scalar_dest)
2924 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2925 rtype = TREE_TYPE (TREE_TYPE (fndecl));
2926 if (TREE_CODE (rtype) == ARRAY_TYPE)
2928 ratype = rtype;
2929 rtype = TREE_TYPE (ratype);
2933 prev_stmt_info = NULL;
2934 for (j = 0; j < ncopies; ++j)
2936 /* Build argument list for the vectorized call. */
2937 if (j == 0)
2938 vargs.create (nargs);
2939 else
2940 vargs.truncate (0);
2942 for (i = 0; i < nargs; i++)
2944 unsigned int k, l, m, o;
2945 tree atype;
2946 op = gimple_call_arg (stmt, i);
2947 switch (bestn->simdclone->args[i].arg_type)
2949 case SIMD_CLONE_ARG_TYPE_VECTOR:
2950 atype = bestn->simdclone->args[i].vector_type;
2951 o = nunits / TYPE_VECTOR_SUBPARTS (atype);
2952 for (m = j * o; m < (j + 1) * o; m++)
2954 if (TYPE_VECTOR_SUBPARTS (atype)
2955 < TYPE_VECTOR_SUBPARTS (arginfo[i].vectype))
2957 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (atype));
2958 k = (TYPE_VECTOR_SUBPARTS (arginfo[i].vectype)
2959 / TYPE_VECTOR_SUBPARTS (atype));
2960 gcc_assert ((k & (k - 1)) == 0);
2961 if (m == 0)
2962 vec_oprnd0
2963 = vect_get_vec_def_for_operand (op, stmt, NULL);
2964 else
2966 vec_oprnd0 = arginfo[i].op;
2967 if ((m & (k - 1)) == 0)
2968 vec_oprnd0
2969 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
2970 vec_oprnd0);
2972 arginfo[i].op = vec_oprnd0;
2973 vec_oprnd0
2974 = build3 (BIT_FIELD_REF, atype, vec_oprnd0,
2975 size_int (prec),
2976 bitsize_int ((m & (k - 1)) * prec));
2977 new_stmt
2978 = gimple_build_assign (make_ssa_name (atype),
2979 vec_oprnd0);
2980 vect_finish_stmt_generation (stmt, new_stmt, gsi);
2981 vargs.safe_push (gimple_assign_lhs (new_stmt));
2983 else
2985 k = (TYPE_VECTOR_SUBPARTS (atype)
2986 / TYPE_VECTOR_SUBPARTS (arginfo[i].vectype));
2987 gcc_assert ((k & (k - 1)) == 0);
2988 vec<constructor_elt, va_gc> *ctor_elts;
2989 if (k != 1)
2990 vec_alloc (ctor_elts, k);
2991 else
2992 ctor_elts = NULL;
2993 for (l = 0; l < k; l++)
2995 if (m == 0 && l == 0)
2996 vec_oprnd0
2997 = vect_get_vec_def_for_operand (op, stmt, NULL);
2998 else
2999 vec_oprnd0
3000 = vect_get_vec_def_for_stmt_copy (arginfo[i].dt,
3001 arginfo[i].op);
3002 arginfo[i].op = vec_oprnd0;
3003 if (k == 1)
3004 break;
3005 CONSTRUCTOR_APPEND_ELT (ctor_elts, NULL_TREE,
3006 vec_oprnd0);
3008 if (k == 1)
3009 vargs.safe_push (vec_oprnd0);
3010 else
3012 vec_oprnd0 = build_constructor (atype, ctor_elts);
3013 new_stmt
3014 = gimple_build_assign (make_ssa_name (atype),
3015 vec_oprnd0);
3016 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3017 vargs.safe_push (gimple_assign_lhs (new_stmt));
3021 break;
3022 case SIMD_CLONE_ARG_TYPE_UNIFORM:
3023 vargs.safe_push (op);
3024 break;
3025 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP:
3026 if (j == 0)
3028 gimple_seq stmts;
3029 arginfo[i].op
3030 = force_gimple_operand (arginfo[i].op, &stmts, true,
3031 NULL_TREE);
3032 if (stmts != NULL)
3034 basic_block new_bb;
3035 edge pe = loop_preheader_edge (loop);
3036 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3037 gcc_assert (!new_bb);
3039 tree phi_res = copy_ssa_name (op);
3040 gphi *new_phi = create_phi_node (phi_res, loop->header);
3041 set_vinfo_for_stmt (new_phi,
3042 new_stmt_vec_info (new_phi, loop_vinfo,
3043 NULL));
3044 add_phi_arg (new_phi, arginfo[i].op,
3045 loop_preheader_edge (loop), UNKNOWN_LOCATION);
3046 enum tree_code code
3047 = POINTER_TYPE_P (TREE_TYPE (op))
3048 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3049 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3050 ? sizetype : TREE_TYPE (op);
3051 widest_int cst
3052 = wi::mul (bestn->simdclone->args[i].linear_step,
3053 ncopies * nunits);
3054 tree tcst = wide_int_to_tree (type, cst);
3055 tree phi_arg = copy_ssa_name (op);
3056 new_stmt
3057 = gimple_build_assign (phi_arg, code, phi_res, tcst);
3058 gimple_stmt_iterator si = gsi_after_labels (loop->header);
3059 gsi_insert_after (&si, new_stmt, GSI_NEW_STMT);
3060 set_vinfo_for_stmt (new_stmt,
3061 new_stmt_vec_info (new_stmt, loop_vinfo,
3062 NULL));
3063 add_phi_arg (new_phi, phi_arg, loop_latch_edge (loop),
3064 UNKNOWN_LOCATION);
3065 arginfo[i].op = phi_res;
3066 vargs.safe_push (phi_res);
3068 else
3070 enum tree_code code
3071 = POINTER_TYPE_P (TREE_TYPE (op))
3072 ? POINTER_PLUS_EXPR : PLUS_EXPR;
3073 tree type = POINTER_TYPE_P (TREE_TYPE (op))
3074 ? sizetype : TREE_TYPE (op);
3075 widest_int cst
3076 = wi::mul (bestn->simdclone->args[i].linear_step,
3077 j * nunits);
3078 tree tcst = wide_int_to_tree (type, cst);
3079 new_temp = make_ssa_name (TREE_TYPE (op));
3080 new_stmt = gimple_build_assign (new_temp, code,
3081 arginfo[i].op, tcst);
3082 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3083 vargs.safe_push (new_temp);
3085 break;
3086 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP:
3087 default:
3088 gcc_unreachable ();
3092 new_stmt = gimple_build_call_vec (fndecl, vargs);
3093 if (vec_dest)
3095 gcc_assert (ratype || TYPE_VECTOR_SUBPARTS (rtype) == nunits);
3096 if (ratype)
3097 new_temp = create_tmp_var (ratype);
3098 else if (TYPE_VECTOR_SUBPARTS (vectype)
3099 == TYPE_VECTOR_SUBPARTS (rtype))
3100 new_temp = make_ssa_name (vec_dest, new_stmt);
3101 else
3102 new_temp = make_ssa_name (rtype, new_stmt);
3103 gimple_call_set_lhs (new_stmt, new_temp);
3105 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3107 if (vec_dest)
3109 if (TYPE_VECTOR_SUBPARTS (vectype) < nunits)
3111 unsigned int k, l;
3112 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (vectype));
3113 k = nunits / TYPE_VECTOR_SUBPARTS (vectype);
3114 gcc_assert ((k & (k - 1)) == 0);
3115 for (l = 0; l < k; l++)
3117 tree t;
3118 if (ratype)
3120 t = build_fold_addr_expr (new_temp);
3121 t = build2 (MEM_REF, vectype, t,
3122 build_int_cst (TREE_TYPE (t),
3123 l * prec / BITS_PER_UNIT));
3125 else
3126 t = build3 (BIT_FIELD_REF, vectype, new_temp,
3127 size_int (prec), bitsize_int (l * prec));
3128 new_stmt
3129 = gimple_build_assign (make_ssa_name (vectype), t);
3130 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3131 if (j == 0 && l == 0)
3132 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3133 else
3134 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3136 prev_stmt_info = vinfo_for_stmt (new_stmt);
3139 if (ratype)
3141 tree clobber = build_constructor (ratype, NULL);
3142 TREE_THIS_VOLATILE (clobber) = 1;
3143 new_stmt = gimple_build_assign (new_temp, clobber);
3144 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3146 continue;
3148 else if (TYPE_VECTOR_SUBPARTS (vectype) > nunits)
3150 unsigned int k = (TYPE_VECTOR_SUBPARTS (vectype)
3151 / TYPE_VECTOR_SUBPARTS (rtype));
3152 gcc_assert ((k & (k - 1)) == 0);
3153 if ((j & (k - 1)) == 0)
3154 vec_alloc (ret_ctor_elts, k);
3155 if (ratype)
3157 unsigned int m, o = nunits / TYPE_VECTOR_SUBPARTS (rtype);
3158 for (m = 0; m < o; m++)
3160 tree tem = build4 (ARRAY_REF, rtype, new_temp,
3161 size_int (m), NULL_TREE, NULL_TREE);
3162 new_stmt
3163 = gimple_build_assign (make_ssa_name (rtype), tem);
3164 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3165 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE,
3166 gimple_assign_lhs (new_stmt));
3168 tree clobber = build_constructor (ratype, NULL);
3169 TREE_THIS_VOLATILE (clobber) = 1;
3170 new_stmt = gimple_build_assign (new_temp, clobber);
3171 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3173 else
3174 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts, NULL_TREE, new_temp);
3175 if ((j & (k - 1)) != k - 1)
3176 continue;
3177 vec_oprnd0 = build_constructor (vectype, ret_ctor_elts);
3178 new_stmt
3179 = gimple_build_assign (make_ssa_name (vec_dest), vec_oprnd0);
3180 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3182 if ((unsigned) j == k - 1)
3183 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3184 else
3185 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3187 prev_stmt_info = vinfo_for_stmt (new_stmt);
3188 continue;
3190 else if (ratype)
3192 tree t = build_fold_addr_expr (new_temp);
3193 t = build2 (MEM_REF, vectype, t,
3194 build_int_cst (TREE_TYPE (t), 0));
3195 new_stmt
3196 = gimple_build_assign (make_ssa_name (vec_dest), t);
3197 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3198 tree clobber = build_constructor (ratype, NULL);
3199 TREE_THIS_VOLATILE (clobber) = 1;
3200 vect_finish_stmt_generation (stmt,
3201 gimple_build_assign (new_temp,
3202 clobber), gsi);
3206 if (j == 0)
3207 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3208 else
3209 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3211 prev_stmt_info = vinfo_for_stmt (new_stmt);
3214 vargs.release ();
3216 /* The call in STMT might prevent it from being removed in dce.
3217 We however cannot remove it here, due to the way the ssa name
3218 it defines is mapped to the new definition. So just replace
3219 rhs of the statement with something harmless. */
3221 if (slp_node)
3222 return true;
3224 if (scalar_dest)
3226 type = TREE_TYPE (scalar_dest);
3227 if (is_pattern_stmt_p (stmt_info))
3228 lhs = gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info));
3229 else
3230 lhs = gimple_call_lhs (stmt);
3231 new_stmt = gimple_build_assign (lhs, build_zero_cst (type));
3233 else
3234 new_stmt = gimple_build_nop ();
3235 set_vinfo_for_stmt (new_stmt, stmt_info);
3236 set_vinfo_for_stmt (stmt, NULL);
3237 STMT_VINFO_STMT (stmt_info) = new_stmt;
3238 gsi_replace (gsi, new_stmt, true);
3239 unlink_stmt_vdef (stmt);
3241 return true;
3245 /* Function vect_gen_widened_results_half
3247 Create a vector stmt whose code, type, number of arguments, and result
3248 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3249 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3250 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3251 needs to be created (DECL is a function-decl of a target-builtin).
3252 STMT is the original scalar stmt that we are vectorizing. */
3254 static gimple
3255 vect_gen_widened_results_half (enum tree_code code,
3256 tree decl,
3257 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3258 tree vec_dest, gimple_stmt_iterator *gsi,
3259 gimple stmt)
3261 gimple new_stmt;
3262 tree new_temp;
3264 /* Generate half of the widened result: */
3265 if (code == CALL_EXPR)
3267 /* Target specific support */
3268 if (op_type == binary_op)
3269 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3270 else
3271 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3272 new_temp = make_ssa_name (vec_dest, new_stmt);
3273 gimple_call_set_lhs (new_stmt, new_temp);
3275 else
3277 /* Generic support */
3278 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3279 if (op_type != binary_op)
3280 vec_oprnd1 = NULL;
3281 new_stmt = gimple_build_assign (vec_dest, code, vec_oprnd0, vec_oprnd1);
3282 new_temp = make_ssa_name (vec_dest, new_stmt);
3283 gimple_assign_set_lhs (new_stmt, new_temp);
3285 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3287 return new_stmt;
3291 /* Get vectorized definitions for loop-based vectorization. For the first
3292 operand we call vect_get_vec_def_for_operand() (with OPRND containing
3293 scalar operand), and for the rest we get a copy with
3294 vect_get_vec_def_for_stmt_copy() using the previous vector definition
3295 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
3296 The vectors are collected into VEC_OPRNDS. */
3298 static void
3299 vect_get_loop_based_defs (tree *oprnd, gimple stmt, enum vect_def_type dt,
3300 vec<tree> *vec_oprnds, int multi_step_cvt)
3302 tree vec_oprnd;
3304 /* Get first vector operand. */
3305 /* All the vector operands except the very first one (that is scalar oprnd)
3306 are stmt copies. */
3307 if (TREE_CODE (TREE_TYPE (*oprnd)) != VECTOR_TYPE)
3308 vec_oprnd = vect_get_vec_def_for_operand (*oprnd, stmt, NULL);
3309 else
3310 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, *oprnd);
3312 vec_oprnds->quick_push (vec_oprnd);
3314 /* Get second vector operand. */
3315 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
3316 vec_oprnds->quick_push (vec_oprnd);
3318 *oprnd = vec_oprnd;
3320 /* For conversion in multiple steps, continue to get operands
3321 recursively. */
3322 if (multi_step_cvt)
3323 vect_get_loop_based_defs (oprnd, stmt, dt, vec_oprnds, multi_step_cvt - 1);
3327 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
3328 For multi-step conversions store the resulting vectors and call the function
3329 recursively. */
3331 static void
3332 vect_create_vectorized_demotion_stmts (vec<tree> *vec_oprnds,
3333 int multi_step_cvt, gimple stmt,
3334 vec<tree> vec_dsts,
3335 gimple_stmt_iterator *gsi,
3336 slp_tree slp_node, enum tree_code code,
3337 stmt_vec_info *prev_stmt_info)
3339 unsigned int i;
3340 tree vop0, vop1, new_tmp, vec_dest;
3341 gimple new_stmt;
3342 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3344 vec_dest = vec_dsts.pop ();
3346 for (i = 0; i < vec_oprnds->length (); i += 2)
3348 /* Create demotion operation. */
3349 vop0 = (*vec_oprnds)[i];
3350 vop1 = (*vec_oprnds)[i + 1];
3351 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
3352 new_tmp = make_ssa_name (vec_dest, new_stmt);
3353 gimple_assign_set_lhs (new_stmt, new_tmp);
3354 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3356 if (multi_step_cvt)
3357 /* Store the resulting vector for next recursive call. */
3358 (*vec_oprnds)[i/2] = new_tmp;
3359 else
3361 /* This is the last step of the conversion sequence. Store the
3362 vectors in SLP_NODE or in vector info of the scalar statement
3363 (or in STMT_VINFO_RELATED_STMT chain). */
3364 if (slp_node)
3365 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3366 else
3368 if (!*prev_stmt_info)
3369 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3370 else
3371 STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
3373 *prev_stmt_info = vinfo_for_stmt (new_stmt);
3378 /* For multi-step demotion operations we first generate demotion operations
3379 from the source type to the intermediate types, and then combine the
3380 results (stored in VEC_OPRNDS) in demotion operation to the destination
3381 type. */
3382 if (multi_step_cvt)
3384 /* At each level of recursion we have half of the operands we had at the
3385 previous level. */
3386 vec_oprnds->truncate ((i+1)/2);
3387 vect_create_vectorized_demotion_stmts (vec_oprnds, multi_step_cvt - 1,
3388 stmt, vec_dsts, gsi, slp_node,
3389 VEC_PACK_TRUNC_EXPR,
3390 prev_stmt_info);
3393 vec_dsts.quick_push (vec_dest);
3397 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
3398 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
3399 the resulting vectors and call the function recursively. */
3401 static void
3402 vect_create_vectorized_promotion_stmts (vec<tree> *vec_oprnds0,
3403 vec<tree> *vec_oprnds1,
3404 gimple stmt, tree vec_dest,
3405 gimple_stmt_iterator *gsi,
3406 enum tree_code code1,
3407 enum tree_code code2, tree decl1,
3408 tree decl2, int op_type)
3410 int i;
3411 tree vop0, vop1, new_tmp1, new_tmp2;
3412 gimple new_stmt1, new_stmt2;
3413 vec<tree> vec_tmp = vNULL;
3415 vec_tmp.create (vec_oprnds0->length () * 2);
3416 FOR_EACH_VEC_ELT (*vec_oprnds0, i, vop0)
3418 if (op_type == binary_op)
3419 vop1 = (*vec_oprnds1)[i];
3420 else
3421 vop1 = NULL_TREE;
3423 /* Generate the two halves of promotion operation. */
3424 new_stmt1 = vect_gen_widened_results_half (code1, decl1, vop0, vop1,
3425 op_type, vec_dest, gsi, stmt);
3426 new_stmt2 = vect_gen_widened_results_half (code2, decl2, vop0, vop1,
3427 op_type, vec_dest, gsi, stmt);
3428 if (is_gimple_call (new_stmt1))
3430 new_tmp1 = gimple_call_lhs (new_stmt1);
3431 new_tmp2 = gimple_call_lhs (new_stmt2);
3433 else
3435 new_tmp1 = gimple_assign_lhs (new_stmt1);
3436 new_tmp2 = gimple_assign_lhs (new_stmt2);
3439 /* Store the results for the next step. */
3440 vec_tmp.quick_push (new_tmp1);
3441 vec_tmp.quick_push (new_tmp2);
3444 vec_oprnds0->release ();
3445 *vec_oprnds0 = vec_tmp;
3449 /* Check if STMT performs a conversion operation, that can be vectorized.
3450 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3451 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3452 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3454 static bool
3455 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3456 gimple *vec_stmt, slp_tree slp_node)
3458 tree vec_dest;
3459 tree scalar_dest;
3460 tree op0, op1 = NULL_TREE;
3461 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3462 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3463 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3464 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3465 enum tree_code codecvt1 = ERROR_MARK, codecvt2 = ERROR_MARK;
3466 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3467 tree new_temp;
3468 tree def;
3469 gimple def_stmt;
3470 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3471 gimple new_stmt = NULL;
3472 stmt_vec_info prev_stmt_info;
3473 int nunits_in;
3474 int nunits_out;
3475 tree vectype_out, vectype_in;
3476 int ncopies, i, j;
3477 tree lhs_type, rhs_type;
3478 enum { NARROW, NONE, WIDEN } modifier;
3479 vec<tree> vec_oprnds0 = vNULL;
3480 vec<tree> vec_oprnds1 = vNULL;
3481 tree vop0;
3482 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3483 int multi_step_cvt = 0;
3484 vec<tree> vec_dsts = vNULL;
3485 vec<tree> interm_types = vNULL;
3486 tree last_oprnd, intermediate_type, cvt_type = NULL_TREE;
3487 int op_type;
3488 machine_mode rhs_mode;
3489 unsigned short fltsz;
3491 /* Is STMT a vectorizable conversion? */
3493 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
3494 return false;
3496 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
3497 return false;
3499 if (!is_gimple_assign (stmt))
3500 return false;
3502 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3503 return false;
3505 code = gimple_assign_rhs_code (stmt);
3506 if (!CONVERT_EXPR_CODE_P (code)
3507 && code != FIX_TRUNC_EXPR
3508 && code != FLOAT_EXPR
3509 && code != WIDEN_MULT_EXPR
3510 && code != WIDEN_LSHIFT_EXPR)
3511 return false;
3513 op_type = TREE_CODE_LENGTH (code);
3515 /* Check types of lhs and rhs. */
3516 scalar_dest = gimple_assign_lhs (stmt);
3517 lhs_type = TREE_TYPE (scalar_dest);
3518 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3520 op0 = gimple_assign_rhs1 (stmt);
3521 rhs_type = TREE_TYPE (op0);
3523 if ((code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3524 && !((INTEGRAL_TYPE_P (lhs_type)
3525 && INTEGRAL_TYPE_P (rhs_type))
3526 || (SCALAR_FLOAT_TYPE_P (lhs_type)
3527 && SCALAR_FLOAT_TYPE_P (rhs_type))))
3528 return false;
3530 if ((INTEGRAL_TYPE_P (lhs_type)
3531 && (TYPE_PRECISION (lhs_type)
3532 != GET_MODE_PRECISION (TYPE_MODE (lhs_type))))
3533 || (INTEGRAL_TYPE_P (rhs_type)
3534 && (TYPE_PRECISION (rhs_type)
3535 != GET_MODE_PRECISION (TYPE_MODE (rhs_type)))))
3537 if (dump_enabled_p ())
3538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3539 "type conversion to/from bit-precision unsupported."
3540 "\n");
3541 return false;
3544 /* Check the operands of the operation. */
3545 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
3546 &def_stmt, &def, &dt[0], &vectype_in))
3548 if (dump_enabled_p ())
3549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3550 "use not simple.\n");
3551 return false;
3553 if (op_type == binary_op)
3555 bool ok;
3557 op1 = gimple_assign_rhs2 (stmt);
3558 gcc_assert (code == WIDEN_MULT_EXPR || code == WIDEN_LSHIFT_EXPR);
3559 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
3560 OP1. */
3561 if (CONSTANT_CLASS_P (op0))
3562 ok = vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo,
3563 &def_stmt, &def, &dt[1], &vectype_in);
3564 else
3565 ok = vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
3566 &def, &dt[1]);
3568 if (!ok)
3570 if (dump_enabled_p ())
3571 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3572 "use not simple.\n");
3573 return false;
3577 /* If op0 is an external or constant defs use a vector type of
3578 the same size as the output vector type. */
3579 if (!vectype_in)
3580 vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
3581 if (vec_stmt)
3582 gcc_assert (vectype_in);
3583 if (!vectype_in)
3585 if (dump_enabled_p ())
3587 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3588 "no vectype for scalar type ");
3589 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, rhs_type);
3590 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3593 return false;
3596 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3597 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3598 if (nunits_in < nunits_out)
3599 modifier = NARROW;
3600 else if (nunits_out == nunits_in)
3601 modifier = NONE;
3602 else
3603 modifier = WIDEN;
3605 /* Multiple types in SLP are handled by creating the appropriate number of
3606 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3607 case of SLP. */
3608 if (slp_node || PURE_SLP_STMT (stmt_info))
3609 ncopies = 1;
3610 else if (modifier == NARROW)
3611 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3612 else
3613 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3615 /* Sanity check: make sure that at least one copy of the vectorized stmt
3616 needs to be generated. */
3617 gcc_assert (ncopies >= 1);
3619 /* Supportable by target? */
3620 switch (modifier)
3622 case NONE:
3623 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3624 return false;
3625 if (supportable_convert_operation (code, vectype_out, vectype_in,
3626 &decl1, &code1))
3627 break;
3628 /* FALLTHRU */
3629 unsupported:
3630 if (dump_enabled_p ())
3631 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3632 "conversion not supported by target.\n");
3633 return false;
3635 case WIDEN:
3636 if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
3637 &code1, &code2, &multi_step_cvt,
3638 &interm_types))
3640 /* Binary widening operation can only be supported directly by the
3641 architecture. */
3642 gcc_assert (!(multi_step_cvt && op_type == binary_op));
3643 break;
3646 if (code != FLOAT_EXPR
3647 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3648 <= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3649 goto unsupported;
3651 rhs_mode = TYPE_MODE (rhs_type);
3652 fltsz = GET_MODE_SIZE (TYPE_MODE (lhs_type));
3653 for (rhs_mode = GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type));
3654 rhs_mode != VOIDmode && GET_MODE_SIZE (rhs_mode) <= fltsz;
3655 rhs_mode = GET_MODE_2XWIDER_MODE (rhs_mode))
3657 cvt_type
3658 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3659 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3660 if (cvt_type == NULL_TREE)
3661 goto unsupported;
3663 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3665 if (!supportable_convert_operation (code, vectype_out,
3666 cvt_type, &decl1, &codecvt1))
3667 goto unsupported;
3669 else if (!supportable_widening_operation (code, stmt, vectype_out,
3670 cvt_type, &codecvt1,
3671 &codecvt2, &multi_step_cvt,
3672 &interm_types))
3673 continue;
3674 else
3675 gcc_assert (multi_step_cvt == 0);
3677 if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
3678 vectype_in, &code1, &code2,
3679 &multi_step_cvt, &interm_types))
3680 break;
3683 if (rhs_mode == VOIDmode || GET_MODE_SIZE (rhs_mode) > fltsz)
3684 goto unsupported;
3686 if (GET_MODE_SIZE (rhs_mode) == fltsz)
3687 codecvt2 = ERROR_MARK;
3688 else
3690 multi_step_cvt++;
3691 interm_types.safe_push (cvt_type);
3692 cvt_type = NULL_TREE;
3694 break;
3696 case NARROW:
3697 gcc_assert (op_type == unary_op);
3698 if (supportable_narrowing_operation (code, vectype_out, vectype_in,
3699 &code1, &multi_step_cvt,
3700 &interm_types))
3701 break;
3703 if (code != FIX_TRUNC_EXPR
3704 || (GET_MODE_SIZE (TYPE_MODE (lhs_type))
3705 >= GET_MODE_SIZE (TYPE_MODE (rhs_type))))
3706 goto unsupported;
3708 rhs_mode = TYPE_MODE (rhs_type);
3709 cvt_type
3710 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0);
3711 cvt_type = get_same_sized_vectype (cvt_type, vectype_in);
3712 if (cvt_type == NULL_TREE)
3713 goto unsupported;
3714 if (!supportable_convert_operation (code, cvt_type, vectype_in,
3715 &decl1, &codecvt1))
3716 goto unsupported;
3717 if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type,
3718 &code1, &multi_step_cvt,
3719 &interm_types))
3720 break;
3721 goto unsupported;
3723 default:
3724 gcc_unreachable ();
3727 if (!vec_stmt) /* transformation not required. */
3729 if (dump_enabled_p ())
3730 dump_printf_loc (MSG_NOTE, vect_location,
3731 "=== vectorizable_conversion ===\n");
3732 if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
3734 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3735 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
3737 else if (modifier == NARROW)
3739 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3740 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3742 else
3744 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3745 vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
3747 interm_types.release ();
3748 return true;
3751 /** Transform. **/
3752 if (dump_enabled_p ())
3753 dump_printf_loc (MSG_NOTE, vect_location,
3754 "transform conversion. ncopies = %d.\n", ncopies);
3756 if (op_type == binary_op)
3758 if (CONSTANT_CLASS_P (op0))
3759 op0 = fold_convert (TREE_TYPE (op1), op0);
3760 else if (CONSTANT_CLASS_P (op1))
3761 op1 = fold_convert (TREE_TYPE (op0), op1);
3764 /* In case of multi-step conversion, we first generate conversion operations
3765 to the intermediate types, and then from that types to the final one.
3766 We create vector destinations for the intermediate type (TYPES) received
3767 from supportable_*_operation, and store them in the correct order
3768 for future use in vect_create_vectorized_*_stmts (). */
3769 vec_dsts.create (multi_step_cvt + 1);
3770 vec_dest = vect_create_destination_var (scalar_dest,
3771 (cvt_type && modifier == WIDEN)
3772 ? cvt_type : vectype_out);
3773 vec_dsts.quick_push (vec_dest);
3775 if (multi_step_cvt)
3777 for (i = interm_types.length () - 1;
3778 interm_types.iterate (i, &intermediate_type); i--)
3780 vec_dest = vect_create_destination_var (scalar_dest,
3781 intermediate_type);
3782 vec_dsts.quick_push (vec_dest);
3786 if (cvt_type)
3787 vec_dest = vect_create_destination_var (scalar_dest,
3788 modifier == WIDEN
3789 ? vectype_out : cvt_type);
3791 if (!slp_node)
3793 if (modifier == WIDEN)
3795 vec_oprnds0.create (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1);
3796 if (op_type == binary_op)
3797 vec_oprnds1.create (1);
3799 else if (modifier == NARROW)
3800 vec_oprnds0.create (
3801 2 * (multi_step_cvt ? vect_pow2 (multi_step_cvt) : 1));
3803 else if (code == WIDEN_LSHIFT_EXPR)
3804 vec_oprnds1.create (slp_node->vec_stmts_size);
3806 last_oprnd = op0;
3807 prev_stmt_info = NULL;
3808 switch (modifier)
3810 case NONE:
3811 for (j = 0; j < ncopies; j++)
3813 if (j == 0)
3814 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node,
3815 -1);
3816 else
3817 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3819 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3821 /* Arguments are ready, create the new vector stmt. */
3822 if (code1 == CALL_EXPR)
3824 new_stmt = gimple_build_call (decl1, 1, vop0);
3825 new_temp = make_ssa_name (vec_dest, new_stmt);
3826 gimple_call_set_lhs (new_stmt, new_temp);
3828 else
3830 gcc_assert (TREE_CODE_LENGTH (code1) == unary_op);
3831 new_stmt = gimple_build_assign (vec_dest, code1, vop0);
3832 new_temp = make_ssa_name (vec_dest, new_stmt);
3833 gimple_assign_set_lhs (new_stmt, new_temp);
3836 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3837 if (slp_node)
3838 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3841 if (j == 0)
3842 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3843 else
3844 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3845 prev_stmt_info = vinfo_for_stmt (new_stmt);
3847 break;
3849 case WIDEN:
3850 /* In case the vectorization factor (VF) is bigger than the number
3851 of elements that we can fit in a vectype (nunits), we have to
3852 generate more than one vector stmt - i.e - we need to "unroll"
3853 the vector stmt by a factor VF/nunits. */
3854 for (j = 0; j < ncopies; j++)
3856 /* Handle uses. */
3857 if (j == 0)
3859 if (slp_node)
3861 if (code == WIDEN_LSHIFT_EXPR)
3863 unsigned int k;
3865 vec_oprnd1 = op1;
3866 /* Store vec_oprnd1 for every vector stmt to be created
3867 for SLP_NODE. We check during the analysis that all
3868 the shift arguments are the same. */
3869 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
3870 vec_oprnds1.quick_push (vec_oprnd1);
3872 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3873 slp_node, -1);
3875 else
3876 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0,
3877 &vec_oprnds1, slp_node, -1);
3879 else
3881 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3882 vec_oprnds0.quick_push (vec_oprnd0);
3883 if (op_type == binary_op)
3885 if (code == WIDEN_LSHIFT_EXPR)
3886 vec_oprnd1 = op1;
3887 else
3888 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt,
3889 NULL);
3890 vec_oprnds1.quick_push (vec_oprnd1);
3894 else
3896 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3897 vec_oprnds0.truncate (0);
3898 vec_oprnds0.quick_push (vec_oprnd0);
3899 if (op_type == binary_op)
3901 if (code == WIDEN_LSHIFT_EXPR)
3902 vec_oprnd1 = op1;
3903 else
3904 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1],
3905 vec_oprnd1);
3906 vec_oprnds1.truncate (0);
3907 vec_oprnds1.quick_push (vec_oprnd1);
3911 /* Arguments are ready. Create the new vector stmts. */
3912 for (i = multi_step_cvt; i >= 0; i--)
3914 tree this_dest = vec_dsts[i];
3915 enum tree_code c1 = code1, c2 = code2;
3916 if (i == 0 && codecvt2 != ERROR_MARK)
3918 c1 = codecvt1;
3919 c2 = codecvt2;
3921 vect_create_vectorized_promotion_stmts (&vec_oprnds0,
3922 &vec_oprnds1,
3923 stmt, this_dest, gsi,
3924 c1, c2, decl1, decl2,
3925 op_type);
3928 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3930 if (cvt_type)
3932 if (codecvt1 == CALL_EXPR)
3934 new_stmt = gimple_build_call (decl1, 1, vop0);
3935 new_temp = make_ssa_name (vec_dest, new_stmt);
3936 gimple_call_set_lhs (new_stmt, new_temp);
3938 else
3940 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3941 new_temp = make_ssa_name (vec_dest);
3942 new_stmt = gimple_build_assign (new_temp, codecvt1,
3943 vop0);
3946 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3948 else
3949 new_stmt = SSA_NAME_DEF_STMT (vop0);
3951 if (slp_node)
3952 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
3953 else
3955 if (!prev_stmt_info)
3956 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3957 else
3958 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3959 prev_stmt_info = vinfo_for_stmt (new_stmt);
3964 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3965 break;
3967 case NARROW:
3968 /* In case the vectorization factor (VF) is bigger than the number
3969 of elements that we can fit in a vectype (nunits), we have to
3970 generate more than one vector stmt - i.e - we need to "unroll"
3971 the vector stmt by a factor VF/nunits. */
3972 for (j = 0; j < ncopies; j++)
3974 /* Handle uses. */
3975 if (slp_node)
3976 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3977 slp_node, -1);
3978 else
3980 vec_oprnds0.truncate (0);
3981 vect_get_loop_based_defs (&last_oprnd, stmt, dt[0], &vec_oprnds0,
3982 vect_pow2 (multi_step_cvt) - 1);
3985 /* Arguments are ready. Create the new vector stmts. */
3986 if (cvt_type)
3987 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
3989 if (codecvt1 == CALL_EXPR)
3991 new_stmt = gimple_build_call (decl1, 1, vop0);
3992 new_temp = make_ssa_name (vec_dest, new_stmt);
3993 gimple_call_set_lhs (new_stmt, new_temp);
3995 else
3997 gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op);
3998 new_temp = make_ssa_name (vec_dest);
3999 new_stmt = gimple_build_assign (new_temp, codecvt1,
4000 vop0);
4003 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4004 vec_oprnds0[i] = new_temp;
4007 vect_create_vectorized_demotion_stmts (&vec_oprnds0, multi_step_cvt,
4008 stmt, vec_dsts, gsi,
4009 slp_node, code1,
4010 &prev_stmt_info);
4013 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4014 break;
4017 vec_oprnds0.release ();
4018 vec_oprnds1.release ();
4019 vec_dsts.release ();
4020 interm_types.release ();
4022 return true;
4026 /* Function vectorizable_assignment.
4028 Check if STMT performs an assignment (copy) that can be vectorized.
4029 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4030 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4031 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4033 static bool
4034 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
4035 gimple *vec_stmt, slp_tree slp_node)
4037 tree vec_dest;
4038 tree scalar_dest;
4039 tree op;
4040 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4041 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4042 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4043 tree new_temp;
4044 tree def;
4045 gimple def_stmt;
4046 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4047 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4048 int ncopies;
4049 int i, j;
4050 vec<tree> vec_oprnds = vNULL;
4051 tree vop;
4052 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4053 gimple new_stmt = NULL;
4054 stmt_vec_info prev_stmt_info = NULL;
4055 enum tree_code code;
4056 tree vectype_in;
4058 /* Multiple types in SLP are handled by creating the appropriate number of
4059 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4060 case of SLP. */
4061 if (slp_node || PURE_SLP_STMT (stmt_info))
4062 ncopies = 1;
4063 else
4064 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4066 gcc_assert (ncopies >= 1);
4068 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4069 return false;
4071 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4072 return false;
4074 /* Is vectorizable assignment? */
4075 if (!is_gimple_assign (stmt))
4076 return false;
4078 scalar_dest = gimple_assign_lhs (stmt);
4079 if (TREE_CODE (scalar_dest) != SSA_NAME)
4080 return false;
4082 code = gimple_assign_rhs_code (stmt);
4083 if (gimple_assign_single_p (stmt)
4084 || code == PAREN_EXPR
4085 || CONVERT_EXPR_CODE_P (code))
4086 op = gimple_assign_rhs1 (stmt);
4087 else
4088 return false;
4090 if (code == VIEW_CONVERT_EXPR)
4091 op = TREE_OPERAND (op, 0);
4093 if (!vect_is_simple_use_1 (op, stmt, loop_vinfo, bb_vinfo,
4094 &def_stmt, &def, &dt[0], &vectype_in))
4096 if (dump_enabled_p ())
4097 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4098 "use not simple.\n");
4099 return false;
4102 /* We can handle NOP_EXPR conversions that do not change the number
4103 of elements or the vector size. */
4104 if ((CONVERT_EXPR_CODE_P (code)
4105 || code == VIEW_CONVERT_EXPR)
4106 && (!vectype_in
4107 || TYPE_VECTOR_SUBPARTS (vectype_in) != nunits
4108 || (GET_MODE_SIZE (TYPE_MODE (vectype))
4109 != GET_MODE_SIZE (TYPE_MODE (vectype_in)))))
4110 return false;
4112 /* We do not handle bit-precision changes. */
4113 if ((CONVERT_EXPR_CODE_P (code)
4114 || code == VIEW_CONVERT_EXPR)
4115 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4116 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4117 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4118 || ((TYPE_PRECISION (TREE_TYPE (op))
4119 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op))))))
4120 /* But a conversion that does not change the bit-pattern is ok. */
4121 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4122 > TYPE_PRECISION (TREE_TYPE (op)))
4123 && TYPE_UNSIGNED (TREE_TYPE (op))))
4125 if (dump_enabled_p ())
4126 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4127 "type conversion to/from bit-precision "
4128 "unsupported.\n");
4129 return false;
4132 if (!vec_stmt) /* transformation not required. */
4134 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
4135 if (dump_enabled_p ())
4136 dump_printf_loc (MSG_NOTE, vect_location,
4137 "=== vectorizable_assignment ===\n");
4138 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4139 return true;
4142 /** Transform. **/
4143 if (dump_enabled_p ())
4144 dump_printf_loc (MSG_NOTE, vect_location, "transform assignment.\n");
4146 /* Handle def. */
4147 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4149 /* Handle use. */
4150 for (j = 0; j < ncopies; j++)
4152 /* Handle uses. */
4153 if (j == 0)
4154 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node, -1);
4155 else
4156 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds, NULL);
4158 /* Arguments are ready. create the new vector stmt. */
4159 FOR_EACH_VEC_ELT (vec_oprnds, i, vop)
4161 if (CONVERT_EXPR_CODE_P (code)
4162 || code == VIEW_CONVERT_EXPR)
4163 vop = build1 (VIEW_CONVERT_EXPR, vectype, vop);
4164 new_stmt = gimple_build_assign (vec_dest, vop);
4165 new_temp = make_ssa_name (vec_dest, new_stmt);
4166 gimple_assign_set_lhs (new_stmt, new_temp);
4167 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4168 if (slp_node)
4169 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4172 if (slp_node)
4173 continue;
4175 if (j == 0)
4176 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4177 else
4178 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4180 prev_stmt_info = vinfo_for_stmt (new_stmt);
4183 vec_oprnds.release ();
4184 return true;
4188 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
4189 either as shift by a scalar or by a vector. */
4191 bool
4192 vect_supportable_shift (enum tree_code code, tree scalar_type)
4195 machine_mode vec_mode;
4196 optab optab;
4197 int icode;
4198 tree vectype;
4200 vectype = get_vectype_for_scalar_type (scalar_type);
4201 if (!vectype)
4202 return false;
4204 optab = optab_for_tree_code (code, vectype, optab_scalar);
4205 if (!optab
4206 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
4208 optab = optab_for_tree_code (code, vectype, optab_vector);
4209 if (!optab
4210 || (optab_handler (optab, TYPE_MODE (vectype))
4211 == CODE_FOR_nothing))
4212 return false;
4215 vec_mode = TYPE_MODE (vectype);
4216 icode = (int) optab_handler (optab, vec_mode);
4217 if (icode == CODE_FOR_nothing)
4218 return false;
4220 return true;
4224 /* Function vectorizable_shift.
4226 Check if STMT performs a shift operation that can be vectorized.
4227 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4228 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4229 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4231 static bool
4232 vectorizable_shift (gimple stmt, gimple_stmt_iterator *gsi,
4233 gimple *vec_stmt, slp_tree slp_node)
4235 tree vec_dest;
4236 tree scalar_dest;
4237 tree op0, op1 = NULL;
4238 tree vec_oprnd1 = NULL_TREE;
4239 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4240 tree vectype;
4241 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4242 enum tree_code code;
4243 machine_mode vec_mode;
4244 tree new_temp;
4245 optab optab;
4246 int icode;
4247 machine_mode optab_op2_mode;
4248 tree def;
4249 gimple def_stmt;
4250 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4251 gimple new_stmt = NULL;
4252 stmt_vec_info prev_stmt_info;
4253 int nunits_in;
4254 int nunits_out;
4255 tree vectype_out;
4256 tree op1_vectype;
4257 int ncopies;
4258 int j, i;
4259 vec<tree> vec_oprnds0 = vNULL;
4260 vec<tree> vec_oprnds1 = vNULL;
4261 tree vop0, vop1;
4262 unsigned int k;
4263 bool scalar_shift_arg = true;
4264 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4265 int vf;
4267 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4268 return false;
4270 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4271 return false;
4273 /* Is STMT a vectorizable binary/unary operation? */
4274 if (!is_gimple_assign (stmt))
4275 return false;
4277 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4278 return false;
4280 code = gimple_assign_rhs_code (stmt);
4282 if (!(code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4283 || code == RROTATE_EXPR))
4284 return false;
4286 scalar_dest = gimple_assign_lhs (stmt);
4287 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4288 if (TYPE_PRECISION (TREE_TYPE (scalar_dest))
4289 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4291 if (dump_enabled_p ())
4292 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4293 "bit-precision shifts not supported.\n");
4294 return false;
4297 op0 = gimple_assign_rhs1 (stmt);
4298 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4299 &def_stmt, &def, &dt[0], &vectype))
4301 if (dump_enabled_p ())
4302 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4303 "use not simple.\n");
4304 return false;
4306 /* If op0 is an external or constant def use a vector type with
4307 the same size as the output vector type. */
4308 if (!vectype)
4309 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4310 if (vec_stmt)
4311 gcc_assert (vectype);
4312 if (!vectype)
4314 if (dump_enabled_p ())
4315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4316 "no vectype for scalar type\n");
4317 return false;
4320 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4321 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4322 if (nunits_out != nunits_in)
4323 return false;
4325 op1 = gimple_assign_rhs2 (stmt);
4326 if (!vect_is_simple_use_1 (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4327 &def, &dt[1], &op1_vectype))
4329 if (dump_enabled_p ())
4330 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4331 "use not simple.\n");
4332 return false;
4335 if (loop_vinfo)
4336 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4337 else
4338 vf = 1;
4340 /* Multiple types in SLP are handled by creating the appropriate number of
4341 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4342 case of SLP. */
4343 if (slp_node || PURE_SLP_STMT (stmt_info))
4344 ncopies = 1;
4345 else
4346 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4348 gcc_assert (ncopies >= 1);
4350 /* Determine whether the shift amount is a vector, or scalar. If the
4351 shift/rotate amount is a vector, use the vector/vector shift optabs. */
4353 if (dt[1] == vect_internal_def && !slp_node)
4354 scalar_shift_arg = false;
4355 else if (dt[1] == vect_constant_def
4356 || dt[1] == vect_external_def
4357 || dt[1] == vect_internal_def)
4359 /* In SLP, need to check whether the shift count is the same,
4360 in loops if it is a constant or invariant, it is always
4361 a scalar shift. */
4362 if (slp_node)
4364 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4365 gimple slpstmt;
4367 FOR_EACH_VEC_ELT (stmts, k, slpstmt)
4368 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt), op1, 0))
4369 scalar_shift_arg = false;
4372 else
4374 if (dump_enabled_p ())
4375 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4376 "operand mode requires invariant argument.\n");
4377 return false;
4380 /* Vector shifted by vector. */
4381 if (!scalar_shift_arg)
4383 optab = optab_for_tree_code (code, vectype, optab_vector);
4384 if (dump_enabled_p ())
4385 dump_printf_loc (MSG_NOTE, vect_location,
4386 "vector/vector shift/rotate found.\n");
4388 if (!op1_vectype)
4389 op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
4390 if (op1_vectype == NULL_TREE
4391 || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
4393 if (dump_enabled_p ())
4394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4395 "unusable type for last operand in"
4396 " vector/vector shift/rotate.\n");
4397 return false;
4400 /* See if the machine has a vector shifted by scalar insn and if not
4401 then see if it has a vector shifted by vector insn. */
4402 else
4404 optab = optab_for_tree_code (code, vectype, optab_scalar);
4405 if (optab
4406 && optab_handler (optab, TYPE_MODE (vectype)) != CODE_FOR_nothing)
4408 if (dump_enabled_p ())
4409 dump_printf_loc (MSG_NOTE, vect_location,
4410 "vector/scalar shift/rotate found.\n");
4412 else
4414 optab = optab_for_tree_code (code, vectype, optab_vector);
4415 if (optab
4416 && (optab_handler (optab, TYPE_MODE (vectype))
4417 != CODE_FOR_nothing))
4419 scalar_shift_arg = false;
4421 if (dump_enabled_p ())
4422 dump_printf_loc (MSG_NOTE, vect_location,
4423 "vector/vector shift/rotate found.\n");
4425 /* Unlike the other binary operators, shifts/rotates have
4426 the rhs being int, instead of the same type as the lhs,
4427 so make sure the scalar is the right type if we are
4428 dealing with vectors of long long/long/short/char. */
4429 if (dt[1] == vect_constant_def)
4430 op1 = fold_convert (TREE_TYPE (vectype), op1);
4431 else if (!useless_type_conversion_p (TREE_TYPE (vectype),
4432 TREE_TYPE (op1)))
4434 if (slp_node
4435 && TYPE_MODE (TREE_TYPE (vectype))
4436 != TYPE_MODE (TREE_TYPE (op1)))
4438 if (dump_enabled_p ())
4439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4440 "unusable type for last operand in"
4441 " vector/vector shift/rotate.\n");
4442 return false;
4444 if (vec_stmt && !slp_node)
4446 op1 = fold_convert (TREE_TYPE (vectype), op1);
4447 op1 = vect_init_vector (stmt, op1,
4448 TREE_TYPE (vectype), NULL);
4455 /* Supportable by target? */
4456 if (!optab)
4458 if (dump_enabled_p ())
4459 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4460 "no optab.\n");
4461 return false;
4463 vec_mode = TYPE_MODE (vectype);
4464 icode = (int) optab_handler (optab, vec_mode);
4465 if (icode == CODE_FOR_nothing)
4467 if (dump_enabled_p ())
4468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4469 "op not supported by target.\n");
4470 /* Check only during analysis. */
4471 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4472 || (vf < vect_min_worthwhile_factor (code)
4473 && !vec_stmt))
4474 return false;
4475 if (dump_enabled_p ())
4476 dump_printf_loc (MSG_NOTE, vect_location,
4477 "proceeding using word mode.\n");
4480 /* Worthwhile without SIMD support? Check only during analysis. */
4481 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
4482 && vf < vect_min_worthwhile_factor (code)
4483 && !vec_stmt)
4485 if (dump_enabled_p ())
4486 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4487 "not worthwhile without SIMD support.\n");
4488 return false;
4491 if (!vec_stmt) /* transformation not required. */
4493 STMT_VINFO_TYPE (stmt_info) = shift_vec_info_type;
4494 if (dump_enabled_p ())
4495 dump_printf_loc (MSG_NOTE, vect_location,
4496 "=== vectorizable_shift ===\n");
4497 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4498 return true;
4501 /** Transform. **/
4503 if (dump_enabled_p ())
4504 dump_printf_loc (MSG_NOTE, vect_location,
4505 "transform binary/unary operation.\n");
4507 /* Handle def. */
4508 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4510 prev_stmt_info = NULL;
4511 for (j = 0; j < ncopies; j++)
4513 /* Handle uses. */
4514 if (j == 0)
4516 if (scalar_shift_arg)
4518 /* Vector shl and shr insn patterns can be defined with scalar
4519 operand 2 (shift operand). In this case, use constant or loop
4520 invariant op1 directly, without extending it to vector mode
4521 first. */
4522 optab_op2_mode = insn_data[icode].operand[2].mode;
4523 if (!VECTOR_MODE_P (optab_op2_mode))
4525 if (dump_enabled_p ())
4526 dump_printf_loc (MSG_NOTE, vect_location,
4527 "operand 1 using scalar mode.\n");
4528 vec_oprnd1 = op1;
4529 vec_oprnds1.create (slp_node ? slp_node->vec_stmts_size : 1);
4530 vec_oprnds1.quick_push (vec_oprnd1);
4531 if (slp_node)
4533 /* Store vec_oprnd1 for every vector stmt to be created
4534 for SLP_NODE. We check during the analysis that all
4535 the shift arguments are the same.
4536 TODO: Allow different constants for different vector
4537 stmts generated for an SLP instance. */
4538 for (k = 0; k < slp_node->vec_stmts_size - 1; k++)
4539 vec_oprnds1.quick_push (vec_oprnd1);
4544 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
4545 (a special case for certain kind of vector shifts); otherwise,
4546 operand 1 should be of a vector type (the usual case). */
4547 if (vec_oprnd1)
4548 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4549 slp_node, -1);
4550 else
4551 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4552 slp_node, -1);
4554 else
4555 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4557 /* Arguments are ready. Create the new vector stmt. */
4558 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4560 vop1 = vec_oprnds1[i];
4561 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1);
4562 new_temp = make_ssa_name (vec_dest, new_stmt);
4563 gimple_assign_set_lhs (new_stmt, new_temp);
4564 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4565 if (slp_node)
4566 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4569 if (slp_node)
4570 continue;
4572 if (j == 0)
4573 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4574 else
4575 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4576 prev_stmt_info = vinfo_for_stmt (new_stmt);
4579 vec_oprnds0.release ();
4580 vec_oprnds1.release ();
4582 return true;
4586 /* Function vectorizable_operation.
4588 Check if STMT performs a binary, unary or ternary operation that can
4589 be vectorized.
4590 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4591 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4592 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4594 static bool
4595 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
4596 gimple *vec_stmt, slp_tree slp_node)
4598 tree vec_dest;
4599 tree scalar_dest;
4600 tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
4601 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4602 tree vectype;
4603 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4604 enum tree_code code;
4605 machine_mode vec_mode;
4606 tree new_temp;
4607 int op_type;
4608 optab optab;
4609 int icode;
4610 tree def;
4611 gimple def_stmt;
4612 enum vect_def_type dt[3]
4613 = {vect_unknown_def_type, vect_unknown_def_type, vect_unknown_def_type};
4614 gimple new_stmt = NULL;
4615 stmt_vec_info prev_stmt_info;
4616 int nunits_in;
4617 int nunits_out;
4618 tree vectype_out;
4619 int ncopies;
4620 int j, i;
4621 vec<tree> vec_oprnds0 = vNULL;
4622 vec<tree> vec_oprnds1 = vNULL;
4623 vec<tree> vec_oprnds2 = vNULL;
4624 tree vop0, vop1, vop2;
4625 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
4626 int vf;
4628 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
4629 return false;
4631 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
4632 return false;
4634 /* Is STMT a vectorizable binary/unary operation? */
4635 if (!is_gimple_assign (stmt))
4636 return false;
4638 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4639 return false;
4641 code = gimple_assign_rhs_code (stmt);
4643 /* For pointer addition, we should use the normal plus for
4644 the vector addition. */
4645 if (code == POINTER_PLUS_EXPR)
4646 code = PLUS_EXPR;
4648 /* Support only unary or binary operations. */
4649 op_type = TREE_CODE_LENGTH (code);
4650 if (op_type != unary_op && op_type != binary_op && op_type != ternary_op)
4652 if (dump_enabled_p ())
4653 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4654 "num. args = %d (not unary/binary/ternary op).\n",
4655 op_type);
4656 return false;
4659 scalar_dest = gimple_assign_lhs (stmt);
4660 vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4662 /* Most operations cannot handle bit-precision types without extra
4663 truncations. */
4664 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest))
4665 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest))))
4666 /* Exception are bitwise binary operations. */
4667 && code != BIT_IOR_EXPR
4668 && code != BIT_XOR_EXPR
4669 && code != BIT_AND_EXPR)
4671 if (dump_enabled_p ())
4672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4673 "bit-precision arithmetic not supported.\n");
4674 return false;
4677 op0 = gimple_assign_rhs1 (stmt);
4678 if (!vect_is_simple_use_1 (op0, stmt, loop_vinfo, bb_vinfo,
4679 &def_stmt, &def, &dt[0], &vectype))
4681 if (dump_enabled_p ())
4682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4683 "use not simple.\n");
4684 return false;
4686 /* If op0 is an external or constant def use a vector type with
4687 the same size as the output vector type. */
4688 if (!vectype)
4689 vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
4690 if (vec_stmt)
4691 gcc_assert (vectype);
4692 if (!vectype)
4694 if (dump_enabled_p ())
4696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4697 "no vectype for scalar type ");
4698 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
4699 TREE_TYPE (op0));
4700 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4703 return false;
4706 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4707 nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
4708 if (nunits_out != nunits_in)
4709 return false;
4711 if (op_type == binary_op || op_type == ternary_op)
4713 op1 = gimple_assign_rhs2 (stmt);
4714 if (!vect_is_simple_use (op1, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4715 &def, &dt[1]))
4717 if (dump_enabled_p ())
4718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4719 "use not simple.\n");
4720 return false;
4723 if (op_type == ternary_op)
4725 op2 = gimple_assign_rhs3 (stmt);
4726 if (!vect_is_simple_use (op2, stmt, loop_vinfo, bb_vinfo, &def_stmt,
4727 &def, &dt[2]))
4729 if (dump_enabled_p ())
4730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4731 "use not simple.\n");
4732 return false;
4736 if (loop_vinfo)
4737 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4738 else
4739 vf = 1;
4741 /* Multiple types in SLP are handled by creating the appropriate number of
4742 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4743 case of SLP. */
4744 if (slp_node || PURE_SLP_STMT (stmt_info))
4745 ncopies = 1;
4746 else
4747 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4749 gcc_assert (ncopies >= 1);
4751 /* Shifts are handled in vectorizable_shift (). */
4752 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
4753 || code == RROTATE_EXPR)
4754 return false;
4756 /* Supportable by target? */
4758 vec_mode = TYPE_MODE (vectype);
4759 if (code == MULT_HIGHPART_EXPR)
4761 if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
4762 icode = LAST_INSN_CODE;
4763 else
4764 icode = CODE_FOR_nothing;
4766 else
4768 optab = optab_for_tree_code (code, vectype, optab_default);
4769 if (!optab)
4771 if (dump_enabled_p ())
4772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4773 "no optab.\n");
4774 return false;
4776 icode = (int) optab_handler (optab, vec_mode);
4779 if (icode == CODE_FOR_nothing)
4781 if (dump_enabled_p ())
4782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4783 "op not supported by target.\n");
4784 /* Check only during analysis. */
4785 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4786 || (!vec_stmt && vf < vect_min_worthwhile_factor (code)))
4787 return false;
4788 if (dump_enabled_p ())
4789 dump_printf_loc (MSG_NOTE, vect_location,
4790 "proceeding using word mode.\n");
4793 /* Worthwhile without SIMD support? Check only during analysis. */
4794 if (!VECTOR_MODE_P (vec_mode)
4795 && !vec_stmt
4796 && vf < vect_min_worthwhile_factor (code))
4798 if (dump_enabled_p ())
4799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4800 "not worthwhile without SIMD support.\n");
4801 return false;
4804 if (!vec_stmt) /* transformation not required. */
4806 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
4807 if (dump_enabled_p ())
4808 dump_printf_loc (MSG_NOTE, vect_location,
4809 "=== vectorizable_operation ===\n");
4810 vect_model_simple_cost (stmt_info, ncopies, dt, NULL, NULL);
4811 return true;
4814 /** Transform. **/
4816 if (dump_enabled_p ())
4817 dump_printf_loc (MSG_NOTE, vect_location,
4818 "transform binary/unary operation.\n");
4820 /* Handle def. */
4821 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4823 /* In case the vectorization factor (VF) is bigger than the number
4824 of elements that we can fit in a vectype (nunits), we have to generate
4825 more than one vector stmt - i.e - we need to "unroll" the
4826 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4827 from one copy of the vector stmt to the next, in the field
4828 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4829 stages to find the correct vector defs to be used when vectorizing
4830 stmts that use the defs of the current stmt. The example below
4831 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
4832 we need to create 4 vectorized stmts):
4834 before vectorization:
4835 RELATED_STMT VEC_STMT
4836 S1: x = memref - -
4837 S2: z = x + 1 - -
4839 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
4840 there):
4841 RELATED_STMT VEC_STMT
4842 VS1_0: vx0 = memref0 VS1_1 -
4843 VS1_1: vx1 = memref1 VS1_2 -
4844 VS1_2: vx2 = memref2 VS1_3 -
4845 VS1_3: vx3 = memref3 - -
4846 S1: x = load - VS1_0
4847 S2: z = x + 1 - -
4849 step2: vectorize stmt S2 (done here):
4850 To vectorize stmt S2 we first need to find the relevant vector
4851 def for the first operand 'x'. This is, as usual, obtained from
4852 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
4853 that defines 'x' (S1). This way we find the stmt VS1_0, and the
4854 relevant vector def 'vx0'. Having found 'vx0' we can generate
4855 the vector stmt VS2_0, and as usual, record it in the
4856 STMT_VINFO_VEC_STMT of stmt S2.
4857 When creating the second copy (VS2_1), we obtain the relevant vector
4858 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
4859 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
4860 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
4861 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
4862 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
4863 chain of stmts and pointers:
4864 RELATED_STMT VEC_STMT
4865 VS1_0: vx0 = memref0 VS1_1 -
4866 VS1_1: vx1 = memref1 VS1_2 -
4867 VS1_2: vx2 = memref2 VS1_3 -
4868 VS1_3: vx3 = memref3 - -
4869 S1: x = load - VS1_0
4870 VS2_0: vz0 = vx0 + v1 VS2_1 -
4871 VS2_1: vz1 = vx1 + v1 VS2_2 -
4872 VS2_2: vz2 = vx2 + v1 VS2_3 -
4873 VS2_3: vz3 = vx3 + v1 - -
4874 S2: z = x + 1 - VS2_0 */
4876 prev_stmt_info = NULL;
4877 for (j = 0; j < ncopies; j++)
4879 /* Handle uses. */
4880 if (j == 0)
4882 if (op_type == binary_op || op_type == ternary_op)
4883 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4884 slp_node, -1);
4885 else
4886 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4887 slp_node, -1);
4888 if (op_type == ternary_op)
4890 vec_oprnds2.create (1);
4891 vec_oprnds2.quick_push (vect_get_vec_def_for_operand (op2,
4892 stmt,
4893 NULL));
4896 else
4898 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4899 if (op_type == ternary_op)
4901 tree vec_oprnd = vec_oprnds2.pop ();
4902 vec_oprnds2.quick_push (vect_get_vec_def_for_stmt_copy (dt[2],
4903 vec_oprnd));
4907 /* Arguments are ready. Create the new vector stmt. */
4908 FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0)
4910 vop1 = ((op_type == binary_op || op_type == ternary_op)
4911 ? vec_oprnds1[i] : NULL_TREE);
4912 vop2 = ((op_type == ternary_op)
4913 ? vec_oprnds2[i] : NULL_TREE);
4914 new_stmt = gimple_build_assign (vec_dest, code, vop0, vop1, vop2);
4915 new_temp = make_ssa_name (vec_dest, new_stmt);
4916 gimple_assign_set_lhs (new_stmt, new_temp);
4917 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4918 if (slp_node)
4919 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
4922 if (slp_node)
4923 continue;
4925 if (j == 0)
4926 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4927 else
4928 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4929 prev_stmt_info = vinfo_for_stmt (new_stmt);
4932 vec_oprnds0.release ();
4933 vec_oprnds1.release ();
4934 vec_oprnds2.release ();
4936 return true;
4939 /* A helper function to ensure data reference DR's base alignment
4940 for STMT_INFO. */
4942 static void
4943 ensure_base_align (stmt_vec_info stmt_info, struct data_reference *dr)
4945 if (!dr->aux)
4946 return;
4948 if (((dataref_aux *)dr->aux)->base_misaligned)
4950 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4951 tree base_decl = ((dataref_aux *)dr->aux)->base_decl;
4953 DECL_ALIGN (base_decl) = TYPE_ALIGN (vectype);
4954 DECL_USER_ALIGN (base_decl) = 1;
4955 ((dataref_aux *)dr->aux)->base_misaligned = false;
4960 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4961 reversal of the vector elements. If that is impossible to do,
4962 returns NULL. */
4964 static tree
4965 perm_mask_for_reverse (tree vectype)
4967 int i, nunits;
4968 unsigned char *sel;
4970 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4971 sel = XALLOCAVEC (unsigned char, nunits);
4973 for (i = 0; i < nunits; ++i)
4974 sel[i] = nunits - 1 - i;
4976 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
4977 return NULL_TREE;
4978 return vect_gen_perm_mask_checked (vectype, sel);
4981 /* Function vectorizable_store.
4983 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4984 can be vectorized.
4985 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4986 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4987 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4989 static bool
4990 vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
4991 slp_tree slp_node)
4993 tree scalar_dest;
4994 tree data_ref;
4995 tree op;
4996 tree vec_oprnd = NULL_TREE;
4997 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4998 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4999 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5000 tree elem_type;
5001 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5002 struct loop *loop = NULL;
5003 machine_mode vec_mode;
5004 tree dummy;
5005 enum dr_alignment_support alignment_support_scheme;
5006 tree def;
5007 gimple def_stmt;
5008 enum vect_def_type dt;
5009 stmt_vec_info prev_stmt_info = NULL;
5010 tree dataref_ptr = NULL_TREE;
5011 tree dataref_offset = NULL_TREE;
5012 gimple ptr_incr = NULL;
5013 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5014 int ncopies;
5015 int j;
5016 gimple next_stmt, first_stmt = NULL;
5017 bool grouped_store = false;
5018 bool store_lanes_p = false;
5019 unsigned int group_size, i;
5020 vec<tree> dr_chain = vNULL;
5021 vec<tree> oprnds = vNULL;
5022 vec<tree> result_chain = vNULL;
5023 bool inv_p;
5024 bool negative = false;
5025 tree offset = NULL_TREE;
5026 vec<tree> vec_oprnds = vNULL;
5027 bool slp = (slp_node != NULL);
5028 unsigned int vec_num;
5029 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5030 tree aggr_type;
5032 if (loop_vinfo)
5033 loop = LOOP_VINFO_LOOP (loop_vinfo);
5035 /* Multiple types in SLP are handled by creating the appropriate number of
5036 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5037 case of SLP. */
5038 if (slp || PURE_SLP_STMT (stmt_info))
5039 ncopies = 1;
5040 else
5041 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5043 gcc_assert (ncopies >= 1);
5045 /* FORNOW. This restriction should be relaxed. */
5046 if (loop && nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
5048 if (dump_enabled_p ())
5049 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5050 "multiple types in nested loop.\n");
5051 return false;
5054 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5055 return false;
5057 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5058 return false;
5060 /* Is vectorizable store? */
5062 if (!is_gimple_assign (stmt))
5063 return false;
5065 scalar_dest = gimple_assign_lhs (stmt);
5066 if (TREE_CODE (scalar_dest) == VIEW_CONVERT_EXPR
5067 && is_pattern_stmt_p (stmt_info))
5068 scalar_dest = TREE_OPERAND (scalar_dest, 0);
5069 if (TREE_CODE (scalar_dest) != ARRAY_REF
5070 && TREE_CODE (scalar_dest) != BIT_FIELD_REF
5071 && TREE_CODE (scalar_dest) != INDIRECT_REF
5072 && TREE_CODE (scalar_dest) != COMPONENT_REF
5073 && TREE_CODE (scalar_dest) != IMAGPART_EXPR
5074 && TREE_CODE (scalar_dest) != REALPART_EXPR
5075 && TREE_CODE (scalar_dest) != MEM_REF)
5076 return false;
5078 gcc_assert (gimple_assign_single_p (stmt));
5079 op = gimple_assign_rhs1 (stmt);
5080 if (!vect_is_simple_use (op, stmt, loop_vinfo, bb_vinfo, &def_stmt,
5081 &def, &dt))
5083 if (dump_enabled_p ())
5084 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5085 "use not simple.\n");
5086 return false;
5089 elem_type = TREE_TYPE (vectype);
5090 vec_mode = TYPE_MODE (vectype);
5092 /* FORNOW. In some cases can vectorize even if data-type not supported
5093 (e.g. - array initialization with 0). */
5094 if (optab_handler (mov_optab, vec_mode) == CODE_FOR_nothing)
5095 return false;
5097 if (!STMT_VINFO_DATA_REF (stmt_info))
5098 return false;
5100 negative =
5101 tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
5102 ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
5103 size_zero_node) < 0;
5104 if (negative && ncopies > 1)
5106 if (dump_enabled_p ())
5107 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5108 "multiple types with negative step.\n");
5109 return false;
5112 if (negative)
5114 gcc_assert (!grouped_store);
5115 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5116 if (alignment_support_scheme != dr_aligned
5117 && alignment_support_scheme != dr_unaligned_supported)
5119 if (dump_enabled_p ())
5120 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5121 "negative step but alignment required.\n");
5122 return false;
5124 if (dt != vect_constant_def
5125 && dt != vect_external_def
5126 && !perm_mask_for_reverse (vectype))
5128 if (dump_enabled_p ())
5129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5130 "negative step and reversing not supported.\n");
5131 return false;
5135 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5137 grouped_store = true;
5138 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5139 if (!slp && !PURE_SLP_STMT (stmt_info))
5141 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5142 if (vect_store_lanes_supported (vectype, group_size))
5143 store_lanes_p = true;
5144 else if (!vect_grouped_store_supported (vectype, group_size))
5145 return false;
5148 if (first_stmt == stmt)
5150 /* STMT is the leader of the group. Check the operands of all the
5151 stmts of the group. */
5152 next_stmt = GROUP_NEXT_ELEMENT (stmt_info);
5153 while (next_stmt)
5155 gcc_assert (gimple_assign_single_p (next_stmt));
5156 op = gimple_assign_rhs1 (next_stmt);
5157 if (!vect_is_simple_use (op, next_stmt, loop_vinfo, bb_vinfo,
5158 &def_stmt, &def, &dt))
5160 if (dump_enabled_p ())
5161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5162 "use not simple.\n");
5163 return false;
5165 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5170 if (!vec_stmt) /* transformation not required. */
5172 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
5173 vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
5174 NULL, NULL, NULL);
5175 return true;
5178 /** Transform. **/
5180 ensure_base_align (stmt_info, dr);
5182 if (grouped_store)
5184 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5185 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5187 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
5189 /* FORNOW */
5190 gcc_assert (!loop || !nested_in_vect_loop_p (loop, stmt));
5192 /* We vectorize all the stmts of the interleaving group when we
5193 reach the last stmt in the group. */
5194 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
5195 < GROUP_SIZE (vinfo_for_stmt (first_stmt))
5196 && !slp)
5198 *vec_stmt = NULL;
5199 return true;
5202 if (slp)
5204 grouped_store = false;
5205 /* VEC_NUM is the number of vect stmts to be created for this
5206 group. */
5207 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5208 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
5209 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5210 op = gimple_assign_rhs1 (first_stmt);
5212 else
5213 /* VEC_NUM is the number of vect stmts to be created for this
5214 group. */
5215 vec_num = group_size;
5217 else
5219 first_stmt = stmt;
5220 first_dr = dr;
5221 group_size = vec_num = 1;
5224 if (dump_enabled_p ())
5225 dump_printf_loc (MSG_NOTE, vect_location,
5226 "transform store. ncopies = %d\n", ncopies);
5228 dr_chain.create (group_size);
5229 oprnds.create (group_size);
5231 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
5232 gcc_assert (alignment_support_scheme);
5233 /* Targets with store-lane instructions must not require explicit
5234 realignment. */
5235 gcc_assert (!store_lanes_p
5236 || alignment_support_scheme == dr_aligned
5237 || alignment_support_scheme == dr_unaligned_supported);
5239 if (negative)
5240 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
5242 if (store_lanes_p)
5243 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
5244 else
5245 aggr_type = vectype;
5247 /* In case the vectorization factor (VF) is bigger than the number
5248 of elements that we can fit in a vectype (nunits), we have to generate
5249 more than one vector stmt - i.e - we need to "unroll" the
5250 vector stmt by a factor VF/nunits. For more details see documentation in
5251 vect_get_vec_def_for_copy_stmt. */
5253 /* In case of interleaving (non-unit grouped access):
5255 S1: &base + 2 = x2
5256 S2: &base = x0
5257 S3: &base + 1 = x1
5258 S4: &base + 3 = x3
5260 We create vectorized stores starting from base address (the access of the
5261 first stmt in the chain (S2 in the above example), when the last store stmt
5262 of the chain (S4) is reached:
5264 VS1: &base = vx2
5265 VS2: &base + vec_size*1 = vx0
5266 VS3: &base + vec_size*2 = vx1
5267 VS4: &base + vec_size*3 = vx3
5269 Then permutation statements are generated:
5271 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
5272 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
5275 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5276 (the order of the data-refs in the output of vect_permute_store_chain
5277 corresponds to the order of scalar stmts in the interleaving chain - see
5278 the documentation of vect_permute_store_chain()).
5280 In case of both multiple types and interleaving, above vector stores and
5281 permutation stmts are created for every copy. The result vector stmts are
5282 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5283 STMT_VINFO_RELATED_STMT for the next copies.
5286 prev_stmt_info = NULL;
5287 for (j = 0; j < ncopies; j++)
5289 gimple new_stmt;
5291 if (j == 0)
5293 if (slp)
5295 /* Get vectorized arguments for SLP_NODE. */
5296 vect_get_vec_defs (op, NULL_TREE, stmt, &vec_oprnds,
5297 NULL, slp_node, -1);
5299 vec_oprnd = vec_oprnds[0];
5301 else
5303 /* For interleaved stores we collect vectorized defs for all the
5304 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
5305 used as an input to vect_permute_store_chain(), and OPRNDS as
5306 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
5308 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5309 OPRNDS are of size 1. */
5310 next_stmt = first_stmt;
5311 for (i = 0; i < group_size; i++)
5313 /* Since gaps are not supported for interleaved stores,
5314 GROUP_SIZE is the exact number of stmts in the chain.
5315 Therefore, NEXT_STMT can't be NULL_TREE. In case that
5316 there is no interleaving, GROUP_SIZE is 1, and only one
5317 iteration of the loop will be executed. */
5318 gcc_assert (next_stmt
5319 && gimple_assign_single_p (next_stmt));
5320 op = gimple_assign_rhs1 (next_stmt);
5322 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
5323 NULL);
5324 dr_chain.quick_push (vec_oprnd);
5325 oprnds.quick_push (vec_oprnd);
5326 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5330 /* We should have catched mismatched types earlier. */
5331 gcc_assert (useless_type_conversion_p (vectype,
5332 TREE_TYPE (vec_oprnd)));
5333 bool simd_lane_access_p
5334 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
5335 if (simd_lane_access_p
5336 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
5337 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
5338 && integer_zerop (DR_OFFSET (first_dr))
5339 && integer_zerop (DR_INIT (first_dr))
5340 && alias_sets_conflict_p (get_alias_set (aggr_type),
5341 get_alias_set (DR_REF (first_dr))))
5343 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
5344 dataref_offset = build_int_cst (reference_alias_ptr_type
5345 (DR_REF (first_dr)), 0);
5346 inv_p = false;
5348 else
5349 dataref_ptr
5350 = vect_create_data_ref_ptr (first_stmt, aggr_type,
5351 simd_lane_access_p ? loop : NULL,
5352 offset, &dummy, gsi, &ptr_incr,
5353 simd_lane_access_p, &inv_p);
5354 gcc_assert (bb_vinfo || !inv_p);
5356 else
5358 /* For interleaved stores we created vectorized defs for all the
5359 defs stored in OPRNDS in the previous iteration (previous copy).
5360 DR_CHAIN is then used as an input to vect_permute_store_chain(),
5361 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
5362 next copy.
5363 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
5364 OPRNDS are of size 1. */
5365 for (i = 0; i < group_size; i++)
5367 op = oprnds[i];
5368 vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo, &def_stmt,
5369 &def, &dt);
5370 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
5371 dr_chain[i] = vec_oprnd;
5372 oprnds[i] = vec_oprnd;
5374 if (dataref_offset)
5375 dataref_offset
5376 = int_const_binop (PLUS_EXPR, dataref_offset,
5377 TYPE_SIZE_UNIT (aggr_type));
5378 else
5379 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
5380 TYPE_SIZE_UNIT (aggr_type));
5383 if (store_lanes_p)
5385 tree vec_array;
5387 /* Combine all the vectors into an array. */
5388 vec_array = create_vector_array (vectype, vec_num);
5389 for (i = 0; i < vec_num; i++)
5391 vec_oprnd = dr_chain[i];
5392 write_vector_array (stmt, gsi, vec_oprnd, vec_array, i);
5395 /* Emit:
5396 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
5397 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
5398 new_stmt = gimple_build_call_internal (IFN_STORE_LANES, 1, vec_array);
5399 gimple_call_set_lhs (new_stmt, data_ref);
5400 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5402 else
5404 new_stmt = NULL;
5405 if (grouped_store)
5407 if (j == 0)
5408 result_chain.create (group_size);
5409 /* Permute. */
5410 vect_permute_store_chain (dr_chain, group_size, stmt, gsi,
5411 &result_chain);
5414 next_stmt = first_stmt;
5415 for (i = 0; i < vec_num; i++)
5417 unsigned align, misalign;
5419 if (i > 0)
5420 /* Bump the vector pointer. */
5421 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
5422 stmt, NULL_TREE);
5424 if (slp)
5425 vec_oprnd = vec_oprnds[i];
5426 else if (grouped_store)
5427 /* For grouped stores vectorized defs are interleaved in
5428 vect_permute_store_chain(). */
5429 vec_oprnd = result_chain[i];
5431 data_ref = build2 (MEM_REF, TREE_TYPE (vec_oprnd), dataref_ptr,
5432 dataref_offset
5433 ? dataref_offset
5434 : build_int_cst (reference_alias_ptr_type
5435 (DR_REF (first_dr)), 0));
5436 align = TYPE_ALIGN_UNIT (vectype);
5437 if (aligned_access_p (first_dr))
5438 misalign = 0;
5439 else if (DR_MISALIGNMENT (first_dr) == -1)
5441 TREE_TYPE (data_ref)
5442 = build_aligned_type (TREE_TYPE (data_ref),
5443 TYPE_ALIGN (elem_type));
5444 align = TYPE_ALIGN_UNIT (elem_type);
5445 misalign = 0;
5447 else
5449 TREE_TYPE (data_ref)
5450 = build_aligned_type (TREE_TYPE (data_ref),
5451 TYPE_ALIGN (elem_type));
5452 misalign = DR_MISALIGNMENT (first_dr);
5454 if (dataref_offset == NULL_TREE)
5455 set_ptr_info_alignment (get_ptr_info (dataref_ptr), align,
5456 misalign);
5458 if (negative
5459 && dt != vect_constant_def
5460 && dt != vect_external_def)
5462 tree perm_mask = perm_mask_for_reverse (vectype);
5463 tree perm_dest
5464 = vect_create_destination_var (gimple_assign_rhs1 (stmt),
5465 vectype);
5466 tree new_temp = make_ssa_name (perm_dest);
5468 /* Generate the permute statement. */
5469 gimple perm_stmt
5470 = gimple_build_assign (new_temp, VEC_PERM_EXPR, vec_oprnd,
5471 vec_oprnd, perm_mask);
5472 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5474 perm_stmt = SSA_NAME_DEF_STMT (new_temp);
5475 vec_oprnd = new_temp;
5478 /* Arguments are ready. Create the new vector stmt. */
5479 new_stmt = gimple_build_assign (data_ref, vec_oprnd);
5480 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5482 if (slp)
5483 continue;
5485 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
5486 if (!next_stmt)
5487 break;
5490 if (!slp)
5492 if (j == 0)
5493 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5494 else
5495 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5496 prev_stmt_info = vinfo_for_stmt (new_stmt);
5500 dr_chain.release ();
5501 oprnds.release ();
5502 result_chain.release ();
5503 vec_oprnds.release ();
5505 return true;
5508 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
5509 VECTOR_CST mask. No checks are made that the target platform supports the
5510 mask, so callers may wish to test can_vec_perm_p separately, or use
5511 vect_gen_perm_mask_checked. */
5513 tree
5514 vect_gen_perm_mask_any (tree vectype, const unsigned char *sel)
5516 tree mask_elt_type, mask_type, mask_vec, *mask_elts;
5517 int i, nunits;
5519 nunits = TYPE_VECTOR_SUBPARTS (vectype);
5521 mask_elt_type = lang_hooks.types.type_for_mode
5522 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
5523 mask_type = get_vectype_for_scalar_type (mask_elt_type);
5525 mask_elts = XALLOCAVEC (tree, nunits);
5526 for (i = nunits - 1; i >= 0; i--)
5527 mask_elts[i] = build_int_cst (mask_elt_type, sel[i]);
5528 mask_vec = build_vector (mask_type, mask_elts);
5530 return mask_vec;
5533 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_p,
5534 i.e. that the target supports the pattern _for arbitrary input vectors_. */
5536 tree
5537 vect_gen_perm_mask_checked (tree vectype, const unsigned char *sel)
5539 gcc_assert (can_vec_perm_p (TYPE_MODE (vectype), false, sel));
5540 return vect_gen_perm_mask_any (vectype, sel);
5543 /* Given a vector variable X and Y, that was generated for the scalar
5544 STMT, generate instructions to permute the vector elements of X and Y
5545 using permutation mask MASK_VEC, insert them at *GSI and return the
5546 permuted vector variable. */
5548 static tree
5549 permute_vec_elements (tree x, tree y, tree mask_vec, gimple stmt,
5550 gimple_stmt_iterator *gsi)
5552 tree vectype = TREE_TYPE (x);
5553 tree perm_dest, data_ref;
5554 gimple perm_stmt;
5556 perm_dest = vect_create_destination_var (gimple_get_lhs (stmt), vectype);
5557 data_ref = make_ssa_name (perm_dest);
5559 /* Generate the permute statement. */
5560 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, x, y, mask_vec);
5561 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5563 return data_ref;
5566 /* Hoist the definitions of all SSA uses on STMT out of the loop LOOP,
5567 inserting them on the loops preheader edge. Returns true if we
5568 were successful in doing so (and thus STMT can be moved then),
5569 otherwise returns false. */
5571 static bool
5572 hoist_defs_of_uses (gimple stmt, struct loop *loop)
5574 ssa_op_iter i;
5575 tree op;
5576 bool any = false;
5578 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5580 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5581 if (!gimple_nop_p (def_stmt)
5582 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5584 /* Make sure we don't need to recurse. While we could do
5585 so in simple cases when there are more complex use webs
5586 we don't have an easy way to preserve stmt order to fulfil
5587 dependencies within them. */
5588 tree op2;
5589 ssa_op_iter i2;
5590 if (gimple_code (def_stmt) == GIMPLE_PHI)
5591 return false;
5592 FOR_EACH_SSA_TREE_OPERAND (op2, def_stmt, i2, SSA_OP_USE)
5594 gimple def_stmt2 = SSA_NAME_DEF_STMT (op2);
5595 if (!gimple_nop_p (def_stmt2)
5596 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt2)))
5597 return false;
5599 any = true;
5603 if (!any)
5604 return true;
5606 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_USE)
5608 gimple def_stmt = SSA_NAME_DEF_STMT (op);
5609 if (!gimple_nop_p (def_stmt)
5610 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
5612 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5613 gsi_remove (&gsi, false);
5614 gsi_insert_on_edge_immediate (loop_preheader_edge (loop), def_stmt);
5618 return true;
5621 /* vectorizable_load.
5623 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5624 can be vectorized.
5625 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5626 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5627 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5629 static bool
5630 vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
5631 slp_tree slp_node, slp_instance slp_node_instance)
5633 tree scalar_dest;
5634 tree vec_dest = NULL;
5635 tree data_ref = NULL;
5636 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5637 stmt_vec_info prev_stmt_info;
5638 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5639 struct loop *loop = NULL;
5640 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
5641 bool nested_in_vect_loop = false;
5642 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
5643 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5644 tree elem_type;
5645 tree new_temp;
5646 machine_mode mode;
5647 gimple new_stmt = NULL;
5648 tree dummy;
5649 enum dr_alignment_support alignment_support_scheme;
5650 tree dataref_ptr = NULL_TREE;
5651 tree dataref_offset = NULL_TREE;
5652 gimple ptr_incr = NULL;
5653 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5654 int ncopies;
5655 int i, j, group_size, group_gap;
5656 tree msq = NULL_TREE, lsq;
5657 tree offset = NULL_TREE;
5658 tree byte_offset = NULL_TREE;
5659 tree realignment_token = NULL_TREE;
5660 gphi *phi = NULL;
5661 vec<tree> dr_chain = vNULL;
5662 bool grouped_load = false;
5663 bool load_lanes_p = false;
5664 gimple first_stmt;
5665 bool inv_p;
5666 bool negative = false;
5667 bool compute_in_loop = false;
5668 struct loop *at_loop;
5669 int vec_num;
5670 bool slp = (slp_node != NULL);
5671 bool slp_perm = false;
5672 enum tree_code code;
5673 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
5674 int vf;
5675 tree aggr_type;
5676 tree gather_base = NULL_TREE, gather_off = NULL_TREE;
5677 tree gather_off_vectype = NULL_TREE, gather_decl = NULL_TREE;
5678 int gather_scale = 1;
5679 enum vect_def_type gather_dt = vect_unknown_def_type;
5681 if (loop_vinfo)
5683 loop = LOOP_VINFO_LOOP (loop_vinfo);
5684 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5685 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5687 else
5688 vf = 1;
5690 /* Multiple types in SLP are handled by creating the appropriate number of
5691 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5692 case of SLP. */
5693 if (slp || PURE_SLP_STMT (stmt_info))
5694 ncopies = 1;
5695 else
5696 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5698 gcc_assert (ncopies >= 1);
5700 /* FORNOW. This restriction should be relaxed. */
5701 if (nested_in_vect_loop && ncopies > 1)
5703 if (dump_enabled_p ())
5704 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5705 "multiple types in nested loop.\n");
5706 return false;
5709 /* Invalidate assumptions made by dependence analysis when vectorization
5710 on the unrolled body effectively re-orders stmts. */
5711 if (ncopies > 1
5712 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5713 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5714 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5716 if (dump_enabled_p ())
5717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5718 "cannot perform implicit CSE when unrolling "
5719 "with negative dependence distance\n");
5720 return false;
5723 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
5724 return false;
5726 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
5727 return false;
5729 /* Is vectorizable load? */
5730 if (!is_gimple_assign (stmt))
5731 return false;
5733 scalar_dest = gimple_assign_lhs (stmt);
5734 if (TREE_CODE (scalar_dest) != SSA_NAME)
5735 return false;
5737 code = gimple_assign_rhs_code (stmt);
5738 if (code != ARRAY_REF
5739 && code != BIT_FIELD_REF
5740 && code != INDIRECT_REF
5741 && code != COMPONENT_REF
5742 && code != IMAGPART_EXPR
5743 && code != REALPART_EXPR
5744 && code != MEM_REF
5745 && TREE_CODE_CLASS (code) != tcc_declaration)
5746 return false;
5748 if (!STMT_VINFO_DATA_REF (stmt_info))
5749 return false;
5751 elem_type = TREE_TYPE (vectype);
5752 mode = TYPE_MODE (vectype);
5754 /* FORNOW. In some cases can vectorize even if data-type not supported
5755 (e.g. - data copies). */
5756 if (optab_handler (mov_optab, mode) == CODE_FOR_nothing)
5758 if (dump_enabled_p ())
5759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5760 "Aligned load, but unsupported type.\n");
5761 return false;
5764 /* Check if the load is a part of an interleaving chain. */
5765 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
5767 grouped_load = true;
5768 /* FORNOW */
5769 gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
5771 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
5772 if (!slp && !PURE_SLP_STMT (stmt_info))
5774 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
5775 if (vect_load_lanes_supported (vectype, group_size))
5776 load_lanes_p = true;
5777 else if (!vect_grouped_load_supported (vectype, group_size))
5778 return false;
5781 /* Invalidate assumptions made by dependence analysis when vectorization
5782 on the unrolled body effectively re-orders stmts. */
5783 if (!PURE_SLP_STMT (stmt_info)
5784 && STMT_VINFO_MIN_NEG_DIST (stmt_info) != 0
5785 && ((unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5786 > STMT_VINFO_MIN_NEG_DIST (stmt_info)))
5788 if (dump_enabled_p ())
5789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5790 "cannot perform implicit CSE when performing "
5791 "group loads with negative dependence distance\n");
5792 return false;
5797 if (STMT_VINFO_GATHER_P (stmt_info))
5799 gimple def_stmt;
5800 tree def;
5801 gather_decl = vect_check_gather (stmt, loop_vinfo, &gather_base,
5802 &gather_off, &gather_scale);
5803 gcc_assert (gather_decl);
5804 if (!vect_is_simple_use_1 (gather_off, NULL, loop_vinfo, bb_vinfo,
5805 &def_stmt, &def, &gather_dt,
5806 &gather_off_vectype))
5808 if (dump_enabled_p ())
5809 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5810 "gather index use not simple.\n");
5811 return false;
5814 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
5816 else
5818 negative = tree_int_cst_compare (nested_in_vect_loop
5819 ? STMT_VINFO_DR_STEP (stmt_info)
5820 : DR_STEP (dr),
5821 size_zero_node) < 0;
5822 if (negative && ncopies > 1)
5824 if (dump_enabled_p ())
5825 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5826 "multiple types with negative step.\n");
5827 return false;
5830 if (negative)
5832 if (grouped_load)
5834 if (dump_enabled_p ())
5835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5836 "negative step for group load not supported"
5837 "\n");
5838 return false;
5840 alignment_support_scheme = vect_supportable_dr_alignment (dr, false);
5841 if (alignment_support_scheme != dr_aligned
5842 && alignment_support_scheme != dr_unaligned_supported)
5844 if (dump_enabled_p ())
5845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5846 "negative step but alignment required.\n");
5847 return false;
5849 if (!perm_mask_for_reverse (vectype))
5851 if (dump_enabled_p ())
5852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5853 "negative step and reversing not supported."
5854 "\n");
5855 return false;
5860 if (!vec_stmt) /* transformation not required. */
5862 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5863 vect_model_load_cost (stmt_info, ncopies, load_lanes_p, NULL, NULL, NULL);
5864 return true;
5867 if (dump_enabled_p ())
5868 dump_printf_loc (MSG_NOTE, vect_location,
5869 "transform load. ncopies = %d\n", ncopies);
5871 /** Transform. **/
5873 ensure_base_align (stmt_info, dr);
5875 if (STMT_VINFO_GATHER_P (stmt_info))
5877 tree vec_oprnd0 = NULL_TREE, op;
5878 tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gather_decl));
5879 tree rettype, srctype, ptrtype, idxtype, masktype, scaletype;
5880 tree ptr, mask, var, scale, merge, perm_mask = NULL_TREE, prev_res = NULL_TREE;
5881 edge pe = loop_preheader_edge (loop);
5882 gimple_seq seq;
5883 basic_block new_bb;
5884 enum { NARROW, NONE, WIDEN } modifier;
5885 int gather_off_nunits = TYPE_VECTOR_SUBPARTS (gather_off_vectype);
5887 if (nunits == gather_off_nunits)
5888 modifier = NONE;
5889 else if (nunits == gather_off_nunits / 2)
5891 unsigned char *sel = XALLOCAVEC (unsigned char, gather_off_nunits);
5892 modifier = WIDEN;
5894 for (i = 0; i < gather_off_nunits; ++i)
5895 sel[i] = i | nunits;
5897 perm_mask = vect_gen_perm_mask_checked (gather_off_vectype, sel);
5899 else if (nunits == gather_off_nunits * 2)
5901 unsigned char *sel = XALLOCAVEC (unsigned char, nunits);
5902 modifier = NARROW;
5904 for (i = 0; i < nunits; ++i)
5905 sel[i] = i < gather_off_nunits
5906 ? i : i + nunits - gather_off_nunits;
5908 perm_mask = vect_gen_perm_mask_checked (vectype, sel);
5909 ncopies *= 2;
5911 else
5912 gcc_unreachable ();
5914 rettype = TREE_TYPE (TREE_TYPE (gather_decl));
5915 srctype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5916 ptrtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5917 idxtype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5918 masktype = TREE_VALUE (arglist); arglist = TREE_CHAIN (arglist);
5919 scaletype = TREE_VALUE (arglist);
5920 gcc_checking_assert (types_compatible_p (srctype, rettype));
5922 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5924 ptr = fold_convert (ptrtype, gather_base);
5925 if (!is_gimple_min_invariant (ptr))
5927 ptr = force_gimple_operand (ptr, &seq, true, NULL_TREE);
5928 new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
5929 gcc_assert (!new_bb);
5932 /* Currently we support only unconditional gather loads,
5933 so mask should be all ones. */
5934 if (TREE_CODE (masktype) == INTEGER_TYPE)
5935 mask = build_int_cst (masktype, -1);
5936 else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
5938 mask = build_int_cst (TREE_TYPE (masktype), -1);
5939 mask = build_vector_from_val (masktype, mask);
5940 mask = vect_init_vector (stmt, mask, masktype, NULL);
5942 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype)))
5944 REAL_VALUE_TYPE r;
5945 long tmp[6];
5946 for (j = 0; j < 6; ++j)
5947 tmp[j] = -1;
5948 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (masktype)));
5949 mask = build_real (TREE_TYPE (masktype), r);
5950 mask = build_vector_from_val (masktype, mask);
5951 mask = vect_init_vector (stmt, mask, masktype, NULL);
5953 else
5954 gcc_unreachable ();
5956 scale = build_int_cst (scaletype, gather_scale);
5958 if (TREE_CODE (TREE_TYPE (rettype)) == INTEGER_TYPE)
5959 merge = build_int_cst (TREE_TYPE (rettype), 0);
5960 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (rettype)))
5962 REAL_VALUE_TYPE r;
5963 long tmp[6];
5964 for (j = 0; j < 6; ++j)
5965 tmp[j] = 0;
5966 real_from_target (&r, tmp, TYPE_MODE (TREE_TYPE (rettype)));
5967 merge = build_real (TREE_TYPE (rettype), r);
5969 else
5970 gcc_unreachable ();
5971 merge = build_vector_from_val (rettype, merge);
5972 merge = vect_init_vector (stmt, merge, rettype, NULL);
5974 prev_stmt_info = NULL;
5975 for (j = 0; j < ncopies; ++j)
5977 if (modifier == WIDEN && (j & 1))
5978 op = permute_vec_elements (vec_oprnd0, vec_oprnd0,
5979 perm_mask, stmt, gsi);
5980 else if (j == 0)
5981 op = vec_oprnd0
5982 = vect_get_vec_def_for_operand (gather_off, stmt, NULL);
5983 else
5984 op = vec_oprnd0
5985 = vect_get_vec_def_for_stmt_copy (gather_dt, vec_oprnd0);
5987 if (!useless_type_conversion_p (idxtype, TREE_TYPE (op)))
5989 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op))
5990 == TYPE_VECTOR_SUBPARTS (idxtype));
5991 var = vect_get_new_vect_var (idxtype, vect_simple_var, NULL);
5992 var = make_ssa_name (var);
5993 op = build1 (VIEW_CONVERT_EXPR, idxtype, op);
5994 new_stmt
5995 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
5996 vect_finish_stmt_generation (stmt, new_stmt, gsi);
5997 op = var;
6000 new_stmt
6001 = gimple_build_call (gather_decl, 5, merge, ptr, op, mask, scale);
6003 if (!useless_type_conversion_p (vectype, rettype))
6005 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype)
6006 == TYPE_VECTOR_SUBPARTS (rettype));
6007 var = vect_get_new_vect_var (rettype, vect_simple_var, NULL);
6008 op = make_ssa_name (var, new_stmt);
6009 gimple_call_set_lhs (new_stmt, op);
6010 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6011 var = make_ssa_name (vec_dest);
6012 op = build1 (VIEW_CONVERT_EXPR, vectype, op);
6013 new_stmt
6014 = gimple_build_assign (var, VIEW_CONVERT_EXPR, op);
6016 else
6018 var = make_ssa_name (vec_dest, new_stmt);
6019 gimple_call_set_lhs (new_stmt, var);
6022 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6024 if (modifier == NARROW)
6026 if ((j & 1) == 0)
6028 prev_res = var;
6029 continue;
6031 var = permute_vec_elements (prev_res, var,
6032 perm_mask, stmt, gsi);
6033 new_stmt = SSA_NAME_DEF_STMT (var);
6036 if (prev_stmt_info == NULL)
6037 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6038 else
6039 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6040 prev_stmt_info = vinfo_for_stmt (new_stmt);
6042 return true;
6044 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
6046 gimple_stmt_iterator incr_gsi;
6047 bool insert_after;
6048 gimple incr;
6049 tree offvar;
6050 tree ivstep;
6051 tree running_off;
6052 vec<constructor_elt, va_gc> *v = NULL;
6053 gimple_seq stmts = NULL;
6054 tree stride_base, stride_step, alias_off;
6056 gcc_assert (!nested_in_vect_loop);
6058 stride_base
6059 = fold_build_pointer_plus
6060 (unshare_expr (DR_BASE_ADDRESS (dr)),
6061 size_binop (PLUS_EXPR,
6062 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr))),
6063 convert_to_ptrofftype (DR_INIT (dr))));
6064 stride_step = fold_convert (sizetype, unshare_expr (DR_STEP (dr)));
6066 /* For a load with loop-invariant (but other than power-of-2)
6067 stride (i.e. not a grouped access) like so:
6069 for (i = 0; i < n; i += stride)
6070 ... = array[i];
6072 we generate a new induction variable and new accesses to
6073 form a new vector (or vectors, depending on ncopies):
6075 for (j = 0; ; j += VF*stride)
6076 tmp1 = array[j];
6077 tmp2 = array[j + stride];
6079 vectemp = {tmp1, tmp2, ...}
6082 ivstep = stride_step;
6083 ivstep = fold_build2 (MULT_EXPR, TREE_TYPE (ivstep), ivstep,
6084 build_int_cst (TREE_TYPE (ivstep), vf));
6086 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
6088 create_iv (stride_base, ivstep, NULL,
6089 loop, &incr_gsi, insert_after,
6090 &offvar, NULL);
6091 incr = gsi_stmt (incr_gsi);
6092 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
6094 stride_step = force_gimple_operand (stride_step, &stmts, true, NULL_TREE);
6095 if (stmts)
6096 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
6098 prev_stmt_info = NULL;
6099 running_off = offvar;
6100 alias_off = build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0);
6101 for (j = 0; j < ncopies; j++)
6103 tree vec_inv;
6105 vec_alloc (v, nunits);
6106 for (i = 0; i < nunits; i++)
6108 tree newref, newoff;
6109 gimple incr;
6110 newref = build2 (MEM_REF, TREE_TYPE (vectype),
6111 running_off, alias_off);
6113 newref = force_gimple_operand_gsi (gsi, newref, true,
6114 NULL_TREE, true,
6115 GSI_SAME_STMT);
6116 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, newref);
6117 newoff = copy_ssa_name (running_off);
6118 incr = gimple_build_assign (newoff, POINTER_PLUS_EXPR,
6119 running_off, stride_step);
6120 vect_finish_stmt_generation (stmt, incr, gsi);
6122 running_off = newoff;
6125 vec_inv = build_constructor (vectype, v);
6126 new_temp = vect_init_vector (stmt, vec_inv, vectype, gsi);
6127 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6129 if (j == 0)
6130 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6131 else
6132 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6133 prev_stmt_info = vinfo_for_stmt (new_stmt);
6135 return true;
6138 if (grouped_load)
6140 first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
6141 if (slp
6142 && !SLP_TREE_LOAD_PERMUTATION (slp_node).exists ()
6143 && first_stmt != SLP_TREE_SCALAR_STMTS (slp_node)[0])
6144 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
6146 /* Check if the chain of loads is already vectorized. */
6147 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt))
6148 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
6149 ??? But we can only do so if there is exactly one
6150 as we have no way to get at the rest. Leave the CSE
6151 opportunity alone.
6152 ??? With the group load eventually participating
6153 in multiple different permutations (having multiple
6154 slp nodes which refer to the same group) the CSE
6155 is even wrong code. See PR56270. */
6156 && !slp)
6158 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6159 return true;
6161 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
6162 group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
6164 /* VEC_NUM is the number of vect stmts to be created for this group. */
6165 if (slp)
6167 grouped_load = false;
6168 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6169 if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
6170 slp_perm = true;
6171 group_gap = GROUP_GAP (vinfo_for_stmt (first_stmt));
6173 else
6175 vec_num = group_size;
6176 group_gap = 0;
6179 else
6181 first_stmt = stmt;
6182 first_dr = dr;
6183 group_size = vec_num = 1;
6184 group_gap = 0;
6187 alignment_support_scheme = vect_supportable_dr_alignment (first_dr, false);
6188 gcc_assert (alignment_support_scheme);
6189 /* Targets with load-lane instructions must not require explicit
6190 realignment. */
6191 gcc_assert (!load_lanes_p
6192 || alignment_support_scheme == dr_aligned
6193 || alignment_support_scheme == dr_unaligned_supported);
6195 /* In case the vectorization factor (VF) is bigger than the number
6196 of elements that we can fit in a vectype (nunits), we have to generate
6197 more than one vector stmt - i.e - we need to "unroll" the
6198 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6199 from one copy of the vector stmt to the next, in the field
6200 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6201 stages to find the correct vector defs to be used when vectorizing
6202 stmts that use the defs of the current stmt. The example below
6203 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
6204 need to create 4 vectorized stmts):
6206 before vectorization:
6207 RELATED_STMT VEC_STMT
6208 S1: x = memref - -
6209 S2: z = x + 1 - -
6211 step 1: vectorize stmt S1:
6212 We first create the vector stmt VS1_0, and, as usual, record a
6213 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
6214 Next, we create the vector stmt VS1_1, and record a pointer to
6215 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
6216 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
6217 stmts and pointers:
6218 RELATED_STMT VEC_STMT
6219 VS1_0: vx0 = memref0 VS1_1 -
6220 VS1_1: vx1 = memref1 VS1_2 -
6221 VS1_2: vx2 = memref2 VS1_3 -
6222 VS1_3: vx3 = memref3 - -
6223 S1: x = load - VS1_0
6224 S2: z = x + 1 - -
6226 See in documentation in vect_get_vec_def_for_stmt_copy for how the
6227 information we recorded in RELATED_STMT field is used to vectorize
6228 stmt S2. */
6230 /* In case of interleaving (non-unit grouped access):
6232 S1: x2 = &base + 2
6233 S2: x0 = &base
6234 S3: x1 = &base + 1
6235 S4: x3 = &base + 3
6237 Vectorized loads are created in the order of memory accesses
6238 starting from the access of the first stmt of the chain:
6240 VS1: vx0 = &base
6241 VS2: vx1 = &base + vec_size*1
6242 VS3: vx3 = &base + vec_size*2
6243 VS4: vx4 = &base + vec_size*3
6245 Then permutation statements are generated:
6247 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
6248 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
6251 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6252 (the order of the data-refs in the output of vect_permute_load_chain
6253 corresponds to the order of scalar stmts in the interleaving chain - see
6254 the documentation of vect_permute_load_chain()).
6255 The generation of permutation stmts and recording them in
6256 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
6258 In case of both multiple types and interleaving, the vector loads and
6259 permutation stmts above are created for every copy. The result vector
6260 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
6261 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
6263 /* If the data reference is aligned (dr_aligned) or potentially unaligned
6264 on a target that supports unaligned accesses (dr_unaligned_supported)
6265 we generate the following code:
6266 p = initial_addr;
6267 indx = 0;
6268 loop {
6269 p = p + indx * vectype_size;
6270 vec_dest = *(p);
6271 indx = indx + 1;
6274 Otherwise, the data reference is potentially unaligned on a target that
6275 does not support unaligned accesses (dr_explicit_realign_optimized) -
6276 then generate the following code, in which the data in each iteration is
6277 obtained by two vector loads, one from the previous iteration, and one
6278 from the current iteration:
6279 p1 = initial_addr;
6280 msq_init = *(floor(p1))
6281 p2 = initial_addr + VS - 1;
6282 realignment_token = call target_builtin;
6283 indx = 0;
6284 loop {
6285 p2 = p2 + indx * vectype_size
6286 lsq = *(floor(p2))
6287 vec_dest = realign_load (msq, lsq, realignment_token)
6288 indx = indx + 1;
6289 msq = lsq;
6290 } */
6292 /* If the misalignment remains the same throughout the execution of the
6293 loop, we can create the init_addr and permutation mask at the loop
6294 preheader. Otherwise, it needs to be created inside the loop.
6295 This can only occur when vectorizing memory accesses in the inner-loop
6296 nested within an outer-loop that is being vectorized. */
6298 if (nested_in_vect_loop
6299 && (TREE_INT_CST_LOW (DR_STEP (dr))
6300 % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
6302 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
6303 compute_in_loop = true;
6306 if ((alignment_support_scheme == dr_explicit_realign_optimized
6307 || alignment_support_scheme == dr_explicit_realign)
6308 && !compute_in_loop)
6310 msq = vect_setup_realignment (first_stmt, gsi, &realignment_token,
6311 alignment_support_scheme, NULL_TREE,
6312 &at_loop);
6313 if (alignment_support_scheme == dr_explicit_realign_optimized)
6315 phi = as_a <gphi *> (SSA_NAME_DEF_STMT (msq));
6316 byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
6317 size_one_node);
6320 else
6321 at_loop = loop;
6323 if (negative)
6324 offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1);
6326 if (load_lanes_p)
6327 aggr_type = build_array_type_nelts (elem_type, vec_num * nunits);
6328 else
6329 aggr_type = vectype;
6331 prev_stmt_info = NULL;
6332 for (j = 0; j < ncopies; j++)
6334 /* 1. Create the vector or array pointer update chain. */
6335 if (j == 0)
6337 bool simd_lane_access_p
6338 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info);
6339 if (simd_lane_access_p
6340 && TREE_CODE (DR_BASE_ADDRESS (first_dr)) == ADDR_EXPR
6341 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr), 0))
6342 && integer_zerop (DR_OFFSET (first_dr))
6343 && integer_zerop (DR_INIT (first_dr))
6344 && alias_sets_conflict_p (get_alias_set (aggr_type),
6345 get_alias_set (DR_REF (first_dr)))
6346 && (alignment_support_scheme == dr_aligned
6347 || alignment_support_scheme == dr_unaligned_supported))
6349 dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr));
6350 dataref_offset = build_int_cst (reference_alias_ptr_type
6351 (DR_REF (first_dr)), 0);
6352 inv_p = false;
6354 else
6355 dataref_ptr
6356 = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
6357 offset, &dummy, gsi, &ptr_incr,
6358 simd_lane_access_p, &inv_p,
6359 byte_offset);
6361 else if (dataref_offset)
6362 dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
6363 TYPE_SIZE_UNIT (aggr_type));
6364 else
6365 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
6366 TYPE_SIZE_UNIT (aggr_type));
6368 if (grouped_load || slp_perm)
6369 dr_chain.create (vec_num);
6371 if (load_lanes_p)
6373 tree vec_array;
6375 vec_array = create_vector_array (vectype, vec_num);
6377 /* Emit:
6378 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
6379 data_ref = create_array_ref (aggr_type, dataref_ptr, first_dr);
6380 new_stmt = gimple_build_call_internal (IFN_LOAD_LANES, 1, data_ref);
6381 gimple_call_set_lhs (new_stmt, vec_array);
6382 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6384 /* Extract each vector into an SSA_NAME. */
6385 for (i = 0; i < vec_num; i++)
6387 new_temp = read_vector_array (stmt, gsi, scalar_dest,
6388 vec_array, i);
6389 dr_chain.quick_push (new_temp);
6392 /* Record the mapping between SSA_NAMEs and statements. */
6393 vect_record_grouped_load_vectors (stmt, dr_chain);
6395 else
6397 for (i = 0; i < vec_num; i++)
6399 if (i > 0)
6400 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6401 stmt, NULL_TREE);
6403 /* 2. Create the vector-load in the loop. */
6404 switch (alignment_support_scheme)
6406 case dr_aligned:
6407 case dr_unaligned_supported:
6409 unsigned int align, misalign;
6411 data_ref
6412 = build2 (MEM_REF, vectype, dataref_ptr,
6413 dataref_offset
6414 ? dataref_offset
6415 : build_int_cst (reference_alias_ptr_type
6416 (DR_REF (first_dr)), 0));
6417 align = TYPE_ALIGN_UNIT (vectype);
6418 if (alignment_support_scheme == dr_aligned)
6420 gcc_assert (aligned_access_p (first_dr));
6421 misalign = 0;
6423 else if (DR_MISALIGNMENT (first_dr) == -1)
6425 TREE_TYPE (data_ref)
6426 = build_aligned_type (TREE_TYPE (data_ref),
6427 TYPE_ALIGN (elem_type));
6428 align = TYPE_ALIGN_UNIT (elem_type);
6429 misalign = 0;
6431 else
6433 TREE_TYPE (data_ref)
6434 = build_aligned_type (TREE_TYPE (data_ref),
6435 TYPE_ALIGN (elem_type));
6436 misalign = DR_MISALIGNMENT (first_dr);
6438 if (dataref_offset == NULL_TREE)
6439 set_ptr_info_alignment (get_ptr_info (dataref_ptr),
6440 align, misalign);
6441 break;
6443 case dr_explicit_realign:
6445 tree ptr, bump;
6446 tree vs_minus_1;
6448 vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
6450 if (compute_in_loop)
6451 msq = vect_setup_realignment (first_stmt, gsi,
6452 &realignment_token,
6453 dr_explicit_realign,
6454 dataref_ptr, NULL);
6456 ptr = copy_ssa_name (dataref_ptr);
6457 new_stmt = gimple_build_assign
6458 (ptr, BIT_AND_EXPR, dataref_ptr,
6459 build_int_cst
6460 (TREE_TYPE (dataref_ptr),
6461 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6462 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6463 data_ref
6464 = build2 (MEM_REF, vectype, ptr,
6465 build_int_cst (reference_alias_ptr_type
6466 (DR_REF (first_dr)), 0));
6467 vec_dest = vect_create_destination_var (scalar_dest,
6468 vectype);
6469 new_stmt = gimple_build_assign (vec_dest, data_ref);
6470 new_temp = make_ssa_name (vec_dest, new_stmt);
6471 gimple_assign_set_lhs (new_stmt, new_temp);
6472 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
6473 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
6474 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6475 msq = new_temp;
6477 bump = size_binop (MULT_EXPR, vs_minus_1,
6478 TYPE_SIZE_UNIT (elem_type));
6479 ptr = bump_vector_ptr (dataref_ptr, NULL, gsi, stmt, bump);
6480 new_stmt = gimple_build_assign
6481 (NULL_TREE, BIT_AND_EXPR, ptr,
6482 build_int_cst
6483 (TREE_TYPE (ptr),
6484 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6485 ptr = copy_ssa_name (dataref_ptr, new_stmt);
6486 gimple_assign_set_lhs (new_stmt, ptr);
6487 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6488 data_ref
6489 = build2 (MEM_REF, vectype, ptr,
6490 build_int_cst (reference_alias_ptr_type
6491 (DR_REF (first_dr)), 0));
6492 break;
6494 case dr_explicit_realign_optimized:
6495 new_temp = copy_ssa_name (dataref_ptr);
6496 new_stmt = gimple_build_assign
6497 (new_temp, BIT_AND_EXPR, dataref_ptr,
6498 build_int_cst
6499 (TREE_TYPE (dataref_ptr),
6500 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
6501 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6502 data_ref
6503 = build2 (MEM_REF, vectype, new_temp,
6504 build_int_cst (reference_alias_ptr_type
6505 (DR_REF (first_dr)), 0));
6506 break;
6507 default:
6508 gcc_unreachable ();
6510 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6511 new_stmt = gimple_build_assign (vec_dest, data_ref);
6512 new_temp = make_ssa_name (vec_dest, new_stmt);
6513 gimple_assign_set_lhs (new_stmt, new_temp);
6514 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6516 /* 3. Handle explicit realignment if necessary/supported.
6517 Create in loop:
6518 vec_dest = realign_load (msq, lsq, realignment_token) */
6519 if (alignment_support_scheme == dr_explicit_realign_optimized
6520 || alignment_support_scheme == dr_explicit_realign)
6522 lsq = gimple_assign_lhs (new_stmt);
6523 if (!realignment_token)
6524 realignment_token = dataref_ptr;
6525 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6526 new_stmt = gimple_build_assign (vec_dest, REALIGN_LOAD_EXPR,
6527 msq, lsq, realignment_token);
6528 new_temp = make_ssa_name (vec_dest, new_stmt);
6529 gimple_assign_set_lhs (new_stmt, new_temp);
6530 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6532 if (alignment_support_scheme == dr_explicit_realign_optimized)
6534 gcc_assert (phi);
6535 if (i == vec_num - 1 && j == ncopies - 1)
6536 add_phi_arg (phi, lsq,
6537 loop_latch_edge (containing_loop),
6538 UNKNOWN_LOCATION);
6539 msq = lsq;
6543 /* 4. Handle invariant-load. */
6544 if (inv_p && !bb_vinfo)
6546 gcc_assert (!grouped_load);
6547 /* If we have versioned for aliasing or the loop doesn't
6548 have any data dependencies that would preclude this,
6549 then we are sure this is a loop invariant load and
6550 thus we can insert it on the preheader edge. */
6551 if (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo)
6552 && !nested_in_vect_loop
6553 && hoist_defs_of_uses (stmt, loop))
6555 if (dump_enabled_p ())
6557 dump_printf_loc (MSG_NOTE, vect_location,
6558 "hoisting out of the vectorized "
6559 "loop: ");
6560 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6561 dump_printf (MSG_NOTE, "\n");
6563 tree tem = copy_ssa_name (scalar_dest);
6564 gsi_insert_on_edge_immediate
6565 (loop_preheader_edge (loop),
6566 gimple_build_assign (tem,
6567 unshare_expr
6568 (gimple_assign_rhs1 (stmt))));
6569 new_temp = vect_init_vector (stmt, tem, vectype, NULL);
6571 else
6573 gimple_stmt_iterator gsi2 = *gsi;
6574 gsi_next (&gsi2);
6575 new_temp = vect_init_vector (stmt, scalar_dest,
6576 vectype, &gsi2);
6578 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6579 set_vinfo_for_stmt (new_stmt,
6580 new_stmt_vec_info (new_stmt, loop_vinfo,
6581 bb_vinfo));
6584 if (negative)
6586 tree perm_mask = perm_mask_for_reverse (vectype);
6587 new_temp = permute_vec_elements (new_temp, new_temp,
6588 perm_mask, stmt, gsi);
6589 new_stmt = SSA_NAME_DEF_STMT (new_temp);
6592 /* Collect vector loads and later create their permutation in
6593 vect_transform_grouped_load (). */
6594 if (grouped_load || slp_perm)
6595 dr_chain.quick_push (new_temp);
6597 /* Store vector loads in the corresponding SLP_NODE. */
6598 if (slp && !slp_perm)
6599 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6601 /* Bump the vector pointer to account for a gap. */
6602 if (slp && group_gap != 0)
6604 tree bump = size_binop (MULT_EXPR,
6605 TYPE_SIZE_UNIT (elem_type),
6606 size_int (group_gap));
6607 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
6608 stmt, bump);
6612 if (slp && !slp_perm)
6613 continue;
6615 if (slp_perm)
6617 if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
6618 slp_node_instance, false))
6620 dr_chain.release ();
6621 return false;
6624 else
6626 if (grouped_load)
6628 if (!load_lanes_p)
6629 vect_transform_grouped_load (stmt, dr_chain, group_size, gsi);
6630 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
6632 else
6634 if (j == 0)
6635 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6636 else
6637 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6638 prev_stmt_info = vinfo_for_stmt (new_stmt);
6641 dr_chain.release ();
6644 return true;
6647 /* Function vect_is_simple_cond.
6649 Input:
6650 LOOP - the loop that is being vectorized.
6651 COND - Condition that is checked for simple use.
6653 Output:
6654 *COMP_VECTYPE - the vector type for the comparison.
6656 Returns whether a COND can be vectorized. Checks whether
6657 condition operands are supportable using vec_is_simple_use. */
6659 static bool
6660 vect_is_simple_cond (tree cond, gimple stmt, loop_vec_info loop_vinfo,
6661 bb_vec_info bb_vinfo, tree *comp_vectype)
6663 tree lhs, rhs;
6664 tree def;
6665 enum vect_def_type dt;
6666 tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
6668 if (!COMPARISON_CLASS_P (cond))
6669 return false;
6671 lhs = TREE_OPERAND (cond, 0);
6672 rhs = TREE_OPERAND (cond, 1);
6674 if (TREE_CODE (lhs) == SSA_NAME)
6676 gimple lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
6677 if (!vect_is_simple_use_1 (lhs, stmt, loop_vinfo, bb_vinfo,
6678 &lhs_def_stmt, &def, &dt, &vectype1))
6679 return false;
6681 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
6682 && TREE_CODE (lhs) != FIXED_CST)
6683 return false;
6685 if (TREE_CODE (rhs) == SSA_NAME)
6687 gimple rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
6688 if (!vect_is_simple_use_1 (rhs, stmt, loop_vinfo, bb_vinfo,
6689 &rhs_def_stmt, &def, &dt, &vectype2))
6690 return false;
6692 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
6693 && TREE_CODE (rhs) != FIXED_CST)
6694 return false;
6696 *comp_vectype = vectype1 ? vectype1 : vectype2;
6697 return true;
6700 /* vectorizable_condition.
6702 Check if STMT is conditional modify expression that can be vectorized.
6703 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
6704 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
6705 at GSI.
6707 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
6708 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
6709 else caluse if it is 2).
6711 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
6713 bool
6714 vectorizable_condition (gimple stmt, gimple_stmt_iterator *gsi,
6715 gimple *vec_stmt, tree reduc_def, int reduc_index,
6716 slp_tree slp_node)
6718 tree scalar_dest = NULL_TREE;
6719 tree vec_dest = NULL_TREE;
6720 tree cond_expr, then_clause, else_clause;
6721 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6722 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6723 tree comp_vectype = NULL_TREE;
6724 tree vec_cond_lhs = NULL_TREE, vec_cond_rhs = NULL_TREE;
6725 tree vec_then_clause = NULL_TREE, vec_else_clause = NULL_TREE;
6726 tree vec_compare, vec_cond_expr;
6727 tree new_temp;
6728 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6729 tree def;
6730 enum vect_def_type dt, dts[4];
6731 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
6732 int ncopies;
6733 enum tree_code code;
6734 stmt_vec_info prev_stmt_info = NULL;
6735 int i, j;
6736 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6737 vec<tree> vec_oprnds0 = vNULL;
6738 vec<tree> vec_oprnds1 = vNULL;
6739 vec<tree> vec_oprnds2 = vNULL;
6740 vec<tree> vec_oprnds3 = vNULL;
6741 tree vec_cmp_type;
6743 if (slp_node || PURE_SLP_STMT (stmt_info))
6744 ncopies = 1;
6745 else
6746 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
6748 gcc_assert (ncopies >= 1);
6749 if (reduc_index && ncopies > 1)
6750 return false; /* FORNOW */
6752 if (reduc_index && STMT_SLP_TYPE (stmt_info))
6753 return false;
6755 if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
6756 return false;
6758 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
6759 && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
6760 && reduc_def))
6761 return false;
6763 /* FORNOW: not yet supported. */
6764 if (STMT_VINFO_LIVE_P (stmt_info))
6766 if (dump_enabled_p ())
6767 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6768 "value used after loop.\n");
6769 return false;
6772 /* Is vectorizable conditional operation? */
6773 if (!is_gimple_assign (stmt))
6774 return false;
6776 code = gimple_assign_rhs_code (stmt);
6778 if (code != COND_EXPR)
6779 return false;
6781 cond_expr = gimple_assign_rhs1 (stmt);
6782 then_clause = gimple_assign_rhs2 (stmt);
6783 else_clause = gimple_assign_rhs3 (stmt);
6785 if (!vect_is_simple_cond (cond_expr, stmt, loop_vinfo, bb_vinfo,
6786 &comp_vectype)
6787 || !comp_vectype)
6788 return false;
6790 if (TREE_CODE (then_clause) == SSA_NAME)
6792 gimple then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6793 if (!vect_is_simple_use (then_clause, stmt, loop_vinfo, bb_vinfo,
6794 &then_def_stmt, &def, &dt))
6795 return false;
6797 else if (TREE_CODE (then_clause) != INTEGER_CST
6798 && TREE_CODE (then_clause) != REAL_CST
6799 && TREE_CODE (then_clause) != FIXED_CST)
6800 return false;
6802 if (TREE_CODE (else_clause) == SSA_NAME)
6804 gimple else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6805 if (!vect_is_simple_use (else_clause, stmt, loop_vinfo, bb_vinfo,
6806 &else_def_stmt, &def, &dt))
6807 return false;
6809 else if (TREE_CODE (else_clause) != INTEGER_CST
6810 && TREE_CODE (else_clause) != REAL_CST
6811 && TREE_CODE (else_clause) != FIXED_CST)
6812 return false;
6814 unsigned int prec = GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype)));
6815 /* The result of a vector comparison should be signed type. */
6816 tree cmp_type = build_nonstandard_integer_type (prec, 0);
6817 vec_cmp_type = get_same_sized_vectype (cmp_type, vectype);
6818 if (vec_cmp_type == NULL_TREE)
6819 return false;
6821 if (!vec_stmt)
6823 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6824 return expand_vec_cond_expr_p (vectype, comp_vectype);
6827 /* Transform. */
6829 if (!slp_node)
6831 vec_oprnds0.create (1);
6832 vec_oprnds1.create (1);
6833 vec_oprnds2.create (1);
6834 vec_oprnds3.create (1);
6837 /* Handle def. */
6838 scalar_dest = gimple_assign_lhs (stmt);
6839 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6841 /* Handle cond expr. */
6842 for (j = 0; j < ncopies; j++)
6844 gassign *new_stmt = NULL;
6845 if (j == 0)
6847 if (slp_node)
6849 auto_vec<tree, 4> ops;
6850 auto_vec<vec<tree>, 4> vec_defs;
6852 ops.safe_push (TREE_OPERAND (cond_expr, 0));
6853 ops.safe_push (TREE_OPERAND (cond_expr, 1));
6854 ops.safe_push (then_clause);
6855 ops.safe_push (else_clause);
6856 vect_get_slp_defs (ops, slp_node, &vec_defs, -1);
6857 vec_oprnds3 = vec_defs.pop ();
6858 vec_oprnds2 = vec_defs.pop ();
6859 vec_oprnds1 = vec_defs.pop ();
6860 vec_oprnds0 = vec_defs.pop ();
6862 ops.release ();
6863 vec_defs.release ();
6865 else
6867 gimple gtemp;
6868 vec_cond_lhs =
6869 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0),
6870 stmt, NULL);
6871 vect_is_simple_use (TREE_OPERAND (cond_expr, 0), stmt,
6872 loop_vinfo, NULL, &gtemp, &def, &dts[0]);
6874 vec_cond_rhs =
6875 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1),
6876 stmt, NULL);
6877 vect_is_simple_use (TREE_OPERAND (cond_expr, 1), stmt,
6878 loop_vinfo, NULL, &gtemp, &def, &dts[1]);
6879 if (reduc_index == 1)
6880 vec_then_clause = reduc_def;
6881 else
6883 vec_then_clause = vect_get_vec_def_for_operand (then_clause,
6884 stmt, NULL);
6885 vect_is_simple_use (then_clause, stmt, loop_vinfo,
6886 NULL, &gtemp, &def, &dts[2]);
6888 if (reduc_index == 2)
6889 vec_else_clause = reduc_def;
6890 else
6892 vec_else_clause = vect_get_vec_def_for_operand (else_clause,
6893 stmt, NULL);
6894 vect_is_simple_use (else_clause, stmt, loop_vinfo,
6895 NULL, &gtemp, &def, &dts[3]);
6899 else
6901 vec_cond_lhs = vect_get_vec_def_for_stmt_copy (dts[0],
6902 vec_oprnds0.pop ());
6903 vec_cond_rhs = vect_get_vec_def_for_stmt_copy (dts[1],
6904 vec_oprnds1.pop ());
6905 vec_then_clause = vect_get_vec_def_for_stmt_copy (dts[2],
6906 vec_oprnds2.pop ());
6907 vec_else_clause = vect_get_vec_def_for_stmt_copy (dts[3],
6908 vec_oprnds3.pop ());
6911 if (!slp_node)
6913 vec_oprnds0.quick_push (vec_cond_lhs);
6914 vec_oprnds1.quick_push (vec_cond_rhs);
6915 vec_oprnds2.quick_push (vec_then_clause);
6916 vec_oprnds3.quick_push (vec_else_clause);
6919 /* Arguments are ready. Create the new vector stmt. */
6920 FOR_EACH_VEC_ELT (vec_oprnds0, i, vec_cond_lhs)
6922 vec_cond_rhs = vec_oprnds1[i];
6923 vec_then_clause = vec_oprnds2[i];
6924 vec_else_clause = vec_oprnds3[i];
6926 vec_compare = build2 (TREE_CODE (cond_expr), vec_cmp_type,
6927 vec_cond_lhs, vec_cond_rhs);
6928 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6929 vec_compare, vec_then_clause, vec_else_clause);
6931 new_stmt = gimple_build_assign (vec_dest, vec_cond_expr);
6932 new_temp = make_ssa_name (vec_dest, new_stmt);
6933 gimple_assign_set_lhs (new_stmt, new_temp);
6934 vect_finish_stmt_generation (stmt, new_stmt, gsi);
6935 if (slp_node)
6936 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6939 if (slp_node)
6940 continue;
6942 if (j == 0)
6943 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
6944 else
6945 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
6947 prev_stmt_info = vinfo_for_stmt (new_stmt);
6950 vec_oprnds0.release ();
6951 vec_oprnds1.release ();
6952 vec_oprnds2.release ();
6953 vec_oprnds3.release ();
6955 return true;
6959 /* Make sure the statement is vectorizable. */
6961 bool
6962 vect_analyze_stmt (gimple stmt, bool *need_to_vectorize, slp_tree node)
6964 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6965 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
6966 enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
6967 bool ok;
6968 tree scalar_type, vectype;
6969 gimple pattern_stmt;
6970 gimple_seq pattern_def_seq;
6972 if (dump_enabled_p ())
6974 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: ");
6975 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6976 dump_printf (MSG_NOTE, "\n");
6979 if (gimple_has_volatile_ops (stmt))
6981 if (dump_enabled_p ())
6982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6983 "not vectorized: stmt has volatile operands\n");
6985 return false;
6988 /* Skip stmts that do not need to be vectorized. In loops this is expected
6989 to include:
6990 - the COND_EXPR which is the loop exit condition
6991 - any LABEL_EXPRs in the loop
6992 - computations that are used only for array indexing or loop control.
6993 In basic blocks we only analyze statements that are a part of some SLP
6994 instance, therefore, all the statements are relevant.
6996 Pattern statement needs to be analyzed instead of the original statement
6997 if the original statement is not relevant. Otherwise, we analyze both
6998 statements. In basic blocks we are called from some SLP instance
6999 traversal, don't analyze pattern stmts instead, the pattern stmts
7000 already will be part of SLP instance. */
7002 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
7003 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7004 && !STMT_VINFO_LIVE_P (stmt_info))
7006 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7007 && pattern_stmt
7008 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7009 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7011 /* Analyze PATTERN_STMT instead of the original stmt. */
7012 stmt = pattern_stmt;
7013 stmt_info = vinfo_for_stmt (pattern_stmt);
7014 if (dump_enabled_p ())
7016 dump_printf_loc (MSG_NOTE, vect_location,
7017 "==> examining pattern statement: ");
7018 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7019 dump_printf (MSG_NOTE, "\n");
7022 else
7024 if (dump_enabled_p ())
7025 dump_printf_loc (MSG_NOTE, vect_location, "irrelevant.\n");
7027 return true;
7030 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7031 && node == NULL
7032 && pattern_stmt
7033 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7034 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7036 /* Analyze PATTERN_STMT too. */
7037 if (dump_enabled_p ())
7039 dump_printf_loc (MSG_NOTE, vect_location,
7040 "==> examining pattern statement: ");
7041 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7042 dump_printf (MSG_NOTE, "\n");
7045 if (!vect_analyze_stmt (pattern_stmt, need_to_vectorize, node))
7046 return false;
7049 if (is_pattern_stmt_p (stmt_info)
7050 && node == NULL
7051 && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
7053 gimple_stmt_iterator si;
7055 for (si = gsi_start (pattern_def_seq); !gsi_end_p (si); gsi_next (&si))
7057 gimple pattern_def_stmt = gsi_stmt (si);
7058 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt))
7059 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt)))
7061 /* Analyze def stmt of STMT if it's a pattern stmt. */
7062 if (dump_enabled_p ())
7064 dump_printf_loc (MSG_NOTE, vect_location,
7065 "==> examining pattern def statement: ");
7066 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_def_stmt, 0);
7067 dump_printf (MSG_NOTE, "\n");
7070 if (!vect_analyze_stmt (pattern_def_stmt,
7071 need_to_vectorize, node))
7072 return false;
7077 switch (STMT_VINFO_DEF_TYPE (stmt_info))
7079 case vect_internal_def:
7080 break;
7082 case vect_reduction_def:
7083 case vect_nested_cycle:
7084 gcc_assert (!bb_vinfo && (relevance == vect_used_in_outer
7085 || relevance == vect_used_in_outer_by_reduction
7086 || relevance == vect_unused_in_scope));
7087 break;
7089 case vect_induction_def:
7090 case vect_constant_def:
7091 case vect_external_def:
7092 case vect_unknown_def_type:
7093 default:
7094 gcc_unreachable ();
7097 if (bb_vinfo)
7099 gcc_assert (PURE_SLP_STMT (stmt_info));
7101 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
7102 if (dump_enabled_p ())
7104 dump_printf_loc (MSG_NOTE, vect_location,
7105 "get vectype for scalar type: ");
7106 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
7107 dump_printf (MSG_NOTE, "\n");
7110 vectype = get_vectype_for_scalar_type (scalar_type);
7111 if (!vectype)
7113 if (dump_enabled_p ())
7115 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7116 "not SLPed: unsupported data-type ");
7117 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
7118 scalar_type);
7119 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7121 return false;
7124 if (dump_enabled_p ())
7126 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
7127 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
7128 dump_printf (MSG_NOTE, "\n");
7131 STMT_VINFO_VECTYPE (stmt_info) = vectype;
7134 if (STMT_VINFO_RELEVANT_P (stmt_info))
7136 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
7137 gcc_assert (STMT_VINFO_VECTYPE (stmt_info)
7138 || (is_gimple_call (stmt)
7139 && gimple_call_lhs (stmt) == NULL_TREE));
7140 *need_to_vectorize = true;
7143 ok = true;
7144 if (!bb_vinfo
7145 && (STMT_VINFO_RELEVANT_P (stmt_info)
7146 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
7147 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, NULL)
7148 || vectorizable_conversion (stmt, NULL, NULL, NULL)
7149 || vectorizable_shift (stmt, NULL, NULL, NULL)
7150 || vectorizable_operation (stmt, NULL, NULL, NULL)
7151 || vectorizable_assignment (stmt, NULL, NULL, NULL)
7152 || vectorizable_load (stmt, NULL, NULL, NULL, NULL)
7153 || vectorizable_call (stmt, NULL, NULL, NULL)
7154 || vectorizable_store (stmt, NULL, NULL, NULL)
7155 || vectorizable_reduction (stmt, NULL, NULL, NULL)
7156 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, NULL));
7157 else
7159 if (bb_vinfo)
7160 ok = (vectorizable_simd_clone_call (stmt, NULL, NULL, node)
7161 || vectorizable_conversion (stmt, NULL, NULL, node)
7162 || vectorizable_shift (stmt, NULL, NULL, node)
7163 || vectorizable_operation (stmt, NULL, NULL, node)
7164 || vectorizable_assignment (stmt, NULL, NULL, node)
7165 || vectorizable_load (stmt, NULL, NULL, node, NULL)
7166 || vectorizable_call (stmt, NULL, NULL, node)
7167 || vectorizable_store (stmt, NULL, NULL, node)
7168 || vectorizable_condition (stmt, NULL, NULL, NULL, 0, node));
7171 if (!ok)
7173 if (dump_enabled_p ())
7175 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7176 "not vectorized: relevant stmt not ");
7177 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7178 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7179 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7182 return false;
7185 if (bb_vinfo)
7186 return true;
7188 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
7189 need extra handling, except for vectorizable reductions. */
7190 if (STMT_VINFO_LIVE_P (stmt_info)
7191 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7192 ok = vectorizable_live_operation (stmt, NULL, NULL);
7194 if (!ok)
7196 if (dump_enabled_p ())
7198 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7199 "not vectorized: live stmt not ");
7200 dump_printf (MSG_MISSED_OPTIMIZATION, "supported: ");
7201 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
7202 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
7205 return false;
7208 return true;
7212 /* Function vect_transform_stmt.
7214 Create a vectorized stmt to replace STMT, and insert it at BSI. */
7216 bool
7217 vect_transform_stmt (gimple stmt, gimple_stmt_iterator *gsi,
7218 bool *grouped_store, slp_tree slp_node,
7219 slp_instance slp_node_instance)
7221 bool is_store = false;
7222 gimple vec_stmt = NULL;
7223 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7224 bool done;
7226 switch (STMT_VINFO_TYPE (stmt_info))
7228 case type_demotion_vec_info_type:
7229 case type_promotion_vec_info_type:
7230 case type_conversion_vec_info_type:
7231 done = vectorizable_conversion (stmt, gsi, &vec_stmt, slp_node);
7232 gcc_assert (done);
7233 break;
7235 case induc_vec_info_type:
7236 gcc_assert (!slp_node);
7237 done = vectorizable_induction (stmt, gsi, &vec_stmt);
7238 gcc_assert (done);
7239 break;
7241 case shift_vec_info_type:
7242 done = vectorizable_shift (stmt, gsi, &vec_stmt, slp_node);
7243 gcc_assert (done);
7244 break;
7246 case op_vec_info_type:
7247 done = vectorizable_operation (stmt, gsi, &vec_stmt, slp_node);
7248 gcc_assert (done);
7249 break;
7251 case assignment_vec_info_type:
7252 done = vectorizable_assignment (stmt, gsi, &vec_stmt, slp_node);
7253 gcc_assert (done);
7254 break;
7256 case load_vec_info_type:
7257 done = vectorizable_load (stmt, gsi, &vec_stmt, slp_node,
7258 slp_node_instance);
7259 gcc_assert (done);
7260 break;
7262 case store_vec_info_type:
7263 done = vectorizable_store (stmt, gsi, &vec_stmt, slp_node);
7264 gcc_assert (done);
7265 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) && !slp_node)
7267 /* In case of interleaving, the whole chain is vectorized when the
7268 last store in the chain is reached. Store stmts before the last
7269 one are skipped, and there vec_stmt_info shouldn't be freed
7270 meanwhile. */
7271 *grouped_store = true;
7272 if (STMT_VINFO_VEC_STMT (stmt_info))
7273 is_store = true;
7275 else
7276 is_store = true;
7277 break;
7279 case condition_vec_info_type:
7280 done = vectorizable_condition (stmt, gsi, &vec_stmt, NULL, 0, slp_node);
7281 gcc_assert (done);
7282 break;
7284 case call_vec_info_type:
7285 done = vectorizable_call (stmt, gsi, &vec_stmt, slp_node);
7286 stmt = gsi_stmt (*gsi);
7287 if (is_gimple_call (stmt)
7288 && gimple_call_internal_p (stmt)
7289 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
7290 is_store = true;
7291 break;
7293 case call_simd_clone_vec_info_type:
7294 done = vectorizable_simd_clone_call (stmt, gsi, &vec_stmt, slp_node);
7295 stmt = gsi_stmt (*gsi);
7296 break;
7298 case reduc_vec_info_type:
7299 done = vectorizable_reduction (stmt, gsi, &vec_stmt, slp_node);
7300 gcc_assert (done);
7301 break;
7303 default:
7304 if (!STMT_VINFO_LIVE_P (stmt_info))
7306 if (dump_enabled_p ())
7307 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7308 "stmt not supported.\n");
7309 gcc_unreachable ();
7313 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
7314 is being vectorized, but outside the immediately enclosing loop. */
7315 if (vec_stmt
7316 && STMT_VINFO_LOOP_VINFO (stmt_info)
7317 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
7318 STMT_VINFO_LOOP_VINFO (stmt_info)), stmt)
7319 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
7320 && (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_outer
7321 || STMT_VINFO_RELEVANT (stmt_info) ==
7322 vect_used_in_outer_by_reduction))
7324 struct loop *innerloop = LOOP_VINFO_LOOP (
7325 STMT_VINFO_LOOP_VINFO (stmt_info))->inner;
7326 imm_use_iterator imm_iter;
7327 use_operand_p use_p;
7328 tree scalar_dest;
7329 gimple exit_phi;
7331 if (dump_enabled_p ())
7332 dump_printf_loc (MSG_NOTE, vect_location,
7333 "Record the vdef for outer-loop vectorization.\n");
7335 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
7336 (to be used when vectorizing outer-loop stmts that use the DEF of
7337 STMT). */
7338 if (gimple_code (stmt) == GIMPLE_PHI)
7339 scalar_dest = PHI_RESULT (stmt);
7340 else
7341 scalar_dest = gimple_assign_lhs (stmt);
7343 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
7345 if (!flow_bb_inside_loop_p (innerloop, gimple_bb (USE_STMT (use_p))))
7347 exit_phi = USE_STMT (use_p);
7348 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi)) = vec_stmt;
7353 /* Handle stmts whose DEF is used outside the loop-nest that is
7354 being vectorized. */
7355 if (STMT_VINFO_LIVE_P (stmt_info)
7356 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
7358 done = vectorizable_live_operation (stmt, gsi, &vec_stmt);
7359 gcc_assert (done);
7362 if (vec_stmt)
7363 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
7365 return is_store;
7369 /* Remove a group of stores (for SLP or interleaving), free their
7370 stmt_vec_info. */
7372 void
7373 vect_remove_stores (gimple first_stmt)
7375 gimple next = first_stmt;
7376 gimple tmp;
7377 gimple_stmt_iterator next_si;
7379 while (next)
7381 stmt_vec_info stmt_info = vinfo_for_stmt (next);
7383 tmp = GROUP_NEXT_ELEMENT (stmt_info);
7384 if (is_pattern_stmt_p (stmt_info))
7385 next = STMT_VINFO_RELATED_STMT (stmt_info);
7386 /* Free the attached stmt_vec_info and remove the stmt. */
7387 next_si = gsi_for_stmt (next);
7388 unlink_stmt_vdef (next);
7389 gsi_remove (&next_si, true);
7390 release_defs (next);
7391 free_stmt_vec_info (next);
7392 next = tmp;
7397 /* Function new_stmt_vec_info.
7399 Create and initialize a new stmt_vec_info struct for STMT. */
7401 stmt_vec_info
7402 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
7403 bb_vec_info bb_vinfo)
7405 stmt_vec_info res;
7406 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
7408 STMT_VINFO_TYPE (res) = undef_vec_info_type;
7409 STMT_VINFO_STMT (res) = stmt;
7410 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
7411 STMT_VINFO_BB_VINFO (res) = bb_vinfo;
7412 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
7413 STMT_VINFO_LIVE_P (res) = false;
7414 STMT_VINFO_VECTYPE (res) = NULL;
7415 STMT_VINFO_VEC_STMT (res) = NULL;
7416 STMT_VINFO_VECTORIZABLE (res) = true;
7417 STMT_VINFO_IN_PATTERN_P (res) = false;
7418 STMT_VINFO_RELATED_STMT (res) = NULL;
7419 STMT_VINFO_PATTERN_DEF_SEQ (res) = NULL;
7420 STMT_VINFO_DATA_REF (res) = NULL;
7422 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
7423 STMT_VINFO_DR_OFFSET (res) = NULL;
7424 STMT_VINFO_DR_INIT (res) = NULL;
7425 STMT_VINFO_DR_STEP (res) = NULL;
7426 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
7428 if (gimple_code (stmt) == GIMPLE_PHI
7429 && is_loop_header_bb_p (gimple_bb (stmt)))
7430 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
7431 else
7432 STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
7434 STMT_VINFO_SAME_ALIGN_REFS (res).create (0);
7435 STMT_SLP_TYPE (res) = loop_vect;
7436 GROUP_FIRST_ELEMENT (res) = NULL;
7437 GROUP_NEXT_ELEMENT (res) = NULL;
7438 GROUP_SIZE (res) = 0;
7439 GROUP_STORE_COUNT (res) = 0;
7440 GROUP_GAP (res) = 0;
7441 GROUP_SAME_DR_STMT (res) = NULL;
7443 return res;
7447 /* Create a hash table for stmt_vec_info. */
7449 void
7450 init_stmt_vec_info_vec (void)
7452 gcc_assert (!stmt_vec_info_vec.exists ());
7453 stmt_vec_info_vec.create (50);
7457 /* Free hash table for stmt_vec_info. */
7459 void
7460 free_stmt_vec_info_vec (void)
7462 unsigned int i;
7463 vec_void_p info;
7464 FOR_EACH_VEC_ELT (stmt_vec_info_vec, i, info)
7465 if (info != NULL)
7466 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info) info));
7467 gcc_assert (stmt_vec_info_vec.exists ());
7468 stmt_vec_info_vec.release ();
7472 /* Free stmt vectorization related info. */
7474 void
7475 free_stmt_vec_info (gimple stmt)
7477 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7479 if (!stmt_info)
7480 return;
7482 /* Check if this statement has a related "pattern stmt"
7483 (introduced by the vectorizer during the pattern recognition
7484 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
7485 too. */
7486 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7488 stmt_vec_info patt_info
7489 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7490 if (patt_info)
7492 gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (patt_info);
7493 gimple patt_stmt = STMT_VINFO_STMT (patt_info);
7494 gimple_set_bb (patt_stmt, NULL);
7495 tree lhs = gimple_get_lhs (patt_stmt);
7496 if (TREE_CODE (lhs) == SSA_NAME)
7497 release_ssa_name (lhs);
7498 if (seq)
7500 gimple_stmt_iterator si;
7501 for (si = gsi_start (seq); !gsi_end_p (si); gsi_next (&si))
7503 gimple seq_stmt = gsi_stmt (si);
7504 gimple_set_bb (seq_stmt, NULL);
7505 lhs = gimple_get_lhs (patt_stmt);
7506 if (TREE_CODE (lhs) == SSA_NAME)
7507 release_ssa_name (lhs);
7508 free_stmt_vec_info (seq_stmt);
7511 free_stmt_vec_info (patt_stmt);
7515 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release ();
7516 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release ();
7517 set_vinfo_for_stmt (stmt, NULL);
7518 free (stmt_info);
7522 /* Function get_vectype_for_scalar_type_and_size.
7524 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
7525 by the target. */
7527 static tree
7528 get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
7530 machine_mode inner_mode = TYPE_MODE (scalar_type);
7531 machine_mode simd_mode;
7532 unsigned int nbytes = GET_MODE_SIZE (inner_mode);
7533 int nunits;
7534 tree vectype;
7536 if (nbytes == 0)
7537 return NULL_TREE;
7539 if (GET_MODE_CLASS (inner_mode) != MODE_INT
7540 && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
7541 return NULL_TREE;
7543 /* For vector types of elements whose mode precision doesn't
7544 match their types precision we use a element type of mode
7545 precision. The vectorization routines will have to make sure
7546 they support the proper result truncation/extension.
7547 We also make sure to build vector types with INTEGER_TYPE
7548 component type only. */
7549 if (INTEGRAL_TYPE_P (scalar_type)
7550 && (GET_MODE_BITSIZE (inner_mode) != TYPE_PRECISION (scalar_type)
7551 || TREE_CODE (scalar_type) != INTEGER_TYPE))
7552 scalar_type = build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode),
7553 TYPE_UNSIGNED (scalar_type));
7555 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
7556 When the component mode passes the above test simply use a type
7557 corresponding to that mode. The theory is that any use that
7558 would cause problems with this will disable vectorization anyway. */
7559 else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
7560 && !INTEGRAL_TYPE_P (scalar_type))
7561 scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
7563 /* We can't build a vector type of elements with alignment bigger than
7564 their size. */
7565 else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
7566 scalar_type = lang_hooks.types.type_for_mode (inner_mode,
7567 TYPE_UNSIGNED (scalar_type));
7569 /* If we felt back to using the mode fail if there was
7570 no scalar type for it. */
7571 if (scalar_type == NULL_TREE)
7572 return NULL_TREE;
7574 /* If no size was supplied use the mode the target prefers. Otherwise
7575 lookup a vector mode of the specified size. */
7576 if (size == 0)
7577 simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
7578 else
7579 simd_mode = mode_for_vector (inner_mode, size / nbytes);
7580 nunits = GET_MODE_SIZE (simd_mode) / nbytes;
7581 if (nunits <= 1)
7582 return NULL_TREE;
7584 vectype = build_vector_type (scalar_type, nunits);
7586 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
7587 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
7588 return NULL_TREE;
7590 return vectype;
7593 unsigned int current_vector_size;
7595 /* Function get_vectype_for_scalar_type.
7597 Returns the vector type corresponding to SCALAR_TYPE as supported
7598 by the target. */
7600 tree
7601 get_vectype_for_scalar_type (tree scalar_type)
7603 tree vectype;
7604 vectype = get_vectype_for_scalar_type_and_size (scalar_type,
7605 current_vector_size);
7606 if (vectype
7607 && current_vector_size == 0)
7608 current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
7609 return vectype;
7612 /* Function get_same_sized_vectype
7614 Returns a vector type corresponding to SCALAR_TYPE of size
7615 VECTOR_TYPE if supported by the target. */
7617 tree
7618 get_same_sized_vectype (tree scalar_type, tree vector_type)
7620 return get_vectype_for_scalar_type_and_size
7621 (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
7624 /* Function vect_is_simple_use.
7626 Input:
7627 LOOP_VINFO - the vect info of the loop that is being vectorized.
7628 BB_VINFO - the vect info of the basic block that is being vectorized.
7629 OPERAND - operand of STMT in the loop or bb.
7630 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7632 Returns whether a stmt with OPERAND can be vectorized.
7633 For loops, supportable operands are constants, loop invariants, and operands
7634 that are defined by the current iteration of the loop. Unsupportable
7635 operands are those that are defined by a previous iteration of the loop (as
7636 is the case in reduction/induction computations).
7637 For basic blocks, supportable operands are constants and bb invariants.
7638 For now, operands defined outside the basic block are not supported. */
7640 bool
7641 vect_is_simple_use (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7642 bb_vec_info bb_vinfo, gimple *def_stmt,
7643 tree *def, enum vect_def_type *dt)
7645 basic_block bb;
7646 stmt_vec_info stmt_vinfo;
7647 struct loop *loop = NULL;
7649 if (loop_vinfo)
7650 loop = LOOP_VINFO_LOOP (loop_vinfo);
7652 *def_stmt = NULL;
7653 *def = NULL_TREE;
7655 if (dump_enabled_p ())
7657 dump_printf_loc (MSG_NOTE, vect_location,
7658 "vect_is_simple_use: operand ");
7659 dump_generic_expr (MSG_NOTE, TDF_SLIM, operand);
7660 dump_printf (MSG_NOTE, "\n");
7663 if (CONSTANT_CLASS_P (operand))
7665 *dt = vect_constant_def;
7666 return true;
7669 if (is_gimple_min_invariant (operand))
7671 *def = operand;
7672 *dt = vect_external_def;
7673 return true;
7676 if (TREE_CODE (operand) == PAREN_EXPR)
7678 if (dump_enabled_p ())
7679 dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
7680 operand = TREE_OPERAND (operand, 0);
7683 if (TREE_CODE (operand) != SSA_NAME)
7685 if (dump_enabled_p ())
7686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7687 "not ssa-name.\n");
7688 return false;
7691 *def_stmt = SSA_NAME_DEF_STMT (operand);
7692 if (*def_stmt == NULL)
7694 if (dump_enabled_p ())
7695 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7696 "no def_stmt.\n");
7697 return false;
7700 if (dump_enabled_p ())
7702 dump_printf_loc (MSG_NOTE, vect_location, "def_stmt: ");
7703 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, *def_stmt, 0);
7704 dump_printf (MSG_NOTE, "\n");
7707 /* Empty stmt is expected only in case of a function argument.
7708 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
7709 if (gimple_nop_p (*def_stmt))
7711 *def = operand;
7712 *dt = vect_external_def;
7713 return true;
7716 bb = gimple_bb (*def_stmt);
7718 if ((loop && !flow_bb_inside_loop_p (loop, bb))
7719 || (!loop && bb != BB_VINFO_BB (bb_vinfo))
7720 || (!loop && gimple_code (*def_stmt) == GIMPLE_PHI))
7721 *dt = vect_external_def;
7722 else
7724 stmt_vinfo = vinfo_for_stmt (*def_stmt);
7725 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
7728 if (*dt == vect_unknown_def_type
7729 || (stmt
7730 && *dt == vect_double_reduction_def
7731 && gimple_code (stmt) != GIMPLE_PHI))
7733 if (dump_enabled_p ())
7734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7735 "Unsupported pattern.\n");
7736 return false;
7739 if (dump_enabled_p ())
7740 dump_printf_loc (MSG_NOTE, vect_location, "type of def: %d.\n", *dt);
7742 switch (gimple_code (*def_stmt))
7744 case GIMPLE_PHI:
7745 *def = gimple_phi_result (*def_stmt);
7746 break;
7748 case GIMPLE_ASSIGN:
7749 *def = gimple_assign_lhs (*def_stmt);
7750 break;
7752 case GIMPLE_CALL:
7753 *def = gimple_call_lhs (*def_stmt);
7754 if (*def != NULL)
7755 break;
7756 /* FALLTHRU */
7757 default:
7758 if (dump_enabled_p ())
7759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7760 "unsupported defining stmt:\n");
7761 return false;
7764 return true;
7767 /* Function vect_is_simple_use_1.
7769 Same as vect_is_simple_use_1 but also determines the vector operand
7770 type of OPERAND and stores it to *VECTYPE. If the definition of
7771 OPERAND is vect_uninitialized_def, vect_constant_def or
7772 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
7773 is responsible to compute the best suited vector type for the
7774 scalar operand. */
7776 bool
7777 vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
7778 bb_vec_info bb_vinfo, gimple *def_stmt,
7779 tree *def, enum vect_def_type *dt, tree *vectype)
7781 if (!vect_is_simple_use (operand, stmt, loop_vinfo, bb_vinfo, def_stmt,
7782 def, dt))
7783 return false;
7785 /* Now get a vector type if the def is internal, otherwise supply
7786 NULL_TREE and leave it up to the caller to figure out a proper
7787 type for the use stmt. */
7788 if (*dt == vect_internal_def
7789 || *dt == vect_induction_def
7790 || *dt == vect_reduction_def
7791 || *dt == vect_double_reduction_def
7792 || *dt == vect_nested_cycle)
7794 stmt_vec_info stmt_info = vinfo_for_stmt (*def_stmt);
7796 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7797 && !STMT_VINFO_RELEVANT (stmt_info)
7798 && !STMT_VINFO_LIVE_P (stmt_info))
7799 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
7801 *vectype = STMT_VINFO_VECTYPE (stmt_info);
7802 gcc_assert (*vectype != NULL_TREE);
7804 else if (*dt == vect_uninitialized_def
7805 || *dt == vect_constant_def
7806 || *dt == vect_external_def)
7807 *vectype = NULL_TREE;
7808 else
7809 gcc_unreachable ();
7811 return true;
7815 /* Function supportable_widening_operation
7817 Check whether an operation represented by the code CODE is a
7818 widening operation that is supported by the target platform in
7819 vector form (i.e., when operating on arguments of type VECTYPE_IN
7820 producing a result of type VECTYPE_OUT).
7822 Widening operations we currently support are NOP (CONVERT), FLOAT
7823 and WIDEN_MULT. This function checks if these operations are supported
7824 by the target platform either directly (via vector tree-codes), or via
7825 target builtins.
7827 Output:
7828 - CODE1 and CODE2 are codes of vector operations to be used when
7829 vectorizing the operation, if available.
7830 - MULTI_STEP_CVT determines the number of required intermediate steps in
7831 case of multi-step conversion (like char->short->int - in that case
7832 MULTI_STEP_CVT will be 1).
7833 - INTERM_TYPES contains the intermediate type required to perform the
7834 widening operation (short in the above example). */
7836 bool
7837 supportable_widening_operation (enum tree_code code, gimple stmt,
7838 tree vectype_out, tree vectype_in,
7839 enum tree_code *code1, enum tree_code *code2,
7840 int *multi_step_cvt,
7841 vec<tree> *interm_types)
7843 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7844 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
7845 struct loop *vect_loop = NULL;
7846 machine_mode vec_mode;
7847 enum insn_code icode1, icode2;
7848 optab optab1, optab2;
7849 tree vectype = vectype_in;
7850 tree wide_vectype = vectype_out;
7851 enum tree_code c1, c2;
7852 int i;
7853 tree prev_type, intermediate_type;
7854 machine_mode intermediate_mode, prev_mode;
7855 optab optab3, optab4;
7857 *multi_step_cvt = 0;
7858 if (loop_info)
7859 vect_loop = LOOP_VINFO_LOOP (loop_info);
7861 switch (code)
7863 case WIDEN_MULT_EXPR:
7864 /* The result of a vectorized widening operation usually requires
7865 two vectors (because the widened results do not fit into one vector).
7866 The generated vector results would normally be expected to be
7867 generated in the same order as in the original scalar computation,
7868 i.e. if 8 results are generated in each vector iteration, they are
7869 to be organized as follows:
7870 vect1: [res1,res2,res3,res4],
7871 vect2: [res5,res6,res7,res8].
7873 However, in the special case that the result of the widening
7874 operation is used in a reduction computation only, the order doesn't
7875 matter (because when vectorizing a reduction we change the order of
7876 the computation). Some targets can take advantage of this and
7877 generate more efficient code. For example, targets like Altivec,
7878 that support widen_mult using a sequence of {mult_even,mult_odd}
7879 generate the following vectors:
7880 vect1: [res1,res3,res5,res7],
7881 vect2: [res2,res4,res6,res8].
7883 When vectorizing outer-loops, we execute the inner-loop sequentially
7884 (each vectorized inner-loop iteration contributes to VF outer-loop
7885 iterations in parallel). We therefore don't allow to change the
7886 order of the computation in the inner-loop during outer-loop
7887 vectorization. */
7888 /* TODO: Another case in which order doesn't *really* matter is when we
7889 widen and then contract again, e.g. (short)((int)x * y >> 8).
7890 Normally, pack_trunc performs an even/odd permute, whereas the
7891 repack from an even/odd expansion would be an interleave, which
7892 would be significantly simpler for e.g. AVX2. */
7893 /* In any case, in order to avoid duplicating the code below, recurse
7894 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
7895 are properly set up for the caller. If we fail, we'll continue with
7896 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
7897 if (vect_loop
7898 && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
7899 && !nested_in_vect_loop_p (vect_loop, stmt)
7900 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
7901 stmt, vectype_out, vectype_in,
7902 code1, code2, multi_step_cvt,
7903 interm_types))
7905 /* Elements in a vector with vect_used_by_reduction property cannot
7906 be reordered if the use chain with this property does not have the
7907 same operation. One such an example is s += a * b, where elements
7908 in a and b cannot be reordered. Here we check if the vector defined
7909 by STMT is only directly used in the reduction statement. */
7910 tree lhs = gimple_assign_lhs (stmt);
7911 use_operand_p dummy;
7912 gimple use_stmt;
7913 stmt_vec_info use_stmt_info = NULL;
7914 if (single_imm_use (lhs, &dummy, &use_stmt)
7915 && (use_stmt_info = vinfo_for_stmt (use_stmt))
7916 && STMT_VINFO_DEF_TYPE (use_stmt_info) == vect_reduction_def)
7917 return true;
7919 c1 = VEC_WIDEN_MULT_LO_EXPR;
7920 c2 = VEC_WIDEN_MULT_HI_EXPR;
7921 break;
7923 case VEC_WIDEN_MULT_EVEN_EXPR:
7924 /* Support the recursion induced just above. */
7925 c1 = VEC_WIDEN_MULT_EVEN_EXPR;
7926 c2 = VEC_WIDEN_MULT_ODD_EXPR;
7927 break;
7929 case WIDEN_LSHIFT_EXPR:
7930 c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
7931 c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
7932 break;
7934 CASE_CONVERT:
7935 c1 = VEC_UNPACK_LO_EXPR;
7936 c2 = VEC_UNPACK_HI_EXPR;
7937 break;
7939 case FLOAT_EXPR:
7940 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
7941 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
7942 break;
7944 case FIX_TRUNC_EXPR:
7945 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
7946 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
7947 computing the operation. */
7948 return false;
7950 default:
7951 gcc_unreachable ();
7954 if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
7956 enum tree_code ctmp = c1;
7957 c1 = c2;
7958 c2 = ctmp;
7961 if (code == FIX_TRUNC_EXPR)
7963 /* The signedness is determined from output operand. */
7964 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
7965 optab2 = optab_for_tree_code (c2, vectype_out, optab_default);
7967 else
7969 optab1 = optab_for_tree_code (c1, vectype, optab_default);
7970 optab2 = optab_for_tree_code (c2, vectype, optab_default);
7973 if (!optab1 || !optab2)
7974 return false;
7976 vec_mode = TYPE_MODE (vectype);
7977 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing
7978 || (icode2 = optab_handler (optab2, vec_mode)) == CODE_FOR_nothing)
7979 return false;
7981 *code1 = c1;
7982 *code2 = c2;
7984 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
7985 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
7986 return true;
7988 /* Check if it's a multi-step conversion that can be done using intermediate
7989 types. */
7991 prev_type = vectype;
7992 prev_mode = vec_mode;
7994 if (!CONVERT_EXPR_CODE_P (code))
7995 return false;
7997 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
7998 intermediate steps in promotion sequence. We try
7999 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
8000 not. */
8001 interm_types->create (MAX_INTERM_CVT_STEPS);
8002 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8004 intermediate_mode = insn_data[icode1].operand[0].mode;
8005 intermediate_type
8006 = lang_hooks.types.type_for_mode (intermediate_mode,
8007 TYPE_UNSIGNED (prev_type));
8008 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
8009 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
8011 if (!optab3 || !optab4
8012 || (icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing
8013 || insn_data[icode1].operand[0].mode != intermediate_mode
8014 || (icode2 = optab_handler (optab2, prev_mode)) == CODE_FOR_nothing
8015 || insn_data[icode2].operand[0].mode != intermediate_mode
8016 || ((icode1 = optab_handler (optab3, intermediate_mode))
8017 == CODE_FOR_nothing)
8018 || ((icode2 = optab_handler (optab4, intermediate_mode))
8019 == CODE_FOR_nothing))
8020 break;
8022 interm_types->quick_push (intermediate_type);
8023 (*multi_step_cvt)++;
8025 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
8026 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
8027 return true;
8029 prev_type = intermediate_type;
8030 prev_mode = intermediate_mode;
8033 interm_types->release ();
8034 return false;
8038 /* Function supportable_narrowing_operation
8040 Check whether an operation represented by the code CODE is a
8041 narrowing operation that is supported by the target platform in
8042 vector form (i.e., when operating on arguments of type VECTYPE_IN
8043 and producing a result of type VECTYPE_OUT).
8045 Narrowing operations we currently support are NOP (CONVERT) and
8046 FIX_TRUNC. This function checks if these operations are supported by
8047 the target platform directly via vector tree-codes.
8049 Output:
8050 - CODE1 is the code of a vector operation to be used when
8051 vectorizing the operation, if available.
8052 - MULTI_STEP_CVT determines the number of required intermediate steps in
8053 case of multi-step conversion (like int->short->char - in that case
8054 MULTI_STEP_CVT will be 1).
8055 - INTERM_TYPES contains the intermediate type required to perform the
8056 narrowing operation (short in the above example). */
8058 bool
8059 supportable_narrowing_operation (enum tree_code code,
8060 tree vectype_out, tree vectype_in,
8061 enum tree_code *code1, int *multi_step_cvt,
8062 vec<tree> *interm_types)
8064 machine_mode vec_mode;
8065 enum insn_code icode1;
8066 optab optab1, interm_optab;
8067 tree vectype = vectype_in;
8068 tree narrow_vectype = vectype_out;
8069 enum tree_code c1;
8070 tree intermediate_type;
8071 machine_mode intermediate_mode, prev_mode;
8072 int i;
8073 bool uns;
8075 *multi_step_cvt = 0;
8076 switch (code)
8078 CASE_CONVERT:
8079 c1 = VEC_PACK_TRUNC_EXPR;
8080 break;
8082 case FIX_TRUNC_EXPR:
8083 c1 = VEC_PACK_FIX_TRUNC_EXPR;
8084 break;
8086 case FLOAT_EXPR:
8087 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
8088 tree code and optabs used for computing the operation. */
8089 return false;
8091 default:
8092 gcc_unreachable ();
8095 if (code == FIX_TRUNC_EXPR)
8096 /* The signedness is determined from output operand. */
8097 optab1 = optab_for_tree_code (c1, vectype_out, optab_default);
8098 else
8099 optab1 = optab_for_tree_code (c1, vectype, optab_default);
8101 if (!optab1)
8102 return false;
8104 vec_mode = TYPE_MODE (vectype);
8105 if ((icode1 = optab_handler (optab1, vec_mode)) == CODE_FOR_nothing)
8106 return false;
8108 *code1 = c1;
8110 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8111 return true;
8113 /* Check if it's a multi-step conversion that can be done using intermediate
8114 types. */
8115 prev_mode = vec_mode;
8116 if (code == FIX_TRUNC_EXPR)
8117 uns = TYPE_UNSIGNED (vectype_out);
8118 else
8119 uns = TYPE_UNSIGNED (vectype);
8121 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
8122 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
8123 costly than signed. */
8124 if (code == FIX_TRUNC_EXPR && uns)
8126 enum insn_code icode2;
8128 intermediate_type
8129 = lang_hooks.types.type_for_mode (TYPE_MODE (vectype_out), 0);
8130 interm_optab
8131 = optab_for_tree_code (c1, intermediate_type, optab_default);
8132 if (interm_optab != unknown_optab
8133 && (icode2 = optab_handler (optab1, vec_mode)) != CODE_FOR_nothing
8134 && insn_data[icode1].operand[0].mode
8135 == insn_data[icode2].operand[0].mode)
8137 uns = false;
8138 optab1 = interm_optab;
8139 icode1 = icode2;
8143 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
8144 intermediate steps in promotion sequence. We try
8145 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
8146 interm_types->create (MAX_INTERM_CVT_STEPS);
8147 for (i = 0; i < MAX_INTERM_CVT_STEPS; i++)
8149 intermediate_mode = insn_data[icode1].operand[0].mode;
8150 intermediate_type
8151 = lang_hooks.types.type_for_mode (intermediate_mode, uns);
8152 interm_optab
8153 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR, intermediate_type,
8154 optab_default);
8155 if (!interm_optab
8156 || ((icode1 = optab_handler (optab1, prev_mode)) == CODE_FOR_nothing)
8157 || insn_data[icode1].operand[0].mode != intermediate_mode
8158 || ((icode1 = optab_handler (interm_optab, intermediate_mode))
8159 == CODE_FOR_nothing))
8160 break;
8162 interm_types->quick_push (intermediate_type);
8163 (*multi_step_cvt)++;
8165 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
8166 return true;
8168 prev_mode = intermediate_mode;
8169 optab1 = interm_optab;
8172 interm_types->release ();
8173 return false;